例えば25行のファイルを8分割したい場合など、split で3行分割指定では3×8+1 の9分割となり、4行分割指定では4×6+1の7分割となり微妙に困る。そんな時、自前で分割数指定でわけるコマンドを持っていると楽できるかもしれない。
1.shellで分割
#! /bin/sh
if [ "$#" != 3 ]
then
echo "Usage : split2.sh divisor input base"
exit 1
fi
divisor=$1
input=$2
base=$3
c_start=0
c_end=0
quotient=0
toomach=0
total=0
part=0
total=`wc -l < $input`
quotient=`expr $total / $divisor`
toomuch=`expr $total % $divisor`
divisor=`expr $divisor - 1`
#printf "%d %d %d\n" $total $qutient $toomuch
for no in `seq 0 $divisor`
do
if [ $no -lt $toomuch ]
then
c_start=`expr $c_end + 1`
c_end=`expr $c_start + $quotient`
else
c_start=`expr $c_end + 1`
c_end=`expr $c_start + $quotient - 1`
fi
file=`printf "%s%02d" $base $no`
cmd=`echo "$cmd -e '$c_start,$c_end w $file'"`
cnt=`expr $c_end - $c_start + 1`
printf "Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n" `expr $no + 1` $c_start $c_end $cnt
done
sh -c "sed -n $cmd $input"
2.perlで分割
#! /usr/bin/perl
if (scalar(@ARGV) != 3) {
printf(STDERR "Usage : split2.pl divisor input base\n");
exit(1);
}
$divisor = shift(@ARGV);
$input = shift(@ARGV);
$base = shift(@ARGV);
$c_start = 0;
$c_end = 0;
$quotient = 0;
$toomach = 0;
$total = 0;
$part = 0;
if (!open(FD,$input)) {
printf(STDERR "Can't open file[%s]\n",$input);
exit(1);
}
while(<FD>) {$total++}
close(FD);
$quotient = int($total / $divisor);
$toomuch = int($total % $divisor);
foreach $no (0..($divisor-1)) {
my($file) = sprintf("%s%02d",$base,$no);
if (!open(${$file},">$file")) {
printf(STDERR "Can't open file[%s]\n",$file);
exit(1);
}
}
if (!open(FD,$input)) {
printf(STDERR "Can't open file[%s]\n",$input);
exit(1);
}
my($cnt) = 0;
while(<FD>) {
$cnt++;
if ($cnt > $c_end) {
if($part < $toomuch) {
$c_start = $c_end + 1;
$c_end = $c_start + $quotient;
}
else {
$c_start = $c_end + 1;
$c_end = $c_start + $quotient - 1;
}
printf("Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n",
$part + 1,$c_start,$c_end,$c_end - $c_start + 1);
$part++;
}
my($file) = sprintf("%s%02d",$base,$part-1);
printf({${$file}} "%s",$_);
}
close(FD);
foreach $no (0..($divisor-1)) {
my($file) = sprintf("%s%02d",$base,$no);
close(${$file});
}
3.rubyで分割
#! /usr/bin/ruby
if (ARGV.length != 3)
printf("Usage : split2.rb divisor input base\n")
exit(1)
end
divisor = ARGV[0].to_i
input = ARGV[1]
base = ARGV[2]
c_start = 0
c_end = 0
quotient = 0
toomuch = 0
total = 0
part = 0
fplist = []
f = File.open(input,"r");
f.each_line do |line|
total += 1
end
f.close()
divisor.times do |no|
fplist[no] = open("%s%02d"%[base,no],"w")
end
quotient = (total / divisor).to_i
toomuch = (total % divisor).to_i
cnt = 0
f = File.open(input,"r");
f.each_line do |line|
cnt += 1
if (cnt > c_end)
if (part < toomuch)
c_start = c_end + 1
c_end = c_start + quotient
else
c_start = c_end + 1
c_end = c_start + quotient - 1
end
printf("Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n",
part + 1,c_start,c_end,c_end - c_start + 1)
part += 1
end
fplist[part-1].printf("%s",line)
end
f.close()
divisor.times do |no|
fplist[no].close
end
4.pythonで分割
#! /usr/bin/python
import sys
if len(sys.argv) != 4:
print "Usage : split2.py divisor input base"
sys.exit(1)
divisor = int(sys.argv[1])
input = sys.argv[2]
base = sys.argv[3]
c_start = 0
c_end = 0
quotient = 0
toomuch = 0
total = 0
part = 0
fplist = []
f = open(input,"r")
for line in f:
total += 1
f.close()
quotient = total / divisor
toomuch = total % divisor
for no in range(0, divisor):
f = open("%s%02d"%(base,no),"w")
fplist.append(f)
cnt = 0
f = open(input,"r")
for line in f:
cnt += 1
if cnt > c_end:
if part < toomuch:
c_start = c_end + 1
c_end = c_start + quotient
else:
c_start = c_end + 1
c_end = c_start + quotient - 1
print "Part[%d] Start[%08d] End[%08d] Cnt[%08d]" \
%(part+1,c_start,c_end,c_end - c_start + 1)
part += 1
fplist[part-1].write("%s"%line)
f.close()
for out in fplist:
out.close()
5.lispで分割
#! /usr/bin/clisp
(if (not (equal (length *args*) 3))
(progn
(format t "Usage split.lisp divisor input base")
(exit 1)))
(setq divisor (parse-integer (car *args*)))
(setq input (car (cdr *args*)))
(setq base (caddr *args*))
(setq c_start 0)
(setq c_end 0)
(setq total 0)
(setq quotient 0)
(setq toomuch 0)
(setq part 0)
(setq fplist '())
(let ((in (open input :direction :input)))
(loop for line = (read-line in nil)
while line do (setq total (+ total 1)))
(close in))
(setq quotient (floor (/ total divisor)))
(setq toomuch (floor (mod total divisor)))
;;
(dotimes (no divisor)
(let ((out (open (format nil "~A~2,'0D" base no) :direction :output)))
(push out fplist)))
;; (format t "~A~2,'0D~%" base no)))
(nreverse fplist)
(let
((in (open input :direction :input))
(cnt 0)
(line ""))
(loop
(setq line (read-line in nil))
(if (null line)
(quit))
(setq cnt (+ cnt 1))
(if (> cnt c_end)
(progn
(if (< part toomuch)
(progn
(setq c_start (+ c_end 1))
(setq c_end (+ c_start quotient)))
(progn
(setq c_start (+ c_end 1))
(setq c_end (+ c_start quotient -1))))
(setq part (+ part 1))
(format t "Part[~D] Start[~8,'0D] End[~8,'0D] Cnt[~8,'0D]~%"
part c_start c_end (+ (- c_end c_start) 1))))
(format (elt fplist (- part 1)) "~A~%" line))
(close in))
;;
(dotimes (no divisor)
(close (pop fplist)))
6.javaで分割
import java.io.*;
import java.util.*;
class split2 {
void split(int divisor, String input, String base) throws Exception {
BufferedReader br = null;
List<PrintWriter>fplist = null;
String line = "";
int total = 0;
int c_start = 0;
int c_end = 0;
int part = 0;
int quotient = 0;
int toomuch = 0;
int cnt =0;
br = new BufferedReader(new FileReader(input));
while((line=br.readLine())!=null) {
total++;
}
br.close();
fplist = new ArrayList<PrintWriter>();
for(int no=0;no<divisor;no++) {
String file = String.format("%s%02d",base,no);
PrintWriter pw = new PrintWriter(
new BufferedWriter(new FileWriter(file)));
fplist.add(pw);
}
quotient = total / divisor;
toomuch = total % divisor;
cnt = 0;
br = new BufferedReader(new FileReader(input));
while((line=br.readLine())!=null) {
cnt++;
if (cnt > c_end) {
if (part < toomuch) {
c_start = c_end + 1;
c_end = c_start + quotient;
}
else {
c_start = c_end + 1;
c_end = c_start + quotient - 1;
}
System.out.printf("Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n",
part+1,c_start,c_end,c_end-c_start+1);
part++;
}
fplist.get(part-1).printf("%s\n",line);
}
br.close();
for(int no=0;no<divisor;no++) {
fplist.get(no).close();
}
}
public static void main(String args[]) throws Exception {
if (args.length != 3) {
System.out.printf("Usage : split2 divisor input base\n");
System.exit(1);
}
int divisor = Integer.parseInt(args[0]);
String input = args[1];
String base = args[2];
split2 obj = new split2();
obj.split(divisor,input,base);
}
}
7.C言語で分割
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
split(int divisor, char *input, char *base)
{
FILE **fplist = NULL;
FILE *fp = NULL;
int c_start = 0;
int c_end = 0;
int quotient = 0;
int toomuch = 0;
int total = 0;
int part = 0;
int cnt = 0;
int no = 0;
char buf[2048];
fplist = calloc(sizeof(FILE *),divisor);
if (!fplist) {
fprintf(stderr,"calloc error!\n");
return(1);
}
fp = fopen(input,"r");
if (!fp) {
fprintf(stderr,"Can't open file[%s]\n",input);
return(1);
}
while(fgets(buf, sizeof(buf), fp)) {
total++;
}
if (fclose(fp)) {
fprintf(stderr,"Can't open file[%s]\n",input);
return(1);
}
quotient = total / divisor;
toomuch = total % divisor;
for(no=0;no<divisor;no++) {
char file[128];
sprintf(file,"%s%02d",base,no);
fp = fopen(file,"w");
if (!fp) {
fprintf(stderr,"Can't open file[%s]\n",file);
return(1);
}
fplist[no] = fp;
}
fp = fopen(input,"r");
if (!fp) {
fprintf(stderr,"Can't open file[%s]\n",input);
return(1);
}
cnt = 0;
while(fgets(buf, sizeof(buf), fp)) {
cnt++;
if (cnt > c_end) {
if (part < toomuch) {
c_start = c_end + 1;
c_end = c_start + quotient;
}
else {
c_start = c_end + 1;
c_end = c_start + quotient - 1;
}
printf("Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n",
part+1, c_start, c_end, c_end-c_start+1);
part++;
}
if (!fprintf(fplist[part-1],"%s",buf)) {
fprintf(stderr,"part [%d] write error\n",part);
return(1);
}
}
if (fclose(fp)) {
fprintf(stderr,"Can't open file[%s]\n",input);
return(1);
}
for(no=0;no<divisor;no++) {
char file[128];
sprintf(file,"%s%02d",base,no);
if (fclose(fplist[no])) {
fprintf(stderr,"Can't open file[%s]\n",file);
return(1);
}
}
return(0);
}
int
main(int argc, char *argv[])
{
int divisor = 0;
char *p = NULL;
char input[128];
char base[128];
if (argc != 4) {
fprintf(stderr,"Usage : split2 divisor input base\n");
exit(1);
}
for(p=argv[1];(*p)!='\0';p++) {
if (!isdigit(*p)) {
fprintf(stderr,"not a number! [%s]\n",argv[1]);
exit(1);
}
}
divisor = atoi(argv[1]);
strcpy(input, argv[2]);
strcpy(base, argv[3]);
split(divisor, input, base);
}
8.速度比較
=== sh === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m2.696s user 0m0.460s sys 0m2.140s === perl === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m2.122s user 0m1.736s sys 0m0.216s === ruby === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m2.510s user 0m2.048s sys 0m0.256s === python === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m1.199s user 0m0.832s sys 0m0.228s === lisp === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m24.201s user 0m21.632s sys 0m2.012s === java === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m2.631s user 0m2.936s sys 0m0.468s === c === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m0.484s user 0m0.236s sys 0m0.224s






