例えば25行のファイルを8分割したい場合など、split で3行分割指定では3×8+1 の9分割となり、4行分割指定では4×6+1の7分割となり微妙に困る。そんな時、自前で分割数指定でわけるコマンドを持っていると楽できるかもしれない。
1.shellで分割
#! /bin/sh if [ "$#" != 3 ] then echo "Usage : split2.sh divisor input base" exit 1 fi divisor=$1 input=$2 base=$3 c_start=0 c_end=0 quotient=0 toomach=0 total=0 part=0 total=`wc -l < $input` quotient=`expr $total / $divisor` toomuch=`expr $total % $divisor` divisor=`expr $divisor - 1` #printf "%d %d %d\n" $total $qutient $toomuch for no in `seq 0 $divisor` do if [ $no -lt $toomuch ] then c_start=`expr $c_end + 1` c_end=`expr $c_start + $quotient` else c_start=`expr $c_end + 1` c_end=`expr $c_start + $quotient - 1` fi file=`printf "%s%02d" $base $no` cmd=`echo "$cmd -e '$c_start,$c_end w $file'"` cnt=`expr $c_end - $c_start + 1` printf "Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n" `expr $no + 1` $c_start $c_end $cnt done sh -c "sed -n $cmd $input"
2.perlで分割
#! /usr/bin/perl if (scalar(@ARGV) != 3) { printf(STDERR "Usage : split2.pl divisor input base\n"); exit(1); } $divisor = shift(@ARGV); $input = shift(@ARGV); $base = shift(@ARGV); $c_start = 0; $c_end = 0; $quotient = 0; $toomach = 0; $total = 0; $part = 0; if (!open(FD,$input)) { printf(STDERR "Can't open file[%s]\n",$input); exit(1); } while(<FD>) {$total++} close(FD); $quotient = int($total / $divisor); $toomuch = int($total % $divisor); foreach $no (0..($divisor-1)) { my($file) = sprintf("%s%02d",$base,$no); if (!open(${$file},">$file")) { printf(STDERR "Can't open file[%s]\n",$file); exit(1); } } if (!open(FD,$input)) { printf(STDERR "Can't open file[%s]\n",$input); exit(1); } my($cnt) = 0; while(<FD>) { $cnt++; if ($cnt > $c_end) { if($part < $toomuch) { $c_start = $c_end + 1; $c_end = $c_start + $quotient; } else { $c_start = $c_end + 1; $c_end = $c_start + $quotient - 1; } printf("Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n", $part + 1,$c_start,$c_end,$c_end - $c_start + 1); $part++; } my($file) = sprintf("%s%02d",$base,$part-1); printf({${$file}} "%s",$_); } close(FD); foreach $no (0..($divisor-1)) { my($file) = sprintf("%s%02d",$base,$no); close(${$file}); }
3.rubyで分割
#! /usr/bin/ruby if (ARGV.length != 3) printf("Usage : split2.rb divisor input base\n") exit(1) end divisor = ARGV[0].to_i input = ARGV[1] base = ARGV[2] c_start = 0 c_end = 0 quotient = 0 toomuch = 0 total = 0 part = 0 fplist = [] f = File.open(input,"r"); f.each_line do |line| total += 1 end f.close() divisor.times do |no| fplist[no] = open("%s%02d"%[base,no],"w") end quotient = (total / divisor).to_i toomuch = (total % divisor).to_i cnt = 0 f = File.open(input,"r"); f.each_line do |line| cnt += 1 if (cnt > c_end) if (part < toomuch) c_start = c_end + 1 c_end = c_start + quotient else c_start = c_end + 1 c_end = c_start + quotient - 1 end printf("Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n", part + 1,c_start,c_end,c_end - c_start + 1) part += 1 end fplist[part-1].printf("%s",line) end f.close() divisor.times do |no| fplist[no].close end
4.pythonで分割
#! /usr/bin/python import sys if len(sys.argv) != 4: print "Usage : split2.py divisor input base" sys.exit(1) divisor = int(sys.argv[1]) input = sys.argv[2] base = sys.argv[3] c_start = 0 c_end = 0 quotient = 0 toomuch = 0 total = 0 part = 0 fplist = [] f = open(input,"r") for line in f: total += 1 f.close() quotient = total / divisor toomuch = total % divisor for no in range(0, divisor): f = open("%s%02d"%(base,no),"w") fplist.append(f) cnt = 0 f = open(input,"r") for line in f: cnt += 1 if cnt > c_end: if part < toomuch: c_start = c_end + 1 c_end = c_start + quotient else: c_start = c_end + 1 c_end = c_start + quotient - 1 print "Part[%d] Start[%08d] End[%08d] Cnt[%08d]" \ %(part+1,c_start,c_end,c_end - c_start + 1) part += 1 fplist[part-1].write("%s"%line) f.close() for out in fplist: out.close()
5.lispで分割
#! /usr/bin/clisp (if (not (equal (length *args*) 3)) (progn (format t "Usage split.lisp divisor input base") (exit 1))) (setq divisor (parse-integer (car *args*))) (setq input (car (cdr *args*))) (setq base (caddr *args*)) (setq c_start 0) (setq c_end 0) (setq total 0) (setq quotient 0) (setq toomuch 0) (setq part 0) (setq fplist '()) (let ((in (open input :direction :input))) (loop for line = (read-line in nil) while line do (setq total (+ total 1))) (close in)) (setq quotient (floor (/ total divisor))) (setq toomuch (floor (mod total divisor))) ;; (dotimes (no divisor) (let ((out (open (format nil "~A~2,'0D" base no) :direction :output))) (push out fplist))) ;; (format t "~A~2,'0D~%" base no))) (nreverse fplist) (let ((in (open input :direction :input)) (cnt 0) (line "")) (loop (setq line (read-line in nil)) (if (null line) (quit)) (setq cnt (+ cnt 1)) (if (> cnt c_end) (progn (if (< part toomuch) (progn (setq c_start (+ c_end 1)) (setq c_end (+ c_start quotient))) (progn (setq c_start (+ c_end 1)) (setq c_end (+ c_start quotient -1)))) (setq part (+ part 1)) (format t "Part[~D] Start[~8,'0D] End[~8,'0D] Cnt[~8,'0D]~%" part c_start c_end (+ (- c_end c_start) 1)))) (format (elt fplist (- part 1)) "~A~%" line)) (close in)) ;; (dotimes (no divisor) (close (pop fplist)))
6.javaで分割
import java.io.*; import java.util.*; class split2 { void split(int divisor, String input, String base) throws Exception { BufferedReader br = null; List<PrintWriter>fplist = null; String line = ""; int total = 0; int c_start = 0; int c_end = 0; int part = 0; int quotient = 0; int toomuch = 0; int cnt =0; br = new BufferedReader(new FileReader(input)); while((line=br.readLine())!=null) { total++; } br.close(); fplist = new ArrayList<PrintWriter>(); for(int no=0;no<divisor;no++) { String file = String.format("%s%02d",base,no); PrintWriter pw = new PrintWriter( new BufferedWriter(new FileWriter(file))); fplist.add(pw); } quotient = total / divisor; toomuch = total % divisor; cnt = 0; br = new BufferedReader(new FileReader(input)); while((line=br.readLine())!=null) { cnt++; if (cnt > c_end) { if (part < toomuch) { c_start = c_end + 1; c_end = c_start + quotient; } else { c_start = c_end + 1; c_end = c_start + quotient - 1; } System.out.printf("Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n", part+1,c_start,c_end,c_end-c_start+1); part++; } fplist.get(part-1).printf("%s\n",line); } br.close(); for(int no=0;no<divisor;no++) { fplist.get(no).close(); } } public static void main(String args[]) throws Exception { if (args.length != 3) { System.out.printf("Usage : split2 divisor input base\n"); System.exit(1); } int divisor = Integer.parseInt(args[0]); String input = args[1]; String base = args[2]; split2 obj = new split2(); obj.split(divisor,input,base); } }
7.C言語で分割
#include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> int split(int divisor, char *input, char *base) { FILE **fplist = NULL; FILE *fp = NULL; int c_start = 0; int c_end = 0; int quotient = 0; int toomuch = 0; int total = 0; int part = 0; int cnt = 0; int no = 0; char buf[2048]; fplist = calloc(sizeof(FILE *),divisor); if (!fplist) { fprintf(stderr,"calloc error!\n"); return(1); } fp = fopen(input,"r"); if (!fp) { fprintf(stderr,"Can't open file[%s]\n",input); return(1); } while(fgets(buf, sizeof(buf), fp)) { total++; } if (fclose(fp)) { fprintf(stderr,"Can't open file[%s]\n",input); return(1); } quotient = total / divisor; toomuch = total % divisor; for(no=0;no<divisor;no++) { char file[128]; sprintf(file,"%s%02d",base,no); fp = fopen(file,"w"); if (!fp) { fprintf(stderr,"Can't open file[%s]\n",file); return(1); } fplist[no] = fp; } fp = fopen(input,"r"); if (!fp) { fprintf(stderr,"Can't open file[%s]\n",input); return(1); } cnt = 0; while(fgets(buf, sizeof(buf), fp)) { cnt++; if (cnt > c_end) { if (part < toomuch) { c_start = c_end + 1; c_end = c_start + quotient; } else { c_start = c_end + 1; c_end = c_start + quotient - 1; } printf("Part[%d] Start[%08d] End[%08d] Cnt[%08d]\n", part+1, c_start, c_end, c_end-c_start+1); part++; } if (!fprintf(fplist[part-1],"%s",buf)) { fprintf(stderr,"part [%d] write error\n",part); return(1); } } if (fclose(fp)) { fprintf(stderr,"Can't open file[%s]\n",input); return(1); } for(no=0;no<divisor;no++) { char file[128]; sprintf(file,"%s%02d",base,no); if (fclose(fplist[no])) { fprintf(stderr,"Can't open file[%s]\n",file); return(1); } } return(0); } int main(int argc, char *argv[]) { int divisor = 0; char *p = NULL; char input[128]; char base[128]; if (argc != 4) { fprintf(stderr,"Usage : split2 divisor input base\n"); exit(1); } for(p=argv[1];(*p)!='\0';p++) { if (!isdigit(*p)) { fprintf(stderr,"not a number! [%s]\n",argv[1]); exit(1); } } divisor = atoi(argv[1]); strcpy(input, argv[2]); strcpy(base, argv[3]); split(divisor, input, base); }
8.速度比較
=== sh === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m2.696s user 0m0.460s sys 0m2.140s === perl === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m2.122s user 0m1.736s sys 0m0.216s === ruby === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m2.510s user 0m2.048s sys 0m0.256s === python === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m1.199s user 0m0.832s sys 0m0.228s === lisp === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m24.201s user 0m21.632s sys 0m2.012s === java === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m2.631s user 0m2.936s sys 0m0.468s === c === Part[1] Start[00000001] End[00125000] Cnt[00125000] Part[2] Start[00125001] End[00250000] Cnt[00125000] Part[3] Start[00250001] End[00375000] Cnt[00125000] Part[4] Start[00375001] End[00500000] Cnt[00125000] Part[5] Start[00500001] End[00625000] Cnt[00125000] Part[6] Start[00625001] End[00750000] Cnt[00125000] Part[7] Start[00750001] End[00875000] Cnt[00125000] Part[8] Start[00875001] End[01000000] Cnt[00125000] real 0m0.484s user 0m0.236s sys 0m0.224s
0 件のコメント:
コメントを投稿