diff options
-rwxr-xr-x | train-test-split | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/train-test-split b/train-test-split index 4d8153a..cf52455 100755 --- a/train-test-split +++ b/train-test-split @@ -9,6 +9,7 @@ conf = Trollop::options do opt :size, "one size", :type => :int, :required => true opt :repeat, "number of repetitions", :type => :int, :default => 1 opt :prefix, "prefix for output files", :type => :string + opt :sets, "number of sets", :type => :int, :default => 1 end fn = conf[:foreign] fn_ext = fn.split('.').last @@ -28,18 +29,23 @@ prefix = conf[:prefix] a = (0..nlines_e-1).to_a i = 0 conf[:repeat].times { + if conf[:repeat] == 1 + infix = "" + else + infix = ".#{i}" + end b = a.sample(size) ax = a.reject{|j| b.include? j} `mkdir split_#{i}` - new_f = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{fn_ext}" - new_e = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{en_ext}" + new_f = WriteFile.new "split_#{i}/#{prefix}.train#{infix}.#{fn_ext}" + new_e = WriteFile.new "split_#{i}/#{prefix}.train#{infix}.#{en_ext}" ax.each { |j| new_f.write f[j] new_e.write e[j] } new_f.close; new_e.close - new_f = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{fn_ext}" - new_e = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{en_ext}" + new_f = WriteFile.new "split_#{i}/#{prefix}.devtest#{infix}.#{fn_ext}" + new_e = WriteFile.new "split_#{i}/#{prefix}.devtest#{infix}.#{en_ext}" b.each { |j| new_f.write f[j] new_e.write e[j] |