summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xtrain-test-split14
1 files changed, 10 insertions, 4 deletions
diff --git a/train-test-split b/train-test-split
index 4d8153a..cf52455 100755
--- a/train-test-split
+++ b/train-test-split
@@ -9,6 +9,7 @@ conf = Trollop::options do
opt :size, "one size", :type => :int, :required => true
opt :repeat, "number of repetitions", :type => :int, :default => 1
opt :prefix, "prefix for output files", :type => :string
+ opt :sets, "number of sets", :type => :int, :default => 1
end
fn = conf[:foreign]
fn_ext = fn.split('.').last
@@ -28,18 +29,23 @@ prefix = conf[:prefix]
a = (0..nlines_e-1).to_a
i = 0
conf[:repeat].times {
+ if conf[:repeat] == 1
+ infix = ""
+ else
+ infix = ".#{i}"
+ end
b = a.sample(size)
ax = a.reject{|j| b.include? j}
`mkdir split_#{i}`
- new_f = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{fn_ext}"
- new_e = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{en_ext}"
+ new_f = WriteFile.new "split_#{i}/#{prefix}.train#{infix}.#{fn_ext}"
+ new_e = WriteFile.new "split_#{i}/#{prefix}.train#{infix}.#{en_ext}"
ax.each { |j|
new_f.write f[j]
new_e.write e[j]
}
new_f.close; new_e.close
- new_f = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{fn_ext}"
- new_e = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{en_ext}"
+ new_f = WriteFile.new "split_#{i}/#{prefix}.devtest#{infix}.#{fn_ext}"
+ new_e = WriteFile.new "split_#{i}/#{prefix}.devtest#{infix}.#{en_ext}"
b.each { |j|
new_f.write f[j]
new_e.write e[j]