summaryrefslogtreecommitdiff
path: root/train-test-split
diff options
context:
space:
mode:
authorPatrick Simianer <pks@pks.rocks>2019-07-28 16:39:55 +0200
committerPatrick Simianer <pks@pks.rocks>2019-07-28 16:39:55 +0200
commit43aa6586f5807aeb99d8e08f996707d92e18774e (patch)
tree7eeea850d1be460e041097c5511dc7c28495becd /train-test-split
parent71278b098dcea1a5fb8520ab08b086cded0764fe (diff)
infix for repeated
Diffstat (limited to 'train-test-split')
-rwxr-xr-xtrain-test-split14
1 files changed, 10 insertions, 4 deletions
diff --git a/train-test-split b/train-test-split
index 4d8153a..cf52455 100755
--- a/train-test-split
+++ b/train-test-split
@@ -9,6 +9,7 @@ conf = Trollop::options do
opt :size, "one size", :type => :int, :required => true
opt :repeat, "number of repetitions", :type => :int, :default => 1
opt :prefix, "prefix for output files", :type => :string
+ opt :sets, "number of sets", :type => :int, :default => 1
end
fn = conf[:foreign]
fn_ext = fn.split('.').last
@@ -28,18 +29,23 @@ prefix = conf[:prefix]
a = (0..nlines_e-1).to_a
i = 0
conf[:repeat].times {
+ if conf[:repeat] == 1
+ infix = ""
+ else
+ infix = ".#{i}"
+ end
b = a.sample(size)
ax = a.reject{|j| b.include? j}
`mkdir split_#{i}`
- new_f = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{fn_ext}"
- new_e = WriteFile.new "split_#{i}/#{prefix}.train.#{i}.#{en_ext}"
+ new_f = WriteFile.new "split_#{i}/#{prefix}.train#{infix}.#{fn_ext}"
+ new_e = WriteFile.new "split_#{i}/#{prefix}.train#{infix}.#{en_ext}"
ax.each { |j|
new_f.write f[j]
new_e.write e[j]
}
new_f.close; new_e.close
- new_f = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{fn_ext}"
- new_e = WriteFile.new "split_#{i}/#{prefix}.test.#{i}.#{en_ext}"
+ new_f = WriteFile.new "split_#{i}/#{prefix}.devtest#{infix}.#{fn_ext}"
+ new_e = WriteFile.new "split_#{i}/#{prefix}.devtest#{infix}.#{en_ext}"
b.each { |j|
new_f.write f[j]
new_e.write e[j]