summaryrefslogtreecommitdiff
path: root/add-seg
diff options
context:
space:
mode:
Diffstat (limited to 'add-seg')
-rwxr-xr-xadd-seg36
1 files changed, 36 insertions, 0 deletions
diff --git a/add-seg b/add-seg
new file mode 100755
index 0000000..e5db580
--- /dev/null
+++ b/add-seg
@@ -0,0 +1,36 @@
+#!/usr/bin/env ruby
+
+require 'trollop'
+require 'zipf'
+
+o = Trollop::options do
+ opt :grammar, "(Absolute) path of folder containing grammars.", :type => :string, :short => '-g', :default => nil
+ opt :loo, "leave one out", :type => :bool, :default => false
+ opt :start_id, "start with this id", :type => :int, :default => 0, :short => '-i'
+ opt :nogz, "grammar files not gzipped", :type => :bool, :default => false
+ opt :index, "number according to index", :type => :string, :default => nil
+end
+
+index = []
+if o[:index]
+ index = ReadFile.readlines_strip(o[:index]).map{ |i| i.to_i }
+end
+
+i = o[:start_id]
+j = 0
+while line = STDIN.gets
+ ext = '.gz'
+ ext = '' if o[:nogz]
+ s = "<seg"
+ if o[:loo] then s += " exclude=\"#{i}\"" end
+ if index.size > 0
+ if o[:grammar] then s += " grammar=\"#{o[:grammar]}/grammar.#{index[j]}#{ext}\"" end
+ puts s + " id=\"#{index[j]}\"> #{line.strip} </seg>"
+ else
+ if o[:grammar] then s += " grammar=\"#{o[:grammar]}/grammar.#{i}#{ext}\"" end
+ puts s + " id=\"#{i}\"> #{line.strip} </seg>"
+ end
+ i += 1
+ j += 1
+end
+