summaryrefslogtreecommitdiff
path: root/add_seg
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-09-21 14:52:55 +0100
committerPatrick Simianer <p@simianer.de>2014-09-21 14:52:55 +0100
commit8e3802303408f6ad4b1299a59fc27f3954174992 (patch)
tree6fca89d6cc0d33f0df9fcc2d3ed532a60150f410 /add_seg
parentc14ba05de38f77ca2465c989b5e055a8f24f1d45 (diff)
add_seg: option to use pre-defined index
Diffstat (limited to 'add_seg')
-rwxr-xr-xadd_seg21
1 files changed, 15 insertions, 6 deletions
diff --git a/add_seg b/add_seg
index 684a236..c7fd521 100755
--- a/add_seg
+++ b/add_seg
@@ -1,26 +1,35 @@
#!/usr/bin/env ruby
require 'trollop'
-
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
+require 'zipf'
cfg = Trollop::options do
opt :grammar, "(Absolute) path of folder containing grammars.", :type => :string, :short => '-g', :required => true
opt :loo, "leave one out", :type => :bool, :default => false
opt :start_id, "start with this id", :type => :int, :default => 0, :short => '-i'
opt :nogz, "grammar files not gzipped", :type => :bool, :default => false
+ opt :index, "number according to index", :type => :string, :default => nil
+end
+
+index = []
+if cfg[:index]
+ index = ReadFile.readlines_strip(cfg[:index]).map{ |i| i.to_i }
end
i = cfg[:start_id]
+j = 0
while line = STDIN.gets
ext = '.gz'
ext = '' if cfg[:nogz]
s = "<seg"
if cfg[:loo] then s += " exclude=\"#{i}\"" end
if cfg[:grammar] then s += " grammar=\"#{cfg[:grammar]}/grammar.#{i}#{ext}\"" end
- puts s + " id=\"#{i}\"> #{line.strip} </seg>"
- i+=1
+ if index.size > 0
+ puts s + " id=\"#{index[j]}\"> #{line.strip} </seg>"
+ else
+ puts s + " id=\"#{i}\"> #{line.strip} </seg>"
+ end
+ i += 1
+ j += 1
end