diff options
author | Patrick Simianer <p@simianer.de> | 2014-09-21 14:52:55 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-09-21 14:52:55 +0100 |
commit | 8e3802303408f6ad4b1299a59fc27f3954174992 (patch) | |
tree | 6fca89d6cc0d33f0df9fcc2d3ed532a60150f410 | |
parent | c14ba05de38f77ca2465c989b5e055a8f24f1d45 (diff) |
add_seg: option to use pre-defined index
-rwxr-xr-x | add_seg | 21 |
1 files changed, 15 insertions, 6 deletions
@@ -1,26 +1,35 @@ #!/usr/bin/env ruby require 'trollop' - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - +require 'zipf' cfg = Trollop::options do opt :grammar, "(Absolute) path of folder containing grammars.", :type => :string, :short => '-g', :required => true opt :loo, "leave one out", :type => :bool, :default => false opt :start_id, "start with this id", :type => :int, :default => 0, :short => '-i' opt :nogz, "grammar files not gzipped", :type => :bool, :default => false + opt :index, "number according to index", :type => :string, :default => nil +end + +index = [] +if cfg[:index] + index = ReadFile.readlines_strip(cfg[:index]).map{ |i| i.to_i } end i = cfg[:start_id] +j = 0 while line = STDIN.gets ext = '.gz' ext = '' if cfg[:nogz] s = "<seg" if cfg[:loo] then s += " exclude=\"#{i}\"" end if cfg[:grammar] then s += " grammar=\"#{cfg[:grammar]}/grammar.#{i}#{ext}\"" end - puts s + " id=\"#{i}\"> #{line.strip} </seg>" - i+=1 + if index.size > 0 + puts s + " id=\"#{index[j]}\"> #{line.strip} </seg>" + else + puts s + " id=\"#{i}\"> #{line.strip} </seg>" + end + i += 1 + j += 1 end |