diff options
author | Patrick Simianer <p@simianer.de> | 2016-07-05 11:01:46 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2016-07-05 11:01:46 +0200 |
commit | 2b1d7f881c19c4d4b5afae194e02d3300c7675d0 (patch) | |
tree | 5a06ee7de98640a39244b57bb369697176b44ebf /make-rule-features | |
parent | 69949dda35c3ea21d8e926e5f0a596a0a0f61c6a (diff) |
mv
Diffstat (limited to 'make-rule-features')
-rwxr-xr-x | make-rule-features | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/make-rule-features b/make-rule-features new file mode 100755 index 0000000..7adb6e9 --- /dev/null +++ b/make-rule-features @@ -0,0 +1,44 @@ +#!/usr/bin/env ruby + +require 'zipf' + +def mkrf src, tgt + s = src.gsub /\[X,[1-9]\]/, "NX" + t = tgt.gsub /\[X,([1-9])\]/,'N\1' + return "R:X:#{s.gsub(" ","_")}:#{t.gsub(" ","_")}" +end + +def mkrbf s, t + s = String.new s + if t == "S" + s.gsub! /\[X,[1-9]\]/, "X" + else + s.gsub! /\[X,([1-9])\]/, 'X\1' + end + s.reverse! + s += " >r<" + s.reverse! + s += " </r>" + a = [] + ngrams(s, 2, true) { |ng| + a << "RB#{t}:#{ng.join "_"}" + } + return a +end + +h = {} +while line = STDIN.gets + _,src,tgt,_,_ = splitpipe line.strip + src.strip! + tgt.strip! + mkrbf(src, "S").each { |f| + h[f] = true + } + mkrbf(tgt, "T").each { |f| + h[f] = true + } + h [mkrf(src, tgt)] = true +end + +h.keys.each { |f| puts f } + |