summaryrefslogtreecommitdiff
path: root/make-rule-features
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2016-07-05 11:01:46 +0200
committerPatrick Simianer <p@simianer.de>2016-07-05 11:01:46 +0200
commit2b1d7f881c19c4d4b5afae194e02d3300c7675d0 (patch)
tree5a06ee7de98640a39244b57bb369697176b44ebf /make-rule-features
parent69949dda35c3ea21d8e926e5f0a596a0a0f61c6a (diff)
mv
Diffstat (limited to 'make-rule-features')
-rwxr-xr-xmake-rule-features44
1 files changed, 44 insertions, 0 deletions
diff --git a/make-rule-features b/make-rule-features
new file mode 100755
index 0000000..7adb6e9
--- /dev/null
+++ b/make-rule-features
@@ -0,0 +1,44 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+def mkrf src, tgt
+ s = src.gsub /\[X,[1-9]\]/, "NX"
+ t = tgt.gsub /\[X,([1-9])\]/,'N\1'
+ return "R:X:#{s.gsub(" ","_")}:#{t.gsub(" ","_")}"
+end
+
+def mkrbf s, t
+ s = String.new s
+ if t == "S"
+ s.gsub! /\[X,[1-9]\]/, "X"
+ else
+ s.gsub! /\[X,([1-9])\]/, 'X\1'
+ end
+ s.reverse!
+ s += " >r<"
+ s.reverse!
+ s += " </r>"
+ a = []
+ ngrams(s, 2, true) { |ng|
+ a << "RB#{t}:#{ng.join "_"}"
+ }
+ return a
+end
+
+h = {}
+while line = STDIN.gets
+ _,src,tgt,_,_ = splitpipe line.strip
+ src.strip!
+ tgt.strip!
+ mkrbf(src, "S").each { |f|
+ h[f] = true
+ }
+ mkrbf(tgt, "T").each { |f|
+ h[f] = true
+ }
+ h [mkrf(src, tgt)] = true
+end
+
+h.keys.each { |f| puts f }
+