From 2b1d7f881c19c4d4b5afae194e02d3300c7675d0 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 5 Jul 2016 11:01:46 +0200 Subject: mv --- make-rule-features | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 make-rule-features (limited to 'make-rule-features') diff --git a/make-rule-features b/make-rule-features new file mode 100755 index 0000000..7adb6e9 --- /dev/null +++ b/make-rule-features @@ -0,0 +1,44 @@ +#!/usr/bin/env ruby + +require 'zipf' + +def mkrf src, tgt + s = src.gsub /\[X,[1-9]\]/, "NX" + t = tgt.gsub /\[X,([1-9])\]/,'N\1' + return "R:X:#{s.gsub(" ","_")}:#{t.gsub(" ","_")}" +end + +def mkrbf s, t + s = String.new s + if t == "S" + s.gsub! /\[X,[1-9]\]/, "X" + else + s.gsub! /\[X,([1-9])\]/, 'X\1' + end + s.reverse! + s += " >r<" + s.reverse! + s += " " + a = [] + ngrams(s, 2, true) { |ng| + a << "RB#{t}:#{ng.join "_"}" + } + return a +end + +h = {} +while line = STDIN.gets + _,src,tgt,_,_ = splitpipe line.strip + src.strip! + tgt.strip! + mkrbf(src, "S").each { |f| + h[f] = true + } + mkrbf(tgt, "T").each { |f| + h[f] = true + } + h [mkrf(src, tgt)] = true +end + +h.keys.each { |f| puts f } + -- cgit v1.2.3