From 546f3936d4bf51b1f575b2b4adf4e20330db22f2 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 11 Dec 2015 16:25:38 +0100 Subject: DerivationToJson module --- derivation_to_json/README | 7 ------- derivation_to_json/README.md | 9 +++++++++ derivation_to_json/derivation_to_json.rb | 12 ++++++++---- server.rb | 2 +- 4 files changed, 18 insertions(+), 12 deletions(-) delete mode 100644 derivation_to_json/README create mode 100644 derivation_to_json/README.md diff --git a/derivation_to_json/README b/derivation_to_json/README deleted file mode 100644 index 0f83d5a..0000000 --- a/derivation_to_json/README +++ /dev/null @@ -1,7 +0,0 @@ -This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" into data structures and tries to align "groups" in source and target sides -of rules in a smart, presentable way. The result resembles a phrase-based -system, given that the word alignment gives enough hints. - -To run: - ./derivation_to_json.rb < - diff --git a/derivation_to_json/README.md b/derivation_to_json/README.md new file mode 100644 index 0000000..9667a80 --- /dev/null +++ b/derivation_to_json/README.md @@ -0,0 +1,9 @@ +This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" into +data structures and tries to align "groups" in source and target sides +of rules in a smart, presentable way. The result resembles a phrase-based +system, given that the word alignment gives enough hints. + +To run: + ./derivation_to_json.rb < +(first line of stdout is json data, source and target strings follow after that) + diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb index b14b0b5..65a26db 100755 --- a/derivation_to_json/derivation_to_json.rb +++ b/derivation_to_json/derivation_to_json.rb @@ -3,6 +3,8 @@ require 'zipf' require 'stringio' +module DerivationToJson + class RuleAndSpan attr_accessor :span, :symbol, :source, :target, :subspans, :done, :id, :trule @@ -121,7 +123,7 @@ class Rule end end -def conv_cdec_show_deriv s +def DerivationToJson.conv_cdec_show_deriv s rules = [] xx = StringIO.new s d_s = xx.gets @@ -140,7 +142,7 @@ def conv_cdec_show_deriv s return a, rules end -def derive span, by_span, o, groups, source +def DerivationToJson.derive span, by_span, o, groups, source if groups.size==0 || groups.last.size>0 groups << [] end @@ -168,7 +170,7 @@ def derive span, by_span, o, groups, source span.done = true end -def proc_deriv s +def DerivationToJson.proc_deriv s a, rules = conv_cdec_show_deriv s by_span = {} @@ -303,12 +305,14 @@ def proc_deriv s return h.to_json end +end # module + if __FILE__ == $0 s = "" while line = STDIN.gets s += line end - json = proc_deriv(s) + json = DerivationToJson.proc_deriv(s) obj = JSON.parse(json) STDERR.write "#{json}\n" puts obj["source_groups"].join " " diff --git a/server.rb b/server.rb index f976ee6..88f8c9a 100755 --- a/server.rb +++ b/server.rb @@ -375,7 +375,7 @@ post '/next' do # (receive post-edit, update models), send next translation # 3. translation msg = "act:translate ||| #{source} " derivation_str = send_recv :dtrain, msg - obj_str = proc_deriv derivation_str + obj_str = DerivationToJson.proc_deriv derivation_str obj = JSON.parse obj_str obj["transl"] = obj["target_groups"].join " " # 4. detokenizer -- cgit v1.2.3