diff options
author | Patrick Simianer <p@simianer.de> | 2015-12-11 16:25:38 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-12-11 16:25:38 +0100 |
commit | 546f3936d4bf51b1f575b2b4adf4e20330db22f2 (patch) | |
tree | 799356f7583bb19cc463132e9a634b45e9b0cd0b /derivation_to_json | |
parent | 6cafb943db279faa4fc0ddb400ad1326d129c5e6 (diff) |
DerivationToJson module
Diffstat (limited to 'derivation_to_json')
-rw-r--r-- | derivation_to_json/README.md (renamed from derivation_to_json/README) | 4 | ||||
-rwxr-xr-x | derivation_to_json/derivation_to_json.rb | 12 |
2 files changed, 11 insertions, 5 deletions
diff --git a/derivation_to_json/README b/derivation_to_json/README.md index 0f83d5a..9667a80 100644 --- a/derivation_to_json/README +++ b/derivation_to_json/README.md @@ -1,7 +1,9 @@ -This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" into data structures and tries to align "groups" in source and target sides +This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" into +data structures and tries to align "groups" in source and target sides of rules in a smart, presentable way. The result resembles a phrase-based system, given that the word alignment gives enough hints. To run: ./derivation_to_json.rb < <one of the .raw files> +(first line of stdout is json data, source and target strings follow after that) diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb index b14b0b5..65a26db 100755 --- a/derivation_to_json/derivation_to_json.rb +++ b/derivation_to_json/derivation_to_json.rb @@ -3,6 +3,8 @@ require 'zipf' require 'stringio' +module DerivationToJson + class RuleAndSpan attr_accessor :span, :symbol, :source, :target, :subspans, :done, :id, :trule @@ -121,7 +123,7 @@ class Rule end end -def conv_cdec_show_deriv s +def DerivationToJson.conv_cdec_show_deriv s rules = [] xx = StringIO.new s d_s = xx.gets @@ -140,7 +142,7 @@ def conv_cdec_show_deriv s return a, rules end -def derive span, by_span, o, groups, source +def DerivationToJson.derive span, by_span, o, groups, source if groups.size==0 || groups.last.size>0 groups << [] end @@ -168,7 +170,7 @@ def derive span, by_span, o, groups, source span.done = true end -def proc_deriv s +def DerivationToJson.proc_deriv s a, rules = conv_cdec_show_deriv s by_span = {} @@ -303,12 +305,14 @@ def proc_deriv s return h.to_json end +end # module + if __FILE__ == $0 s = "" while line = STDIN.gets s += line end - json = proc_deriv(s) + json = DerivationToJson.proc_deriv(s) obj = JSON.parse(json) STDERR.write "#{json}\n" puts obj["source_groups"].join " " |