summaryrefslogtreecommitdiff
path: root/derivation_to_json
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-12-11 16:25:38 +0100
committerPatrick Simianer <p@simianer.de>2015-12-11 16:25:38 +0100
commit546f3936d4bf51b1f575b2b4adf4e20330db22f2 (patch)
tree799356f7583bb19cc463132e9a634b45e9b0cd0b /derivation_to_json
parent6cafb943db279faa4fc0ddb400ad1326d129c5e6 (diff)
DerivationToJson module
Diffstat (limited to 'derivation_to_json')
-rw-r--r--derivation_to_json/README.md (renamed from derivation_to_json/README)4
-rwxr-xr-xderivation_to_json/derivation_to_json.rb12
2 files changed, 11 insertions, 5 deletions
diff --git a/derivation_to_json/README b/derivation_to_json/README.md
index 0f83d5a..9667a80 100644
--- a/derivation_to_json/README
+++ b/derivation_to_json/README.md
@@ -1,7 +1,9 @@
-This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" into data structures and tries to align "groups" in source and target sides
+This (horrid) hack reads cdec's "--show_derivations" and "--extract_rules" into
+data structures and tries to align "groups" in source and target sides
of rules in a smart, presentable way. The result resembles a phrase-based
system, given that the word alignment gives enough hints.
To run:
./derivation_to_json.rb < <one of the .raw files>
+(first line of stdout is json data, source and target strings follow after that)
diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb
index b14b0b5..65a26db 100755
--- a/derivation_to_json/derivation_to_json.rb
+++ b/derivation_to_json/derivation_to_json.rb
@@ -3,6 +3,8 @@
require 'zipf'
require 'stringio'
+module DerivationToJson
+
class RuleAndSpan
attr_accessor :span, :symbol, :source, :target, :subspans, :done, :id, :trule
@@ -121,7 +123,7 @@ class Rule
end
end
-def conv_cdec_show_deriv s
+def DerivationToJson.conv_cdec_show_deriv s
rules = []
xx = StringIO.new s
d_s = xx.gets
@@ -140,7 +142,7 @@ def conv_cdec_show_deriv s
return a, rules
end
-def derive span, by_span, o, groups, source
+def DerivationToJson.derive span, by_span, o, groups, source
if groups.size==0 || groups.last.size>0
groups << []
end
@@ -168,7 +170,7 @@ def derive span, by_span, o, groups, source
span.done = true
end
-def proc_deriv s
+def DerivationToJson.proc_deriv s
a, rules = conv_cdec_show_deriv s
by_span = {}
@@ -303,12 +305,14 @@ def proc_deriv s
return h.to_json
end
+end # module
+
if __FILE__ == $0
s = ""
while line = STDIN.gets
s += line
end
- json = proc_deriv(s)
+ json = DerivationToJson.proc_deriv(s)
obj = JSON.parse(json)
STDERR.write "#{json}\n"
puts obj["source_groups"].join " "