summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/.config.py.swpbin0 -> 12288 bytes
-rw-r--r--src/.nl_reweighter.py.swpbin0 -> 24576 bytes
-rw-r--r--src/__init__.py0
-rw-r--r--src/__init__.pycbin0 -> 131 bytes
-rw-r--r--src/__pycache__/__init__.cpython-33.pycbin0 -> 139 bytes
-rw-r--r--src/__pycache__/smt_semparse_config.cpython-33.pycbin0 -> 1337 bytes
-rw-r--r--src/bleu_scorer.py17
-rw-r--r--src/bleu_scorer.pycbin0 -> 1086 bytes
-rw-r--r--src/config.py39
-rw-r--r--src/config.pycbin0 -> 2082 bytes
-rw-r--r--src/eval_bevan/clean280
-rw-r--r--src/eval_bevan/errlog0
-rwxr-xr-xsrc/eval_bevan/eval_bevan.sh10
-rwxr-xr-xsrc/eval_bevan/format_prolog.py30
-rw-r--r--src/eval_bevan/test.out280
-rw-r--r--src/eval_bevan/test.pl560
-rw-r--r--src/evaluator.py47
-rw-r--r--src/evaluator.pycbin0 -> 1763 bytes
-rw-r--r--src/extractor.py418
-rw-r--r--src/extractor.pycbin0 -> 12435 bytes
-rw-r--r--src/functionalizer.py112
-rw-r--r--src/functionalizer.pycbin0 -> 2687 bytes
-rw-r--r--src/geo_world.py108
-rw-r--r--src/geo_world.pycbin0 -> 3286 bytes
-rw-r--r--src/moses.py141
-rw-r--r--src/moses.pycbin0 -> 5366 bytes
-rw-r--r--src/nl_reweighter.py227
-rw-r--r--src/nl_reweighter.pycbin0 -> 8662 bytes
-rw-r--r--src/query_comparer.py31
-rw-r--r--src/query_comparer.pycbin0 -> 1280 bytes
-rw-r--r--src/slot_checker.pycbin0 -> 1643 bytes
-rw-r--r--src/smt_semparse_config.py31
-rw-r--r--src/smt_semparse_config.pycbin0 -> 1523 bytes
-rw-r--r--src/smt_semparse_experiment.py87
-rw-r--r--src/smt_semparse_experiment.pycbin0 -> 3062 bytes
-rw-r--r--src/srilm.py20
-rw-r--r--src/srilm.pycbin0 -> 1102 bytes
-rw-r--r--src/util.py67
-rw-r--r--src/util.pycbin0 -> 2356 bytes
39 files changed, 2505 insertions, 0 deletions
diff --git a/src/.config.py.swp b/src/.config.py.swp
new file mode 100644
index 0000000..706de6a
--- /dev/null
+++ b/src/.config.py.swp
Binary files differ
diff --git a/src/.nl_reweighter.py.swp b/src/.nl_reweighter.py.swp
new file mode 100644
index 0000000..2180681
--- /dev/null
+++ b/src/.nl_reweighter.py.swp
Binary files differ
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/__init__.py
diff --git a/src/__init__.pyc b/src/__init__.pyc
new file mode 100644
index 0000000..cf88e29
--- /dev/null
+++ b/src/__init__.pyc
Binary files differ
diff --git a/src/__pycache__/__init__.cpython-33.pyc b/src/__pycache__/__init__.cpython-33.pyc
new file mode 100644
index 0000000..65e20ff
--- /dev/null
+++ b/src/__pycache__/__init__.cpython-33.pyc
Binary files differ
diff --git a/src/__pycache__/smt_semparse_config.cpython-33.pyc b/src/__pycache__/smt_semparse_config.cpython-33.pyc
new file mode 100644
index 0000000..c8d19ff
--- /dev/null
+++ b/src/__pycache__/smt_semparse_config.cpython-33.pyc
Binary files differ
diff --git a/src/bleu_scorer.py b/src/bleu_scorer.py
new file mode 100644
index 0000000..0b0da55
--- /dev/null
+++ b/src/bleu_scorer.py
@@ -0,0 +1,17 @@
+import os
+import subprocess
+import sys
+
+class BLEUScorer:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run(self):
+ args = [self.config.bleu_eval, '%s/test.nl' % self.config.experiment_dir]
+ infile = open('%s/hyp.nl' % self.config.experiment_dir)
+ nullfile = open(os.devnull, 'w')
+ p = subprocess.Popen(args, stdin=infile, stdout=sys.stdout, stderr=nullfile)
+ p.wait()
+ infile.close()
+ nullfile.close()
diff --git a/src/bleu_scorer.pyc b/src/bleu_scorer.pyc
new file mode 100644
index 0000000..05d95dc
--- /dev/null
+++ b/src/bleu_scorer.pyc
Binary files differ
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..138a28d
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,39 @@
+import yaml
+import logging
+
+class Config:
+
+ def __init__(self, settings_path, dependencies_path):
+ with open(settings_path) as settings_file:
+ settings = yaml.load(settings_file)
+ with open(dependencies_path) as dependencies_file:
+ dependencies = yaml.load(dependencies_file)
+
+ self.entries = {}
+
+ for config in (settings, dependencies):
+ for key, value in config.items():
+ self.put(key, value)
+
+ def __hasattr__(self, key):
+ return key in self.entries
+
+ def __getattr__(self, key):
+ if key not in self.entries:
+ raise Exception('No such key: %s' % key)
+ return self.entries[key]
+
+ def put(self, key, value):
+ if key in self.entries:
+ logging.warn('changing value of %s' % key)
+ self.entries[key] = value
+
+ def __repr__(self):
+ return '%s(%d items)' % (self.__class__, len(self.keys))
+
+ def __str__(self):
+ s = []
+ s.append('%s:' % self.__class__.__name__)
+ for key in sorted(self.entries.keys()):
+ s.append(' %s: %s' % (key, getattr(self, key)))
+ return '\n'.join(s)
diff --git a/src/config.pyc b/src/config.pyc
new file mode 100644
index 0000000..b0b5fa0
--- /dev/null
+++ b/src/config.pyc
Binary files differ
diff --git a/src/eval_bevan/clean b/src/eval_bevan/clean
new file mode 100644
index 0000000..0d9c412
--- /dev/null
+++ b/src/eval_bevan/clean
@@ -0,0 +1,280 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/src/eval_bevan/errlog b/src/eval_bevan/errlog
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/eval_bevan/errlog
diff --git a/src/eval_bevan/eval_bevan.sh b/src/eval_bevan/eval_bevan.sh
new file mode 100755
index 0000000..31e7941
--- /dev/null
+++ b/src/eval_bevan/eval_bevan.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+in=$1
+cat $in | sed 's/W//g' | sed 's/( /(/g' | sed 's/ )/)/g' | sed 's/#.*//g' | sed 's/^0$/0/' | sed 's/()//g' | sed 's/ /,/g' > clean
+./format_prolog.py clean ~/src/semparse-old/work_psmt/test.fun > test.pl
+
+swipl -l "/home/jacob/src/3p/wasp-1.0/data/geo-funql/eval/eval.pl" \
+ < test.pl \
+ > test.out
+ 2>> errlog
diff --git a/src/eval_bevan/format_prolog.py b/src/eval_bevan/format_prolog.py
new file mode 100755
index 0000000..aa60fd7
--- /dev/null
+++ b/src/eval_bevan/format_prolog.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python2
+
+import sys
+
+def main():
+ HYP_PATH = sys.argv[1]
+ REF_PATH = sys.argv[2]
+
+ hyp_file = open(HYP_PATH)
+ ref_file = open(REF_PATH)
+
+ hyps = []
+ for hyp_line in hyp_file.readlines():
+ hyp = hyp_line.strip()
+ hyps.append(hyp)
+
+ refs = []
+ for r_line in ref_file.readlines():
+ ref = r_line.strip()
+ refs.append(ref)
+
+ i = 0
+ for ref, hyp in zip(refs, hyps):
+ print \
+ 'catch(call_with_time_limit(1,eval([%d,%f,%s,%s])),E,writeln(\'error\')).\n' \
+ % (i, 0, ref, hyp)
+ i += 1
+
+if __name__ == '__main__':
+ main()
diff --git a/src/eval_bevan/test.out b/src/eval_bevan/test.out
new file mode 100644
index 0000000..9e8a334
--- /dev/null
+++ b/src/eval_bevan/test.out
@@ -0,0 +1,280 @@
+0 0.0 n
+1 0.0 n
+2 0.0 n
+3 0.0 n
+4 0.0 n
+5 0.0 n
+6 0.0 n
+7 0.0 n
+8 0.0 n
+9 0.0 n
+10 0.0 n
+11 0.0 n
+12 0.0 n
+13 0.0 n
+14 0.0 n
+15 0.0 n
+16 0.0 n
+17 0.0 n
+18 0.0 n
+19 0.0 n
+20 0.0 n
+21 0.0 n
+22 0.0 n
+23 0.0 n
+24 0.0 n
+25 0.0 n
+26 0.0 y
+27 0.0 n
+28 0.0 n
+29 0.0 n
+30 0.0 n
+31 0.0 n
+32 0.0 n
+33 0.0 n
+34 0.0 n
+35 0.0 n
+36 0.0 n
+37 0.0 n
+38 0.0 n
+39 0.0 n
+40 0.0 n
+41 0.0 n
+42 0.0 n
+43 0.0 n
+44 0.0 n
+45 0.0 n
+46 0.0 y
+47 0.0 n
+48 0.0 n
+49 0.0 n
+50 0.0 n
+51 0.0 n
+52 0.0 n
+53 0.0 n
+54 0.0 n
+55 0.0 n
+56 0.0 n
+57 0.0 n
+58 0.0 n
+59 0.0 n
+60 0.0 n
+61 0.0 n
+62 0.0 n
+63 0.0 n
+64 0.0 n
+65 0.0 y
+66 0.0 n
+67 0.0 n
+68 0.0 n
+69 0.0 n
+70 0.0 n
+71 0.0 n
+72 0.0 n
+73 0.0 y
+74 0.0 n
+75 0.0 y
+76 0.0 n
+77 0.0 n
+78 0.0 n
+79 0.0 n
+80 0.0 n
+81 0.0 n
+82 0.0 n
+83 0.0 n
+84 0.0 n
+85 0.0 n
+86 0.0 n
+87 0.0 n
+88 0.0 n
+89 0.0 n
+90 0.0 n
+91 0.0 n
+92 0.0 y
+93 0.0 n
+94 0.0 n
+95 0.0 n
+96 0.0 n
+97 0.0 n
+98 0.0 n
+99 0.0 n
+100 0.0 n
+101 0.0 n
+102 0.0 n
+103 0.0 n
+104 0.0 n
+105 0.0 n
+106 0.0 n
+107 0.0 n
+108 0.0 n
+109 0.0 n
+110 0.0 n
+111 0.0 n
+112 0.0 n
+113 0.0 n
+114 0.0 n
+115 0.0 n
+116 0.0 n
+117 0.0 n
+118 0.0 n
+119 0.0 n
+120 0.0 n
+121 0.0 n
+122 0.0 n
+123 0.0 n
+124 0.0 n
+125 0.0 n
+126 0.0 n
+127 0.0 n
+128 0.0 n
+129 0.0 n
+130 0.0 n
+131 0.0 n
+132 0.0 y
+133 0.0 y
+134 0.0 n
+135 0.0 y
+136 0.0 y
+137 0.0 n
+138 0.0 n
+139 0.0 n
+140 0.0 n
+141 0.0 n
+142 0.0 n
+143 0.0 n
+144 0.0 n
+145 0.0 n
+146 0.0 y
+147 0.0 n
+148 0.0 n
+149 0.0 n
+150 0.0 n
+151 0.0 n
+152 0.0 n
+153 0.0 n
+154 0.0 n
+155 0.0 n
+156 0.0 n
+157 0.0 n
+158 0.0 n
+159 0.0 n
+160 0.0 n
+161 0.0 n
+162 0.0 n
+163 0.0 n
+164 0.0 n
+165 0.0 n
+166 0.0 n
+167 0.0 n
+168 0.0 n
+169 0.0 n
+170 0.0 n
+171 0.0 n
+172 0.0 n
+173 0.0 n
+174 0.0 n
+175 0.0 n
+176 0.0 n
+177 0.0 n
+178 0.0 n
+179 0.0 n
+180 0.0 n
+181 0.0 y
+182 0.0 n
+183 0.0 n
+184 0.0 n
+185 0.0 n
+186 0.0 n
+187 0.0 n
+188 0.0 n
+189 0.0 n
+190 0.0 n
+191 0.0 n
+192 0.0 n
+193 0.0 n
+194 0.0 n
+195 0.0 n
+196 0.0 n
+197 0.0 n
+198 0.0 n
+199 0.0 n
+200 0.0 n
+201 0.0 n
+202 0.0 n
+203 0.0 n
+204 0.0 n
+205 0.0 n
+206 0.0 n
+207 0.0 n
+208 0.0 n
+209 0.0 n
+210 0.0 n
+211 0.0 n
+212 0.0 n
+213 0.0 y
+214 0.0 y
+215 0.0 n
+216 0.0 n
+217 0.0 n
+218 0.0 n
+219 0.0 n
+220 0.0 n
+221 0.0 n
+222 0.0 n
+223 0.0 n
+224 0.0 n
+225 0.0 n
+226 0.0 n
+227 0.0 n
+228 0.0 n
+229 0.0 n
+230 0.0 n
+231 0.0 n
+232 0.0 n
+233 0.0 n
+234 0.0 n
+235 0.0 n
+236 0.0 n
+237 0.0 n
+238 0.0 n
+239 0.0 n
+240 0.0 n
+241 0.0 n
+242 0.0 n
+243 0.0 n
+244 0.0 n
+245 0.0 n
+246 0.0 n
+247 0.0 n
+248 0.0 y
+249 0.0 y
+250 0.0 n
+251 0.0 n
+252 0.0 n
+253 0.0 n
+254 0.0 n
+255 0.0 n
+256 0.0 n
+257 0.0 n
+258 0.0 n
+259 0.0 n
+260 0.0 n
+261 0.0 n
+262 0.0 n
+263 0.0 n
+264 0.0 n
+265 0.0 n
+266 0.0 n
+267 0.0 n
+268 0.0 n
+269 0.0 n
+270 0.0 n
+271 0.0 y
+272 0.0 n
+273 0.0 n
+274 0.0 n
+275 0.0 n
+276 0.0 n
+277 0.0 n
+278 0.0 n
+279 0.0 y
diff --git a/src/eval_bevan/test.pl b/src/eval_bevan/test.pl
new file mode 100644
index 0000000..837a66d
--- /dev/null
+++ b/src/eval_bevan/test.pl
@@ -0,0 +1,560 @@
+catch(call_with_time_limit(1,eval([0,0.000000,answer(river(loc_2(stateid('colorado')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([1,0.000000,answer(count(state(low_point_2(lower_2(low_point_1(stateid('alabama'))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([2,0.000000,answer(count(river(loc_2(stateid('california'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([3,0.000000,answer(state(next_to_2(stateid('utah')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([4,0.000000,answer(elevation_1(placeid('mount mckinley'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([5,0.000000,answer(elevation_1(highest(place(loc_2(countryid('usa')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([6,0.000000,answer(elevation_1(highest(place(loc_2(stateid('alabama')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([7,0.000000,answer(size(stateid('alaska'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([8,0.000000,answer(size(stateid('texas'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([9,0.000000,answer(len(river(riverid('colorado')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([10,0.000000,answer(len(river(riverid('delaware')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([11,0.000000,answer(len(longest(river(loc_2(stateid('california')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([12,0.000000,answer(len(longest(river(loc_2(countryid('usa')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([13,0.000000,answer(len(river(riverid('north platte')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([14,0.000000,answer(len(river(riverid('ohio')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([15,0.000000,answer(count(capital(loc_2(stateid('rhode island'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([16,0.000000,answer(count(city(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([17,0.000000,answer(population_1(largest(city(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([18,0.000000,answer(count(river(riverid('colorado')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([19,0.000000,answer(population_1(cityid('detroit', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([20,0.000000,answer(population_1(cityid('houston', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([21,0.000000,answer(population_1(cityid('minneapolis', 'mn'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([22,0.000000,answer(population_1(stateid('mississippi'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([23,0.000000,answer(population_1(stateid('rhode island'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([24,0.000000,answer(population_1(largest(city(loc_2(state(stateid('new york'))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([25,0.000000,answer(population_1(capital(loc_2(stateid('texas'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([26,0.000000,answer(population_1(countryid('usa'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([27,0.000000,answer(population_1(cityid('austin', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([28,0.000000,answer(population_1(stateid('utah'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([29,0.000000,answer(population_1(stateid('texas'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([30,0.000000,answer(count(river(loc_2(stateid('iowa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([31,0.000000,answer(count(river(loc_2(most(state(loc_1(river(all)))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([32,0.000000,answer(count(river(loc_2(stateid('colorado'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([33,0.000000,answer(count(state(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([34,0.000000,answer(count(state(all))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([35,0.000000,answer(count(state(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([36,0.000000,answer(count(state(next_to_2(stateid('iowa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([37,0.000000,answer(count(state(next_to_2(largest_one(population_1(state(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([38,0.000000,answer(count(exclude(state(all), loc_1(river(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([39,0.000000,answer(count(state(next_to_1(stateid('tennessee'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([40,0.000000,answer(count(state(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([41,0.000000,answer(count(state(loc_1(place(higher_2(highest(place(loc_2(state(loc_1(largest(capital(city(loc_2(countryid('usa')))))))))))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([42,0.000000,answer(population_1(stateid('texas'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([43,0.000000,answer(elevation_1(placeid('mount mckinley'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([44,0.000000,answer(elevation_1(highest(place(loc_2(stateid('montana')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([45,0.000000,answer(count(state(next_to_1(stateid('iowa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([46,0.000000,answer(major(river(loc_2(stateid('florida'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([47,0.000000,answer(population_1(cityid('boulder', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([48,0.000000,answer(count(state(next_to_2(stateid('iowa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([49,0.000000,answer(river(loc_2(stateid('new york')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([50,0.000000,answer(state(loc_1(cityid('san antonio', _)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([51,0.000000,answer(city(loc_2(stateid('texas')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([52,0.000000,answer(state(traverse_1(longest(river(loc_2(stateid('texas'))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([53,0.000000,answer(river(loc_2(stateid('texas')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([54,0.000000,answer(capital(city(loc_2(stateid('texas'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([55,0.000000,answer(capital(loc_2(state(next_to_2(stateid('missouri')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([56,0.000000,answer(city(loc_2(state(traverse_1(riverid('mississippi')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([57,0.000000,answer(city(loc_2(state(loc_1(highest(place(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([58,0.000000,answer(highest(place(loc_2(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([59,0.000000,answer(major(city(loc_2(stateid('alabama'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([60,0.000000,answer(major(city(loc_2(stateid('alaska'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([61,0.000000,answer(major(city(loc_2(stateid('new york'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([62,0.000000,answer(major(city(loc_2(state(traverse_1(riverid('mississippi'))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([63,0.000000,answer(major(city(loc_2(state(stateid('california')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([64,0.000000,answer(major(city(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([65,0.000000,answer(major(city(loc_2(stateid('vermont'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([66,0.000000,answer(major(river(loc_2(stateid('ohio'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([67,0.000000,answer(density_1(state(all))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([68,0.000000,answer(population_1(stateid('mississippi'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([69,0.000000,answer(population_1(state(traverse_1(river(riverid('mississippi')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([70,0.000000,answer(population_1(state(traverse_1(riverid('mississippi'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([71,0.000000,answer(population_1(state(next_to_2(stateid('texas'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([72,0.000000,answer(population_1(major(city(loc_2(stateid('texas')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([73,0.000000,answer(river(loc_2(stateid('alaska')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([74,0.000000,answer(largest_one(population_1(city(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([75,0.000000,answer(largest_one(density_1(city(loc_2(countryid('usa')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([76,0.000000,answer(capital(loc_2(state(loc_1(lowest(place(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([77,0.000000,answer(area_1(stateid('florida'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([78,0.000000,answer(area_1(stateid('ohio'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([79,0.000000,answer(area_1(state(stateid('texas')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([80,0.000000,answer(area_1(stateid('wisconsin'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([81,0.000000,answer(largest(city(loc_2(state(loc_1(river(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([82,0.000000,answer(largest(capital(city(loc_2(countryid('usa')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([83,0.000000,answer(largest(city(loc_2(stateid('kansas'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([84,0.000000,answer(largest(city(loc_2(stateid('louisiana'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([85,0.000000,answer(capital(loc_2(stateid('california')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([86,0.000000,answer(capital(loc_2(stateid('colorado')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([87,0.000000,answer(capital(loc_2(stateid('illinois')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([88,0.000000,answer(capital(loc_2(stateid('iowa')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([89,0.000000,answer(capital(loc_2(stateid('massachusetts')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([90,0.000000,answer(capital(loc_2(stateid('new jersey')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([91,0.000000,answer(capital(loc_2(stateid('new york')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([92,0.000000,answer(capital(loc_2(stateid('north dakota')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([93,0.000000,answer(capital(loc_2(stateid('ohio')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([94,0.000000,answer(capital(loc_2(state(loc_1(city(cityid('durham', _))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([95,0.000000,answer(capital(loc_2(state(stateid('florida'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([96,0.000000,answer(capital(loc_2(smallest(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([97,0.000000,answer(capital(loc_2(largest_one(population_1(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([98,0.000000,answer(capital(loc_2(largest_one(density_1(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([99,0.000000,answer(capital(loc_2(state(loc_1(longest(river(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([100,0.000000,answer(capital(loc_2(largest_one(population_1(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([101,0.000000,answer(sum(area_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([102,0.000000,answer(density_1(stateid('new york'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([103,0.000000,answer(high_point_1(stateid('wyoming'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([104,0.000000,answer(highest(place(loc_2(stateid('texas'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([105,0.000000,answer(highest(place(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([106,0.000000,answer(highest(mountain(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([107,0.000000,answer(highest(mountain(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([108,0.000000,answer(highest(place(loc_2(stateid('delaware'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([109,0.000000,answer(highest(place(loc_2(stateid('iowa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([110,0.000000,answer(highest(place(loc_2(stateid('maine'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([111,0.000000,answer(highest(place(loc_2(stateid('montana'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([112,0.000000,answer(highest(place(loc_2(stateid('nevada'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([113,0.000000,answer(highest(place(loc_2(state(next_to_2(stateid('georgia'))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([114,0.000000,answer(highest(place(loc_2(state(loc_1(capital(cityid('austin', _)))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([115,0.000000,answer(highest(place(loc_2(state(next_to_2(stateid('colorado'))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([116,0.000000,answer(highest(place(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([117,0.000000,answer(highest(place(loc_2(stateid('virginia'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([118,0.000000,answer(highest(place(loc_2(smallest_one(density_1(state(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([119,0.000000,answer(highest(place(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([120,0.000000,answer(largest(capital(city(loc_2(countryid('usa')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([121,0.000000,answer(largest(city(loc_2(stateid('california'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([122,0.000000,answer(largest(city(loc_2(stateid('rhode island'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([123,0.000000,answer(largest(city(loc_2(smallest(state(traverse_1(riverid('mississippi')))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([124,0.000000,answer(largest(city(loc_2(smallest(state(loc_2(countryid('usa')))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([125,0.000000,answer(longest(river(loc_2(state(stateid('washington')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([126,0.000000,answer(largest(state(next_to_2(stateid('arkansas'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([127,0.000000,answer(largest(state(next_to_2(stateid('texas'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([128,0.000000,answer(smallest_one(population_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([129,0.000000,answer(len(river(riverid('colorado')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([130,0.000000,answer(len(longest(river(traverse_2(stateid('texas')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([131,0.000000,answer(len(river(riverid('mississippi')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([132,0.000000,answer(len(most(river(traverse_2(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([133,0.000000,answer(len(most(river(traverse_2(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([134,0.000000,answer(longest(river(loc_2(stateid('florida'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([135,0.000000,answer(longest(river(loc_2(largest(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([136,0.000000,answer(longest(river(loc_2(most(state(loc_1(major(city(all))))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([137,0.000000,answer(longest(river(loc_2(state(next_to_2(stateid('nebraska'))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([138,0.000000,answer(longest(river(traverse_2(state(next_to_2(stateid('indiana'))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([139,0.000000,answer(lowest(place(loc_2(stateid('arkansas'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([140,0.000000,answer(lowest(place(loc_2(stateid('massachusetts'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([141,0.000000,answer(lowest(place(loc_2(stateid('mississippi'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([142,0.000000,answer(lowest(place(loc_2(stateid('nebraska'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([143,0.000000,answer(lowest(place(loc_2(state(stateid('california')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([144,0.000000,answer(lowest(place(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([145,0.000000,answer(lowest(place(loc_2(state(traverse_1(river(riverid('colorado')))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([146,0.000000,answer(highest(place(loc_2(cityid('san francisco', _))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([147,0.000000,answer(largest_one(density_1(state(loc_2(countryid('usa')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([148,0.000000,answer(largest_one(population_1(city(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([149,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([150,0.000000,answer(largest_one(population_1(state(traverse_1(riverid('mississippi')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([151,0.000000,answer(density_1(stateid('maine'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([152,0.000000,answer(density_1(largest(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([153,0.000000,answer(population_1(stateid('alaska'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([154,0.000000,answer(population_1(cityid('boulder', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([155,0.000000,answer(population_1(cityid('erie', 'pa'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([156,0.000000,answer(population_1(stateid('hawaii'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([157,0.000000,answer(population_1(cityid('houston', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([158,0.000000,answer(population_1(stateid('maryland'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([159,0.000000,answer(population_1(stateid('new mexico'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([160,0.000000,answer(population_1(city(cityid('new york', _)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([161,0.000000,answer(population_1(cityid('san antonio', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([162,0.000000,answer(population_1(cityid('tempe', 'az'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([163,0.000000,answer(population_1(largest(city(loc_2(largest_one(area_1(state(all)))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([164,0.000000,answer(smallest(population_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([165,0.000000,answer(population_1(most(state(next_to_2(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([166,0.000000,answer(population_1(largest_one(density_1(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([167,0.000000,answer(population_1(cityid('tucson', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([168,0.000000,answer(population_1(stateid('utah'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([169,0.000000,answer(population_1(stateid('washington'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([170,0.000000,answer(smallest(city(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([171,0.000000,answer(smallest(city(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([172,0.000000,answer(smallest(state(next_to_2(stateid('wyoming'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([173,0.000000,answer(smallest(most(state(next_to_2(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([174,0.000000,answer(smallest(state(traverse_1(river(riverid('mississippi')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([175,0.000000,answer(largest_one(area_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([176,0.000000,answer(smallest_one(area_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([177,0.000000,answer(sum(len(river(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([178,0.000000,answer(len(riverid('mississippi'))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([179,0.000000,answer(major(city(loc_2(stateid('pennsylvania'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([180,0.000000,answer(most(river(traverse_2(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([181,0.000000,answer(river(traverse_2(most(state(loc_1(city(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([182,0.000000,answer(river(traverse_2(most(state(next_to_2(state(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([183,0.000000,answer(river(loc_2(state(next_to_2(stateid('texas')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([184,0.000000,answer(river(loc_2(stateid('texas')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([185,0.000000,answer(river(loc_2(stateid('texas')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([186,0.000000,answer(river(traverse_2(stateid('new york')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([187,0.000000,answer(river(traverse_2(most(state(next_to_2(state(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([188,0.000000,answer(largest_one(population_1(state(next_to_2(stateid('nevada')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([189,0.000000,answer(state(next_to_2(stateid('new york')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([190,0.000000,answer(most(state(next_to_2(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([191,0.000000,answer(state(loc_1(highest(place(loc_2(state(traverse_1(river(riverid('colorado')))))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([192,0.000000,answer(largest_one(area_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([193,0.000000,answer(state(loc_1(largest(capital(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([194,0.000000,answer(state(loc_1(longest(river(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([195,0.000000,answer(smallest_one(density_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([196,0.000000,answer(most(state(loc_1(major(city(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([197,0.000000,answer(most(state(loc_1(river(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([198,0.000000,answer(smallest_one(population_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([199,0.000000,answer(state(loc_1(cityid('austin', _)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([200,0.000000,answer(state(loc_1(cityid('miami', _)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([201,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([202,0.000000,answer(state(next_to_2(stateid('arizona')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([203,0.000000,answer(state(next_to_2(stateid('florida')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([204,0.000000,answer(state(next_to_2(stateid('indiana')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([205,0.000000,answer(state(next_to_2(stateid('michigan')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([206,0.000000,answer(state(next_to_2(stateid('montana')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([207,0.000000,answer(state(next_to_2(stateid('new jersey')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([208,0.000000,answer(state(next_to_2(state(next_to_2(stateid('mississippi')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([209,0.000000,answer(state(next_to_2(state(traverse_1(riverid('ohio')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([210,0.000000,answer(intersection(state(next_to_2(stateid('texas'))), loc_1(major(river(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([211,0.000000,answer(state(next_to_2(largest_one(population_1(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([212,0.000000,answer(state(next_to_2(most(state(next_to_2(state(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([213,0.000000,answer(state(next_to_2(most(state(loc_1(city(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([214,0.000000,answer(state(next_to_2(most(state(loc_1(major(city(all)))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([215,0.000000,answer(state(next_to_2(smallest_one(area_1(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([216,0.000000,answer(state(loc_1(major(river(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([217,0.000000,answer(state(traverse_1(river(riverid('delaware'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([218,0.000000,answer(state(traverse_1(river(riverid('mississippi'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([219,0.000000,answer(state(traverse_1(riverid('missouri')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([220,0.000000,answer(state(traverse_1(river(riverid('ohio'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([221,0.000000,answer(state(loc_1(city(cityid('dallas', _))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([222,0.000000,answer(state(loc_1(city(cityid('plano', _))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([223,0.000000,answer(state(loc_1(city(cityid('portland', _))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([224,0.000000,answer(state(loc_1(city(cityid('rochester', _))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([225,0.000000,answer(state(loc_1(city(cityid('salt lake city', _))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([226,0.000000,answer(state(next_to_2(stateid('kentucky')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([227,0.000000,answer(loc_1(mountain(all))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([228,0.000000,answer(loc_1(cityid('dallas', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([229,0.000000,answer(loc_1(cityid('portland', _))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([230,0.000000,answer(loc_1(river(riverid('chattahoochee')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([231,0.000000,answer(highest(mountain(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([232,0.000000,answer(highest(place(loc_2(stateid('hawaii'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([233,0.000000,answer(lowest(place(loc_2(stateid('maryland'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([234,0.000000,answer(largest_one(population_1(city(loc_2(stateid('new mexico')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([235,0.000000,answer(loc_1(smallest(city(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([236,0.000000,answer(largest_one(population_1(city(loc_2(stateid('california')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([237,0.000000,answer(density_1(state(traverse_1(longest(river(loc_2(countryid('usa')))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([238,0.000000,answer(highest(exclude(mountain(all), loc_2(stateid('alaska'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([239,0.000000,answer(longest(river(loc_2(countryid('usa'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([240,0.000000,answer(smallest(state(all))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([241,0.000000,answer(largest_one(population_1(state(next_to_2(stateid('pennsylvania')))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([242,0.000000,answer(most(river(traverse_2(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([243,0.000000,answer(most(river(traverse_2(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([244,0.000000,answer(most(river(traverse_2(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([245,0.000000,answer(exclude(river(all), traverse_2(stateid('texas')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([246,0.000000,answer(exclude(river(all), traverse_2(countryid('usa')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([247,0.000000,answer(river(traverse_2(state(next_to_2(state(loc_1(capital(cityid('austin', _))))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([248,0.000000,answer(river(traverse_2(fewest(state(loc_1(city(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([249,0.000000,answer(state(next_to_2(stateid('hawaii')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([250,0.000000,answer(most(state(next_to_2(state(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([251,0.000000,answer(smallest_one(population_1(capital(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([252,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([253,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([254,0.000000,answer(state(loc_1(highest(place(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([255,0.000000,answer(state(loc_1(highest(place(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([256,0.000000,answer(largest_one(density_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([257,0.000000,answer(state(loc_1(lowest(place(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([258,0.000000,answer(state(loc_1(lowest(place(loc_2(next_to_2(stateid('idaho')))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([259,0.000000,answer(smallest_one(density_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([260,0.000000,answer(most(state(loc_1(major(city(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([261,0.000000,answer(most(state(loc_1(major(river(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([262,0.000000,answer(most(state(traverse_1(major(river(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([263,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([264,0.000000,answer(most(state(loc_1(river(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([265,0.000000,answer(smallest_one(density_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([266,0.000000,answer(smallest_one(density_1(state(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([267,0.000000,answer(state(loc_1(placeid('mount mckinley')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([268,0.000000,answer(smallest(state(all))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([269,0.000000,answer(state(next_to_2(stateid('illinois')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([270,0.000000,answer(state(next_to_2(stateid('kentucky')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([271,0.000000,answer(state(next_to_2(river(riverid('missouri'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([272,0.000000,answer(state(next_to_2(smallest_one(area_1(state(all)))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([273,0.000000,answer(state(loc_1(largest(city(capital_1(state(all))))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([274,0.000000,answer(state(traverse_1(river(riverid('chattahoochee'))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([275,0.000000,answer(state(traverse_1(longest(river(all))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([276,0.000000,answer(state(traverse_1(riverid('mississippi')))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([277,0.000000,answer(state(loc_1(river(all)))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([278,0.000000,answer(state(loc_1(city(cityid('austin', _))))),0])),E,writeln('error')).
+
+catch(call_with_time_limit(1,eval([279,0.000000,answer(largest_one(density_1(city(all)))),0])),E,writeln('error')).
+
diff --git a/src/evaluator.py b/src/evaluator.py
new file mode 100644
index 0000000..16c2536
--- /dev/null
+++ b/src/evaluator.py
@@ -0,0 +1,47 @@
+class Evaluator:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run(self):
+ if self.config.run == 'debug':
+ s_p, s_r, s_f = self.score('%s/1' % self.config.work_dir)
+ elif self.config.run == 'dev':
+ s_p = 0
+ s_r = 0
+ s_f = 0
+ for i in range(10):
+ p, r, f = self.score('%s/%d' % (self.config.work_dir, i))
+ s_p += p
+ s_r += r
+ s_f += f
+ s_p /= 10
+ s_r /= 10
+ s_f /= 10
+ elif self.config.run == 'test':
+ s_p, s_r, s_f = self.score(self.config.work_dir)
+
+ print 'p: %f\nr: %f\nf: %f' % (s_p, s_r, s_f)
+
+ def score(self, experiment_dir):
+ result_file = open('%s/eval.scored' % (experiment_dir))
+ tp = 0
+ fp = 0
+ count = 0
+ for line in result_file.readlines():
+ count += 1
+ tag = line.strip()
+ if tag == 'empty':
+ continue
+ tag, score = tag.split()
+ score = float(score)
+ if tag == 'yes':
+ tp += 1
+ elif tag == 'no':
+ fp += 1
+
+ p = 1.0 * tp / (tp + fp)
+ r = 1.0 * tp / count
+ f = 2.0 * p * r / (p + r)
+
+ return (p, r, f)
diff --git a/src/evaluator.pyc b/src/evaluator.pyc
new file mode 100644
index 0000000..b13bae1
--- /dev/null
+++ b/src/evaluator.pyc
Binary files differ
diff --git a/src/extractor.py b/src/extractor.py
new file mode 100644
index 0000000..ff2abdb
--- /dev/null
+++ b/src/extractor.py
@@ -0,0 +1,418 @@
+from nltk.stem.porter import PorterStemmer
+from nltk.stem.snowball import GermanStemmer
+import os
+import re
+import util
+import xml.etree.ElementTree as ET
+
+class IdStemmer:
+ def stem(self, word):
+ return word
+
+class Extractor:
+
+ NP_WEIGHT = 50
+
+ def __init__(self, config):
+ self.config = config
+ if config.stem:
+ if config.lang == 'en':
+ self.stemmer = PorterStemmer()
+ elif config.lang == 'de':
+ self.stemmer = GermanStemmer()
+ else:
+ self.stemmer = IdStemmer()
+
+ def run(self):
+ if self.config.corpus == 'geo':
+ self.run_geo()
+ elif self.config.corpus == 'robo':
+ self.run_robo()
+ elif self.config.corpus == 'atis':
+ self.run_atis()
+ else:
+ assert False
+
+ def run_atis(self):
+
+ train_nl = open('%s/train.nl' % self.config.experiment_dir, 'w')
+ train_nl_lm = open('%s/train.nl.lm' % self.config.experiment_dir, 'w')
+ train_nl_np = open('%s/train.np.nl' % self.config.experiment_dir, 'w')
+ train_mrl = open('%s/train.mrl' % self.config.experiment_dir, 'w')
+ train_mrl_lm = open('%s/train.mrl.lm' % self.config.experiment_dir, 'w')
+ train_mrl_np = open('%s/train.np.mrl' % self.config.experiment_dir, 'w')
+ train_fun = open('%s/train.fun' % self.config.experiment_dir, 'w')
+ tune_nl = open('%s/tune.nl' % self.config.experiment_dir, 'w')
+ tune_mrl = open('%s/tune.mrl' % self.config.experiment_dir, 'w')
+ test_nl = open('%s/test.nl' % self.config.experiment_dir, 'w')
+ test_mrl = open('%s/test.mrl' % self.config.experiment_dir, 'w')
+ test_fun = open('%s/test.fun' % self.config.experiment_dir, 'w')
+
+ if self.config.run == 'debug':
+ with open('%s/atis-train.sem' % self.config.data_dir) as data_file:
+ counter = 0
+ for line in data_file:
+ nl, slot = line.split('<=>', 1)
+ nl = self.preprocess_nl(nl)
+ slot = self.replace_specials(slot)
+ fun = self.slot_to_fun(slot)
+ mrl = util.fun_to_mrl(fun, True)
+ if counter % 4 in (0,1):
+ print >>train_nl, nl
+ print >>train_mrl, mrl
+ print >>train_fun, fun
+ print >>train_nl_np, nl
+ print >>train_mrl_np, mrl
+ print >>train_nl_lm, '<s>', nl, '</s>'
+ print >>train_mrl_lm, '<s>', mrl, '</s>'
+ elif counter % 4 == 2:
+ print >>tune_nl, nl
+ print >>tune_mrl, mrl
+ else:
+ print >>test_nl, nl
+ print >>test_mrl, mrl
+ print >>test_fun, fun
+ counter += 1
+
+ else:
+ train_path = '%s/atis-train.sem' % self.config.data_dir
+ if self.config.run == 'dev':
+ tune_path = train_path
+ test_path = '%s/atis-dev.sem' % self.config.data_dir
+ elif self.config.run == 'test':
+ tune_path = '%s/atis-dev.sem' % self.config.data_dir
+ test_path = '%s/atis-test.sem' % self.config.data_dir
+
+ with open(train_path) as train_file:
+ for line in train_file:
+ nl, slot = line.split('<=>', 1)
+ nl = self.preprocess_nl(nl)
+ slot = self.replace_specials(slot)
+ fun = self.slot_to_fun(slot)
+ mrl = util.fun_to_mrl(fun, True)
+ print >>train_nl, nl
+ print >>train_mrl, mrl
+ print >>train_fun, fun
+ print >>train_nl_np, nl
+ print >>train_mrl_np, mrl
+ print >>train_nl_lm, '<s>', nl, '</s>'
+ print >>train_mrl_lm, '<s>', mrl, '</s>'
+
+ with open(tune_path) as tune_file:
+ for line in tune_file:
+ nl, slot = line.split('<=>', 1)
+ nl = self.preprocess_nl(nl)
+ slot = self.replace_specials(slot)
+ fun = self.slot_to_fun(slot)
+ mrl = util.fun_to_mrl(fun, True)
+ print >>tune_nl, nl
+ print >>tune_mrl, mrl
+
+ with open(test_path) as test_file:
+ for line in test_file:
+ nl, slot = line.split('<=>', 1)
+ nl = self.preprocess_nl(nl)
+ slot = self.replace_specials(slot)
+ fun = self.slot_to_fun(slot)
+ mrl = util.fun_to_mrl(fun, True)
+ print >>test_nl, nl
+ print >>test_mrl, mrl
+ print >>test_fun, fun
+
+ for np_name in os.listdir('%s/db' % self.config.data_dir):
+ np_path = '%s/db/%s' % (self.config.data_dir, np_name)
+ with open(np_path) as np_file:
+ for line in np_file:
+ names = re.findall(r'"([^"]+)"', line)
+ for name in names:
+ nl = name
+ mrl = "%s" % self.replace_specials(name)
+ mrl = mrl.replace(' ', '_')
+ mrl = mrl + '@s'
+ print >>train_nl_np, nl
+ print >>train_mrl_np, mrl
+ print >>train_nl_lm, nl
+ print >>train_mrl_lm, mrl
+
+ train_nl.close()
+ train_nl_lm.close()
+ train_mrl.close()
+ train_mrl_lm.close()
+ train_fun.close()
+ test_nl.close()
+ test_mrl.close()
+ test_fun.close()
+ tune_nl.close()
+ tune_mrl.close()
+
+ def run_robo(self):
+
+ train_ids, tune_ids, test_ids = self.get_folds()
+ tune_ids = test_ids
+
+ train_nl = open('%s/train.nl' % self.config.experiment_dir, 'w')
+ train_nl_lm = open('%s/train.nl.lm' % self.config.experiment_dir, 'w')
+ train_nl_np = open('%s/train.np.nl' % self.config.experiment_dir, 'w')
+ train_mrl = open('%s/train.mrl' % self.config.experiment_dir, 'w')
+ train_mrl_lm = open('%s/train.mrl.lm' % self.config.experiment_dir, 'w')
+ train_mrl_np = open('%s/train.np.mrl' % self.config.experiment_dir, 'w')
+ train_fun = open('%s/train.fun' % self.config.experiment_dir, 'w')
+ tune_nl = open('%s/tune.nl' % self.config.experiment_dir, 'w')
+ tune_mrl = open('%s/tune.mrl' % self.config.experiment_dir, 'w')
+ test_nl = open('%s/test.nl' % self.config.experiment_dir, 'w')
+ test_mrl = open('%s/test.mrl' % self.config.experiment_dir, 'w')
+ test_fun = open('%s/test.fun' % self.config.experiment_dir, 'w')
+
+ corpus = ET.parse('%s/corpus.xml' % self.config.data_dir)
+ corpus_root = corpus.getroot()
+
+ for node in corpus_root.findall('example'):
+ nl = node.find("nl[@lang='%s']" % self.config.lang).text
+ nl = self.preprocess_nl(nl)
+ clang = node.find("mrl[@lang='robocup-clang']").text
+ clang = self.replace_specials(clang)
+ fun = self.clang_to_fun(clang)
+ #print fun
+ mrl = util.fun_to_mrl(fun)
+ eid = int(node.attrib['id'])
+
+ if eid in tune_ids:
+ print >>tune_nl, nl
+ print >>tune_mrl, mrl
+ elif eid in train_ids:
+ print >>train_nl, nl
+ print >>train_mrl, mrl
+ print >>train_fun, fun
+ print >>train_nl_np, nl
+ print >>train_mrl_np, mrl
+ print >>train_nl_lm, '<s>', nl, '</s>'
+ print >>train_mrl_lm, '<s>', mrl, '</s>'
+ if eid in test_ids:
+ #elif eid in test_ids:
+ print >>test_nl, nl
+ print >>test_mrl, mrl
+ print >>test_fun, fun
+
+ nps_file = open('%s/names' % self.config.data_dir)
+ while True:
+ line = nps_file.readline()
+ if not line:
+ break
+ nl = nps_file.readline().strip()[3:]
+ nl = self.preprocess_nl(nl)
+ nps_file.readline()
+ nps_file.readline()
+ while True:
+ line = nps_file.readline().strip()
+ if line == '':
+ break
+ m = re.match('^\*n:(Num|Unum|Ident) -> \(\{ (\S+) \}\)$', line)
+ mrl = m.group(2) + '@0'
+ for i in range(self.NP_WEIGHT):
+ print >>train_nl_np, nl
+ print >>train_mrl_np, mrl
+ print >>train_nl_lm, nl
+ print >>train_mrl_lm, mrl
+
+ train_nl.close()
+ train_nl_lm.close()
+ train_mrl.close()
+ train_mrl_lm.close()
+ train_fun.close()
+ test_nl.close()
+ test_mrl.close()
+ test_fun.close()
+ tune_nl.close()
+ tune_mrl.close()
+
+ def run_geo(self):
+ train_ids, tune_ids, test_ids = self.get_folds()
+
+ train_nl = open('%s/train.nl' % self.config.experiment_dir, 'w')
+ train_nl_lm = open('%s/train.nl.lm' % self.config.experiment_dir, 'w')
+ train_nl_np = open('%s/train.np.nl' % self.config.experiment_dir, 'w')
+ train_mrl = open('%s/train.mrl' % self.config.experiment_dir, 'w')
+ train_mrl_lm = open('%s/train.mrl.lm' % self.config.experiment_dir, 'w')
+ train_mrl_np = open('%s/train.np.mrl' % self.config.experiment_dir, 'w')
+ train_fun = open('%s/train.fun' % self.config.experiment_dir, 'w')
+ unlabeled_nl = open('%s/unlabeled.nl' % self.config.experiment_dir, 'w')
+ tune_nl = open('%s/tune.nl' % self.config.experiment_dir, 'w')
+ tune_mrl = open('%s/tune.mrl' % self.config.experiment_dir, 'w')
+ test_nl = open('%s/test.nl' % self.config.experiment_dir, 'w')
+ test_mrl = open('%s/test.mrl' % self.config.experiment_dir, 'w')
+ test_fun = open('%s/test.fun' % self.config.experiment_dir, 'w')
+
+ corpus = ET.parse('%s/corpus-true.xml' % self.config.data_dir)
+ corpus_root = corpus.getroot()
+
+ counter = 0
+ #stop_labeling = False
+ for node in corpus_root.findall('example'):
+ nl = node.find("nl[@lang='%s']" % self.config.lang).text
+ nl = self.preprocess_nl(nl)
+ fun = node.find("mrl[@lang='geo-funql']").text
+ fun = self.preprocess_fun(fun)
+ #fun = self.replace_specials(fun)
+ mrl = util.fun_to_mrl(fun)
+ eid = int(node.attrib['id'])
+
+ unlabel_this = (counter >= 10 * self.config.lfrac)
+ counter += 1
+ counter %= 10
+
+ if eid in tune_ids:
+ print >>tune_nl, nl
+ print >>tune_mrl, mrl
+ elif eid in train_ids and not unlabel_this:
+ print >>train_nl, nl
+ print >>train_mrl, mrl
+ print >>train_fun, fun
+ print >>train_nl_np, nl
+ print >>train_mrl_np, mrl
+ print >>train_nl_lm, '<s>', nl, '</s>'
+ print >>train_mrl_lm, '<s>', mrl, '</s>'
+ elif eid in train_ids and unlabel_this:
+ print >>unlabeled_nl, nl
+ elif eid in test_ids:
+ print >>test_nl, nl
+ print >>test_mrl, mrl
+ print >>test_fun, fun
+
+ nplist = ET.parse('%s/nps-true.xml' % self.config.data_dir)
+ nplist_root = nplist.getroot()
+ for node in nplist_root.findall('example'):
+ fun = node.find("mrl[@lang='geo-funql']").text
+ fun = self.preprocess_fun(fun)
+ #fun = self.replace_specials(fun)
+ mrl = util.fun_to_mrl(fun)
+ big_np = len(mrl.split()) > 1
+ if (self.config.np_type == 'big' and not big_np) or \
+ (self.config.np_type == 'small' and big_np):
+ continue
+ for nl_node in node.findall("nl[@lang='%s']" % self.config.lang):
+ nl = nl_node.text
+ nl = self.preprocess_nl(nl)
+ for i in range(self.NP_WEIGHT):
+ print >>train_nl_np, nl
+ print >>train_mrl_np, mrl
+ print >>train_nl_lm, nl
+ print >>train_mrl_lm, mrl
+
+ train_nl.close()
+ train_nl_lm.close()
+ train_mrl.close()
+ train_mrl_lm.close()
+ train_fun.close()
+ test_nl.close()
+ test_mrl.close()
+ test_fun.close()
+ tune_nl.close()
+ tune_mrl.close()
+
+ def get_folds(self):
+
+ if self.config.corpus == 'geo':
+ if self.config.run in ('debug', 'dev'):
+ train_ids_file = '%s/folds600/fold-%d-train.ids' \
+ % (self.config.data_dir, self.config.fold)
+ tune_ids_file = None
+ test_ids_file = '%s/folds600/fold-%d-test.ids' \
+ % (self.config.data_dir, self.config.fold)
+ elif self.config.run == 'test':
+ train_ids_file = '%s/split880/fold-0-train.ids' % self.config.data_dir
+ tune_ids_file = '%s/split880/fold-0-tune.ids' % self.config.data_dir
+ test_ids_file = '%s/split880/fold-0-test.ids' % self.config.data_dir
+
+ elif self.config.corpus == 'robo':
+ if self.config.run in ('debug', 'dev'):
+ train_ids_file = '%s/split-300/run-0/fold-%d/train-N270' \
+ % (self.config.data_dir, self.config.fold)
+ tune_ids_file = None
+ test_ids_file = '%s/split-300/run-0/fold-%d/test' \
+ % (self.config.data_dir, self.config.fold)
+ else:
+ assert False
+
+ train_ids = set()
+ tune_ids = set()
+ test_ids = set()
+ with open(train_ids_file) as fold_file:
+ for line in fold_file.readlines():
+ train_ids.add(int(line))
+ if tune_ids_file:
+ with open(tune_ids_file) as fold_file:
+ for line in fold_file.readlines():
+ tune_ids.add(int(line))
+ with open(test_ids_file) as fold_file:
+ for line in fold_file.readlines():
+ test_ids.add(int(line))
+
+ return train_ids, tune_ids, test_ids
+
+ def preprocess_nl(self, nl):
+ nl = nl.strip().lower()
+ if self.config.stem and self.config.lang == 'de':
+ # German stemmer can't handle UTF-8
+ nl = nl.encode('ascii', 'ignore')
+ else:
+ nl = nl.encode('utf-8', 'ignore')
+ if nl[-2:] == ' .' or nl[-2:] == ' ?':
+ nl = nl[:-2]
+ if self.config.stem:
+ nl = ' '.join([self.stemmer.stem(tok) for tok in nl.split()])
+ return nl
+
+ def preprocess_fun(self, fun):
+ return fun.strip()
+
+ def replace_specials(self, mrl):
+ mrl = mrl.replace('.', 'xxd')
+ mrl = mrl.replace("'", 'xxq')
+ mrl = mrl.replace('/', 'xxs')
+ #mrl = re.sub(r"(' *[^'()]*)\'([^'()]* *')", r'\1_q_\2', mrl)
+ #mrl = re.sub(r"(' *[^'()]*)\.([^'()]* *')", r'\1_dot_\2', mrl)
+ #mrl = re.sub(r"(' *[^'()]*)\/([^'()]* *')", r'\1_slash_\2', mrl)
+ return mrl
+
+ def clang_to_fun(self, clang):
+ clang = clang.strip()
+ clang = re.sub(r'\s+', ' ', clang)
+ clang = re.sub(r'\{([\d|X]+( [\d|X]+)*)\}', r'(set \1)', clang)
+ clang = re.sub(r'\(([\w.-]+) ?', r'\1(', clang)
+ clang = self.strip_bare_parens(clang)
+ clang = clang.replace('()', '')
+ clang = clang.replace(' ', ',')
+ clang = clang.replace('"', '')
+
+ clang = re.sub(r'definerule\([^,]+,[^,]+,', r'definerule(', clang)
+
+ return clang
+
+ def strip_bare_parens(self, clang):
+ try:
+ start = clang.index(' (')+1
+ except ValueError:
+ return clang
+
+ end = start+1
+ pcounter = 0
+ while pcounter >= 0:
+ c = clang[end:end+1]
+ if c == '(':
+ pcounter += 1
+ elif c == ')':
+ pcounter -= 1
+ end += 1
+ end -= 1
+
+ r = clang[:start] + clang[start+1:end] + clang[end+1:]
+ return r
+
+ def slot_to_fun(self, slot):
+ slot = slot.strip()
+ slot = slot.replace('value', '"value"')
+ slot = slot.replace('="', "('")
+ slot = slot.replace('",', "'),")
+ slot = slot.replace('")', "'))")
+ slot = slot.replace("'value'", 'value')
+ return slot
diff --git a/src/extractor.pyc b/src/extractor.pyc
new file mode 100644
index 0000000..212156c
--- /dev/null
+++ b/src/extractor.pyc
Binary files differ
diff --git a/src/functionalizer.py b/src/functionalizer.py
new file mode 100644
index 0000000..66325a0
--- /dev/null
+++ b/src/functionalizer.py
@@ -0,0 +1,112 @@
+import logging
+import util
+import sys
+
+class Functionalizer:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run(self):
+ hyp_file = open('%s/hyp.mrl.nbest' % self.config.experiment_dir)
+ fun_file = open('%s/hyp.fun' % self.config.experiment_dir, 'w')
+
+ hypsets = []
+ hypset = []
+ last_eid = 0
+ for line in hyp_file:
+ parts = line.split('|||')
+ eid = int(parts[0])
+ if eid != last_eid:
+ hypsets.append(hypset)
+ hypset = []
+ last_eid = eid
+ score = parts[2] + ' ||| ' + parts[3].strip()
+ hyp = parts[1].strip()
+ hypset.append((hyp,score))
+ hypsets.append(hypset)
+
+ counter = 0
+ for hypset in hypsets:
+ hypset = list(reversed(hypset))
+ while hypset:
+ hyp, score = hypset.pop()
+ fun = self.functionalize(hyp)
+ if fun:
+ print >>fun_file, counter, '|||', fun, '|||', score
+ break
+ counter += 1
+
+ #xc = 0
+ def functionalize(self, mrl):
+
+ #if '_@0' in mrl and 'cityid@2' in mrl:
+ # #print '==='
+ # #print mrl
+ # self.xc += 1
+ # if self.xc > 5:
+ # exit()
+
+ stack = []
+ r = []
+ tokens = list(reversed(mrl.split()))
+
+ #print tokens
+
+ while tokens:
+ it = tokens.pop()
+ #print it
+ if util.ARITY_SEP not in it:
+ token = it
+ arity = util.ARITY_STR
+ logging.warn('unrecognized token: %s', it)
+ else:
+ token, arity = it.rsplit(util.ARITY_SEP)
+ if arity == util.ARITY_STR:
+ arity = 0
+ arity_str = True
+ elif not (arity == util.ARITY_ANY):
+ arity = int(arity)
+ arity_str = False
+
+ if arity == util.ARITY_ANY or arity > 0:
+ r.append(token)
+ r.append('(')
+ stack.append(arity)
+ else:
+ assert arity == 0
+ if arity_str:
+ r.append("'%s'" % token.replace('_', ' '))
+ else:
+ r.append(token)
+ #print r
+ while stack:
+ top = stack.pop()
+ if top == util.ARITY_ANY and tokens:
+ r.append(',')
+ stack.append(util.ARITY_ANY)
+ break
+ elif top != util.ARITY_ANY and top > 1:
+ r.append(',')
+ stack.append(top - 1)
+ break
+ else:
+ r.append(')')
+
+ if not stack and tokens:
+ return None
+
+ if stack:
+ return None
+
+ r = ''.join(r)
+
+ # nasty hacks to fix misplaced _
+ if '(_' in r:
+ return None
+ if ',_' in r and not ('cityid' in r):
+ return None
+ if '_),_)' in r:
+ return None
+
+ return r
diff --git a/src/functionalizer.pyc b/src/functionalizer.pyc
new file mode 100644
index 0000000..c8d3295
--- /dev/null
+++ b/src/functionalizer.pyc
Binary files differ
diff --git a/src/geo_world.py b/src/geo_world.py
new file mode 100644
index 0000000..e5cd58e
--- /dev/null
+++ b/src/geo_world.py
@@ -0,0 +1,108 @@
+import subprocess
+
+class GeoWorld:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run(self):
+ self.write_queries()
+
+ infile = open('%s/eval.pl' % self.config.experiment_dir)
+ log = open('%s/prolog.log' % self.config.experiment_dir, 'w')
+ outfile = open('%s/eval.out' % self.config.experiment_dir, 'w')
+ p = subprocess.Popen([self.config.prolog,
+ '-l', self.config.wasp_eval],
+ stdin=infile,
+ stdout=outfile,
+ stderr=log)
+ p.wait()
+ infile.close()
+ log.close()
+ outfile.close()
+
+ self.extract_results()
+
+ def write_queries(self):
+
+ hyp_file = open('%s/hyp.fun' % self.config.experiment_dir)
+ ref_file = open('%s/test.fun' % self.config.experiment_dir)
+ query_file = open('%s/eval.pl' % self.config.experiment_dir, 'w')
+
+ examples = []
+ hyp_list = []
+ last_idx = 0
+ for hyp_line in hyp_file.readlines():
+ idx, hyp, scoreparts, score = hyp_line.split('|||')
+ idx = int(idx)
+ hyp = hyp.strip()
+ if idx != last_idx:
+ examples.append(hyp_list)
+ for i in range(last_idx, idx-1):
+ examples.append([])
+ hyp_list = []
+ last_idx = idx
+ hyp_list.append((hyp,float(score)))
+ examples.append(hyp_list)
+
+ i = 0
+ for ref, hyp_list in zip(ref_file.readlines(), examples):
+ ref = ref.strip()
+ for hyp, score in hyp_list:
+ print >>query_file, \
+ 'catch(call_with_time_limit(1,eval([%d,%f,%s,%s])),E,writeln(\'error\')).\n' \
+ % (i, score, ref, hyp)
+ i += 1
+
+ hyp_file.close()
+ ref_file.close()
+ query_file.close()
+
+ def extract_results(self):
+
+ eval_file = open('%s/eval.out' % self.config.experiment_dir)
+ result_file = open('%s/eval.scored' % self.config.experiment_dir, 'w')
+
+ examples = []
+ hyp_list = []
+ last_idx = 0
+ for line in eval_file.readlines():
+ if line == 'error\n':
+ continue
+ idx, score, result = line.split()
+ idx = int(idx)
+ score = float(score)
+ if idx > last_idx:
+ examples.append(hyp_list)
+ last_idx += 1
+ while idx > last_idx:
+ examples.append([])
+ last_idx += 1
+ hyp_list = []
+ hyp_list.append((result,score))
+ examples.append(hyp_list)
+ last_idx += 1
+
+ if self.config.corpus == 'geo' and self.config.run in ('debug', 'dev'):
+ top = 60
+ elif self.config.corpus == 'geo' and self.config.run == 'test':
+ top = 280
+ else:
+ assert False
+ while top > last_idx:
+ examples.append([])
+ last_idx += 1
+
+ for hyp_list in examples:
+ if len(hyp_list) == 0:
+ print >>result_file, 'empty'
+ continue
+
+ choice, score = hyp_list[0]
+ if choice == 'y':
+ print >>result_file, 'yes', score
+ else:
+ print >>result_file, 'no', score
+
+ eval_file.close()
+ result_file.close()
diff --git a/src/geo_world.pyc b/src/geo_world.pyc
new file mode 100644
index 0000000..9cb2720
--- /dev/null
+++ b/src/geo_world.pyc
Binary files differ
diff --git a/src/moses.py b/src/moses.py
new file mode 100644
index 0000000..857ddbf
--- /dev/null
+++ b/src/moses.py
@@ -0,0 +1,141 @@
+import logging
+import os
+import subprocess
+import gzip
+
+class Moses:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run_train(self):
+ args = [self.config.moses_train,
+ '--root-dir', self.config.experiment_dir,
+ '--corpus', '%s/%s' % (self.config.experiment_dir,
+ self.config.train_name),
+ '--f', self.config.src,
+ '--e', self.config.tgt,
+ '--lm', '0:3:%s/%s.arpa' % (self.config.experiment_dir, self.config.tgt),
+ #'-score-options', "'--OnlyDirect --NoPhraseCount'"
+ '--alignment', self.config.symm]
+ if self.config.model == 'hier':
+ args += ['-hierarchical', '-glue-grammar']
+
+ logging.info(' '.join(args))
+
+ log = open('%s/train.log' % self.config.experiment_dir, 'w')
+ p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=log)
+ p.wait()
+ log.close()
+
+ def run_retrain(self):
+ old_train_nl = '%s/%s.nl' % (self.config.experiment_dir,
+ self.config.train_name)
+ old_train_mrl = '%s/%s.mrl' % (self.config.experiment_dir,
+ self.config.train_name)
+ moved_train_nl = '%s.notune' % old_train_nl
+ moved_train_mrl = '%s.notune' % old_train_mrl
+ tune_nl = '%s/tune.nl' % self.config.experiment_dir
+ tune_mrl = '%s/tune.mrl' % self.config.experiment_dir
+ os.rename(old_train_nl, moved_train_nl)
+ os.rename(old_train_mrl, moved_train_mrl)
+ with open(old_train_nl, 'w') as rt_train_nl:
+ subprocess.call(['cat', moved_train_nl, tune_nl], stdout=rt_train_nl)
+ with open(old_train_mrl, 'w') as rt_train_mrl:
+ subprocess.call(['cat', moved_train_mrl, tune_mrl], stdout=rt_train_mrl)
+
+ os.remove('%s/model/extract.inv.gz' % self.config.experiment_dir)
+ os.remove('%s/model/extract.gz' % self.config.experiment_dir)
+ if self.config.model == 'hier':
+ os.remove('%s/model/rule-table.gz' % self.config.experiment_dir)
+ else:
+ os.remove('%s/model/phrase-table.gz' % self.config.experiment_dir)
+
+ self.run_train()
+
+ def parens_ok(self, line):
+ mrl_part = line.split(' ||| ')[1]
+ tokens = [t[-1] for t in mrl_part.split() if t[-2] == '@']
+ tokens.reverse()
+ stack = []
+ while tokens:
+ t = tokens.pop()
+ assert t != '*'
+ if t == 's':
+ t = 0
+ t = int(t)
+ if t > 0:
+ stack.append(t)
+ else:
+ while stack:
+ top = stack.pop()
+ if top > 1:
+ stack.append(top - 1)
+ break
+ if tokens and not stack:
+ return False
+ return True
+
+ def filter_phrase_table(self):
+ table_name = 'phrase' if self.config.model == 'phrase' else 'rule'
+ oldname = '%s/model/%s-table.gz' % (self.config.experiment_dir, table_name)
+ newname = '%s/model/%s-table.old.gz' % (self.config.experiment_dir, table_name)
+ os.rename(oldname, newname)
+
+ with gzip.open(oldname, 'w') as filtered_table_f:
+ with gzip.open(newname, 'r') as old_table_f:
+ for line in old_table_f:
+ if self.parens_ok(line):
+ print >>filtered_table_f, line,
+
+ def run_tune(self):
+ wd = os.getcwd()
+ os.chdir(self.config.experiment_dir)
+ args = [self.config.moses_tune,
+ '%s/tune.%s' % (self.config.experiment_dir, self.config.src),
+ '%s/tune.%s' % (self.config.experiment_dir, self.config.tgt)]
+ if self.config.model == 'hier':
+ args += [self.config.moses_decode_hier]
+ else:
+ args += [self.config.moses_decode_phrase]
+ args += ['%s/model/moses.ini' % self.config.experiment_dir,
+ '--mertdir', '%s/dist/bin' % self.config.moses]
+ if self.config.model == 'hier':
+ args += ['--filtercmd',
+ '%s/scripts/training/filter-model-given-input.pl --Hierarchical'\
+ % self.config.moses]
+
+ log = open('%s/tune.log' % self.config.experiment_dir, 'w')
+ p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=log)
+ p.wait()
+ log.close()
+ os.chdir(wd)
+
+ def run_decode(self):
+ if self.config.model == 'phrase':
+ args = [self.config.moses_decode_phrase]
+ elif self.config.model == 'hier':
+ args = [self.config.moses_decode_hier]
+ else:
+ assert False
+
+ if self.config.run == 'test':
+ args += ['-f', '%s/mert-work/moses.ini' % self.config.experiment_dir]
+ else:
+ args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir]
+ #args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir]
+
+ args += ['-drop-unknown',
+ '-n-best-list', '%s/hyp.%s.nbest' % (self.config.experiment_dir, self.config.tgt),
+ str(self.config.nbest), 'distinct',
+ '-threads', '3']
+
+ #nullfile = open(os.devnull, 'w')
+ infile = open('%s/test.%s' % (self.config.experiment_dir, self.config.src))
+ outfile = open('%s/hyp.%s' % (self.config.experiment_dir, self.config.tgt), 'w')
+ log = open('%s/decode.log' % self.config.experiment_dir, 'w')
+ p = subprocess.Popen(args, stdin=infile, stdout=outfile, stderr=log)
+ p.wait()
+ infile.close()
+ log.close()
+ outfile.close()
diff --git a/src/moses.pyc b/src/moses.pyc
new file mode 100644
index 0000000..d2cbf10
--- /dev/null
+++ b/src/moses.pyc
Binary files differ
diff --git a/src/nl_reweighter.py b/src/nl_reweighter.py
new file mode 100644
index 0000000..fcc8f85
--- /dev/null
+++ b/src/nl_reweighter.py
@@ -0,0 +1,227 @@
+import gzip
+import re
+from nlp_tools.hypergraph import Hypergraph
+import itertools
+import logging
+from collections import defaultdict
+import os
+
+class Rule:
+
+ MOSES_SYMBOL = '[X]'
+
+ def __init__(self, rule_id, symbol, src, tgt, coindexing):
+ self.rule_id = rule_id
+ self.symbol = symbol
+ self.src = src
+ self.tgt = tgt
+ self.coindexing = coindexing
+ self.degree = len(self.coindexing)
+
+ @classmethod
+ def from_moses(cls, rule_id, rule_table_line):
+ nl, mrl, scores, alignments, counts = re.split(r'\ ?\|\|\|\ ?',
+ rule_table_line.strip())
+ nl = nl.split()[:-1]
+ nl = [cls.MOSES_SYMBOL if t == '[X][X]' else t for t in nl]
+ mrl = mrl.split()[:-1]
+ mrl = [cls.MOSES_SYMBOL if t == '[X][X]' else t for t in mrl]
+ coindexing = []
+ for pair in alignments.split():
+ i_s, i_t = pair.split('-')
+ coindexing.append((int(i_s), int(i_t)))
+ return Rule(rule_id, cls.MOSES_SYMBOL, nl, mrl, coindexing)
+
+ @classmethod
+ def glue(cls, rule_id):
+ return Rule(rule_id, cls.MOSES_SYMBOL, [cls.MOSES_SYMBOL, cls.MOSES_SYMBOL],
+ [cls.MOSES_SYMBOL, cls.MOSES_SYMBOL], [(0,0), (1,1)])
+
+ def __eq__(self, other):
+ return other.__class__ == self.__class__ and self.rule_id == other.rule_id
+
+ def __hash__(self):
+ return self.rule_id
+
+ def __repr__(self):
+ return 'Rule<(%d) %s -> %s : %s>' % (self.rule_id, self.symbol, self.src,
+ self.tgt)
+
+class NLReweighter:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run(self):
+ rules = self.load_rule_table()
+ glue = Rule.glue(len(rules))
+ all_counts = defaultdict(lambda: 0)
+ successful_counts = defaultdict(lambda: 0)
+
+ with open('%s/unlabeled.nl' % self.config.experiment_dir) as ul_f:
+ for line in ul_f:
+ toks = line.strip().split()
+ chart = self.parse(toks, rules, glue)
+ if not chart:
+ continue
+ self.collect_all_counts(all_counts, chart)
+ self.collect_successful_counts(successful_counts, chart, toks)
+
+ if not self.config.ul_only:
+ with open('%s/train.nl' % self.config.experiment_dir) as t_f:
+ for line in t_f:
+ toks = line.strip().split()
+ chart = self.parse(toks, rules, glue)
+ # TODO is this an OOV issue?
+ if not chart:
+ continue
+ self.collect_all_counts(all_counts, chart)
+ self.collect_successful_counts(successful_counts, chart, toks)
+
+ #self.write_updated_model(all_counts)
+ self.write_updated_model(successful_counts)
+
+ def load_rule_table(self):
+ rule_table_path = '%s/model/rule-table.gz' % self.config.experiment_dir
+ rules = {}
+ with gzip.open(rule_table_path) as rule_table_f:
+ for line in rule_table_f.readlines():
+ rule = Rule.from_moses(len(rules), line)
+ rules[rule.rule_id] = rule
+ return rules
+
+ def write_updated_model(self, counts):
+ old_rule_table_path = '%s/model/rule-table.gz' % self.config.experiment_dir
+ new_rule_table_path = '%s/model/rule-table-new.gz' % self.config.experiment_dir
+ counter = 0
+ with gzip.open(old_rule_table_path) as old_rule_table_f:
+ with gzip.open(new_rule_table_path, 'w') as new_rule_table_f:
+ for line in old_rule_table_f:
+ nl, mrl, scores, alignments, rule_counts = re.split(r'\ ?\|\|\|\ ?',
+ line.strip())
+ scores = '%s %f' % (scores, counts[counter])
+ newline = ' ||| '.join([nl, mrl, scores, alignments, rule_counts])
+ newline = re.sub(r'\s+', ' ', newline)
+ print >>new_rule_table_f, newline
+ counter += 1
+
+ old_config_path = '%s/model/moses.ini' % self.config.experiment_dir
+ new_config_path = '%s/model/moses-new.ini' % self.config.experiment_dir
+ with open(old_config_path) as old_config_f:
+ with open(new_config_path, 'w') as new_config_f:
+ for line in old_config_f:
+ if line[-14:-1] == 'rule-table.gz':
+ line = line[:6] + '6' + line[7:]
+ #line[6] = '6'
+ print >>new_config_f, line,
+ if line == '[weight-t]\n':
+ print >>new_config_f, '0.20'
+
+ os.rename(new_rule_table_path, old_rule_table_path)
+ os.rename(new_config_path, old_config_path)
+
+ def parse(self, sent, grammar, glue):
+ chart = dict()
+
+ for span in range(1, len(sent)+1):
+ for start in range(len(sent)+1-span):
+ chart[start,span] = list()
+ for rule in grammar.values():
+ matches = self.match(sent, rule, start, span, chart)
+ chart[start,span] += matches
+
+ for i in range(1, len(sent)):
+ if chart[0,i] and chart[i,len(sent)-i]:
+ psets = [(c1, c2) for c1 in chart[0,i] for c2 in chart[i,len(sent)-i]]
+ chart[0,len(sent)].append(Hypergraph(glue, psets))
+
+ if not chart[0,len(sent)]:
+ #logging.debug('failed to parse')
+ return None
+ else:
+ #logging.debug('parse OK!')
+ return chart
+
+ def match(self, sent, rule, start, span, chart):
+
+ if rule.degree == 0:
+ if span != len(rule.src):
+ return []
+ if sent[start:start+span] != rule.src:
+ return []
+ return [Hypergraph(rule, [])]
+
+ elif rule.degree == 1:
+ nt_start = start + rule.coindexing[0][0]
+ nt_span = span - len(rule.src) + 1
+ if nt_span <= 0:
+ return []
+ if sent[start:nt_start] != rule.src[0:rule.coindexing[0][0]]:
+ return []
+ if sent[nt_start+nt_span:start+span] != rule.src[rule.coindexing[0][0]+1:]:
+ return []
+
+ pointer_sets = [i for i in chart[nt_start, nt_span] if i.label.symbol ==
+ rule.src[rule.coindexing[0][0]]]
+ ## if not chart[nt_start, nt_span]:
+ ## return []
+ if not pointer_sets:
+ return []
+ return [Hypergraph(rule, [(i,) for i in pointer_sets])]
+
+ elif rule.degree == 2:
+ matches = []
+ before_dist = rule.coindexing[0][0]
+ between_dist = rule.coindexing[1][0] - rule.coindexing[0][0] - 1
+ before_2_dist = rule.coindexing[1][0]
+ nt_total_span = span - len(rule.src) + 2
+ if nt_total_span <= 0:
+ return []
+ nt1_start = start + before_dist
+ for nt1_span in range(1,nt_total_span):
+ nt2_start = nt1_start + nt1_span + between_dist
+ nt2_span = nt_total_span - nt1_span
+
+ if sent[start:nt1_start] != rule.src[0:before_dist]:
+ continue
+ if sent[nt1_start+nt1_span:nt2_start] != rule.src[before_dist+1:before_2_dist]:
+ continue
+ if sent[nt2_start+nt2_span:start+span] != rule.src[before_2_dist+1:]:
+ continue
+
+ pointer_sets_1 = [i for i in chart[nt1_start,nt1_span] if i.label.symbol ==
+ rule.src[rule.coindexing[0][0]]]
+ pointer_sets_2 = [i for i in chart[nt2_start,nt2_span] if i.label.symbol ==
+ rule.src[rule.coindexing[1][0]]]
+
+ if not (pointer_sets_1 and pointer_sets_2):
+ continue
+
+ matches.append(Hypergraph(rule, list(itertools.product(pointer_sets_1,
+ pointer_sets_2))))
+ #matches.append(rule.rule_id)
+
+ return matches
+
+ assert False
+
+ def collect_all_counts(self, counts, chart):
+ for cell in chart.values():
+ for node in cell:
+ counts[node.label.rule_id] += 1
+
+ def collect_successful_counts(self, counts, chart, sent):
+ used = set()
+ for cell in chart[0, len(sent)]:
+ self.mark_used(used, cell)
+ for cell in chart.values():
+ for node in cell:
+ if node in used:
+ counts[node.label.rule_id] += 1
+
+ def mark_used(self, used, cell):
+ for edge in cell.edges:
+ for ccell in edge:
+ if ccell not in used:
+ self.mark_used(used, ccell)
+ used.add(cell)
diff --git a/src/nl_reweighter.pyc b/src/nl_reweighter.pyc
new file mode 100644
index 0000000..e6aac7d
--- /dev/null
+++ b/src/nl_reweighter.pyc
Binary files differ
diff --git a/src/query_comparer.py b/src/query_comparer.py
new file mode 100644
index 0000000..79b9905
--- /dev/null
+++ b/src/query_comparer.py
@@ -0,0 +1,31 @@
+class QueryComparer:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run(self):
+
+ hyp_file = open('%s/hyp.fun' % self.config.experiment_dir)
+ ref_file = open('%s/test.fun' % self.config.experiment_dir)
+ out_file = open('%s/eval.scored' % self.config.experiment_dir, 'w')
+
+ hyps = {}
+ for line in hyp_file:
+ idx, hyp, scores1, scores2 = line.split(' ||| ')
+ hyps[int(idx)] = hyp
+
+ i = -1
+ for line in ref_file:
+ i += 1
+ if i not in hyps:
+ print >>out_file, 'empty'
+ continue
+ test = line.strip()
+ if hyps[i] == test:
+ print >>out_file, 'yes', 0
+ else:
+ print >>out_file, 'no', 0
+
+ hyp_file.close()
+ ref_file.close()
+ out_file.close()
diff --git a/src/query_comparer.pyc b/src/query_comparer.pyc
new file mode 100644
index 0000000..0024b42
--- /dev/null
+++ b/src/query_comparer.pyc
Binary files differ
diff --git a/src/slot_checker.pyc b/src/slot_checker.pyc
new file mode 100644
index 0000000..baca7bf
--- /dev/null
+++ b/src/slot_checker.pyc
Binary files differ
diff --git a/src/smt_semparse_config.py b/src/smt_semparse_config.py
new file mode 100644
index 0000000..71eaf24
--- /dev/null
+++ b/src/smt_semparse_config.py
@@ -0,0 +1,31 @@
+from config import Config
+
+class SMTSemparseConfig(Config):
+
+ def __init__(self, settings_path, dependencies_path):
+ Config.__init__(self, settings_path, dependencies_path)
+
+ self.put('data_dir', '%s/data/%s' % (self.smt_semparse, self.corpus))
+
+ if self.np:
+ self.train_name = 'train.np'
+ else:
+ self.train_name = 'train'
+
+ self.put('srilm_ngram_count', '%s/bin/%s/ngram-count' % \
+ (self.srilm, self.srilm_arch))
+
+ self.put('moses_train', '%s/scripts/training/train-model.perl' % self.moses)
+ self.put('moses_tune', '%s/scripts/training/mert-moses.pl' % self.moses)
+ self.put('moses_decode_phrase', '%s/dist/bin/moses' % self.moses)
+ self.put('moses_decode_hier', '%s/dist/bin/moses_chart' % self.moses)
+ self.put('bleu_eval', '%s/scripts/generic/multi-bleu.perl' % self.moses)
+
+ self.put('wasp_eval', '%s/data/geo-funql/eval/eval.pl' % self.wasp)
+
+ if self.nlg:
+ self.put('src', 'mrl')
+ self.put('tgt', 'nl')
+ else:
+ self.put('src', 'nl')
+ self.put('tgt', 'mrl')
diff --git a/src/smt_semparse_config.pyc b/src/smt_semparse_config.pyc
new file mode 100644
index 0000000..3a27aad
--- /dev/null
+++ b/src/smt_semparse_config.pyc
Binary files differ
diff --git a/src/smt_semparse_experiment.py b/src/smt_semparse_experiment.py
new file mode 100644
index 0000000..222b890
--- /dev/null
+++ b/src/smt_semparse_experiment.py
@@ -0,0 +1,87 @@
+import logging
+import os
+from extractor import Extractor
+from functionalizer import Functionalizer
+from slot_checker import SlotChecker
+from srilm import SRILM
+from moses import Moses
+from nl_reweighter import NLReweighter
+from geo_world import GeoWorld
+from query_comparer import QueryComparer
+from bleu_scorer import BLEUScorer
+
+class SMTSemparseExperiment:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run_fold(self, fold):
+ logging.info('running fold %d', fold)
+ self.config.put('fold', fold)
+ fold_dir = os.path.join(self.config.work_dir, str(fold))
+ self.config.put('experiment_dir', fold_dir)
+ os.makedirs(fold_dir)
+ self.run()
+
+ def run_split(self):
+ logging.info('running split')
+ self.config.put('experiment_dir', self.config.work_dir)
+ self.run()
+
+ def run(self):
+ logging.info('working dir is %s', self.config.experiment_dir)
+
+ # get data
+ logging.info('extracting data')
+ Extractor(self.config).run()
+
+ # learn lm
+ logging.info('learning LM')
+ SRILM(self.config).run_ngram_count()
+
+ # train moses
+ moses = Moses(self.config)
+ logging.info('training TM')
+ moses.run_train()
+
+ # reweight using monolingual data
+ if self.config.monolingual:
+ logging.info('learning from monolingual data')
+ NLReweighter(self.config).run()
+
+ # filter disconnected rules
+ if self.config.filter:
+ logging.info('filtering disconnected rules')
+ moses.filter_phrase_table()
+
+ # tune moses
+ if self.config.run == 'test':
+ logging.info('tuning TM')
+ moses.run_tune()
+
+ if self.config.retrain:
+ logging.info('retraining TM')
+ moses.run_retrain()
+
+ # decode input
+ logging.info('decoding')
+ moses.run_decode()
+
+ if self.config.nlg:
+ logging.info('running BLEU')
+ BLEUScorer(self.config).run()
+ pass
+
+ else:
+ # functionalize
+ logging.info('functionalizing')
+ Functionalizer(self.config).run()
+
+ # compare answers
+ logging.info('executing queries')
+ if self.config.corpus == 'geo':
+ GeoWorld(self.config).run()
+ elif self.config.corpus == 'atis':
+ SlotChecker(self.config).run()
+ else:
+ QueryComparer(self.config).run()
diff --git a/src/smt_semparse_experiment.pyc b/src/smt_semparse_experiment.pyc
new file mode 100644
index 0000000..0a067b7
--- /dev/null
+++ b/src/smt_semparse_experiment.pyc
Binary files differ
diff --git a/src/srilm.py b/src/srilm.py
new file mode 100644
index 0000000..ef371cf
--- /dev/null
+++ b/src/srilm.py
@@ -0,0 +1,20 @@
+import logging
+import subprocess
+
+class SRILM:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run_ngram_count(self):
+ log = open('%s/lm.log' % self.config.experiment_dir, 'w')
+ p = subprocess.Popen([self.config.srilm_ngram_count,
+ '-text', '%s/train.%s.lm' % (self.config.experiment_dir, self.config.tgt),
+ '-order', '3',
+ '-no-sos',
+ '-no-eos',
+ '-lm', '%s/%s.arpa' % (self.config.experiment_dir, self.config.tgt),
+ '-unk'],
+ stderr=log)
+ p.wait()
+ log.close()
diff --git a/src/srilm.pyc b/src/srilm.pyc
new file mode 100644
index 0000000..9a07944
--- /dev/null
+++ b/src/srilm.pyc
Binary files differ
diff --git a/src/util.py b/src/util.py
new file mode 100644
index 0000000..7ce1c7f
--- /dev/null
+++ b/src/util.py
@@ -0,0 +1,67 @@
+import re
+from collections import defaultdict
+
+ARITY_SEP = '@'
+ARITY_STR = 's'
+ARITY_ANY = '*'
+
+def after_nth(mrl, token, n):
+ #print mrl, token
+ while n > 0:
+ m = re.search(r'\b%s\b' % token, mrl)
+ #m = re.search(r'(^|[(, ])%s[(),]' % token, mrl)
+ mrl = mrl[m.end()-1:]
+ n = n - 1;
+ return mrl
+
+def count_arguments(s):
+ args = False;
+ parens = 0;
+ commas = 0;
+ i = 0
+ #while parens >= 0 and i < len(s):
+ while i < len(s) and ((not args and parens == 0) or (args and parens > 0)):
+ c = s[i:i+1]
+ if c == '(':
+ args = True
+ parens += 1
+ elif c == ')':
+ parens -= 1
+ elif parens == 1 and c == ',':
+ commas += 1
+ elif parens < 1 and c == ',':
+ break
+ i += 1
+ if args:
+ return commas + 1
+ else:
+ assert commas == 0
+ return 0
+
+def fun_to_mrl(mrl, star_top=False):
+ mrl = mrl.strip()
+
+ mrl = re.sub(r"' *([A-Za-z0-9_ ]+?) *'", lambda x: '%s%s%s' % (x.group(1).replace(' ', '_'), ARITY_SEP, ARITY_STR), mrl)
+ mrl = re.sub(r'\s+', ' ', mrl)
+ mrl_noparens = re.sub(r'[\(\)]', ' ', mrl)
+ mrl_noparens = re.sub(r'\s+', ' ', mrl_noparens)
+ mrl_nocommas = re.sub(r',', ' ', mrl_noparens)
+ mrl_nocommas = re.sub(r'\s+', ' ', mrl_nocommas)
+
+ mrl_labeled_tokens = []
+ seen = defaultdict(lambda:0)
+ for token in mrl_nocommas.split():
+ seen[token] += 1
+ args = count_arguments(after_nth(mrl, token, seen[token]))
+ #print token, args, after_nth(mrl, token, seen[token])
+ if token[-len(ARITY_SEP)-len(ARITY_STR):] == '%s%s' % (ARITY_SEP, ARITY_STR):
+ mrl_labeled_tokens.append(token)
+ else:
+ mrl_labeled_tokens.append('%s%s%d' % (token, ARITY_SEP, args))
+
+ if star_top:
+ tok = mrl_labeled_tokens[0]
+ sep = tok.rindex(ARITY_SEP)
+ mrl_labeled_tokens[0] = tok[:sep] + ARITY_SEP + ARITY_ANY
+
+ return ' '.join(mrl_labeled_tokens)
diff --git a/src/util.pyc b/src/util.pyc
new file mode 100644
index 0000000..edaf734
--- /dev/null
+++ b/src/util.pyc
Binary files differ