From f343459d6198352964dbb6779f15c352fe2d5794 Mon Sep 17 00:00:00 2001 From: Jacob Date: Sun, 28 Jul 2013 09:54:54 +0100 Subject: init --- src/.config.py.swp | Bin 0 -> 12288 bytes src/.nl_reweighter.py.swp | Bin 0 -> 24576 bytes src/__init__.py | 0 src/__init__.pyc | Bin 0 -> 131 bytes src/__pycache__/__init__.cpython-33.pyc | Bin 0 -> 139 bytes src/__pycache__/smt_semparse_config.cpython-33.pyc | Bin 0 -> 1337 bytes src/bleu_scorer.py | 17 + src/bleu_scorer.pyc | Bin 0 -> 1086 bytes src/config.py | 39 ++ src/config.pyc | Bin 0 -> 2082 bytes src/eval_bevan/clean | 280 +++++++++++ src/eval_bevan/errlog | 0 src/eval_bevan/eval_bevan.sh | 10 + src/eval_bevan/format_prolog.py | 30 ++ src/eval_bevan/test.out | 280 +++++++++++ src/eval_bevan/test.pl | 560 +++++++++++++++++++++ src/evaluator.py | 47 ++ src/evaluator.pyc | Bin 0 -> 1763 bytes src/extractor.py | 418 +++++++++++++++ src/extractor.pyc | Bin 0 -> 12435 bytes src/functionalizer.py | 112 +++++ src/functionalizer.pyc | Bin 0 -> 2687 bytes src/geo_world.py | 108 ++++ src/geo_world.pyc | Bin 0 -> 3286 bytes src/moses.py | 141 ++++++ src/moses.pyc | Bin 0 -> 5366 bytes src/nl_reweighter.py | 227 +++++++++ src/nl_reweighter.pyc | Bin 0 -> 8662 bytes src/query_comparer.py | 31 ++ src/query_comparer.pyc | Bin 0 -> 1280 bytes src/slot_checker.pyc | Bin 0 -> 1643 bytes src/smt_semparse_config.py | 31 ++ src/smt_semparse_config.pyc | Bin 0 -> 1523 bytes src/smt_semparse_experiment.py | 87 ++++ src/smt_semparse_experiment.pyc | Bin 0 -> 3062 bytes src/srilm.py | 20 + src/srilm.pyc | Bin 0 -> 1102 bytes src/util.py | 67 +++ src/util.pyc | Bin 0 -> 2356 bytes 39 files changed, 2505 insertions(+) create mode 100644 src/.config.py.swp create mode 100644 src/.nl_reweighter.py.swp create mode 100644 src/__init__.py create mode 100644 src/__init__.pyc create mode 100644 src/__pycache__/__init__.cpython-33.pyc create mode 100644 src/__pycache__/smt_semparse_config.cpython-33.pyc create mode 100644 src/bleu_scorer.py create mode 100644 src/bleu_scorer.pyc create mode 100644 src/config.py create mode 100644 src/config.pyc create mode 100644 src/eval_bevan/clean create mode 100644 src/eval_bevan/errlog create mode 100755 src/eval_bevan/eval_bevan.sh create mode 100755 src/eval_bevan/format_prolog.py create mode 100644 src/eval_bevan/test.out create mode 100644 src/eval_bevan/test.pl create mode 100644 src/evaluator.py create mode 100644 src/evaluator.pyc create mode 100644 src/extractor.py create mode 100644 src/extractor.pyc create mode 100644 src/functionalizer.py create mode 100644 src/functionalizer.pyc create mode 100644 src/geo_world.py create mode 100644 src/geo_world.pyc create mode 100644 src/moses.py create mode 100644 src/moses.pyc create mode 100644 src/nl_reweighter.py create mode 100644 src/nl_reweighter.pyc create mode 100644 src/query_comparer.py create mode 100644 src/query_comparer.pyc create mode 100644 src/slot_checker.pyc create mode 100644 src/smt_semparse_config.py create mode 100644 src/smt_semparse_config.pyc create mode 100644 src/smt_semparse_experiment.py create mode 100644 src/smt_semparse_experiment.pyc create mode 100644 src/srilm.py create mode 100644 src/srilm.pyc create mode 100644 src/util.py create mode 100644 src/util.pyc (limited to 'src') diff --git a/src/.config.py.swp b/src/.config.py.swp new file mode 100644 index 0000000..706de6a Binary files /dev/null and b/src/.config.py.swp differ diff --git a/src/.nl_reweighter.py.swp b/src/.nl_reweighter.py.swp new file mode 100644 index 0000000..2180681 Binary files /dev/null and b/src/.nl_reweighter.py.swp differ diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__init__.pyc b/src/__init__.pyc new file mode 100644 index 0000000..cf88e29 Binary files /dev/null and b/src/__init__.pyc differ diff --git a/src/__pycache__/__init__.cpython-33.pyc b/src/__pycache__/__init__.cpython-33.pyc new file mode 100644 index 0000000..65e20ff Binary files /dev/null and b/src/__pycache__/__init__.cpython-33.pyc differ diff --git a/src/__pycache__/smt_semparse_config.cpython-33.pyc b/src/__pycache__/smt_semparse_config.cpython-33.pyc new file mode 100644 index 0000000..c8d19ff Binary files /dev/null and b/src/__pycache__/smt_semparse_config.cpython-33.pyc differ diff --git a/src/bleu_scorer.py b/src/bleu_scorer.py new file mode 100644 index 0000000..0b0da55 --- /dev/null +++ b/src/bleu_scorer.py @@ -0,0 +1,17 @@ +import os +import subprocess +import sys + +class BLEUScorer: + + def __init__(self, config): + self.config = config + + def run(self): + args = [self.config.bleu_eval, '%s/test.nl' % self.config.experiment_dir] + infile = open('%s/hyp.nl' % self.config.experiment_dir) + nullfile = open(os.devnull, 'w') + p = subprocess.Popen(args, stdin=infile, stdout=sys.stdout, stderr=nullfile) + p.wait() + infile.close() + nullfile.close() diff --git a/src/bleu_scorer.pyc b/src/bleu_scorer.pyc new file mode 100644 index 0000000..05d95dc Binary files /dev/null and b/src/bleu_scorer.pyc differ diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..138a28d --- /dev/null +++ b/src/config.py @@ -0,0 +1,39 @@ +import yaml +import logging + +class Config: + + def __init__(self, settings_path, dependencies_path): + with open(settings_path) as settings_file: + settings = yaml.load(settings_file) + with open(dependencies_path) as dependencies_file: + dependencies = yaml.load(dependencies_file) + + self.entries = {} + + for config in (settings, dependencies): + for key, value in config.items(): + self.put(key, value) + + def __hasattr__(self, key): + return key in self.entries + + def __getattr__(self, key): + if key not in self.entries: + raise Exception('No such key: %s' % key) + return self.entries[key] + + def put(self, key, value): + if key in self.entries: + logging.warn('changing value of %s' % key) + self.entries[key] = value + + def __repr__(self): + return '%s(%d items)' % (self.__class__, len(self.keys)) + + def __str__(self): + s = [] + s.append('%s:' % self.__class__.__name__) + for key in sorted(self.entries.keys()): + s.append(' %s: %s' % (key, getattr(self, key))) + return '\n'.join(s) diff --git a/src/config.pyc b/src/config.pyc new file mode 100644 index 0000000..b0b5fa0 Binary files /dev/null and b/src/config.pyc differ diff --git a/src/eval_bevan/clean b/src/eval_bevan/clean new file mode 100644 index 0000000..0d9c412 --- /dev/null +++ b/src/eval_bevan/clean @@ -0,0 +1,280 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/src/eval_bevan/errlog b/src/eval_bevan/errlog new file mode 100644 index 0000000..e69de29 diff --git a/src/eval_bevan/eval_bevan.sh b/src/eval_bevan/eval_bevan.sh new file mode 100755 index 0000000..31e7941 --- /dev/null +++ b/src/eval_bevan/eval_bevan.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +in=$1 +cat $in | sed 's/W//g' | sed 's/( /(/g' | sed 's/ )/)/g' | sed 's/#.*//g' | sed 's/^0$/0/' | sed 's/()//g' | sed 's/ /,/g' > clean +./format_prolog.py clean ~/src/semparse-old/work_psmt/test.fun > test.pl + +swipl -l "/home/jacob/src/3p/wasp-1.0/data/geo-funql/eval/eval.pl" \ + < test.pl \ + > test.out + 2>> errlog diff --git a/src/eval_bevan/format_prolog.py b/src/eval_bevan/format_prolog.py new file mode 100755 index 0000000..aa60fd7 --- /dev/null +++ b/src/eval_bevan/format_prolog.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python2 + +import sys + +def main(): + HYP_PATH = sys.argv[1] + REF_PATH = sys.argv[2] + + hyp_file = open(HYP_PATH) + ref_file = open(REF_PATH) + + hyps = [] + for hyp_line in hyp_file.readlines(): + hyp = hyp_line.strip() + hyps.append(hyp) + + refs = [] + for r_line in ref_file.readlines(): + ref = r_line.strip() + refs.append(ref) + + i = 0 + for ref, hyp in zip(refs, hyps): + print \ + 'catch(call_with_time_limit(1,eval([%d,%f,%s,%s])),E,writeln(\'error\')).\n' \ + % (i, 0, ref, hyp) + i += 1 + +if __name__ == '__main__': + main() diff --git a/src/eval_bevan/test.out b/src/eval_bevan/test.out new file mode 100644 index 0000000..9e8a334 --- /dev/null +++ b/src/eval_bevan/test.out @@ -0,0 +1,280 @@ +0 0.0 n +1 0.0 n +2 0.0 n +3 0.0 n +4 0.0 n +5 0.0 n +6 0.0 n +7 0.0 n +8 0.0 n +9 0.0 n +10 0.0 n +11 0.0 n +12 0.0 n +13 0.0 n +14 0.0 n +15 0.0 n +16 0.0 n +17 0.0 n +18 0.0 n +19 0.0 n +20 0.0 n +21 0.0 n +22 0.0 n +23 0.0 n +24 0.0 n +25 0.0 n +26 0.0 y +27 0.0 n +28 0.0 n +29 0.0 n +30 0.0 n +31 0.0 n +32 0.0 n +33 0.0 n +34 0.0 n +35 0.0 n +36 0.0 n +37 0.0 n +38 0.0 n +39 0.0 n +40 0.0 n +41 0.0 n +42 0.0 n +43 0.0 n +44 0.0 n +45 0.0 n +46 0.0 y +47 0.0 n +48 0.0 n +49 0.0 n +50 0.0 n +51 0.0 n +52 0.0 n +53 0.0 n +54 0.0 n +55 0.0 n +56 0.0 n +57 0.0 n +58 0.0 n +59 0.0 n +60 0.0 n +61 0.0 n +62 0.0 n +63 0.0 n +64 0.0 n +65 0.0 y +66 0.0 n +67 0.0 n +68 0.0 n +69 0.0 n +70 0.0 n +71 0.0 n +72 0.0 n +73 0.0 y +74 0.0 n +75 0.0 y +76 0.0 n +77 0.0 n +78 0.0 n +79 0.0 n +80 0.0 n +81 0.0 n +82 0.0 n +83 0.0 n +84 0.0 n +85 0.0 n +86 0.0 n +87 0.0 n +88 0.0 n +89 0.0 n +90 0.0 n +91 0.0 n +92 0.0 y +93 0.0 n +94 0.0 n +95 0.0 n +96 0.0 n +97 0.0 n +98 0.0 n +99 0.0 n +100 0.0 n +101 0.0 n +102 0.0 n +103 0.0 n +104 0.0 n +105 0.0 n +106 0.0 n +107 0.0 n +108 0.0 n +109 0.0 n +110 0.0 n +111 0.0 n +112 0.0 n +113 0.0 n +114 0.0 n +115 0.0 n +116 0.0 n +117 0.0 n +118 0.0 n +119 0.0 n +120 0.0 n +121 0.0 n +122 0.0 n +123 0.0 n +124 0.0 n +125 0.0 n +126 0.0 n +127 0.0 n +128 0.0 n +129 0.0 n +130 0.0 n +131 0.0 n +132 0.0 y +133 0.0 y +134 0.0 n +135 0.0 y +136 0.0 y +137 0.0 n +138 0.0 n +139 0.0 n +140 0.0 n +141 0.0 n +142 0.0 n +143 0.0 n +144 0.0 n +145 0.0 n +146 0.0 y +147 0.0 n +148 0.0 n +149 0.0 n +150 0.0 n +151 0.0 n +152 0.0 n +153 0.0 n +154 0.0 n +155 0.0 n +156 0.0 n +157 0.0 n +158 0.0 n +159 0.0 n +160 0.0 n +161 0.0 n +162 0.0 n +163 0.0 n +164 0.0 n +165 0.0 n +166 0.0 n +167 0.0 n +168 0.0 n +169 0.0 n +170 0.0 n +171 0.0 n +172 0.0 n +173 0.0 n +174 0.0 n +175 0.0 n +176 0.0 n +177 0.0 n +178 0.0 n +179 0.0 n +180 0.0 n +181 0.0 y +182 0.0 n +183 0.0 n +184 0.0 n +185 0.0 n +186 0.0 n +187 0.0 n +188 0.0 n +189 0.0 n +190 0.0 n +191 0.0 n +192 0.0 n +193 0.0 n +194 0.0 n +195 0.0 n +196 0.0 n +197 0.0 n +198 0.0 n +199 0.0 n +200 0.0 n +201 0.0 n +202 0.0 n +203 0.0 n +204 0.0 n +205 0.0 n +206 0.0 n +207 0.0 n +208 0.0 n +209 0.0 n +210 0.0 n +211 0.0 n +212 0.0 n +213 0.0 y +214 0.0 y +215 0.0 n +216 0.0 n +217 0.0 n +218 0.0 n +219 0.0 n +220 0.0 n +221 0.0 n +222 0.0 n +223 0.0 n +224 0.0 n +225 0.0 n +226 0.0 n +227 0.0 n +228 0.0 n +229 0.0 n +230 0.0 n +231 0.0 n +232 0.0 n +233 0.0 n +234 0.0 n +235 0.0 n +236 0.0 n +237 0.0 n +238 0.0 n +239 0.0 n +240 0.0 n +241 0.0 n +242 0.0 n +243 0.0 n +244 0.0 n +245 0.0 n +246 0.0 n +247 0.0 n +248 0.0 y +249 0.0 y +250 0.0 n +251 0.0 n +252 0.0 n +253 0.0 n +254 0.0 n +255 0.0 n +256 0.0 n +257 0.0 n +258 0.0 n +259 0.0 n +260 0.0 n +261 0.0 n +262 0.0 n +263 0.0 n +264 0.0 n +265 0.0 n +266 0.0 n +267 0.0 n +268 0.0 n +269 0.0 n +270 0.0 n +271 0.0 y +272 0.0 n +273 0.0 n +274 0.0 n +275 0.0 n +276 0.0 n +277 0.0 n +278 0.0 n +279 0.0 y diff --git a/src/eval_bevan/test.pl b/src/eval_bevan/test.pl new file mode 100644 index 0000000..837a66d --- /dev/null +++ b/src/eval_bevan/test.pl @@ -0,0 +1,560 @@ +catch(call_with_time_limit(1,eval([0,0.000000,answer(river(loc_2(stateid('colorado')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([1,0.000000,answer(count(state(low_point_2(lower_2(low_point_1(stateid('alabama'))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([2,0.000000,answer(count(river(loc_2(stateid('california'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([3,0.000000,answer(state(next_to_2(stateid('utah')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([4,0.000000,answer(elevation_1(placeid('mount mckinley'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([5,0.000000,answer(elevation_1(highest(place(loc_2(countryid('usa')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([6,0.000000,answer(elevation_1(highest(place(loc_2(stateid('alabama')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([7,0.000000,answer(size(stateid('alaska'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([8,0.000000,answer(size(stateid('texas'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([9,0.000000,answer(len(river(riverid('colorado')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([10,0.000000,answer(len(river(riverid('delaware')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([11,0.000000,answer(len(longest(river(loc_2(stateid('california')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([12,0.000000,answer(len(longest(river(loc_2(countryid('usa')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([13,0.000000,answer(len(river(riverid('north platte')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([14,0.000000,answer(len(river(riverid('ohio')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([15,0.000000,answer(count(capital(loc_2(stateid('rhode island'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([16,0.000000,answer(count(city(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([17,0.000000,answer(population_1(largest(city(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([18,0.000000,answer(count(river(riverid('colorado')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([19,0.000000,answer(population_1(cityid('detroit', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([20,0.000000,answer(population_1(cityid('houston', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([21,0.000000,answer(population_1(cityid('minneapolis', 'mn'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([22,0.000000,answer(population_1(stateid('mississippi'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([23,0.000000,answer(population_1(stateid('rhode island'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([24,0.000000,answer(population_1(largest(city(loc_2(state(stateid('new york'))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([25,0.000000,answer(population_1(capital(loc_2(stateid('texas'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([26,0.000000,answer(population_1(countryid('usa'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([27,0.000000,answer(population_1(cityid('austin', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([28,0.000000,answer(population_1(stateid('utah'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([29,0.000000,answer(population_1(stateid('texas'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([30,0.000000,answer(count(river(loc_2(stateid('iowa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([31,0.000000,answer(count(river(loc_2(most(state(loc_1(river(all)))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([32,0.000000,answer(count(river(loc_2(stateid('colorado'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([33,0.000000,answer(count(state(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([34,0.000000,answer(count(state(all))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([35,0.000000,answer(count(state(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([36,0.000000,answer(count(state(next_to_2(stateid('iowa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([37,0.000000,answer(count(state(next_to_2(largest_one(population_1(state(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([38,0.000000,answer(count(exclude(state(all), loc_1(river(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([39,0.000000,answer(count(state(next_to_1(stateid('tennessee'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([40,0.000000,answer(count(state(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([41,0.000000,answer(count(state(loc_1(place(higher_2(highest(place(loc_2(state(loc_1(largest(capital(city(loc_2(countryid('usa')))))))))))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([42,0.000000,answer(population_1(stateid('texas'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([43,0.000000,answer(elevation_1(placeid('mount mckinley'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([44,0.000000,answer(elevation_1(highest(place(loc_2(stateid('montana')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([45,0.000000,answer(count(state(next_to_1(stateid('iowa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([46,0.000000,answer(major(river(loc_2(stateid('florida'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([47,0.000000,answer(population_1(cityid('boulder', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([48,0.000000,answer(count(state(next_to_2(stateid('iowa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([49,0.000000,answer(river(loc_2(stateid('new york')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([50,0.000000,answer(state(loc_1(cityid('san antonio', _)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([51,0.000000,answer(city(loc_2(stateid('texas')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([52,0.000000,answer(state(traverse_1(longest(river(loc_2(stateid('texas'))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([53,0.000000,answer(river(loc_2(stateid('texas')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([54,0.000000,answer(capital(city(loc_2(stateid('texas'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([55,0.000000,answer(capital(loc_2(state(next_to_2(stateid('missouri')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([56,0.000000,answer(city(loc_2(state(traverse_1(riverid('mississippi')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([57,0.000000,answer(city(loc_2(state(loc_1(highest(place(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([58,0.000000,answer(highest(place(loc_2(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([59,0.000000,answer(major(city(loc_2(stateid('alabama'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([60,0.000000,answer(major(city(loc_2(stateid('alaska'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([61,0.000000,answer(major(city(loc_2(stateid('new york'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([62,0.000000,answer(major(city(loc_2(state(traverse_1(riverid('mississippi'))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([63,0.000000,answer(major(city(loc_2(state(stateid('california')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([64,0.000000,answer(major(city(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([65,0.000000,answer(major(city(loc_2(stateid('vermont'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([66,0.000000,answer(major(river(loc_2(stateid('ohio'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([67,0.000000,answer(density_1(state(all))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([68,0.000000,answer(population_1(stateid('mississippi'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([69,0.000000,answer(population_1(state(traverse_1(river(riverid('mississippi')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([70,0.000000,answer(population_1(state(traverse_1(riverid('mississippi'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([71,0.000000,answer(population_1(state(next_to_2(stateid('texas'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([72,0.000000,answer(population_1(major(city(loc_2(stateid('texas')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([73,0.000000,answer(river(loc_2(stateid('alaska')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([74,0.000000,answer(largest_one(population_1(city(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([75,0.000000,answer(largest_one(density_1(city(loc_2(countryid('usa')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([76,0.000000,answer(capital(loc_2(state(loc_1(lowest(place(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([77,0.000000,answer(area_1(stateid('florida'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([78,0.000000,answer(area_1(stateid('ohio'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([79,0.000000,answer(area_1(state(stateid('texas')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([80,0.000000,answer(area_1(stateid('wisconsin'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([81,0.000000,answer(largest(city(loc_2(state(loc_1(river(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([82,0.000000,answer(largest(capital(city(loc_2(countryid('usa')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([83,0.000000,answer(largest(city(loc_2(stateid('kansas'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([84,0.000000,answer(largest(city(loc_2(stateid('louisiana'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([85,0.000000,answer(capital(loc_2(stateid('california')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([86,0.000000,answer(capital(loc_2(stateid('colorado')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([87,0.000000,answer(capital(loc_2(stateid('illinois')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([88,0.000000,answer(capital(loc_2(stateid('iowa')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([89,0.000000,answer(capital(loc_2(stateid('massachusetts')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([90,0.000000,answer(capital(loc_2(stateid('new jersey')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([91,0.000000,answer(capital(loc_2(stateid('new york')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([92,0.000000,answer(capital(loc_2(stateid('north dakota')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([93,0.000000,answer(capital(loc_2(stateid('ohio')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([94,0.000000,answer(capital(loc_2(state(loc_1(city(cityid('durham', _))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([95,0.000000,answer(capital(loc_2(state(stateid('florida'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([96,0.000000,answer(capital(loc_2(smallest(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([97,0.000000,answer(capital(loc_2(largest_one(population_1(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([98,0.000000,answer(capital(loc_2(largest_one(density_1(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([99,0.000000,answer(capital(loc_2(state(loc_1(longest(river(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([100,0.000000,answer(capital(loc_2(largest_one(population_1(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([101,0.000000,answer(sum(area_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([102,0.000000,answer(density_1(stateid('new york'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([103,0.000000,answer(high_point_1(stateid('wyoming'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([104,0.000000,answer(highest(place(loc_2(stateid('texas'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([105,0.000000,answer(highest(place(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([106,0.000000,answer(highest(mountain(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([107,0.000000,answer(highest(mountain(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([108,0.000000,answer(highest(place(loc_2(stateid('delaware'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([109,0.000000,answer(highest(place(loc_2(stateid('iowa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([110,0.000000,answer(highest(place(loc_2(stateid('maine'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([111,0.000000,answer(highest(place(loc_2(stateid('montana'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([112,0.000000,answer(highest(place(loc_2(stateid('nevada'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([113,0.000000,answer(highest(place(loc_2(state(next_to_2(stateid('georgia'))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([114,0.000000,answer(highest(place(loc_2(state(loc_1(capital(cityid('austin', _)))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([115,0.000000,answer(highest(place(loc_2(state(next_to_2(stateid('colorado'))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([116,0.000000,answer(highest(place(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([117,0.000000,answer(highest(place(loc_2(stateid('virginia'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([118,0.000000,answer(highest(place(loc_2(smallest_one(density_1(state(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([119,0.000000,answer(highest(place(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([120,0.000000,answer(largest(capital(city(loc_2(countryid('usa')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([121,0.000000,answer(largest(city(loc_2(stateid('california'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([122,0.000000,answer(largest(city(loc_2(stateid('rhode island'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([123,0.000000,answer(largest(city(loc_2(smallest(state(traverse_1(riverid('mississippi')))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([124,0.000000,answer(largest(city(loc_2(smallest(state(loc_2(countryid('usa')))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([125,0.000000,answer(longest(river(loc_2(state(stateid('washington')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([126,0.000000,answer(largest(state(next_to_2(stateid('arkansas'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([127,0.000000,answer(largest(state(next_to_2(stateid('texas'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([128,0.000000,answer(smallest_one(population_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([129,0.000000,answer(len(river(riverid('colorado')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([130,0.000000,answer(len(longest(river(traverse_2(stateid('texas')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([131,0.000000,answer(len(river(riverid('mississippi')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([132,0.000000,answer(len(most(river(traverse_2(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([133,0.000000,answer(len(most(river(traverse_2(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([134,0.000000,answer(longest(river(loc_2(stateid('florida'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([135,0.000000,answer(longest(river(loc_2(largest(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([136,0.000000,answer(longest(river(loc_2(most(state(loc_1(major(city(all))))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([137,0.000000,answer(longest(river(loc_2(state(next_to_2(stateid('nebraska'))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([138,0.000000,answer(longest(river(traverse_2(state(next_to_2(stateid('indiana'))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([139,0.000000,answer(lowest(place(loc_2(stateid('arkansas'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([140,0.000000,answer(lowest(place(loc_2(stateid('massachusetts'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([141,0.000000,answer(lowest(place(loc_2(stateid('mississippi'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([142,0.000000,answer(lowest(place(loc_2(stateid('nebraska'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([143,0.000000,answer(lowest(place(loc_2(state(stateid('california')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([144,0.000000,answer(lowest(place(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([145,0.000000,answer(lowest(place(loc_2(state(traverse_1(river(riverid('colorado')))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([146,0.000000,answer(highest(place(loc_2(cityid('san francisco', _))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([147,0.000000,answer(largest_one(density_1(state(loc_2(countryid('usa')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([148,0.000000,answer(largest_one(population_1(city(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([149,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([150,0.000000,answer(largest_one(population_1(state(traverse_1(riverid('mississippi')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([151,0.000000,answer(density_1(stateid('maine'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([152,0.000000,answer(density_1(largest(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([153,0.000000,answer(population_1(stateid('alaska'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([154,0.000000,answer(population_1(cityid('boulder', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([155,0.000000,answer(population_1(cityid('erie', 'pa'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([156,0.000000,answer(population_1(stateid('hawaii'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([157,0.000000,answer(population_1(cityid('houston', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([158,0.000000,answer(population_1(stateid('maryland'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([159,0.000000,answer(population_1(stateid('new mexico'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([160,0.000000,answer(population_1(city(cityid('new york', _)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([161,0.000000,answer(population_1(cityid('san antonio', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([162,0.000000,answer(population_1(cityid('tempe', 'az'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([163,0.000000,answer(population_1(largest(city(loc_2(largest_one(area_1(state(all)))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([164,0.000000,answer(smallest(population_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([165,0.000000,answer(population_1(most(state(next_to_2(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([166,0.000000,answer(population_1(largest_one(density_1(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([167,0.000000,answer(population_1(cityid('tucson', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([168,0.000000,answer(population_1(stateid('utah'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([169,0.000000,answer(population_1(stateid('washington'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([170,0.000000,answer(smallest(city(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([171,0.000000,answer(smallest(city(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([172,0.000000,answer(smallest(state(next_to_2(stateid('wyoming'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([173,0.000000,answer(smallest(most(state(next_to_2(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([174,0.000000,answer(smallest(state(traverse_1(river(riverid('mississippi')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([175,0.000000,answer(largest_one(area_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([176,0.000000,answer(smallest_one(area_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([177,0.000000,answer(sum(len(river(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([178,0.000000,answer(len(riverid('mississippi'))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([179,0.000000,answer(major(city(loc_2(stateid('pennsylvania'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([180,0.000000,answer(most(river(traverse_2(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([181,0.000000,answer(river(traverse_2(most(state(loc_1(city(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([182,0.000000,answer(river(traverse_2(most(state(next_to_2(state(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([183,0.000000,answer(river(loc_2(state(next_to_2(stateid('texas')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([184,0.000000,answer(river(loc_2(stateid('texas')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([185,0.000000,answer(river(loc_2(stateid('texas')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([186,0.000000,answer(river(traverse_2(stateid('new york')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([187,0.000000,answer(river(traverse_2(most(state(next_to_2(state(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([188,0.000000,answer(largest_one(population_1(state(next_to_2(stateid('nevada')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([189,0.000000,answer(state(next_to_2(stateid('new york')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([190,0.000000,answer(most(state(next_to_2(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([191,0.000000,answer(state(loc_1(highest(place(loc_2(state(traverse_1(river(riverid('colorado')))))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([192,0.000000,answer(largest_one(area_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([193,0.000000,answer(state(loc_1(largest(capital(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([194,0.000000,answer(state(loc_1(longest(river(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([195,0.000000,answer(smallest_one(density_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([196,0.000000,answer(most(state(loc_1(major(city(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([197,0.000000,answer(most(state(loc_1(river(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([198,0.000000,answer(smallest_one(population_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([199,0.000000,answer(state(loc_1(cityid('austin', _)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([200,0.000000,answer(state(loc_1(cityid('miami', _)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([201,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([202,0.000000,answer(state(next_to_2(stateid('arizona')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([203,0.000000,answer(state(next_to_2(stateid('florida')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([204,0.000000,answer(state(next_to_2(stateid('indiana')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([205,0.000000,answer(state(next_to_2(stateid('michigan')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([206,0.000000,answer(state(next_to_2(stateid('montana')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([207,0.000000,answer(state(next_to_2(stateid('new jersey')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([208,0.000000,answer(state(next_to_2(state(next_to_2(stateid('mississippi')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([209,0.000000,answer(state(next_to_2(state(traverse_1(riverid('ohio')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([210,0.000000,answer(intersection(state(next_to_2(stateid('texas'))), loc_1(major(river(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([211,0.000000,answer(state(next_to_2(largest_one(population_1(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([212,0.000000,answer(state(next_to_2(most(state(next_to_2(state(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([213,0.000000,answer(state(next_to_2(most(state(loc_1(city(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([214,0.000000,answer(state(next_to_2(most(state(loc_1(major(city(all)))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([215,0.000000,answer(state(next_to_2(smallest_one(area_1(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([216,0.000000,answer(state(loc_1(major(river(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([217,0.000000,answer(state(traverse_1(river(riverid('delaware'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([218,0.000000,answer(state(traverse_1(river(riverid('mississippi'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([219,0.000000,answer(state(traverse_1(riverid('missouri')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([220,0.000000,answer(state(traverse_1(river(riverid('ohio'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([221,0.000000,answer(state(loc_1(city(cityid('dallas', _))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([222,0.000000,answer(state(loc_1(city(cityid('plano', _))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([223,0.000000,answer(state(loc_1(city(cityid('portland', _))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([224,0.000000,answer(state(loc_1(city(cityid('rochester', _))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([225,0.000000,answer(state(loc_1(city(cityid('salt lake city', _))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([226,0.000000,answer(state(next_to_2(stateid('kentucky')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([227,0.000000,answer(loc_1(mountain(all))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([228,0.000000,answer(loc_1(cityid('dallas', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([229,0.000000,answer(loc_1(cityid('portland', _))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([230,0.000000,answer(loc_1(river(riverid('chattahoochee')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([231,0.000000,answer(highest(mountain(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([232,0.000000,answer(highest(place(loc_2(stateid('hawaii'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([233,0.000000,answer(lowest(place(loc_2(stateid('maryland'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([234,0.000000,answer(largest_one(population_1(city(loc_2(stateid('new mexico')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([235,0.000000,answer(loc_1(smallest(city(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([236,0.000000,answer(largest_one(population_1(city(loc_2(stateid('california')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([237,0.000000,answer(density_1(state(traverse_1(longest(river(loc_2(countryid('usa')))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([238,0.000000,answer(highest(exclude(mountain(all), loc_2(stateid('alaska'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([239,0.000000,answer(longest(river(loc_2(countryid('usa'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([240,0.000000,answer(smallest(state(all))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([241,0.000000,answer(largest_one(population_1(state(next_to_2(stateid('pennsylvania')))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([242,0.000000,answer(most(river(traverse_2(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([243,0.000000,answer(most(river(traverse_2(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([244,0.000000,answer(most(river(traverse_2(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([245,0.000000,answer(exclude(river(all), traverse_2(stateid('texas')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([246,0.000000,answer(exclude(river(all), traverse_2(countryid('usa')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([247,0.000000,answer(river(traverse_2(state(next_to_2(state(loc_1(capital(cityid('austin', _))))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([248,0.000000,answer(river(traverse_2(fewest(state(loc_1(city(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([249,0.000000,answer(state(next_to_2(stateid('hawaii')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([250,0.000000,answer(most(state(next_to_2(state(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([251,0.000000,answer(smallest_one(population_1(capital(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([252,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([253,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([254,0.000000,answer(state(loc_1(highest(place(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([255,0.000000,answer(state(loc_1(highest(place(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([256,0.000000,answer(largest_one(density_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([257,0.000000,answer(state(loc_1(lowest(place(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([258,0.000000,answer(state(loc_1(lowest(place(loc_2(next_to_2(stateid('idaho')))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([259,0.000000,answer(smallest_one(density_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([260,0.000000,answer(most(state(loc_1(major(city(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([261,0.000000,answer(most(state(loc_1(major(river(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([262,0.000000,answer(most(state(traverse_1(major(river(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([263,0.000000,answer(largest_one(population_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([264,0.000000,answer(most(state(loc_1(river(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([265,0.000000,answer(smallest_one(density_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([266,0.000000,answer(smallest_one(density_1(state(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([267,0.000000,answer(state(loc_1(placeid('mount mckinley')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([268,0.000000,answer(smallest(state(all))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([269,0.000000,answer(state(next_to_2(stateid('illinois')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([270,0.000000,answer(state(next_to_2(stateid('kentucky')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([271,0.000000,answer(state(next_to_2(river(riverid('missouri'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([272,0.000000,answer(state(next_to_2(smallest_one(area_1(state(all)))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([273,0.000000,answer(state(loc_1(largest(city(capital_1(state(all))))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([274,0.000000,answer(state(traverse_1(river(riverid('chattahoochee'))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([275,0.000000,answer(state(traverse_1(longest(river(all))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([276,0.000000,answer(state(traverse_1(riverid('mississippi')))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([277,0.000000,answer(state(loc_1(river(all)))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([278,0.000000,answer(state(loc_1(city(cityid('austin', _))))),0])),E,writeln('error')). + +catch(call_with_time_limit(1,eval([279,0.000000,answer(largest_one(density_1(city(all)))),0])),E,writeln('error')). + diff --git a/src/evaluator.py b/src/evaluator.py new file mode 100644 index 0000000..16c2536 --- /dev/null +++ b/src/evaluator.py @@ -0,0 +1,47 @@ +class Evaluator: + + def __init__(self, config): + self.config = config + + def run(self): + if self.config.run == 'debug': + s_p, s_r, s_f = self.score('%s/1' % self.config.work_dir) + elif self.config.run == 'dev': + s_p = 0 + s_r = 0 + s_f = 0 + for i in range(10): + p, r, f = self.score('%s/%d' % (self.config.work_dir, i)) + s_p += p + s_r += r + s_f += f + s_p /= 10 + s_r /= 10 + s_f /= 10 + elif self.config.run == 'test': + s_p, s_r, s_f = self.score(self.config.work_dir) + + print 'p: %f\nr: %f\nf: %f' % (s_p, s_r, s_f) + + def score(self, experiment_dir): + result_file = open('%s/eval.scored' % (experiment_dir)) + tp = 0 + fp = 0 + count = 0 + for line in result_file.readlines(): + count += 1 + tag = line.strip() + if tag == 'empty': + continue + tag, score = tag.split() + score = float(score) + if tag == 'yes': + tp += 1 + elif tag == 'no': + fp += 1 + + p = 1.0 * tp / (tp + fp) + r = 1.0 * tp / count + f = 2.0 * p * r / (p + r) + + return (p, r, f) diff --git a/src/evaluator.pyc b/src/evaluator.pyc new file mode 100644 index 0000000..b13bae1 Binary files /dev/null and b/src/evaluator.pyc differ diff --git a/src/extractor.py b/src/extractor.py new file mode 100644 index 0000000..ff2abdb --- /dev/null +++ b/src/extractor.py @@ -0,0 +1,418 @@ +from nltk.stem.porter import PorterStemmer +from nltk.stem.snowball import GermanStemmer +import os +import re +import util +import xml.etree.ElementTree as ET + +class IdStemmer: + def stem(self, word): + return word + +class Extractor: + + NP_WEIGHT = 50 + + def __init__(self, config): + self.config = config + if config.stem: + if config.lang == 'en': + self.stemmer = PorterStemmer() + elif config.lang == 'de': + self.stemmer = GermanStemmer() + else: + self.stemmer = IdStemmer() + + def run(self): + if self.config.corpus == 'geo': + self.run_geo() + elif self.config.corpus == 'robo': + self.run_robo() + elif self.config.corpus == 'atis': + self.run_atis() + else: + assert False + + def run_atis(self): + + train_nl = open('%s/train.nl' % self.config.experiment_dir, 'w') + train_nl_lm = open('%s/train.nl.lm' % self.config.experiment_dir, 'w') + train_nl_np = open('%s/train.np.nl' % self.config.experiment_dir, 'w') + train_mrl = open('%s/train.mrl' % self.config.experiment_dir, 'w') + train_mrl_lm = open('%s/train.mrl.lm' % self.config.experiment_dir, 'w') + train_mrl_np = open('%s/train.np.mrl' % self.config.experiment_dir, 'w') + train_fun = open('%s/train.fun' % self.config.experiment_dir, 'w') + tune_nl = open('%s/tune.nl' % self.config.experiment_dir, 'w') + tune_mrl = open('%s/tune.mrl' % self.config.experiment_dir, 'w') + test_nl = open('%s/test.nl' % self.config.experiment_dir, 'w') + test_mrl = open('%s/test.mrl' % self.config.experiment_dir, 'w') + test_fun = open('%s/test.fun' % self.config.experiment_dir, 'w') + + if self.config.run == 'debug': + with open('%s/atis-train.sem' % self.config.data_dir) as data_file: + counter = 0 + for line in data_file: + nl, slot = line.split('<=>', 1) + nl = self.preprocess_nl(nl) + slot = self.replace_specials(slot) + fun = self.slot_to_fun(slot) + mrl = util.fun_to_mrl(fun, True) + if counter % 4 in (0,1): + print >>train_nl, nl + print >>train_mrl, mrl + print >>train_fun, fun + print >>train_nl_np, nl + print >>train_mrl_np, mrl + print >>train_nl_lm, '', nl, '' + print >>train_mrl_lm, '', mrl, '' + elif counter % 4 == 2: + print >>tune_nl, nl + print >>tune_mrl, mrl + else: + print >>test_nl, nl + print >>test_mrl, mrl + print >>test_fun, fun + counter += 1 + + else: + train_path = '%s/atis-train.sem' % self.config.data_dir + if self.config.run == 'dev': + tune_path = train_path + test_path = '%s/atis-dev.sem' % self.config.data_dir + elif self.config.run == 'test': + tune_path = '%s/atis-dev.sem' % self.config.data_dir + test_path = '%s/atis-test.sem' % self.config.data_dir + + with open(train_path) as train_file: + for line in train_file: + nl, slot = line.split('<=>', 1) + nl = self.preprocess_nl(nl) + slot = self.replace_specials(slot) + fun = self.slot_to_fun(slot) + mrl = util.fun_to_mrl(fun, True) + print >>train_nl, nl + print >>train_mrl, mrl + print >>train_fun, fun + print >>train_nl_np, nl + print >>train_mrl_np, mrl + print >>train_nl_lm, '', nl, '' + print >>train_mrl_lm, '', mrl, '' + + with open(tune_path) as tune_file: + for line in tune_file: + nl, slot = line.split('<=>', 1) + nl = self.preprocess_nl(nl) + slot = self.replace_specials(slot) + fun = self.slot_to_fun(slot) + mrl = util.fun_to_mrl(fun, True) + print >>tune_nl, nl + print >>tune_mrl, mrl + + with open(test_path) as test_file: + for line in test_file: + nl, slot = line.split('<=>', 1) + nl = self.preprocess_nl(nl) + slot = self.replace_specials(slot) + fun = self.slot_to_fun(slot) + mrl = util.fun_to_mrl(fun, True) + print >>test_nl, nl + print >>test_mrl, mrl + print >>test_fun, fun + + for np_name in os.listdir('%s/db' % self.config.data_dir): + np_path = '%s/db/%s' % (self.config.data_dir, np_name) + with open(np_path) as np_file: + for line in np_file: + names = re.findall(r'"([^"]+)"', line) + for name in names: + nl = name + mrl = "%s" % self.replace_specials(name) + mrl = mrl.replace(' ', '_') + mrl = mrl + '@s' + print >>train_nl_np, nl + print >>train_mrl_np, mrl + print >>train_nl_lm, nl + print >>train_mrl_lm, mrl + + train_nl.close() + train_nl_lm.close() + train_mrl.close() + train_mrl_lm.close() + train_fun.close() + test_nl.close() + test_mrl.close() + test_fun.close() + tune_nl.close() + tune_mrl.close() + + def run_robo(self): + + train_ids, tune_ids, test_ids = self.get_folds() + tune_ids = test_ids + + train_nl = open('%s/train.nl' % self.config.experiment_dir, 'w') + train_nl_lm = open('%s/train.nl.lm' % self.config.experiment_dir, 'w') + train_nl_np = open('%s/train.np.nl' % self.config.experiment_dir, 'w') + train_mrl = open('%s/train.mrl' % self.config.experiment_dir, 'w') + train_mrl_lm = open('%s/train.mrl.lm' % self.config.experiment_dir, 'w') + train_mrl_np = open('%s/train.np.mrl' % self.config.experiment_dir, 'w') + train_fun = open('%s/train.fun' % self.config.experiment_dir, 'w') + tune_nl = open('%s/tune.nl' % self.config.experiment_dir, 'w') + tune_mrl = open('%s/tune.mrl' % self.config.experiment_dir, 'w') + test_nl = open('%s/test.nl' % self.config.experiment_dir, 'w') + test_mrl = open('%s/test.mrl' % self.config.experiment_dir, 'w') + test_fun = open('%s/test.fun' % self.config.experiment_dir, 'w') + + corpus = ET.parse('%s/corpus.xml' % self.config.data_dir) + corpus_root = corpus.getroot() + + for node in corpus_root.findall('example'): + nl = node.find("nl[@lang='%s']" % self.config.lang).text + nl = self.preprocess_nl(nl) + clang = node.find("mrl[@lang='robocup-clang']").text + clang = self.replace_specials(clang) + fun = self.clang_to_fun(clang) + #print fun + mrl = util.fun_to_mrl(fun) + eid = int(node.attrib['id']) + + if eid in tune_ids: + print >>tune_nl, nl + print >>tune_mrl, mrl + elif eid in train_ids: + print >>train_nl, nl + print >>train_mrl, mrl + print >>train_fun, fun + print >>train_nl_np, nl + print >>train_mrl_np, mrl + print >>train_nl_lm, '', nl, '' + print >>train_mrl_lm, '', mrl, '' + if eid in test_ids: + #elif eid in test_ids: + print >>test_nl, nl + print >>test_mrl, mrl + print >>test_fun, fun + + nps_file = open('%s/names' % self.config.data_dir) + while True: + line = nps_file.readline() + if not line: + break + nl = nps_file.readline().strip()[3:] + nl = self.preprocess_nl(nl) + nps_file.readline() + nps_file.readline() + while True: + line = nps_file.readline().strip() + if line == '': + break + m = re.match('^\*n:(Num|Unum|Ident) -> \(\{ (\S+) \}\)$', line) + mrl = m.group(2) + '@0' + for i in range(self.NP_WEIGHT): + print >>train_nl_np, nl + print >>train_mrl_np, mrl + print >>train_nl_lm, nl + print >>train_mrl_lm, mrl + + train_nl.close() + train_nl_lm.close() + train_mrl.close() + train_mrl_lm.close() + train_fun.close() + test_nl.close() + test_mrl.close() + test_fun.close() + tune_nl.close() + tune_mrl.close() + + def run_geo(self): + train_ids, tune_ids, test_ids = self.get_folds() + + train_nl = open('%s/train.nl' % self.config.experiment_dir, 'w') + train_nl_lm = open('%s/train.nl.lm' % self.config.experiment_dir, 'w') + train_nl_np = open('%s/train.np.nl' % self.config.experiment_dir, 'w') + train_mrl = open('%s/train.mrl' % self.config.experiment_dir, 'w') + train_mrl_lm = open('%s/train.mrl.lm' % self.config.experiment_dir, 'w') + train_mrl_np = open('%s/train.np.mrl' % self.config.experiment_dir, 'w') + train_fun = open('%s/train.fun' % self.config.experiment_dir, 'w') + unlabeled_nl = open('%s/unlabeled.nl' % self.config.experiment_dir, 'w') + tune_nl = open('%s/tune.nl' % self.config.experiment_dir, 'w') + tune_mrl = open('%s/tune.mrl' % self.config.experiment_dir, 'w') + test_nl = open('%s/test.nl' % self.config.experiment_dir, 'w') + test_mrl = open('%s/test.mrl' % self.config.experiment_dir, 'w') + test_fun = open('%s/test.fun' % self.config.experiment_dir, 'w') + + corpus = ET.parse('%s/corpus-true.xml' % self.config.data_dir) + corpus_root = corpus.getroot() + + counter = 0 + #stop_labeling = False + for node in corpus_root.findall('example'): + nl = node.find("nl[@lang='%s']" % self.config.lang).text + nl = self.preprocess_nl(nl) + fun = node.find("mrl[@lang='geo-funql']").text + fun = self.preprocess_fun(fun) + #fun = self.replace_specials(fun) + mrl = util.fun_to_mrl(fun) + eid = int(node.attrib['id']) + + unlabel_this = (counter >= 10 * self.config.lfrac) + counter += 1 + counter %= 10 + + if eid in tune_ids: + print >>tune_nl, nl + print >>tune_mrl, mrl + elif eid in train_ids and not unlabel_this: + print >>train_nl, nl + print >>train_mrl, mrl + print >>train_fun, fun + print >>train_nl_np, nl + print >>train_mrl_np, mrl + print >>train_nl_lm, '', nl, '' + print >>train_mrl_lm, '', mrl, '' + elif eid in train_ids and unlabel_this: + print >>unlabeled_nl, nl + elif eid in test_ids: + print >>test_nl, nl + print >>test_mrl, mrl + print >>test_fun, fun + + nplist = ET.parse('%s/nps-true.xml' % self.config.data_dir) + nplist_root = nplist.getroot() + for node in nplist_root.findall('example'): + fun = node.find("mrl[@lang='geo-funql']").text + fun = self.preprocess_fun(fun) + #fun = self.replace_specials(fun) + mrl = util.fun_to_mrl(fun) + big_np = len(mrl.split()) > 1 + if (self.config.np_type == 'big' and not big_np) or \ + (self.config.np_type == 'small' and big_np): + continue + for nl_node in node.findall("nl[@lang='%s']" % self.config.lang): + nl = nl_node.text + nl = self.preprocess_nl(nl) + for i in range(self.NP_WEIGHT): + print >>train_nl_np, nl + print >>train_mrl_np, mrl + print >>train_nl_lm, nl + print >>train_mrl_lm, mrl + + train_nl.close() + train_nl_lm.close() + train_mrl.close() + train_mrl_lm.close() + train_fun.close() + test_nl.close() + test_mrl.close() + test_fun.close() + tune_nl.close() + tune_mrl.close() + + def get_folds(self): + + if self.config.corpus == 'geo': + if self.config.run in ('debug', 'dev'): + train_ids_file = '%s/folds600/fold-%d-train.ids' \ + % (self.config.data_dir, self.config.fold) + tune_ids_file = None + test_ids_file = '%s/folds600/fold-%d-test.ids' \ + % (self.config.data_dir, self.config.fold) + elif self.config.run == 'test': + train_ids_file = '%s/split880/fold-0-train.ids' % self.config.data_dir + tune_ids_file = '%s/split880/fold-0-tune.ids' % self.config.data_dir + test_ids_file = '%s/split880/fold-0-test.ids' % self.config.data_dir + + elif self.config.corpus == 'robo': + if self.config.run in ('debug', 'dev'): + train_ids_file = '%s/split-300/run-0/fold-%d/train-N270' \ + % (self.config.data_dir, self.config.fold) + tune_ids_file = None + test_ids_file = '%s/split-300/run-0/fold-%d/test' \ + % (self.config.data_dir, self.config.fold) + else: + assert False + + train_ids = set() + tune_ids = set() + test_ids = set() + with open(train_ids_file) as fold_file: + for line in fold_file.readlines(): + train_ids.add(int(line)) + if tune_ids_file: + with open(tune_ids_file) as fold_file: + for line in fold_file.readlines(): + tune_ids.add(int(line)) + with open(test_ids_file) as fold_file: + for line in fold_file.readlines(): + test_ids.add(int(line)) + + return train_ids, tune_ids, test_ids + + def preprocess_nl(self, nl): + nl = nl.strip().lower() + if self.config.stem and self.config.lang == 'de': + # German stemmer can't handle UTF-8 + nl = nl.encode('ascii', 'ignore') + else: + nl = nl.encode('utf-8', 'ignore') + if nl[-2:] == ' .' or nl[-2:] == ' ?': + nl = nl[:-2] + if self.config.stem: + nl = ' '.join([self.stemmer.stem(tok) for tok in nl.split()]) + return nl + + def preprocess_fun(self, fun): + return fun.strip() + + def replace_specials(self, mrl): + mrl = mrl.replace('.', 'xxd') + mrl = mrl.replace("'", 'xxq') + mrl = mrl.replace('/', 'xxs') + #mrl = re.sub(r"(' *[^'()]*)\'([^'()]* *')", r'\1_q_\2', mrl) + #mrl = re.sub(r"(' *[^'()]*)\.([^'()]* *')", r'\1_dot_\2', mrl) + #mrl = re.sub(r"(' *[^'()]*)\/([^'()]* *')", r'\1_slash_\2', mrl) + return mrl + + def clang_to_fun(self, clang): + clang = clang.strip() + clang = re.sub(r'\s+', ' ', clang) + clang = re.sub(r'\{([\d|X]+( [\d|X]+)*)\}', r'(set \1)', clang) + clang = re.sub(r'\(([\w.-]+) ?', r'\1(', clang) + clang = self.strip_bare_parens(clang) + clang = clang.replace('()', '') + clang = clang.replace(' ', ',') + clang = clang.replace('"', '') + + clang = re.sub(r'definerule\([^,]+,[^,]+,', r'definerule(', clang) + + return clang + + def strip_bare_parens(self, clang): + try: + start = clang.index(' (')+1 + except ValueError: + return clang + + end = start+1 + pcounter = 0 + while pcounter >= 0: + c = clang[end:end+1] + if c == '(': + pcounter += 1 + elif c == ')': + pcounter -= 1 + end += 1 + end -= 1 + + r = clang[:start] + clang[start+1:end] + clang[end+1:] + return r + + def slot_to_fun(self, slot): + slot = slot.strip() + slot = slot.replace('value', '"value"') + slot = slot.replace('="', "('") + slot = slot.replace('",', "'),") + slot = slot.replace('")', "'))") + slot = slot.replace("'value'", 'value') + return slot diff --git a/src/extractor.pyc b/src/extractor.pyc new file mode 100644 index 0000000..212156c Binary files /dev/null and b/src/extractor.pyc differ diff --git a/src/functionalizer.py b/src/functionalizer.py new file mode 100644 index 0000000..66325a0 --- /dev/null +++ b/src/functionalizer.py @@ -0,0 +1,112 @@ +import logging +import util +import sys + +class Functionalizer: + + def __init__(self, config): + self.config = config + + def run(self): + hyp_file = open('%s/hyp.mrl.nbest' % self.config.experiment_dir) + fun_file = open('%s/hyp.fun' % self.config.experiment_dir, 'w') + + hypsets = [] + hypset = [] + last_eid = 0 + for line in hyp_file: + parts = line.split('|||') + eid = int(parts[0]) + if eid != last_eid: + hypsets.append(hypset) + hypset = [] + last_eid = eid + score = parts[2] + ' ||| ' + parts[3].strip() + hyp = parts[1].strip() + hypset.append((hyp,score)) + hypsets.append(hypset) + + counter = 0 + for hypset in hypsets: + hypset = list(reversed(hypset)) + while hypset: + hyp, score = hypset.pop() + fun = self.functionalize(hyp) + if fun: + print >>fun_file, counter, '|||', fun, '|||', score + break + counter += 1 + + #xc = 0 + def functionalize(self, mrl): + + #if '_@0' in mrl and 'cityid@2' in mrl: + # #print '===' + # #print mrl + # self.xc += 1 + # if self.xc > 5: + # exit() + + stack = [] + r = [] + tokens = list(reversed(mrl.split())) + + #print tokens + + while tokens: + it = tokens.pop() + #print it + if util.ARITY_SEP not in it: + token = it + arity = util.ARITY_STR + logging.warn('unrecognized token: %s', it) + else: + token, arity = it.rsplit(util.ARITY_SEP) + if arity == util.ARITY_STR: + arity = 0 + arity_str = True + elif not (arity == util.ARITY_ANY): + arity = int(arity) + arity_str = False + + if arity == util.ARITY_ANY or arity > 0: + r.append(token) + r.append('(') + stack.append(arity) + else: + assert arity == 0 + if arity_str: + r.append("'%s'" % token.replace('_', ' ')) + else: + r.append(token) + #print r + while stack: + top = stack.pop() + if top == util.ARITY_ANY and tokens: + r.append(',') + stack.append(util.ARITY_ANY) + break + elif top != util.ARITY_ANY and top > 1: + r.append(',') + stack.append(top - 1) + break + else: + r.append(')') + + if not stack and tokens: + return None + + if stack: + return None + + r = ''.join(r) + + # nasty hacks to fix misplaced _ + if '(_' in r: + return None + if ',_' in r and not ('cityid' in r): + return None + if '_),_)' in r: + return None + + return r diff --git a/src/functionalizer.pyc b/src/functionalizer.pyc new file mode 100644 index 0000000..c8d3295 Binary files /dev/null and b/src/functionalizer.pyc differ diff --git a/src/geo_world.py b/src/geo_world.py new file mode 100644 index 0000000..e5cd58e --- /dev/null +++ b/src/geo_world.py @@ -0,0 +1,108 @@ +import subprocess + +class GeoWorld: + + def __init__(self, config): + self.config = config + + def run(self): + self.write_queries() + + infile = open('%s/eval.pl' % self.config.experiment_dir) + log = open('%s/prolog.log' % self.config.experiment_dir, 'w') + outfile = open('%s/eval.out' % self.config.experiment_dir, 'w') + p = subprocess.Popen([self.config.prolog, + '-l', self.config.wasp_eval], + stdin=infile, + stdout=outfile, + stderr=log) + p.wait() + infile.close() + log.close() + outfile.close() + + self.extract_results() + + def write_queries(self): + + hyp_file = open('%s/hyp.fun' % self.config.experiment_dir) + ref_file = open('%s/test.fun' % self.config.experiment_dir) + query_file = open('%s/eval.pl' % self.config.experiment_dir, 'w') + + examples = [] + hyp_list = [] + last_idx = 0 + for hyp_line in hyp_file.readlines(): + idx, hyp, scoreparts, score = hyp_line.split('|||') + idx = int(idx) + hyp = hyp.strip() + if idx != last_idx: + examples.append(hyp_list) + for i in range(last_idx, idx-1): + examples.append([]) + hyp_list = [] + last_idx = idx + hyp_list.append((hyp,float(score))) + examples.append(hyp_list) + + i = 0 + for ref, hyp_list in zip(ref_file.readlines(), examples): + ref = ref.strip() + for hyp, score in hyp_list: + print >>query_file, \ + 'catch(call_with_time_limit(1,eval([%d,%f,%s,%s])),E,writeln(\'error\')).\n' \ + % (i, score, ref, hyp) + i += 1 + + hyp_file.close() + ref_file.close() + query_file.close() + + def extract_results(self): + + eval_file = open('%s/eval.out' % self.config.experiment_dir) + result_file = open('%s/eval.scored' % self.config.experiment_dir, 'w') + + examples = [] + hyp_list = [] + last_idx = 0 + for line in eval_file.readlines(): + if line == 'error\n': + continue + idx, score, result = line.split() + idx = int(idx) + score = float(score) + if idx > last_idx: + examples.append(hyp_list) + last_idx += 1 + while idx > last_idx: + examples.append([]) + last_idx += 1 + hyp_list = [] + hyp_list.append((result,score)) + examples.append(hyp_list) + last_idx += 1 + + if self.config.corpus == 'geo' and self.config.run in ('debug', 'dev'): + top = 60 + elif self.config.corpus == 'geo' and self.config.run == 'test': + top = 280 + else: + assert False + while top > last_idx: + examples.append([]) + last_idx += 1 + + for hyp_list in examples: + if len(hyp_list) == 0: + print >>result_file, 'empty' + continue + + choice, score = hyp_list[0] + if choice == 'y': + print >>result_file, 'yes', score + else: + print >>result_file, 'no', score + + eval_file.close() + result_file.close() diff --git a/src/geo_world.pyc b/src/geo_world.pyc new file mode 100644 index 0000000..9cb2720 Binary files /dev/null and b/src/geo_world.pyc differ diff --git a/src/moses.py b/src/moses.py new file mode 100644 index 0000000..857ddbf --- /dev/null +++ b/src/moses.py @@ -0,0 +1,141 @@ +import logging +import os +import subprocess +import gzip + +class Moses: + + def __init__(self, config): + self.config = config + + def run_train(self): + args = [self.config.moses_train, + '--root-dir', self.config.experiment_dir, + '--corpus', '%s/%s' % (self.config.experiment_dir, + self.config.train_name), + '--f', self.config.src, + '--e', self.config.tgt, + '--lm', '0:3:%s/%s.arpa' % (self.config.experiment_dir, self.config.tgt), + #'-score-options', "'--OnlyDirect --NoPhraseCount'" + '--alignment', self.config.symm] + if self.config.model == 'hier': + args += ['-hierarchical', '-glue-grammar'] + + logging.info(' '.join(args)) + + log = open('%s/train.log' % self.config.experiment_dir, 'w') + p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=log) + p.wait() + log.close() + + def run_retrain(self): + old_train_nl = '%s/%s.nl' % (self.config.experiment_dir, + self.config.train_name) + old_train_mrl = '%s/%s.mrl' % (self.config.experiment_dir, + self.config.train_name) + moved_train_nl = '%s.notune' % old_train_nl + moved_train_mrl = '%s.notune' % old_train_mrl + tune_nl = '%s/tune.nl' % self.config.experiment_dir + tune_mrl = '%s/tune.mrl' % self.config.experiment_dir + os.rename(old_train_nl, moved_train_nl) + os.rename(old_train_mrl, moved_train_mrl) + with open(old_train_nl, 'w') as rt_train_nl: + subprocess.call(['cat', moved_train_nl, tune_nl], stdout=rt_train_nl) + with open(old_train_mrl, 'w') as rt_train_mrl: + subprocess.call(['cat', moved_train_mrl, tune_mrl], stdout=rt_train_mrl) + + os.remove('%s/model/extract.inv.gz' % self.config.experiment_dir) + os.remove('%s/model/extract.gz' % self.config.experiment_dir) + if self.config.model == 'hier': + os.remove('%s/model/rule-table.gz' % self.config.experiment_dir) + else: + os.remove('%s/model/phrase-table.gz' % self.config.experiment_dir) + + self.run_train() + + def parens_ok(self, line): + mrl_part = line.split(' ||| ')[1] + tokens = [t[-1] for t in mrl_part.split() if t[-2] == '@'] + tokens.reverse() + stack = [] + while tokens: + t = tokens.pop() + assert t != '*' + if t == 's': + t = 0 + t = int(t) + if t > 0: + stack.append(t) + else: + while stack: + top = stack.pop() + if top > 1: + stack.append(top - 1) + break + if tokens and not stack: + return False + return True + + def filter_phrase_table(self): + table_name = 'phrase' if self.config.model == 'phrase' else 'rule' + oldname = '%s/model/%s-table.gz' % (self.config.experiment_dir, table_name) + newname = '%s/model/%s-table.old.gz' % (self.config.experiment_dir, table_name) + os.rename(oldname, newname) + + with gzip.open(oldname, 'w') as filtered_table_f: + with gzip.open(newname, 'r') as old_table_f: + for line in old_table_f: + if self.parens_ok(line): + print >>filtered_table_f, line, + + def run_tune(self): + wd = os.getcwd() + os.chdir(self.config.experiment_dir) + args = [self.config.moses_tune, + '%s/tune.%s' % (self.config.experiment_dir, self.config.src), + '%s/tune.%s' % (self.config.experiment_dir, self.config.tgt)] + if self.config.model == 'hier': + args += [self.config.moses_decode_hier] + else: + args += [self.config.moses_decode_phrase] + args += ['%s/model/moses.ini' % self.config.experiment_dir, + '--mertdir', '%s/dist/bin' % self.config.moses] + if self.config.model == 'hier': + args += ['--filtercmd', + '%s/scripts/training/filter-model-given-input.pl --Hierarchical'\ + % self.config.moses] + + log = open('%s/tune.log' % self.config.experiment_dir, 'w') + p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=log) + p.wait() + log.close() + os.chdir(wd) + + def run_decode(self): + if self.config.model == 'phrase': + args = [self.config.moses_decode_phrase] + elif self.config.model == 'hier': + args = [self.config.moses_decode_hier] + else: + assert False + + if self.config.run == 'test': + args += ['-f', '%s/mert-work/moses.ini' % self.config.experiment_dir] + else: + args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir] + #args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir] + + args += ['-drop-unknown', + '-n-best-list', '%s/hyp.%s.nbest' % (self.config.experiment_dir, self.config.tgt), + str(self.config.nbest), 'distinct', + '-threads', '3'] + + #nullfile = open(os.devnull, 'w') + infile = open('%s/test.%s' % (self.config.experiment_dir, self.config.src)) + outfile = open('%s/hyp.%s' % (self.config.experiment_dir, self.config.tgt), 'w') + log = open('%s/decode.log' % self.config.experiment_dir, 'w') + p = subprocess.Popen(args, stdin=infile, stdout=outfile, stderr=log) + p.wait() + infile.close() + log.close() + outfile.close() diff --git a/src/moses.pyc b/src/moses.pyc new file mode 100644 index 0000000..d2cbf10 Binary files /dev/null and b/src/moses.pyc differ diff --git a/src/nl_reweighter.py b/src/nl_reweighter.py new file mode 100644 index 0000000..fcc8f85 --- /dev/null +++ b/src/nl_reweighter.py @@ -0,0 +1,227 @@ +import gzip +import re +from nlp_tools.hypergraph import Hypergraph +import itertools +import logging +from collections import defaultdict +import os + +class Rule: + + MOSES_SYMBOL = '[X]' + + def __init__(self, rule_id, symbol, src, tgt, coindexing): + self.rule_id = rule_id + self.symbol = symbol + self.src = src + self.tgt = tgt + self.coindexing = coindexing + self.degree = len(self.coindexing) + + @classmethod + def from_moses(cls, rule_id, rule_table_line): + nl, mrl, scores, alignments, counts = re.split(r'\ ?\|\|\|\ ?', + rule_table_line.strip()) + nl = nl.split()[:-1] + nl = [cls.MOSES_SYMBOL if t == '[X][X]' else t for t in nl] + mrl = mrl.split()[:-1] + mrl = [cls.MOSES_SYMBOL if t == '[X][X]' else t for t in mrl] + coindexing = [] + for pair in alignments.split(): + i_s, i_t = pair.split('-') + coindexing.append((int(i_s), int(i_t))) + return Rule(rule_id, cls.MOSES_SYMBOL, nl, mrl, coindexing) + + @classmethod + def glue(cls, rule_id): + return Rule(rule_id, cls.MOSES_SYMBOL, [cls.MOSES_SYMBOL, cls.MOSES_SYMBOL], + [cls.MOSES_SYMBOL, cls.MOSES_SYMBOL], [(0,0), (1,1)]) + + def __eq__(self, other): + return other.__class__ == self.__class__ and self.rule_id == other.rule_id + + def __hash__(self): + return self.rule_id + + def __repr__(self): + return 'Rule<(%d) %s -> %s : %s>' % (self.rule_id, self.symbol, self.src, + self.tgt) + +class NLReweighter: + + def __init__(self, config): + self.config = config + + def run(self): + rules = self.load_rule_table() + glue = Rule.glue(len(rules)) + all_counts = defaultdict(lambda: 0) + successful_counts = defaultdict(lambda: 0) + + with open('%s/unlabeled.nl' % self.config.experiment_dir) as ul_f: + for line in ul_f: + toks = line.strip().split() + chart = self.parse(toks, rules, glue) + if not chart: + continue + self.collect_all_counts(all_counts, chart) + self.collect_successful_counts(successful_counts, chart, toks) + + if not self.config.ul_only: + with open('%s/train.nl' % self.config.experiment_dir) as t_f: + for line in t_f: + toks = line.strip().split() + chart = self.parse(toks, rules, glue) + # TODO is this an OOV issue? + if not chart: + continue + self.collect_all_counts(all_counts, chart) + self.collect_successful_counts(successful_counts, chart, toks) + + #self.write_updated_model(all_counts) + self.write_updated_model(successful_counts) + + def load_rule_table(self): + rule_table_path = '%s/model/rule-table.gz' % self.config.experiment_dir + rules = {} + with gzip.open(rule_table_path) as rule_table_f: + for line in rule_table_f.readlines(): + rule = Rule.from_moses(len(rules), line) + rules[rule.rule_id] = rule + return rules + + def write_updated_model(self, counts): + old_rule_table_path = '%s/model/rule-table.gz' % self.config.experiment_dir + new_rule_table_path = '%s/model/rule-table-new.gz' % self.config.experiment_dir + counter = 0 + with gzip.open(old_rule_table_path) as old_rule_table_f: + with gzip.open(new_rule_table_path, 'w') as new_rule_table_f: + for line in old_rule_table_f: + nl, mrl, scores, alignments, rule_counts = re.split(r'\ ?\|\|\|\ ?', + line.strip()) + scores = '%s %f' % (scores, counts[counter]) + newline = ' ||| '.join([nl, mrl, scores, alignments, rule_counts]) + newline = re.sub(r'\s+', ' ', newline) + print >>new_rule_table_f, newline + counter += 1 + + old_config_path = '%s/model/moses.ini' % self.config.experiment_dir + new_config_path = '%s/model/moses-new.ini' % self.config.experiment_dir + with open(old_config_path) as old_config_f: + with open(new_config_path, 'w') as new_config_f: + for line in old_config_f: + if line[-14:-1] == 'rule-table.gz': + line = line[:6] + '6' + line[7:] + #line[6] = '6' + print >>new_config_f, line, + if line == '[weight-t]\n': + print >>new_config_f, '0.20' + + os.rename(new_rule_table_path, old_rule_table_path) + os.rename(new_config_path, old_config_path) + + def parse(self, sent, grammar, glue): + chart = dict() + + for span in range(1, len(sent)+1): + for start in range(len(sent)+1-span): + chart[start,span] = list() + for rule in grammar.values(): + matches = self.match(sent, rule, start, span, chart) + chart[start,span] += matches + + for i in range(1, len(sent)): + if chart[0,i] and chart[i,len(sent)-i]: + psets = [(c1, c2) for c1 in chart[0,i] for c2 in chart[i,len(sent)-i]] + chart[0,len(sent)].append(Hypergraph(glue, psets)) + + if not chart[0,len(sent)]: + #logging.debug('failed to parse') + return None + else: + #logging.debug('parse OK!') + return chart + + def match(self, sent, rule, start, span, chart): + + if rule.degree == 0: + if span != len(rule.src): + return [] + if sent[start:start+span] != rule.src: + return [] + return [Hypergraph(rule, [])] + + elif rule.degree == 1: + nt_start = start + rule.coindexing[0][0] + nt_span = span - len(rule.src) + 1 + if nt_span <= 0: + return [] + if sent[start:nt_start] != rule.src[0:rule.coindexing[0][0]]: + return [] + if sent[nt_start+nt_span:start+span] != rule.src[rule.coindexing[0][0]+1:]: + return [] + + pointer_sets = [i for i in chart[nt_start, nt_span] if i.label.symbol == + rule.src[rule.coindexing[0][0]]] + ## if not chart[nt_start, nt_span]: + ## return [] + if not pointer_sets: + return [] + return [Hypergraph(rule, [(i,) for i in pointer_sets])] + + elif rule.degree == 2: + matches = [] + before_dist = rule.coindexing[0][0] + between_dist = rule.coindexing[1][0] - rule.coindexing[0][0] - 1 + before_2_dist = rule.coindexing[1][0] + nt_total_span = span - len(rule.src) + 2 + if nt_total_span <= 0: + return [] + nt1_start = start + before_dist + for nt1_span in range(1,nt_total_span): + nt2_start = nt1_start + nt1_span + between_dist + nt2_span = nt_total_span - nt1_span + + if sent[start:nt1_start] != rule.src[0:before_dist]: + continue + if sent[nt1_start+nt1_span:nt2_start] != rule.src[before_dist+1:before_2_dist]: + continue + if sent[nt2_start+nt2_span:start+span] != rule.src[before_2_dist+1:]: + continue + + pointer_sets_1 = [i for i in chart[nt1_start,nt1_span] if i.label.symbol == + rule.src[rule.coindexing[0][0]]] + pointer_sets_2 = [i for i in chart[nt2_start,nt2_span] if i.label.symbol == + rule.src[rule.coindexing[1][0]]] + + if not (pointer_sets_1 and pointer_sets_2): + continue + + matches.append(Hypergraph(rule, list(itertools.product(pointer_sets_1, + pointer_sets_2)))) + #matches.append(rule.rule_id) + + return matches + + assert False + + def collect_all_counts(self, counts, chart): + for cell in chart.values(): + for node in cell: + counts[node.label.rule_id] += 1 + + def collect_successful_counts(self, counts, chart, sent): + used = set() + for cell in chart[0, len(sent)]: + self.mark_used(used, cell) + for cell in chart.values(): + for node in cell: + if node in used: + counts[node.label.rule_id] += 1 + + def mark_used(self, used, cell): + for edge in cell.edges: + for ccell in edge: + if ccell not in used: + self.mark_used(used, ccell) + used.add(cell) diff --git a/src/nl_reweighter.pyc b/src/nl_reweighter.pyc new file mode 100644 index 0000000..e6aac7d Binary files /dev/null and b/src/nl_reweighter.pyc differ diff --git a/src/query_comparer.py b/src/query_comparer.py new file mode 100644 index 0000000..79b9905 --- /dev/null +++ b/src/query_comparer.py @@ -0,0 +1,31 @@ +class QueryComparer: + + def __init__(self, config): + self.config = config + + def run(self): + + hyp_file = open('%s/hyp.fun' % self.config.experiment_dir) + ref_file = open('%s/test.fun' % self.config.experiment_dir) + out_file = open('%s/eval.scored' % self.config.experiment_dir, 'w') + + hyps = {} + for line in hyp_file: + idx, hyp, scores1, scores2 = line.split(' ||| ') + hyps[int(idx)] = hyp + + i = -1 + for line in ref_file: + i += 1 + if i not in hyps: + print >>out_file, 'empty' + continue + test = line.strip() + if hyps[i] == test: + print >>out_file, 'yes', 0 + else: + print >>out_file, 'no', 0 + + hyp_file.close() + ref_file.close() + out_file.close() diff --git a/src/query_comparer.pyc b/src/query_comparer.pyc new file mode 100644 index 0000000..0024b42 Binary files /dev/null and b/src/query_comparer.pyc differ diff --git a/src/slot_checker.pyc b/src/slot_checker.pyc new file mode 100644 index 0000000..baca7bf Binary files /dev/null and b/src/slot_checker.pyc differ diff --git a/src/smt_semparse_config.py b/src/smt_semparse_config.py new file mode 100644 index 0000000..71eaf24 --- /dev/null +++ b/src/smt_semparse_config.py @@ -0,0 +1,31 @@ +from config import Config + +class SMTSemparseConfig(Config): + + def __init__(self, settings_path, dependencies_path): + Config.__init__(self, settings_path, dependencies_path) + + self.put('data_dir', '%s/data/%s' % (self.smt_semparse, self.corpus)) + + if self.np: + self.train_name = 'train.np' + else: + self.train_name = 'train' + + self.put('srilm_ngram_count', '%s/bin/%s/ngram-count' % \ + (self.srilm, self.srilm_arch)) + + self.put('moses_train', '%s/scripts/training/train-model.perl' % self.moses) + self.put('moses_tune', '%s/scripts/training/mert-moses.pl' % self.moses) + self.put('moses_decode_phrase', '%s/dist/bin/moses' % self.moses) + self.put('moses_decode_hier', '%s/dist/bin/moses_chart' % self.moses) + self.put('bleu_eval', '%s/scripts/generic/multi-bleu.perl' % self.moses) + + self.put('wasp_eval', '%s/data/geo-funql/eval/eval.pl' % self.wasp) + + if self.nlg: + self.put('src', 'mrl') + self.put('tgt', 'nl') + else: + self.put('src', 'nl') + self.put('tgt', 'mrl') diff --git a/src/smt_semparse_config.pyc b/src/smt_semparse_config.pyc new file mode 100644 index 0000000..3a27aad Binary files /dev/null and b/src/smt_semparse_config.pyc differ diff --git a/src/smt_semparse_experiment.py b/src/smt_semparse_experiment.py new file mode 100644 index 0000000..222b890 --- /dev/null +++ b/src/smt_semparse_experiment.py @@ -0,0 +1,87 @@ +import logging +import os +from extractor import Extractor +from functionalizer import Functionalizer +from slot_checker import SlotChecker +from srilm import SRILM +from moses import Moses +from nl_reweighter import NLReweighter +from geo_world import GeoWorld +from query_comparer import QueryComparer +from bleu_scorer import BLEUScorer + +class SMTSemparseExperiment: + + def __init__(self, config): + self.config = config + + def run_fold(self, fold): + logging.info('running fold %d', fold) + self.config.put('fold', fold) + fold_dir = os.path.join(self.config.work_dir, str(fold)) + self.config.put('experiment_dir', fold_dir) + os.makedirs(fold_dir) + self.run() + + def run_split(self): + logging.info('running split') + self.config.put('experiment_dir', self.config.work_dir) + self.run() + + def run(self): + logging.info('working dir is %s', self.config.experiment_dir) + + # get data + logging.info('extracting data') + Extractor(self.config).run() + + # learn lm + logging.info('learning LM') + SRILM(self.config).run_ngram_count() + + # train moses + moses = Moses(self.config) + logging.info('training TM') + moses.run_train() + + # reweight using monolingual data + if self.config.monolingual: + logging.info('learning from monolingual data') + NLReweighter(self.config).run() + + # filter disconnected rules + if self.config.filter: + logging.info('filtering disconnected rules') + moses.filter_phrase_table() + + # tune moses + if self.config.run == 'test': + logging.info('tuning TM') + moses.run_tune() + + if self.config.retrain: + logging.info('retraining TM') + moses.run_retrain() + + # decode input + logging.info('decoding') + moses.run_decode() + + if self.config.nlg: + logging.info('running BLEU') + BLEUScorer(self.config).run() + pass + + else: + # functionalize + logging.info('functionalizing') + Functionalizer(self.config).run() + + # compare answers + logging.info('executing queries') + if self.config.corpus == 'geo': + GeoWorld(self.config).run() + elif self.config.corpus == 'atis': + SlotChecker(self.config).run() + else: + QueryComparer(self.config).run() diff --git a/src/smt_semparse_experiment.pyc b/src/smt_semparse_experiment.pyc new file mode 100644 index 0000000..0a067b7 Binary files /dev/null and b/src/smt_semparse_experiment.pyc differ diff --git a/src/srilm.py b/src/srilm.py new file mode 100644 index 0000000..ef371cf --- /dev/null +++ b/src/srilm.py @@ -0,0 +1,20 @@ +import logging +import subprocess + +class SRILM: + + def __init__(self, config): + self.config = config + + def run_ngram_count(self): + log = open('%s/lm.log' % self.config.experiment_dir, 'w') + p = subprocess.Popen([self.config.srilm_ngram_count, + '-text', '%s/train.%s.lm' % (self.config.experiment_dir, self.config.tgt), + '-order', '3', + '-no-sos', + '-no-eos', + '-lm', '%s/%s.arpa' % (self.config.experiment_dir, self.config.tgt), + '-unk'], + stderr=log) + p.wait() + log.close() diff --git a/src/srilm.pyc b/src/srilm.pyc new file mode 100644 index 0000000..9a07944 Binary files /dev/null and b/src/srilm.pyc differ diff --git a/src/util.py b/src/util.py new file mode 100644 index 0000000..7ce1c7f --- /dev/null +++ b/src/util.py @@ -0,0 +1,67 @@ +import re +from collections import defaultdict + +ARITY_SEP = '@' +ARITY_STR = 's' +ARITY_ANY = '*' + +def after_nth(mrl, token, n): + #print mrl, token + while n > 0: + m = re.search(r'\b%s\b' % token, mrl) + #m = re.search(r'(^|[(, ])%s[(),]' % token, mrl) + mrl = mrl[m.end()-1:] + n = n - 1; + return mrl + +def count_arguments(s): + args = False; + parens = 0; + commas = 0; + i = 0 + #while parens >= 0 and i < len(s): + while i < len(s) and ((not args and parens == 0) or (args and parens > 0)): + c = s[i:i+1] + if c == '(': + args = True + parens += 1 + elif c == ')': + parens -= 1 + elif parens == 1 and c == ',': + commas += 1 + elif parens < 1 and c == ',': + break + i += 1 + if args: + return commas + 1 + else: + assert commas == 0 + return 0 + +def fun_to_mrl(mrl, star_top=False): + mrl = mrl.strip() + + mrl = re.sub(r"' *([A-Za-z0-9_ ]+?) *'", lambda x: '%s%s%s' % (x.group(1).replace(' ', '_'), ARITY_SEP, ARITY_STR), mrl) + mrl = re.sub(r'\s+', ' ', mrl) + mrl_noparens = re.sub(r'[\(\)]', ' ', mrl) + mrl_noparens = re.sub(r'\s+', ' ', mrl_noparens) + mrl_nocommas = re.sub(r',', ' ', mrl_noparens) + mrl_nocommas = re.sub(r'\s+', ' ', mrl_nocommas) + + mrl_labeled_tokens = [] + seen = defaultdict(lambda:0) + for token in mrl_nocommas.split(): + seen[token] += 1 + args = count_arguments(after_nth(mrl, token, seen[token])) + #print token, args, after_nth(mrl, token, seen[token]) + if token[-len(ARITY_SEP)-len(ARITY_STR):] == '%s%s' % (ARITY_SEP, ARITY_STR): + mrl_labeled_tokens.append(token) + else: + mrl_labeled_tokens.append('%s%s%d' % (token, ARITY_SEP, args)) + + if star_top: + tok = mrl_labeled_tokens[0] + sep = tok.rindex(ARITY_SEP) + mrl_labeled_tokens[0] = tok[:sep] + ARITY_SEP + ARITY_ANY + + return ' '.join(mrl_labeled_tokens) diff --git a/src/util.pyc b/src/util.pyc new file mode 100644 index 0000000..edaf734 Binary files /dev/null and b/src/util.pyc differ -- cgit v1.2.3