summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2014-04-25 18:40:18 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2014-04-25 18:40:18 +0200
commit7120911494711094c6dcd9dc0da741d686207aab (patch)
tree9eb85690b3550d810796ebed2328631001f4405b
parent145368964385a6e8d475d10e2b726bf18a33d2f1 (diff)
modificationsHEADmaster
-rw-r--r--decode_sentence.py37
-rw-r--r--dependencies.yaml9
-rw-r--r--settings.yaml6
-rw-r--r--smt-semparse-full-dataset.tar.gzbin0 -> 14324575 bytes
-rw-r--r--src/functionalizer.py31
-rw-r--r--src/moses.py38
-rw-r--r--src/smt_semparse_config.py4
7 files changed, 113 insertions, 12 deletions
diff --git a/decode_sentence.py b/decode_sentence.py
new file mode 100644
index 0000000..36f059b
--- /dev/null
+++ b/decode_sentence.py
@@ -0,0 +1,37 @@
+import sys
+import os
+import tempfile, shutil
+from src.extractor import Extractor
+from src.smt_semparse_config import SMTSemparseConfig
+from src.moses import Moses
+from src.functionalizer import Functionalizer
+
+#input: English sentence
+if __name__ == '__main__':
+ sentence = ''
+ if len(sys.argv) == 3:
+ experiment_dir = sys.argv[1]
+ sentence = sys.argv[2]
+ else:
+ assert False
+
+ # load config
+ _dir = os.path.dirname(os.path.abspath(__file__))
+ config = SMTSemparseConfig(_dir+'/settings.yaml', _dir+'/dependencies.yaml')
+
+ #stem
+ sentence = Extractor(config).preprocess_nl(sentence)
+
+ # we need a temp dir!
+ temp_dir = tempfile.mkdtemp()
+
+ #decode
+ moses = Moses(config)
+ moses.decode_sentence(experiment_dir, sentence, temp_dir)
+
+ #convert to bracket structure
+ print Functionalizer(config).run_sentence(experiment_dir, temp_dir)
+
+ #delete tmp files
+ shutil.rmtree(temp_dir)
+
diff --git a/dependencies.yaml b/dependencies.yaml
index 37083df..f98ae6b 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -1,8 +1,9 @@
-smt_semparse: /home/jacob/src/smt-semparse
+smt_semparse: /workspace/grounded/test/smt-semparse
-moses: /home/jacob/src/3p/mosesdecoder
-srilm: /home/jacob/src/3p/srilm1.6.0
+moses: /workspace/grounded/test/mosesdecoder
+giza: /workspace/grounded/test/mosesdecoder/tools
+srilm: /workspace/grounded/test/srilm
prolog: /usr/bin/swipl
-wasp: /home/jacob/src/3p/wasp-1.0
+wasp: /workspace/grounded/test/wasp-1.0
srilm_arch: i686-m64
diff --git a/settings.yaml b/settings.yaml
index 3d0669d..40bb44f 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -1,12 +1,12 @@
nbest: 100 # how many entries in the nbest list?
corpus: geo # which corpus? [geo, robo]
lang: en # which language? [en, de, el, th]
-stem: false # run the stemmer?
-symm: srctotgt # which symmetrization? [e.g. srctotgt, tgttosrc, grow, ...]
+stem: true # run the stemmer?
+symm: tgttosrc # which symmetrization? [e.g. srctotgt, tgttosrc, grow, ...]
np: true # use NP list?
np_type: all # what version of NP list?
model: hier # which machine translation model? [phrase, hier]
-run: test # which experiment? [dev, test, debug]
+run: test # which experiment? [dev, test, debug, all]
workdir: work # where?
# experimental, and unrelated to published work
diff --git a/smt-semparse-full-dataset.tar.gz b/smt-semparse-full-dataset.tar.gz
new file mode 100644
index 0000000..08086eb
--- /dev/null
+++ b/smt-semparse-full-dataset.tar.gz
Binary files differ
diff --git a/src/functionalizer.py b/src/functionalizer.py
index 66325a0..782b4e5 100644
--- a/src/functionalizer.py
+++ b/src/functionalizer.py
@@ -37,6 +37,37 @@ class Functionalizer:
break
counter += 1
+ def run_sentence(self, experiment_dir, temp_dir):
+ hyp_file = open('%s/nbest.tmp' % temp_dir, 'r')
+
+ hypsets = []
+ hypset = []
+ last_eid = 0
+ for line in hyp_file:
+ parts = line.split('|||')
+ eid = int(parts[0])
+ if eid != last_eid:
+ hypsets.append(hypset)
+ hypset = []
+ last_eid = eid
+ score = parts[2] + ' ||| ' + parts[3].strip()
+ hyp = parts[1].strip()
+ hypset.append((hyp,score))
+ hypsets.append(hypset)
+ hyp_file.close()
+
+ counter = 0
+ for hypset in hypsets:
+ hypset = list(reversed(hypset))
+ while hypset:
+ hyp, score = hypset.pop()
+ fun = self.functionalize(hyp)
+ if fun:
+ return fun
+ break
+ counter += 1
+ return ""
+
#xc = 0
def functionalize(self, mrl):
diff --git a/src/moses.py b/src/moses.py
index 857ddbf..9a159c3 100644
--- a/src/moses.py
+++ b/src/moses.py
@@ -3,6 +3,8 @@ import os
import subprocess
import gzip
+from subprocess import Popen, PIPE, STDOUT
+
class Moses:
def __init__(self, config):
@@ -17,7 +19,8 @@ class Moses:
'--e', self.config.tgt,
'--lm', '0:3:%s/%s.arpa' % (self.config.experiment_dir, self.config.tgt),
#'-score-options', "'--OnlyDirect --NoPhraseCount'"
- '--alignment', self.config.symm]
+ '--alignment', self.config.symm,
+ '-external-bin-dir', self.config.giza]
if self.config.model == 'hier':
args += ['-hierarchical', '-glue-grammar']
@@ -99,7 +102,7 @@ class Moses:
else:
args += [self.config.moses_decode_phrase]
args += ['%s/model/moses.ini' % self.config.experiment_dir,
- '--mertdir', '%s/dist/bin' % self.config.moses]
+ '--mertdir', '%s/bin' % self.config.moses]
if self.config.model == 'hier':
args += ['--filtercmd',
'%s/scripts/training/filter-model-given-input.pl --Hierarchical'\
@@ -119,7 +122,7 @@ class Moses:
else:
assert False
- if self.config.run == 'test':
+ if self.config.run == 'test' or self.config.run == 'all':
args += ['-f', '%s/mert-work/moses.ini' % self.config.experiment_dir]
else:
args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir]
@@ -139,3 +142,32 @@ class Moses:
infile.close()
log.close()
outfile.close()
+
+ def decode_sentence(self, experiment_dir, sentence, temp_dir):
+ if self.config.model == 'phrase':
+ args = [self.config.moses_decode_phrase]
+ elif self.config.model == 'hier':
+ args = [self.config.moses_decode_hier]
+ else:
+ assert False
+
+ if self.config.run == 'test' or self.config.run == 'all':
+ args += ['-f', '%s/mert-work/moses.ini' % experiment_dir]
+ else:
+ args += ['-f', '%s/model/moses.ini' % experiment_dir]
+
+ args += ['-drop-unknown',
+ '-n-best-list', '%s/nbest.tmp' % temp_dir,
+ str(self.config.nbest), 'distinct',
+ '-threads', '1']
+
+ infile = open('%s/sent.tmp' % temp_dir, 'w')
+ print >>infile, sentence
+ infile.close
+ infile = open('%s/sent.tmp' % temp_dir, 'r')
+ nullfile = open(os.devnull, 'w')
+ p = subprocess.Popen(args, stdin=infile, stdout=nullfile, stderr=nullfile)
+ p.wait()
+ infile.close()
+ return
+
diff --git a/src/smt_semparse_config.py b/src/smt_semparse_config.py
index 71eaf24..6bf50d7 100644
--- a/src/smt_semparse_config.py
+++ b/src/smt_semparse_config.py
@@ -17,8 +17,8 @@ class SMTSemparseConfig(Config):
self.put('moses_train', '%s/scripts/training/train-model.perl' % self.moses)
self.put('moses_tune', '%s/scripts/training/mert-moses.pl' % self.moses)
- self.put('moses_decode_phrase', '%s/dist/bin/moses' % self.moses)
- self.put('moses_decode_hier', '%s/dist/bin/moses_chart' % self.moses)
+ self.put('moses_decode_phrase', '%s/bin/moses' % self.moses)
+ self.put('moses_decode_hier', '%s/bin/moses_chart' % self.moses)
self.put('bleu_eval', '%s/scripts/generic/multi-bleu.perl' % self.moses)
self.put('wasp_eval', '%s/data/geo-funql/eval/eval.pl' % self.wasp)