diff options
| author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2014-04-25 18:40:18 +0200 | 
|---|---|---|
| committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2014-04-25 18:40:18 +0200 | 
| commit | 7120911494711094c6dcd9dc0da741d686207aab (patch) | |
| tree | 9eb85690b3550d810796ebed2328631001f4405b | |
| parent | 145368964385a6e8d475d10e2b726bf18a33d2f1 (diff) | |
| -rw-r--r-- | decode_sentence.py | 37 | ||||
| -rw-r--r-- | dependencies.yaml | 9 | ||||
| -rw-r--r-- | settings.yaml | 6 | ||||
| -rw-r--r-- | smt-semparse-full-dataset.tar.gz | bin | 0 -> 14324575 bytes | |||
| -rw-r--r-- | src/functionalizer.py | 31 | ||||
| -rw-r--r-- | src/moses.py | 38 | ||||
| -rw-r--r-- | src/smt_semparse_config.py | 4 | 
7 files changed, 113 insertions, 12 deletions
| diff --git a/decode_sentence.py b/decode_sentence.py new file mode 100644 index 0000000..36f059b --- /dev/null +++ b/decode_sentence.py @@ -0,0 +1,37 @@ +import sys
 +import os
 +import tempfile, shutil
 +from src.extractor import Extractor
 +from src.smt_semparse_config import SMTSemparseConfig
 +from src.moses import Moses
 +from src.functionalizer import Functionalizer
 +
 +#input: English sentence
 +if __name__ == '__main__':
 +  sentence = ''
 +  if len(sys.argv) == 3:
 +    experiment_dir = sys.argv[1]
 +    sentence = sys.argv[2]
 +  else:
 +    assert False
 +	
 +  # load config
 +  _dir = os.path.dirname(os.path.abspath(__file__))
 +  config = SMTSemparseConfig(_dir+'/settings.yaml', _dir+'/dependencies.yaml')
 +
 +  #stem
 +  sentence = Extractor(config).preprocess_nl(sentence)
 +
 +  # we need a temp dir!
 +  temp_dir = tempfile.mkdtemp()
 +
 +  #decode
 +  moses = Moses(config)
 +  moses.decode_sentence(experiment_dir, sentence, temp_dir)
 +
 +  #convert to bracket structure
 +  print Functionalizer(config).run_sentence(experiment_dir, temp_dir)
 +
 +  #delete tmp files
 +  shutil.rmtree(temp_dir)
 +
 diff --git a/dependencies.yaml b/dependencies.yaml index 37083df..f98ae6b 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -1,8 +1,9 @@ -smt_semparse: /home/jacob/src/smt-semparse +smt_semparse: /workspace/grounded/test/smt-semparse -moses:  /home/jacob/src/3p/mosesdecoder -srilm:  /home/jacob/src/3p/srilm1.6.0 +moses:  /workspace/grounded/test/mosesdecoder +giza:  /workspace/grounded/test/mosesdecoder/tools +srilm:  /workspace/grounded/test/srilm  prolog: /usr/bin/swipl -wasp:   /home/jacob/src/3p/wasp-1.0 +wasp:   /workspace/grounded/test/wasp-1.0  srilm_arch: i686-m64 diff --git a/settings.yaml b/settings.yaml index 3d0669d..40bb44f 100644 --- a/settings.yaml +++ b/settings.yaml @@ -1,12 +1,12 @@  nbest:   100       # how many entries in the nbest list?  corpus:  geo       # which corpus? [geo, robo]  lang:    en        # which language? [en, de, el, th] -stem:    false     # run the stemmer? -symm:    srctotgt  # which symmetrization? [e.g. srctotgt, tgttosrc, grow, ...] +stem:    true     # run the stemmer? +symm:    tgttosrc  # which symmetrization? [e.g. srctotgt, tgttosrc, grow, ...]  np:      true      # use NP list?  np_type: all       # what version of NP list?  model:   hier      # which machine translation model? [phrase, hier] -run:     test      # which experiment? [dev, test, debug] +run:     test      # which experiment? [dev, test, debug, all]  workdir: work      # where?  # experimental, and unrelated to published work diff --git a/smt-semparse-full-dataset.tar.gz b/smt-semparse-full-dataset.tar.gzBinary files differ new file mode 100644 index 0000000..08086eb --- /dev/null +++ b/smt-semparse-full-dataset.tar.gz diff --git a/src/functionalizer.py b/src/functionalizer.py index 66325a0..782b4e5 100644 --- a/src/functionalizer.py +++ b/src/functionalizer.py @@ -37,6 +37,37 @@ class Functionalizer:            break        counter += 1 +  def run_sentence(self, experiment_dir, temp_dir): +    hyp_file = open('%s/nbest.tmp' % temp_dir, 'r') + +    hypsets = [] +    hypset = [] +    last_eid = 0 +    for line in hyp_file: +      parts = line.split('|||') +      eid = int(parts[0]) +      if eid != last_eid: +        hypsets.append(hypset) +        hypset = [] +        last_eid = eid +      score = parts[2] + ' ||| ' + parts[3].strip() +      hyp = parts[1].strip() +      hypset.append((hyp,score)) +    hypsets.append(hypset) +    hyp_file.close() + +    counter = 0 +    for hypset in hypsets: +      hypset = list(reversed(hypset)) +      while hypset: +        hyp, score = hypset.pop() +        fun = self.functionalize(hyp) +        if fun: +          return fun +          break +      counter += 1 +    return "" +    #xc = 0    def functionalize(self, mrl): diff --git a/src/moses.py b/src/moses.py index 857ddbf..9a159c3 100644 --- a/src/moses.py +++ b/src/moses.py @@ -3,6 +3,8 @@ import os  import subprocess  import gzip +from subprocess import Popen, PIPE, STDOUT +  class Moses:    def __init__(self, config): @@ -17,7 +19,8 @@ class Moses:              '--e', self.config.tgt,              '--lm', '0:3:%s/%s.arpa' % (self.config.experiment_dir, self.config.tgt),              #'-score-options', "'--OnlyDirect --NoPhraseCount'" -            '--alignment', self.config.symm] +            '--alignment', self.config.symm, +			'-external-bin-dir', self.config.giza]      if self.config.model == 'hier':        args += ['-hierarchical', '-glue-grammar'] @@ -99,7 +102,7 @@ class Moses:      else:        args += [self.config.moses_decode_phrase]      args += ['%s/model/moses.ini' % self.config.experiment_dir, -             '--mertdir', '%s/dist/bin' % self.config.moses] +             '--mertdir', '%s/bin' % self.config.moses]      if self.config.model == 'hier':        args += ['--filtercmd',                  '%s/scripts/training/filter-model-given-input.pl --Hierarchical'\ @@ -119,7 +122,7 @@ class Moses:      else:        assert False -    if self.config.run == 'test': +    if self.config.run == 'test' or self.config.run == 'all':        args += ['-f', '%s/mert-work/moses.ini' % self.config.experiment_dir]      else:        args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir] @@ -139,3 +142,32 @@ class Moses:      infile.close()      log.close()      outfile.close() + +  def decode_sentence(self, experiment_dir, sentence, temp_dir): +    if self.config.model == 'phrase': +      args = [self.config.moses_decode_phrase] +    elif self.config.model == 'hier': +      args = [self.config.moses_decode_hier] +    else: +      assert False + +    if self.config.run == 'test' or self.config.run == 'all': +      args += ['-f', '%s/mert-work/moses.ini' % experiment_dir] +    else: +      args += ['-f', '%s/model/moses.ini' % experiment_dir] + +    args += ['-drop-unknown', +             '-n-best-list', '%s/nbest.tmp' % temp_dir, +                             str(self.config.nbest), 'distinct', +             '-threads', '1'] + +    infile = open('%s/sent.tmp' % temp_dir, 'w') +    print >>infile, sentence +    infile.close +    infile = open('%s/sent.tmp' % temp_dir, 'r') +    nullfile = open(os.devnull, 'w') +    p = subprocess.Popen(args, stdin=infile, stdout=nullfile, stderr=nullfile) +    p.wait() +    infile.close() +    return + diff --git a/src/smt_semparse_config.py b/src/smt_semparse_config.py index 71eaf24..6bf50d7 100644 --- a/src/smt_semparse_config.py +++ b/src/smt_semparse_config.py @@ -17,8 +17,8 @@ class SMTSemparseConfig(Config):      self.put('moses_train', '%s/scripts/training/train-model.perl' % self.moses)      self.put('moses_tune', '%s/scripts/training/mert-moses.pl' % self.moses) -    self.put('moses_decode_phrase', '%s/dist/bin/moses' % self.moses) -    self.put('moses_decode_hier', '%s/dist/bin/moses_chart' % self.moses) +    self.put('moses_decode_phrase', '%s/bin/moses' % self.moses) +    self.put('moses_decode_hier', '%s/bin/moses_chart' % self.moses)      self.put('bleu_eval', '%s/scripts/generic/multi-bleu.perl' % self.moses)      self.put('wasp_eval', '%s/data/geo-funql/eval/eval.pl' % self.wasp) | 
