summaryrefslogtreecommitdiff
path: root/data/geoquery/smt-semparse/moses.py
diff options
context:
space:
mode:
Diffstat (limited to 'data/geoquery/smt-semparse/moses.py')
-rw-r--r--data/geoquery/smt-semparse/moses.py173
1 files changed, 0 insertions, 173 deletions
diff --git a/data/geoquery/smt-semparse/moses.py b/data/geoquery/smt-semparse/moses.py
deleted file mode 100644
index 9a159c3..0000000
--- a/data/geoquery/smt-semparse/moses.py
+++ /dev/null
@@ -1,173 +0,0 @@
-import logging
-import os
-import subprocess
-import gzip
-
-from subprocess import Popen, PIPE, STDOUT
-
-class Moses:
-
- def __init__(self, config):
- self.config = config
-
- def run_train(self):
- args = [self.config.moses_train,
- '--root-dir', self.config.experiment_dir,
- '--corpus', '%s/%s' % (self.config.experiment_dir,
- self.config.train_name),
- '--f', self.config.src,
- '--e', self.config.tgt,
- '--lm', '0:3:%s/%s.arpa' % (self.config.experiment_dir, self.config.tgt),
- #'-score-options', "'--OnlyDirect --NoPhraseCount'"
- '--alignment', self.config.symm,
- '-external-bin-dir', self.config.giza]
- if self.config.model == 'hier':
- args += ['-hierarchical', '-glue-grammar']
-
- logging.info(' '.join(args))
-
- log = open('%s/train.log' % self.config.experiment_dir, 'w')
- p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=log)
- p.wait()
- log.close()
-
- def run_retrain(self):
- old_train_nl = '%s/%s.nl' % (self.config.experiment_dir,
- self.config.train_name)
- old_train_mrl = '%s/%s.mrl' % (self.config.experiment_dir,
- self.config.train_name)
- moved_train_nl = '%s.notune' % old_train_nl
- moved_train_mrl = '%s.notune' % old_train_mrl
- tune_nl = '%s/tune.nl' % self.config.experiment_dir
- tune_mrl = '%s/tune.mrl' % self.config.experiment_dir
- os.rename(old_train_nl, moved_train_nl)
- os.rename(old_train_mrl, moved_train_mrl)
- with open(old_train_nl, 'w') as rt_train_nl:
- subprocess.call(['cat', moved_train_nl, tune_nl], stdout=rt_train_nl)
- with open(old_train_mrl, 'w') as rt_train_mrl:
- subprocess.call(['cat', moved_train_mrl, tune_mrl], stdout=rt_train_mrl)
-
- os.remove('%s/model/extract.inv.gz' % self.config.experiment_dir)
- os.remove('%s/model/extract.gz' % self.config.experiment_dir)
- if self.config.model == 'hier':
- os.remove('%s/model/rule-table.gz' % self.config.experiment_dir)
- else:
- os.remove('%s/model/phrase-table.gz' % self.config.experiment_dir)
-
- self.run_train()
-
- def parens_ok(self, line):
- mrl_part = line.split(' ||| ')[1]
- tokens = [t[-1] for t in mrl_part.split() if t[-2] == '@']
- tokens.reverse()
- stack = []
- while tokens:
- t = tokens.pop()
- assert t != '*'
- if t == 's':
- t = 0
- t = int(t)
- if t > 0:
- stack.append(t)
- else:
- while stack:
- top = stack.pop()
- if top > 1:
- stack.append(top - 1)
- break
- if tokens and not stack:
- return False
- return True
-
- def filter_phrase_table(self):
- table_name = 'phrase' if self.config.model == 'phrase' else 'rule'
- oldname = '%s/model/%s-table.gz' % (self.config.experiment_dir, table_name)
- newname = '%s/model/%s-table.old.gz' % (self.config.experiment_dir, table_name)
- os.rename(oldname, newname)
-
- with gzip.open(oldname, 'w') as filtered_table_f:
- with gzip.open(newname, 'r') as old_table_f:
- for line in old_table_f:
- if self.parens_ok(line):
- print >>filtered_table_f, line,
-
- def run_tune(self):
- wd = os.getcwd()
- os.chdir(self.config.experiment_dir)
- args = [self.config.moses_tune,
- '%s/tune.%s' % (self.config.experiment_dir, self.config.src),
- '%s/tune.%s' % (self.config.experiment_dir, self.config.tgt)]
- if self.config.model == 'hier':
- args += [self.config.moses_decode_hier]
- else:
- args += [self.config.moses_decode_phrase]
- args += ['%s/model/moses.ini' % self.config.experiment_dir,
- '--mertdir', '%s/bin' % self.config.moses]
- if self.config.model == 'hier':
- args += ['--filtercmd',
- '%s/scripts/training/filter-model-given-input.pl --Hierarchical'\
- % self.config.moses]
-
- log = open('%s/tune.log' % self.config.experiment_dir, 'w')
- p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=log)
- p.wait()
- log.close()
- os.chdir(wd)
-
- def run_decode(self):
- if self.config.model == 'phrase':
- args = [self.config.moses_decode_phrase]
- elif self.config.model == 'hier':
- args = [self.config.moses_decode_hier]
- else:
- assert False
-
- if self.config.run == 'test' or self.config.run == 'all':
- args += ['-f', '%s/mert-work/moses.ini' % self.config.experiment_dir]
- else:
- args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir]
- #args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir]
-
- args += ['-drop-unknown',
- '-n-best-list', '%s/hyp.%s.nbest' % (self.config.experiment_dir, self.config.tgt),
- str(self.config.nbest), 'distinct',
- '-threads', '3']
-
- #nullfile = open(os.devnull, 'w')
- infile = open('%s/test.%s' % (self.config.experiment_dir, self.config.src))
- outfile = open('%s/hyp.%s' % (self.config.experiment_dir, self.config.tgt), 'w')
- log = open('%s/decode.log' % self.config.experiment_dir, 'w')
- p = subprocess.Popen(args, stdin=infile, stdout=outfile, stderr=log)
- p.wait()
- infile.close()
- log.close()
- outfile.close()
-
- def decode_sentence(self, experiment_dir, sentence, temp_dir):
- if self.config.model == 'phrase':
- args = [self.config.moses_decode_phrase]
- elif self.config.model == 'hier':
- args = [self.config.moses_decode_hier]
- else:
- assert False
-
- if self.config.run == 'test' or self.config.run == 'all':
- args += ['-f', '%s/mert-work/moses.ini' % experiment_dir]
- else:
- args += ['-f', '%s/model/moses.ini' % experiment_dir]
-
- args += ['-drop-unknown',
- '-n-best-list', '%s/nbest.tmp' % temp_dir,
- str(self.config.nbest), 'distinct',
- '-threads', '1']
-
- infile = open('%s/sent.tmp' % temp_dir, 'w')
- print >>infile, sentence
- infile.close
- infile = open('%s/sent.tmp' % temp_dir, 'r')
- nullfile = open(os.devnull, 'w')
- p = subprocess.Popen(args, stdin=infile, stdout=nullfile, stderr=nullfile)
- p.wait()
- infile.close()
- return
-