From 5f04f17907ce37a8e8dd6cff9cb6dc026b30b8f6 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 25 Apr 2014 18:21:11 +0200 Subject: better readme, example and code --- data/geoquery/smt-semparse/moses.py | 173 ------------------------------------ 1 file changed, 173 deletions(-) delete mode 100644 data/geoquery/smt-semparse/moses.py (limited to 'data/geoquery/smt-semparse/moses.py') diff --git a/data/geoquery/smt-semparse/moses.py b/data/geoquery/smt-semparse/moses.py deleted file mode 100644 index 9a159c3..0000000 --- a/data/geoquery/smt-semparse/moses.py +++ /dev/null @@ -1,173 +0,0 @@ -import logging -import os -import subprocess -import gzip - -from subprocess import Popen, PIPE, STDOUT - -class Moses: - - def __init__(self, config): - self.config = config - - def run_train(self): - args = [self.config.moses_train, - '--root-dir', self.config.experiment_dir, - '--corpus', '%s/%s' % (self.config.experiment_dir, - self.config.train_name), - '--f', self.config.src, - '--e', self.config.tgt, - '--lm', '0:3:%s/%s.arpa' % (self.config.experiment_dir, self.config.tgt), - #'-score-options', "'--OnlyDirect --NoPhraseCount'" - '--alignment', self.config.symm, - '-external-bin-dir', self.config.giza] - if self.config.model == 'hier': - args += ['-hierarchical', '-glue-grammar'] - - logging.info(' '.join(args)) - - log = open('%s/train.log' % self.config.experiment_dir, 'w') - p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=log) - p.wait() - log.close() - - def run_retrain(self): - old_train_nl = '%s/%s.nl' % (self.config.experiment_dir, - self.config.train_name) - old_train_mrl = '%s/%s.mrl' % (self.config.experiment_dir, - self.config.train_name) - moved_train_nl = '%s.notune' % old_train_nl - moved_train_mrl = '%s.notune' % old_train_mrl - tune_nl = '%s/tune.nl' % self.config.experiment_dir - tune_mrl = '%s/tune.mrl' % self.config.experiment_dir - os.rename(old_train_nl, moved_train_nl) - os.rename(old_train_mrl, moved_train_mrl) - with open(old_train_nl, 'w') as rt_train_nl: - subprocess.call(['cat', moved_train_nl, tune_nl], stdout=rt_train_nl) - with open(old_train_mrl, 'w') as rt_train_mrl: - subprocess.call(['cat', moved_train_mrl, tune_mrl], stdout=rt_train_mrl) - - os.remove('%s/model/extract.inv.gz' % self.config.experiment_dir) - os.remove('%s/model/extract.gz' % self.config.experiment_dir) - if self.config.model == 'hier': - os.remove('%s/model/rule-table.gz' % self.config.experiment_dir) - else: - os.remove('%s/model/phrase-table.gz' % self.config.experiment_dir) - - self.run_train() - - def parens_ok(self, line): - mrl_part = line.split(' ||| ')[1] - tokens = [t[-1] for t in mrl_part.split() if t[-2] == '@'] - tokens.reverse() - stack = [] - while tokens: - t = tokens.pop() - assert t != '*' - if t == 's': - t = 0 - t = int(t) - if t > 0: - stack.append(t) - else: - while stack: - top = stack.pop() - if top > 1: - stack.append(top - 1) - break - if tokens and not stack: - return False - return True - - def filter_phrase_table(self): - table_name = 'phrase' if self.config.model == 'phrase' else 'rule' - oldname = '%s/model/%s-table.gz' % (self.config.experiment_dir, table_name) - newname = '%s/model/%s-table.old.gz' % (self.config.experiment_dir, table_name) - os.rename(oldname, newname) - - with gzip.open(oldname, 'w') as filtered_table_f: - with gzip.open(newname, 'r') as old_table_f: - for line in old_table_f: - if self.parens_ok(line): - print >>filtered_table_f, line, - - def run_tune(self): - wd = os.getcwd() - os.chdir(self.config.experiment_dir) - args = [self.config.moses_tune, - '%s/tune.%s' % (self.config.experiment_dir, self.config.src), - '%s/tune.%s' % (self.config.experiment_dir, self.config.tgt)] - if self.config.model == 'hier': - args += [self.config.moses_decode_hier] - else: - args += [self.config.moses_decode_phrase] - args += ['%s/model/moses.ini' % self.config.experiment_dir, - '--mertdir', '%s/bin' % self.config.moses] - if self.config.model == 'hier': - args += ['--filtercmd', - '%s/scripts/training/filter-model-given-input.pl --Hierarchical'\ - % self.config.moses] - - log = open('%s/tune.log' % self.config.experiment_dir, 'w') - p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=log) - p.wait() - log.close() - os.chdir(wd) - - def run_decode(self): - if self.config.model == 'phrase': - args = [self.config.moses_decode_phrase] - elif self.config.model == 'hier': - args = [self.config.moses_decode_hier] - else: - assert False - - if self.config.run == 'test' or self.config.run == 'all': - args += ['-f', '%s/mert-work/moses.ini' % self.config.experiment_dir] - else: - args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir] - #args += ['-f', '%s/model/moses.ini' % self.config.experiment_dir] - - args += ['-drop-unknown', - '-n-best-list', '%s/hyp.%s.nbest' % (self.config.experiment_dir, self.config.tgt), - str(self.config.nbest), 'distinct', - '-threads', '3'] - - #nullfile = open(os.devnull, 'w') - infile = open('%s/test.%s' % (self.config.experiment_dir, self.config.src)) - outfile = open('%s/hyp.%s' % (self.config.experiment_dir, self.config.tgt), 'w') - log = open('%s/decode.log' % self.config.experiment_dir, 'w') - p = subprocess.Popen(args, stdin=infile, stdout=outfile, stderr=log) - p.wait() - infile.close() - log.close() - outfile.close() - - def decode_sentence(self, experiment_dir, sentence, temp_dir): - if self.config.model == 'phrase': - args = [self.config.moses_decode_phrase] - elif self.config.model == 'hier': - args = [self.config.moses_decode_hier] - else: - assert False - - if self.config.run == 'test' or self.config.run == 'all': - args += ['-f', '%s/mert-work/moses.ini' % experiment_dir] - else: - args += ['-f', '%s/model/moses.ini' % experiment_dir] - - args += ['-drop-unknown', - '-n-best-list', '%s/nbest.tmp' % temp_dir, - str(self.config.nbest), 'distinct', - '-threads', '1'] - - infile = open('%s/sent.tmp' % temp_dir, 'w') - print >>infile, sentence - infile.close - infile = open('%s/sent.tmp' % temp_dir, 'r') - nullfile = open(os.devnull, 'w') - p = subprocess.Popen(args, stdin=infile, stdout=nullfile, stderr=nullfile) - p.wait() - infile.close() - return - -- cgit v1.2.3