summaryrefslogtreecommitdiff
path: root/training/mira/mira.py
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2015-04-02 00:50:04 -0400
committerChris Dyer <redpony@gmail.com>2015-04-02 00:50:04 -0400
commit5ee02ce1602f2fce6d5af5db93c2278fe6c9ede5 (patch)
tree7ebad8dd99e38d190c579f425c3eb959363e96e5 /training/mira/mira.py
parente7d77de8a9b9929b22fc6562f88f3668900f9662 (diff)
parent737ed7a7f932b1a7e40d2755bcdee6bc0aa2de63 (diff)
Merge pull request #70 from redpony/cmake
Cmake
Diffstat (limited to 'training/mira/mira.py')
-rwxr-xr-xtraining/mira/mira.py45
1 files changed, 14 insertions, 31 deletions
diff --git a/training/mira/mira.py b/training/mira/mira.py
index b84eafad..4c87c664 100755
--- a/training/mira/mira.py
+++ b/training/mira/mira.py
@@ -5,12 +5,6 @@ import argparse
import logging
import random, time
import gzip, itertools
-try:
- import cdec.score
-except ImportError:
- sys.stderr.write('Could not import pycdec, see cdec/python/README.md for details\n')
- sys.exit(1)
-have_mpl = True
try:
import matplotlib
matplotlib.use('Agg')
@@ -19,26 +13,19 @@ except ImportError:
have_mpl = False
#mira run script
-#requires pycdec to be built, since it is used for scoring hypothesis
-#translations.
#matplotlib must be installed for graphing to work
#email option requires mail
+script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
+fast_score_binary = script_dir+'/../../mteval/fast_score'
+dlog = None
+
#scoring function using pycdec scoring
def fast_score(hyps, refs, metric):
- scorer = cdec.score.Scorer(metric)
- logging.info('loaded {0} references for scoring with {1}'.format(
- len(refs), metric))
- if metric=='BLEU':
- logging.warning('BLEU is ambiguous, assuming IBM_BLEU\n')
- metric = 'IBM_BLEU'
- elif metric=='COMBI':
- logging.warning('COMBI metric is no longer supported, switching to '
- 'COMB:TER=-0.5;BLEU=0.5')
- metric = 'COMB:TER=-0.5;BLEU=0.5'
- stats = sum(scorer(r).evaluate(h) for h,r in itertools.izip(hyps,refs))
- logging.info('Score={} ({})'.format(stats.score, stats.detail))
- return stats.score
+ cmd = ('{0} -r{1} -i {2} -m {3}').format(fast_score_binary, refs, hyps, metric)
+ proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
+ o = proc.stdout.readline().strip()
+ return float(o)
#create new parallel input file in output directory in sgml format
def enseg(devfile, newfile, gprefix):
@@ -81,7 +68,6 @@ def enseg(devfile, newfile, gprefix):
def main():
#set logging to write all info messages to stderr
logging.basicConfig(level=logging.INFO)
- script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
if not have_mpl:
logging.warning('Failed to import matplotlib, graphs will not be generated.')
@@ -376,7 +362,8 @@ def optimize(args, script_dir, dev_size, score_sign):
cmd = parallel_cmd + ' ' + decoder_cmd
logging.info('OPTIMIZATION COMMAND: {}'.format(cmd))
-
+
+ global dlog
dlog = open(decoderlog,'w')
runf = open(runfile,'w')
retries = 0
@@ -423,7 +410,7 @@ def optimize(args, script_dir, dev_size, score_sign):
bests = []
fears = []
for line in run:
- hope, best, fear = line.split(' ||| ')
+ hope, best, fear = line.strip().split(' ||| ')
hopes.append(hope)
bests.append(best)
fears.append(fear)
@@ -439,14 +426,10 @@ def optimize(args, script_dir, dev_size, score_sign):
gzip_file(runfile)
gzip_file(decoderlog)
- ref_file = open(refs)
- references = [line.split(' ||| ') for line in
- ref_file.read().strip().split('\n')]
- ref_file.close()
#get score for best hypothesis translations, hope and fear translations
- dec_score = fast_score(bests, references, args.metric)
- dec_score_h = fast_score(hopes, references, args.metric)
- dec_score_f = fast_score(fears, references, args.metric)
+ dec_score = fast_score(runfile+'.B', refs, args.metric)
+ dec_score_h = fast_score(runfile+'.H', refs, args.metric)
+ dec_score_f = fast_score(runfile+'.F', refs, args.metric)
hope_best_fear['hope'].append(dec_score)
hope_best_fear['best'].append(dec_score_h)