summaryrefslogtreecommitdiff
path: root/training/mira
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2015-03-04 21:36:51 -0500
committerChris Dyer <redpony@gmail.com>2015-03-04 21:36:51 -0500
commit06f1b6aedbd96d652d5337cf1b93b51e4dd9a620 (patch)
treea736ff77bbe61810c71b421e9ff0e0ecd7638446 /training/mira
parent95183b5760d7f168ae093ae8f9b29740628a278f (diff)
remove perfect hash function stuff, add zip option to extract.cc
Diffstat (limited to 'training/mira')
-rwxr-xr-xtraining/mira/mira.py59
1 files changed, 28 insertions, 31 deletions
diff --git a/training/mira/mira.py b/training/mira/mira.py
index ec9c2d64..ccecb10e 100755
--- a/training/mira/mira.py
+++ b/training/mira/mira.py
@@ -5,12 +5,6 @@ import argparse
import logging
import random, time
import gzip, itertools
-try:
- import cdec.score
-except ImportError:
- sys.stderr.write('Could not import pycdec, see cdec/python/README.md for details\n')
- sys.exit(1)
-have_mpl = True
try:
import matplotlib
matplotlib.use('Agg')
@@ -19,26 +13,33 @@ except ImportError:
have_mpl = False
#mira run script
-#requires pycdec to be built, since it is used for scoring hypothesis
-#translations.
#matplotlib must be installed for graphing to work
#email option requires mail
+script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
+fast_score_binary = script_dir+'/../../mteval/fast_score'
+dlog = None
+
#scoring function using pycdec scoring
def fast_score(hyps, refs, metric):
- scorer = cdec.score.Scorer(metric)
- logging.info('loaded {0} references for scoring with {1}'.format(
- len(refs), metric))
- if metric=='BLEU':
- logging.warning('BLEU is ambiguous, assuming IBM_BLEU\n')
- metric = 'IBM_BLEU'
- elif metric=='COMBI':
- logging.warning('COMBI metric is no longer supported, switching to '
- 'COMB:TER=-0.5;BLEU=0.5')
- metric = 'COMB:TER=-0.5;BLEU=0.5'
- stats = sum(scorer(r).evaluate(h) for h,r in itertools.izip(hyps,refs))
- logging.info('Score={} ({})'.format(stats.score, stats.detail))
- return stats.score
+ #scorer = cdec.score.Scorer(metric)
+ #logging.info('loaded {0} references for scoring with {1}'.format(
+ # len(refs), metric))
+ #if metric=='BLEU':
+ # logging.warning('BLEU is ambiguous, assuming IBM_BLEU\n')
+ # metric = 'IBM_BLEU'
+ #elif metric=='COMBI':
+ # logging.warning('COMBI metric is no longer supported, switching to '
+ # 'COMB:TER=-0.5;BLEU=0.5')
+ # metric = 'COMB:TER=-0.5;BLEU=0.5'
+ #stats = sum(scorer(r).evaluate(h) for h,r in itertools.izip(hyps,refs))
+ #logging.info('Score={} ({})'.format(stats.score, stats.detail))
+ #return stats.score
+ cmd = ('{0} -r{1} -i {2} -m {3}').format(fast_score_binary, refs, hyps, metric)
+ proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
+ o = proc.stdout.readline().strip()
+ print 'res: ', o
+ return float(o)
#create new parallel input file in output directory in sgml format
def enseg(devfile, newfile, gprefix):
@@ -81,7 +82,6 @@ def enseg(devfile, newfile, gprefix):
def main():
#set logging to write all info messages to stderr
logging.basicConfig(level=logging.INFO)
- script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
if not have_mpl:
logging.warning('Failed to import matplotlib, graphs will not be generated.')
@@ -373,7 +373,8 @@ def optimize(args, script_dir, dev_size):
cmd = parallel_cmd + ' ' + decoder_cmd
logging.info('OPTIMIZATION COMMAND: {}'.format(cmd))
-
+
+ global dlog
dlog = open(decoderlog,'w')
runf = open(runfile,'w')
retries = 0
@@ -420,7 +421,7 @@ def optimize(args, script_dir, dev_size):
bests = []
fears = []
for line in run:
- hope, best, fear = line.split(' ||| ')
+ hope, best, fear = line.strip().split(' ||| ')
hopes.append(hope)
bests.append(best)
fears.append(fear)
@@ -436,14 +437,10 @@ def optimize(args, script_dir, dev_size):
gzip_file(runfile)
gzip_file(decoderlog)
- ref_file = open(refs)
- references = [line.split(' ||| ') for line in
- ref_file.read().strip().split('\n')]
- ref_file.close()
#get score for best hypothesis translations, hope and fear translations
- dec_score = fast_score(bests, references, args.metric)
- dec_score_h = fast_score(hopes, references, args.metric)
- dec_score_f = fast_score(fears, references, args.metric)
+ dec_score = fast_score(runfile+'.B', refs, args.metric)
+ dec_score_h = fast_score(runfile+'.H', refs, args.metric)
+ dec_score_f = fast_score(runfile+'.F', refs, args.metric)
hope_best_fear['hope'].append(dec_score)
hope_best_fear['best'].append(dec_score_h)