summaryrefslogtreecommitdiff
path: root/training/mira
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2015-04-02 00:50:04 -0400
committerChris Dyer <redpony@gmail.com>2015-04-02 00:50:04 -0400
commit5ee02ce1602f2fce6d5af5db93c2278fe6c9ede5 (patch)
tree7ebad8dd99e38d190c579f425c3eb959363e96e5 /training/mira
parente7d77de8a9b9929b22fc6562f88f3668900f9662 (diff)
parent737ed7a7f932b1a7e40d2755bcdee6bc0aa2de63 (diff)
Merge pull request #70 from redpony/cmake
Cmake
Diffstat (limited to 'training/mira')
-rw-r--r--training/mira/CMakeLists.txt16
-rw-r--r--training/mira/Makefile.am20
-rwxr-xr-xtraining/mira/mira.py45
3 files changed, 30 insertions, 51 deletions
diff --git a/training/mira/CMakeLists.txt b/training/mira/CMakeLists.txt
new file mode 100644
index 00000000..bba9ef5f
--- /dev/null
+++ b/training/mira/CMakeLists.txt
@@ -0,0 +1,16 @@
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../utils)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../../utils)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../../mteval)
+INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../../decoder)
+
+set(kbest_mira_SRCS kbest_mira.cc)
+add_executable(kbest_mira ${kbest_mira_SRCS})
+target_link_libraries(kbest_mira libcdec ksearch mteval utils klm klm_util klm_util_double ${Boost_LIBRARIES} z ${LIBDL_LIBRARIES})
+
+set(kbest_cut_mira_SRCS kbest_cut_mira.cc)
+add_executable(kbest_cut_mira ${kbest_cut_mira_SRCS})
+target_link_libraries(kbest_cut_mira libcdec ksearch mteval utils klm klm_util klm_util_double ${Boost_LIBRARIES} z ${LIBDL_LIBRARIES})
+
+set(ada_opt_sm_SRCS ada_opt_sm.cc)
+add_executable(ada_opt_sm ${ada_opt_sm_SRCS})
+target_link_libraries(ada_opt_sm training_utils libcdec ksearch mteval utils klm klm_util klm_util_double ${Boost_LIBRARIES} z ${LIBDL_LIBRARIES})
diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am
deleted file mode 100644
index a318cf6e..00000000
--- a/training/mira/Makefile.am
+++ /dev/null
@@ -1,20 +0,0 @@
-bin_PROGRAMS = \
- kbest_mira \
- kbest_cut_mira \
- ada_opt_sm
-
-EXTRA_DIST = mira.py
-
-ada_opt_sm_SOURCES = ada_opt_sm.cc
-ada_opt_sm_LDFLAGS= -rdynamic
-ada_opt_sm_LDADD = ../utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
-
-kbest_mira_SOURCES = kbest_mira.cc
-kbest_mira_LDFLAGS= -rdynamic
-kbest_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
-
-kbest_cut_mira_SOURCES = kbest_cut_mira.cc
-kbest_cut_mira_LDFLAGS= -rdynamic
-kbest_cut_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils
diff --git a/training/mira/mira.py b/training/mira/mira.py
index b84eafad..4c87c664 100755
--- a/training/mira/mira.py
+++ b/training/mira/mira.py
@@ -5,12 +5,6 @@ import argparse
import logging
import random, time
import gzip, itertools
-try:
- import cdec.score
-except ImportError:
- sys.stderr.write('Could not import pycdec, see cdec/python/README.md for details\n')
- sys.exit(1)
-have_mpl = True
try:
import matplotlib
matplotlib.use('Agg')
@@ -19,26 +13,19 @@ except ImportError:
have_mpl = False
#mira run script
-#requires pycdec to be built, since it is used for scoring hypothesis
-#translations.
#matplotlib must be installed for graphing to work
#email option requires mail
+script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
+fast_score_binary = script_dir+'/../../mteval/fast_score'
+dlog = None
+
#scoring function using pycdec scoring
def fast_score(hyps, refs, metric):
- scorer = cdec.score.Scorer(metric)
- logging.info('loaded {0} references for scoring with {1}'.format(
- len(refs), metric))
- if metric=='BLEU':
- logging.warning('BLEU is ambiguous, assuming IBM_BLEU\n')
- metric = 'IBM_BLEU'
- elif metric=='COMBI':
- logging.warning('COMBI metric is no longer supported, switching to '
- 'COMB:TER=-0.5;BLEU=0.5')
- metric = 'COMB:TER=-0.5;BLEU=0.5'
- stats = sum(scorer(r).evaluate(h) for h,r in itertools.izip(hyps,refs))
- logging.info('Score={} ({})'.format(stats.score, stats.detail))
- return stats.score
+ cmd = ('{0} -r{1} -i {2} -m {3}').format(fast_score_binary, refs, hyps, metric)
+ proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
+ o = proc.stdout.readline().strip()
+ return float(o)
#create new parallel input file in output directory in sgml format
def enseg(devfile, newfile, gprefix):
@@ -81,7 +68,6 @@ def enseg(devfile, newfile, gprefix):
def main():
#set logging to write all info messages to stderr
logging.basicConfig(level=logging.INFO)
- script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
if not have_mpl:
logging.warning('Failed to import matplotlib, graphs will not be generated.')
@@ -376,7 +362,8 @@ def optimize(args, script_dir, dev_size, score_sign):
cmd = parallel_cmd + ' ' + decoder_cmd
logging.info('OPTIMIZATION COMMAND: {}'.format(cmd))
-
+
+ global dlog
dlog = open(decoderlog,'w')
runf = open(runfile,'w')
retries = 0
@@ -423,7 +410,7 @@ def optimize(args, script_dir, dev_size, score_sign):
bests = []
fears = []
for line in run:
- hope, best, fear = line.split(' ||| ')
+ hope, best, fear = line.strip().split(' ||| ')
hopes.append(hope)
bests.append(best)
fears.append(fear)
@@ -439,14 +426,10 @@ def optimize(args, script_dir, dev_size, score_sign):
gzip_file(runfile)
gzip_file(decoderlog)
- ref_file = open(refs)
- references = [line.split(' ||| ') for line in
- ref_file.read().strip().split('\n')]
- ref_file.close()
#get score for best hypothesis translations, hope and fear translations
- dec_score = fast_score(bests, references, args.metric)
- dec_score_h = fast_score(hopes, references, args.metric)
- dec_score_f = fast_score(fears, references, args.metric)
+ dec_score = fast_score(runfile+'.B', refs, args.metric)
+ dec_score_h = fast_score(runfile+'.H', refs, args.metric)
+ dec_score_f = fast_score(runfile+'.F', refs, args.metric)
hope_best_fear['hope'].append(dec_score)
hope_best_fear['best'].append(dec_score_h)