diff options
author | Chris Dyer <redpony@gmail.com> | 2015-04-02 00:50:04 -0400 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2015-04-02 00:50:04 -0400 |
commit | 5ee02ce1602f2fce6d5af5db93c2278fe6c9ede5 (patch) | |
tree | 7ebad8dd99e38d190c579f425c3eb959363e96e5 /training/mira | |
parent | e7d77de8a9b9929b22fc6562f88f3668900f9662 (diff) | |
parent | 737ed7a7f932b1a7e40d2755bcdee6bc0aa2de63 (diff) |
Merge pull request #70 from redpony/cmake
Cmake
Diffstat (limited to 'training/mira')
-rw-r--r-- | training/mira/CMakeLists.txt | 16 | ||||
-rw-r--r-- | training/mira/Makefile.am | 20 | ||||
-rwxr-xr-x | training/mira/mira.py | 45 |
3 files changed, 30 insertions, 51 deletions
diff --git a/training/mira/CMakeLists.txt b/training/mira/CMakeLists.txt new file mode 100644 index 00000000..bba9ef5f --- /dev/null +++ b/training/mira/CMakeLists.txt @@ -0,0 +1,16 @@ +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../utils) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../../utils) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../../mteval) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../../decoder) + +set(kbest_mira_SRCS kbest_mira.cc) +add_executable(kbest_mira ${kbest_mira_SRCS}) +target_link_libraries(kbest_mira libcdec ksearch mteval utils klm klm_util klm_util_double ${Boost_LIBRARIES} z ${LIBDL_LIBRARIES}) + +set(kbest_cut_mira_SRCS kbest_cut_mira.cc) +add_executable(kbest_cut_mira ${kbest_cut_mira_SRCS}) +target_link_libraries(kbest_cut_mira libcdec ksearch mteval utils klm klm_util klm_util_double ${Boost_LIBRARIES} z ${LIBDL_LIBRARIES}) + +set(ada_opt_sm_SRCS ada_opt_sm.cc) +add_executable(ada_opt_sm ${ada_opt_sm_SRCS}) +target_link_libraries(ada_opt_sm training_utils libcdec ksearch mteval utils klm klm_util klm_util_double ${Boost_LIBRARIES} z ${LIBDL_LIBRARIES}) diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am deleted file mode 100644 index a318cf6e..00000000 --- a/training/mira/Makefile.am +++ /dev/null @@ -1,20 +0,0 @@ -bin_PROGRAMS = \ - kbest_mira \ - kbest_cut_mira \ - ada_opt_sm - -EXTRA_DIST = mira.py - -ada_opt_sm_SOURCES = ada_opt_sm.cc -ada_opt_sm_LDFLAGS= -rdynamic -ada_opt_sm_LDADD = ../utils/libtraining_utils.a ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a - -kbest_mira_SOURCES = kbest_mira.cc -kbest_mira_LDFLAGS= -rdynamic -kbest_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a - -kbest_cut_mira_SOURCES = kbest_cut_mira.cc -kbest_cut_mira_LDFLAGS= -rdynamic -kbest_cut_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils diff --git a/training/mira/mira.py b/training/mira/mira.py index b84eafad..4c87c664 100755 --- a/training/mira/mira.py +++ b/training/mira/mira.py @@ -5,12 +5,6 @@ import argparse import logging import random, time import gzip, itertools -try: - import cdec.score -except ImportError: - sys.stderr.write('Could not import pycdec, see cdec/python/README.md for details\n') - sys.exit(1) -have_mpl = True try: import matplotlib matplotlib.use('Agg') @@ -19,26 +13,19 @@ except ImportError: have_mpl = False #mira run script -#requires pycdec to be built, since it is used for scoring hypothesis -#translations. #matplotlib must be installed for graphing to work #email option requires mail +script_dir = os.path.dirname(os.path.abspath(sys.argv[0])) +fast_score_binary = script_dir+'/../../mteval/fast_score' +dlog = None + #scoring function using pycdec scoring def fast_score(hyps, refs, metric): - scorer = cdec.score.Scorer(metric) - logging.info('loaded {0} references for scoring with {1}'.format( - len(refs), metric)) - if metric=='BLEU': - logging.warning('BLEU is ambiguous, assuming IBM_BLEU\n') - metric = 'IBM_BLEU' - elif metric=='COMBI': - logging.warning('COMBI metric is no longer supported, switching to ' - 'COMB:TER=-0.5;BLEU=0.5') - metric = 'COMB:TER=-0.5;BLEU=0.5' - stats = sum(scorer(r).evaluate(h) for h,r in itertools.izip(hyps,refs)) - logging.info('Score={} ({})'.format(stats.score, stats.detail)) - return stats.score + cmd = ('{0} -r{1} -i {2} -m {3}').format(fast_score_binary, refs, hyps, metric) + proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE) + o = proc.stdout.readline().strip() + return float(o) #create new parallel input file in output directory in sgml format def enseg(devfile, newfile, gprefix): @@ -81,7 +68,6 @@ def enseg(devfile, newfile, gprefix): def main(): #set logging to write all info messages to stderr logging.basicConfig(level=logging.INFO) - script_dir = os.path.dirname(os.path.abspath(sys.argv[0])) if not have_mpl: logging.warning('Failed to import matplotlib, graphs will not be generated.') @@ -376,7 +362,8 @@ def optimize(args, script_dir, dev_size, score_sign): cmd = parallel_cmd + ' ' + decoder_cmd logging.info('OPTIMIZATION COMMAND: {}'.format(cmd)) - + + global dlog dlog = open(decoderlog,'w') runf = open(runfile,'w') retries = 0 @@ -423,7 +410,7 @@ def optimize(args, script_dir, dev_size, score_sign): bests = [] fears = [] for line in run: - hope, best, fear = line.split(' ||| ') + hope, best, fear = line.strip().split(' ||| ') hopes.append(hope) bests.append(best) fears.append(fear) @@ -439,14 +426,10 @@ def optimize(args, script_dir, dev_size, score_sign): gzip_file(runfile) gzip_file(decoderlog) - ref_file = open(refs) - references = [line.split(' ||| ') for line in - ref_file.read().strip().split('\n')] - ref_file.close() #get score for best hypothesis translations, hope and fear translations - dec_score = fast_score(bests, references, args.metric) - dec_score_h = fast_score(hopes, references, args.metric) - dec_score_f = fast_score(fears, references, args.metric) + dec_score = fast_score(runfile+'.B', refs, args.metric) + dec_score_h = fast_score(runfile+'.H', refs, args.metric) + dec_score_f = fast_score(runfile+'.F', refs, args.metric) hope_best_fear['hope'].append(dec_score) hope_best_fear['best'].append(dec_score_h) |