From 78bf1457f606dd3880c2bc912201c4945d3f85b4 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 8 Mar 2012 14:29:42 -0500 Subject: moar --- gi/pf/align-tl.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'gi/pf/align-tl.cc') diff --git a/gi/pf/align-tl.cc b/gi/pf/align-tl.cc index fe8950b5..fc9b7ca5 100644 --- a/gi/pf/align-tl.cc +++ b/gi/pf/align-tl.cc @@ -30,6 +30,10 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { opts.add_options() ("samples,s",po::value()->default_value(1000),"Number of samples") ("input,i",po::value(),"Read parallel data from") + ("max_src_chunk", po::value()->default_value(4), "Maximum size of translitered chunk in source") + ("max_trg_chunk", po::value()->default_value(4), "Maximum size of translitered chunk in target") + ("min_transliterated_src_length", po::value()->default_value(3), "Minimum length of source words considered for transliteration") + ("filter_ratio", po::value()->default_value(0.66), "Filter ratio: basically, if the lengths differ by less than this ratio, mark the pair as non-transliteratable") ("random_seed,S",po::value(), "Random seed"); po::options_description clo("Command line options"); clo.add_options() @@ -306,12 +310,11 @@ int main(int argc, char** argv) { letters[TD::Convert("NULL")].clear(); // TODO configure this - int max_src_chunk = 4; - int max_trg_chunk = 4; - Transliterations tl(max_src_chunk, max_trg_chunk); - - // TODO CONFIGURE THIS - int min_trans_src = 4; + const int max_src_chunk = conf["max_src_chunk"].as(); + const int max_trg_chunk = conf["max_trg_chunk"].as(); + const double filter_rat = conf["filter_ratio"].as(); + const int min_trans_src = conf["min_transliterated_src_length"].as(); + Transliterations tl(max_src_chunk, max_trg_chunk, filter_rat); cerr << "Initializing transliteration graph structures ...\n"; for (int i = 0; i < corpus.size(); ++i) { -- cgit v1.2.3