diff options
author | Chris Dyer <redpony@gmail.com> | 2014-09-07 13:57:52 -0400 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-09-07 13:57:52 -0400 |
commit | ffd0096320770325a8925dd17453d1fdd9375bb9 (patch) | |
tree | 200b1a6d1b14853d8ed6acb649ab9add881cc99b /training/mira | |
parent | 49c105dfc1fc3a0334d03de4d361abf23a6f1898 (diff) | |
parent | e6f2dd6892e277d0a868c22f726c4a83c86da016 (diff) |
Merge pull request #50 from pks/master
alignment features, PassThroughN features, dtrain update, mira qsub, and pro fix
Diffstat (limited to 'training/mira')
-rw-r--r-- | training/mira/kbest_cut_mira.cc | 8 | ||||
-rwxr-xr-x | training/mira/mira.py | 19 |
2 files changed, 22 insertions, 5 deletions
diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc index 56206593..724b1853 100644 --- a/training/mira/kbest_cut_mira.cc +++ b/training/mira/kbest_cut_mira.cc @@ -95,7 +95,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("stream,t", "Stream mode (used for realtime)") ("weights_output,O",po::value<string>(),"Directory to write weights to") ("output_dir,D",po::value<string>(),"Directory to place output in") - ("decoder_config,c",po::value<string>(),"Decoder configuration file"); + ("decoder_config,c",po::value<string>(),"Decoder configuration file") + ("verbose,v",po::value<bool>()->zero_tokens(),"verbose stderr output"); po::options_description clo("Command line options"); clo.add_options() ("config", po::value<string>(), "Configuration file") @@ -621,6 +622,7 @@ int main(int argc, char** argv) { vector<string> corpus; + const bool VERBOSE = conf.count("verbose"); const string metric_name = conf["mt_metric"].as<string>(); optimizer = conf["optimizer"].as<int>(); fear_select = conf["fear"].as<int>(); @@ -783,7 +785,8 @@ int main(int argc, char** argv) { double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights); double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric); const double loss = margin + mt_loss; - cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) <<endl; + cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << endl; + if (VERBOSE) cerr << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) << endl; if (loss > 0.0 || !checkloss) { SparseVector<double> diff = cur_good.features; diff -= cur_bad.features; @@ -920,6 +923,7 @@ int main(int argc, char** argv) { lambdas += (cur_pair[1]->features) * step_size; lambdas -= (cur_pair[0]->features) * step_size; + if (VERBOSE) cerr << " Lambdas " << lambdas << endl; //reload weights based on update dense_weights.clear(); diff --git a/training/mira/mira.py b/training/mira/mira.py index 3e6aa2db..ec9c2d64 100755 --- a/training/mira/mira.py +++ b/training/mira/mira.py @@ -143,6 +143,12 @@ def main(): parser.add_argument('--pass-suffix', help='multipass decoding iteration. see documentation ' 'at www.cdec-decoder.org for more information') + parser.add_argument('--qsub', + help='use qsub', action='store_true') + parser.add_argument('--pmem', + help='memory for qsub', type=str, default='5G') + parser.add_argument('-v', '--verbose', + help='more verbose mira optimizers') args = parser.parse_args() args.metric = args.metric.upper() @@ -315,6 +321,8 @@ def split_devset(dev, outdir): def optimize(args, script_dir, dev_size): parallelize = script_dir+'/../utils/parallelize.pl' + if args.qsub: + parallelize += " -p %s"%args.pmem decoder = script_dir+'/kbest_cut_mira' (source, refs) = split_devset(args.devset, args.output_dir) port = random.randint(15000,50000) @@ -353,10 +361,15 @@ def optimize(args, script_dir, dev_size): decoder_cmd += ' -a' if not args.no_pseudo: decoder_cmd += ' -e' + if args.verbose: + decoder_cmd += ' -v' - #always use fork - parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format( - parallelize, logdir, args.jobs) + if args.qsub: + parallel_cmd = '{0} -e {1} -j {2} --'.format( + parallelize, logdir, args.jobs) + else: + parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format( + parallelize, logdir, args.jobs) cmd = parallel_cmd + ' ' + decoder_cmd logging.info('OPTIMIZATION COMMAND: {}'.format(cmd)) |