summaryrefslogtreecommitdiff
path: root/training/mira
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-09-07 13:57:52 -0400
committerChris Dyer <redpony@gmail.com>2014-09-07 13:57:52 -0400
commitffd0096320770325a8925dd17453d1fdd9375bb9 (patch)
tree200b1a6d1b14853d8ed6acb649ab9add881cc99b /training/mira
parent49c105dfc1fc3a0334d03de4d361abf23a6f1898 (diff)
parente6f2dd6892e277d0a868c22f726c4a83c86da016 (diff)
Merge pull request #50 from pks/master
alignment features, PassThroughN features, dtrain update, mira qsub, and pro fix
Diffstat (limited to 'training/mira')
-rw-r--r--training/mira/kbest_cut_mira.cc8
-rwxr-xr-xtraining/mira/mira.py19
2 files changed, 22 insertions, 5 deletions
diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc
index 56206593..724b1853 100644
--- a/training/mira/kbest_cut_mira.cc
+++ b/training/mira/kbest_cut_mira.cc
@@ -95,7 +95,8 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
("stream,t", "Stream mode (used for realtime)")
("weights_output,O",po::value<string>(),"Directory to write weights to")
("output_dir,D",po::value<string>(),"Directory to place output in")
- ("decoder_config,c",po::value<string>(),"Decoder configuration file");
+ ("decoder_config,c",po::value<string>(),"Decoder configuration file")
+ ("verbose,v",po::value<bool>()->zero_tokens(),"verbose stderr output");
po::options_description clo("Command line options");
clo.add_options()
("config", po::value<string>(), "Configuration file")
@@ -621,6 +622,7 @@ int main(int argc, char** argv) {
vector<string> corpus;
+ const bool VERBOSE = conf.count("verbose");
const string metric_name = conf["mt_metric"].as<string>();
optimizer = conf["optimizer"].as<int>();
fear_select = conf["fear"].as<int>();
@@ -783,7 +785,8 @@ int main(int argc, char** argv) {
double margin = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights);
double mt_loss = (cur_good.mt_metric - cur_bad.mt_metric);
const double loss = margin + mt_loss;
- cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << " " << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) <<endl;
+ cerr << "LOSS: " << loss << " Margin:" << margin << " BLEUL:" << mt_loss << endl;
+ if (VERBOSE) cerr << cur_bad.features.dot(dense_weights) << " " << cur_good.features.dot(dense_weights) << endl;
if (loss > 0.0 || !checkloss) {
SparseVector<double> diff = cur_good.features;
diff -= cur_bad.features;
@@ -920,6 +923,7 @@ int main(int argc, char** argv) {
lambdas += (cur_pair[1]->features) * step_size;
lambdas -= (cur_pair[0]->features) * step_size;
+ if (VERBOSE) cerr << " Lambdas " << lambdas << endl;
//reload weights based on update
dense_weights.clear();
diff --git a/training/mira/mira.py b/training/mira/mira.py
index 3e6aa2db..ec9c2d64 100755
--- a/training/mira/mira.py
+++ b/training/mira/mira.py
@@ -143,6 +143,12 @@ def main():
parser.add_argument('--pass-suffix',
help='multipass decoding iteration. see documentation '
'at www.cdec-decoder.org for more information')
+ parser.add_argument('--qsub',
+ help='use qsub', action='store_true')
+ parser.add_argument('--pmem',
+ help='memory for qsub', type=str, default='5G')
+ parser.add_argument('-v', '--verbose',
+ help='more verbose mira optimizers')
args = parser.parse_args()
args.metric = args.metric.upper()
@@ -315,6 +321,8 @@ def split_devset(dev, outdir):
def optimize(args, script_dir, dev_size):
parallelize = script_dir+'/../utils/parallelize.pl'
+ if args.qsub:
+ parallelize += " -p %s"%args.pmem
decoder = script_dir+'/kbest_cut_mira'
(source, refs) = split_devset(args.devset, args.output_dir)
port = random.randint(15000,50000)
@@ -353,10 +361,15 @@ def optimize(args, script_dir, dev_size):
decoder_cmd += ' -a'
if not args.no_pseudo:
decoder_cmd += ' -e'
+ if args.verbose:
+ decoder_cmd += ' -v'
- #always use fork
- parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format(
- parallelize, logdir, args.jobs)
+ if args.qsub:
+ parallel_cmd = '{0} -e {1} -j {2} --'.format(
+ parallelize, logdir, args.jobs)
+ else:
+ parallel_cmd = '{0} --use-fork -e {1} -j {2} --'.format(
+ parallelize, logdir, args.jobs)
cmd = parallel_cmd + ' ' + decoder_cmd
logging.info('OPTIMIZATION COMMAND: {}'.format(cmd))