diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-01 20:13:48 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-01 20:13:48 +0000 |
commit | de3b20fd379a62f8f381990f4d819a732b57a814 (patch) | |
tree | f4ee81303ad8d595224087ff6b753680b354ce88 /training/cllh_filter_grammar.cc | |
parent | 597db20b3b38fd0cbb3e3d9a7105b0c3c5c37e84 (diff) |
compute obj, fixes for grammar filter
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@668 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'training/cllh_filter_grammar.cc')
-rw-r--r-- | training/cllh_filter_grammar.cc | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/training/cllh_filter_grammar.cc b/training/cllh_filter_grammar.cc index 90fe9fba..6998ec2b 100644 --- a/training/cllh_filter_grammar.cc +++ b/training/cllh_filter_grammar.cc @@ -22,7 +22,10 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { opts.add_options() ("training_data,t",po::value<string>(),"Training data corpus") ("decoder_config,c",po::value<string>(),"Decoder configuration file") - ("ncpus,n",po::value<unsigned>()->default_value(1),"Number of CPUs to use"); + ("shards,s",po::value<unsigned>()->default_value(1),"Number of shards") + ("starting_shard,S",po::value<unsigned>()->default_value(0), "In this invocation only process shards >= S") + ("work_limit,l",po::value<unsigned>()->default_value(9999), "Process maximially this many shards") + ("ncpus,C",po::value<unsigned>()->default_value(1),"Number of CPUs to use"); po::options_description clo("Command line options"); clo.add_options() ("config", po::value<string>(), "Configuration file") @@ -49,6 +52,8 @@ void ReadTrainingCorpus(const string& fname, int rank, int size, vector<string>* istream& in = *rf.stream(); string line; int lc = 0; + assert(size > 0); + assert(rank < size); while(in) { getline(in, line); if (!in) break; @@ -112,6 +117,7 @@ void work(const string& fname, int rank, int size, Decoder* decoder) { vector<int> ids; ReadTrainingCorpus(fname, rank, size, &corpus, &ids); assert(corpus.size() > 0); + assert(corpus.size() == ids.size()); cerr << " " << rank << '/' << size << ": has " << corpus.size() << " sentences to process\n"; ostringstream oc; oc << "corpus." << rank << "_of_" << size; WriteFile foc(oc.str()); @@ -121,13 +127,14 @@ void work(const string& fname, int rank, int size, Decoder* decoder) { set<const TRule*> all_used; TrainingObserver observer; for (int i = 0; i < corpus.size(); ++i) { - int ex_num = ids[i]; - decoder->SetId(ex_num); - decoder->Decode(corpus[ex_num], &observer); + const int sent_id = ids[i]; + const string& input = corpus[i]; + decoder->SetId(sent_id); + decoder->Decode(input, &observer); if (observer.failed) { - (*foc.stream()) << "*** id=" << ex_num << " is unreachable\n"; + // do nothing } else { - (*foc.stream()) << corpus[ex_num] << endl; + (*foc.stream()) << input << endl; for (set<const TRule*>::iterator it = observer.used.begin(); it != observer.used.end(); ++it) { if (all_used.insert(*it).second) (*fog.stream()) << **it << endl; @@ -143,6 +150,11 @@ int main(int argc, char** argv) { InitCommandLine(argc, argv, &conf); const string fname = conf["training_data"].as<string>(); const unsigned ncpus = conf["ncpus"].as<unsigned>(); + const unsigned shards = conf["shards"].as<unsigned>(); + const unsigned start = conf["starting_shard"].as<unsigned>(); + const unsigned work_limit = conf["work_limit"].as<unsigned>(); + const unsigned eff_shards = min(start + work_limit, shards); + cerr << "Processing shards " << start << "/" << shards << " to " << eff_shards << "/" << shards << endl; assert(ncpus > 0); ReadFile ini_rf(conf["decoder_config"].as<string>()); Decoder decoder(ini_rf.stream()); @@ -162,8 +174,13 @@ int main(int argc, char** argv) { if (pid > 0) { children.push_back(pid); } else { - work(fname, i, ncpus, &decoder); - cerr << " " << i << "/" << ncpus << " finished.\n"; + for (int j = start; j < eff_shards; ++j) { + if (j % ncpus == i) { + cerr << " CPU " << i << " processing shard " << j << endl; + work(fname, j, shards, &decoder); + cerr << " Shard " << j << "/" << shards << " finished.\n"; + } + } _exit(0); } } @@ -171,7 +188,10 @@ int main(int argc, char** argv) { int status; int w = waitpid(children[i], &status, 0); if (w < 0) { cerr << "Error while waiting for children!"; return 1; } - cerr << "Child " << i << ": status=" << status << " sig?=" << WIFSIGNALED(status) << " sig=" << WTERMSIG(status) << endl; + if (WIFSIGNALED(status)) { + cerr << "Child " << i << " received signal " << WTERMSIG(status) << endl; + if (WTERMSIG(status) == 11) { cerr << " this is a SEGV- you may be trying to print temporarily created rules\n"; } + } } return 0; } |