From cb9b2004c95a162b9a2e42b3fbe9677fc54693bb Mon Sep 17 00:00:00 2001 From: philblunsom Date: Fri, 2 Jul 2010 20:36:27 +0000 Subject: Updated pipeline defaults. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@121 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pipeline/local-gi-pipeline.pl | 2 +- gi/pyp-topics/src/train-contexts.cc | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index 17f39a62..7c31be3b 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -10,7 +10,7 @@ my $BASE_PHRASE_MAX_SIZE = 10; my $COMPLETE_CACHE = 1; my $ITEMS_IN_MEMORY = 10000000; # cache size in extractors my $NUM_TOPICS = 50; -my $NUM_SAMPLES = 100; +my $NUM_SAMPLES = 1000; my $CONTEXT_SIZE = 1; my $BIDIR = 1; diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc index de107c86..7e2100f8 100644 --- a/gi/pyp-topics/src/train-contexts.cc +++ b/gi/pyp-topics/src/train-contexts.cc @@ -96,7 +96,7 @@ int main(int argc, char **argv) ogzstream documents_out(vm["document-topics-out"].as().c_str()); int document_id=0; - map all_terms; + map all_terms; for (Corpus::const_iterator corpusIt=contexts_corpus.begin(); corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) { vector unique_terms; @@ -125,13 +125,15 @@ int main(int argc, char **argv) } documents_out.close(); - ofstream default_topics(vm["default-topics-out"].as().c_str()); - default_topics << model.max_topic() <::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) { - vector strings = contexts_corpus.context2string(termIt->first); - default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| "; - copy(strings.begin(), strings.end(),ostream_iterator(default_topics, " ")); - default_topics <().c_str()); + default_topics << model.max_topic() <::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) { + vector strings = contexts_corpus.context2string(termIt->first); + default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| "; + copy(strings.begin(), strings.end(),ostream_iterator(default_topics, " ")); + default_topics <