summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorphilblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-02 20:36:27 +0000
committerphilblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-02 20:36:27 +0000
commitcb9b2004c95a162b9a2e42b3fbe9677fc54693bb (patch)
tree9efe3573ceb7a8d037575cd33488fe6bad1c84aa
parent95f9a8f0a2bafcee8cac37276af047b7abd9bc45 (diff)
Updated pipeline defaults.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@121 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-xgi/pipeline/local-gi-pipeline.pl2
-rw-r--r--gi/pyp-topics/src/train-contexts.cc18
2 files changed, 11 insertions, 9 deletions
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
index 17f39a62..7c31be3b 100755
--- a/gi/pipeline/local-gi-pipeline.pl
+++ b/gi/pipeline/local-gi-pipeline.pl
@@ -10,7 +10,7 @@ my $BASE_PHRASE_MAX_SIZE = 10;
my $COMPLETE_CACHE = 1;
my $ITEMS_IN_MEMORY = 10000000; # cache size in extractors
my $NUM_TOPICS = 50;
-my $NUM_SAMPLES = 100;
+my $NUM_SAMPLES = 1000;
my $CONTEXT_SIZE = 1;
my $BIDIR = 1;
diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc
index de107c86..7e2100f8 100644
--- a/gi/pyp-topics/src/train-contexts.cc
+++ b/gi/pyp-topics/src/train-contexts.cc
@@ -96,7 +96,7 @@ int main(int argc, char **argv)
ogzstream documents_out(vm["document-topics-out"].as<string>().c_str());
int document_id=0;
- map<int,int> all_terms;
+ map<int,int> all_terms;
for (Corpus::const_iterator corpusIt=contexts_corpus.begin();
corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) {
vector<int> unique_terms;
@@ -125,13 +125,15 @@ int main(int argc, char **argv)
}
documents_out.close();
- ofstream default_topics(vm["default-topics-out"].as<string>().c_str());
- default_topics << model.max_topic() <<endl;
- for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
- vector<std::string> strings = contexts_corpus.context2string(termIt->first);
- default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| ";
- copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " "));
- default_topics <<endl;
+ if (vm.count("default-topics-out")) {
+ ofstream default_topics(vm["default-topics-out"].as<string>().c_str());
+ default_topics << model.max_topic() <<endl;
+ for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
+ vector<std::string> strings = contexts_corpus.context2string(termIt->first);
+ default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| ";
+ copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " "));
+ default_topics <<endl;
+ }
}
}