From a9c9f9f823cacb8e94d88838d08cf7daa8c4c82e Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 23 Jun 2010 03:54:29 +0000 Subject: clean up git-svn-id: https://ws10smt.googlecode.com/svn/trunk@8 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/clda/src/clda.cc | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) (limited to 'gi') diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc index 4dd4789c..976b020f 100644 --- a/gi/clda/src/clda.cc +++ b/gi/clda/src/clda.cc @@ -9,10 +9,11 @@ Dict TD::dict_; std::string TD::empty = ""; std::string TD::space = " "; +const size_t MAX_DOC_LEN_CHARS = 1000000; using namespace std; -void ShowTopWords(const map& counts) { +void ShowTopWordsForTopic(const map& counts) { multimap ms; for (map::const_iterator it = counts.begin(); it != counts.end(); ++it) ms.insert(make_pair(it->second, it->first)); @@ -42,12 +43,12 @@ int main(int argc, char** argv) { return 1; } cerr << "CLASSES: " << num_classes << endl; - char* buf = new char[800000]; + char* buf = new char[MAX_DOC_LEN_CHARS]; vector > wji; // w[j][i] - observed word i of doc j vector > zji; // z[j][i] - topic assignment for word i of doc j cerr << "READING DOCUMENTS\n"; while(cin) { - cin.getline(buf, 800000); + cin.getline(buf, MAX_DOC_LEN_CHARS); if (buf[0] == 0) continue; wji.push_back(vector()); TD::ConvertSentence(buf, &wji.back()); @@ -76,16 +77,15 @@ int main(int argc, char** argv) { } cerr << "SAMPLING\n"; vector > t2w(num_classes); - bool needline = false; Timer timer; SampleSet ss; ss.resize(num_classes); double total_time = 0; for (int iter = 0; iter < num_iterations; ++iter) { + cerr << '.'; if (iter && iter % 10 == 0) { total_time += timer.Elapsed(); timer.Reset(); - cerr << '.'; needline=true; prob_t lh = prob_t::One(); for (int j = 0; j < zji.size(); ++j) { const size_t num_words = wji[j].size(); @@ -101,7 +101,7 @@ int main(int argc, char** argv) { } } } - if (iter && iter % 200 == 0) { cerr << " [ITER=" << iter << " SEC/SAMPLE=" << (total_time / 200) << " LLH=" << log(lh) << "]\n"; needline=false; total_time=0; } + if (iter && iter % 40 == 0) { cerr << " [ITER=" << iter << " SEC/SAMPLE=" << (total_time / 40) << " LLH=" << log(lh) << "]\n"; total_time=0; } //cerr << "ITERATION " << iter << " LOG LIKELIHOOD: " << log(lh) << endl; } for (int j = 0; j < zji.size(); ++j) { @@ -124,22 +124,9 @@ int main(int argc, char** argv) { } } } - if (needline) cerr << endl; -#if 0 - for (int j = 0; j < zji.size(); ++j) { - const size_t num_words = wji[j].size(); - vector& zj = zji[j]; - const vector& wj = wji[j]; - zj.resize(num_words); - for (int i = 0; i < num_words; ++i) { - cout << TD::Convert(wj[i]) << '(' << zj[i] << ") "; - } - cout << endl; - } -#endif for (int i = 0; i < num_classes; ++i) { cerr << "---------------------------------\n"; - ShowTopWords(t2w[i]); + ShowTopWordsForTopic(t2w[i]); } return 0; } -- cgit v1.2.3