summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gi/clda/src/clda.cc27
1 files changed, 7 insertions, 20 deletions
diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc
index 4dd4789c..976b020f 100644
--- a/gi/clda/src/clda.cc
+++ b/gi/clda/src/clda.cc
@@ -9,10 +9,11 @@
Dict TD::dict_;
std::string TD::empty = "";
std::string TD::space = " ";
+const size_t MAX_DOC_LEN_CHARS = 1000000;
using namespace std;
-void ShowTopWords(const map<WordID, int>& counts) {
+void ShowTopWordsForTopic(const map<WordID, int>& counts) {
multimap<int, WordID> ms;
for (map<WordID,int>::const_iterator it = counts.begin(); it != counts.end(); ++it)
ms.insert(make_pair(it->second, it->first));
@@ -42,12 +43,12 @@ int main(int argc, char** argv) {
return 1;
}
cerr << "CLASSES: " << num_classes << endl;
- char* buf = new char[800000];
+ char* buf = new char[MAX_DOC_LEN_CHARS];
vector<vector<int> > wji; // w[j][i] - observed word i of doc j
vector<vector<int> > zji; // z[j][i] - topic assignment for word i of doc j
cerr << "READING DOCUMENTS\n";
while(cin) {
- cin.getline(buf, 800000);
+ cin.getline(buf, MAX_DOC_LEN_CHARS);
if (buf[0] == 0) continue;
wji.push_back(vector<WordID>());
TD::ConvertSentence(buf, &wji.back());
@@ -76,16 +77,15 @@ int main(int argc, char** argv) {
}
cerr << "SAMPLING\n";
vector<map<WordID, int> > t2w(num_classes);
- bool needline = false;
Timer timer;
SampleSet ss;
ss.resize(num_classes);
double total_time = 0;
for (int iter = 0; iter < num_iterations; ++iter) {
+ cerr << '.';
if (iter && iter % 10 == 0) {
total_time += timer.Elapsed();
timer.Reset();
- cerr << '.'; needline=true;
prob_t lh = prob_t::One();
for (int j = 0; j < zji.size(); ++j) {
const size_t num_words = wji[j].size();
@@ -101,7 +101,7 @@ int main(int argc, char** argv) {
}
}
}
- if (iter && iter % 200 == 0) { cerr << " [ITER=" << iter << " SEC/SAMPLE=" << (total_time / 200) << " LLH=" << log(lh) << "]\n"; needline=false; total_time=0; }
+ if (iter && iter % 40 == 0) { cerr << " [ITER=" << iter << " SEC/SAMPLE=" << (total_time / 40) << " LLH=" << log(lh) << "]\n"; total_time=0; }
//cerr << "ITERATION " << iter << " LOG LIKELIHOOD: " << log(lh) << endl;
}
for (int j = 0; j < zji.size(); ++j) {
@@ -124,22 +124,9 @@ int main(int argc, char** argv) {
}
}
}
- if (needline) cerr << endl;
-#if 0
- for (int j = 0; j < zji.size(); ++j) {
- const size_t num_words = wji[j].size();
- vector<int>& zj = zji[j];
- const vector<int>& wj = wji[j];
- zj.resize(num_words);
- for (int i = 0; i < num_words; ++i) {
- cout << TD::Convert(wj[i]) << '(' << zj[i] << ") ";
- }
- cout << endl;
- }
-#endif
for (int i = 0; i < num_classes; ++i) {
cerr << "---------------------------------\n";
- ShowTopWords(t2w[i]);
+ ShowTopWordsForTopic(t2w[i]);
}
return 0;
}