summaryrefslogtreecommitdiff
path: root/gi/clda/src/clda.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gi/clda/src/clda.cc')
-rw-r--r--gi/clda/src/clda.cc24
1 files changed, 18 insertions, 6 deletions
diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc
index 482a1c4c..574fa038 100644
--- a/gi/clda/src/clda.cc
+++ b/gi/clda/src/clda.cc
@@ -6,9 +6,6 @@
#include "crp.h"
#include "sampler.h"
#include "tdict.h"
-Dict TD::dict_;
-std::string TD::empty = "";
-std::string TD::space = " ";
const size_t MAX_DOC_LEN_CHARS = 1000000;
using namespace std;
@@ -57,8 +54,8 @@ int main(int argc, char** argv) {
MT19937 rng;
cerr << "INITIALIZING RANDOM TOPIC ASSIGNMENTS\n";
zji.resize(wji.size());
- double beta = 0.01;
- double alpha = 0.001;
+ double beta = 0.1;
+ double alpha = 50.0 / num_classes;
vector<CRP<int> > dr(zji.size(), CRP<int>(beta)); // dr[i] describes the probability of using a topic in document i
vector<CRP<int> > wr(num_classes, CRP<int>(alpha)); // wr[k] describes the probability of generating a word in topic k
int random_topic = rng.next() * num_classes;
@@ -79,9 +76,11 @@ int main(int argc, char** argv) {
vector<map<WordID, int> > t2w(num_classes);
Timer timer;
SampleSet ss;
- const int num_types = TD::dict_.max();
+ const int num_types = TD::NumWords();
const prob_t class_p0(1.0 / num_classes);
const prob_t word_p0(1.0 / num_types);
+ cerr << "CLASS PRIOR PROB: " << class_p0 << endl;
+ cerr << " WORD PRIOR LOGPROB: " << log(word_p0) << endl;
ss.resize(num_classes);
double total_time = 0;
for (int iter = 0; iter < num_iterations; ++iter) {
@@ -131,6 +130,19 @@ int main(int argc, char** argv) {
cerr << "---------------------------------\n";
ShowTopWordsForTopic(t2w[i]);
}
+ cerr << "-------------\n";
+#if 0
+ for (int j = 0; j < zji.size(); ++j) {
+ const size_t num_words = wji[j].size();
+ vector<int>& zj = zji[j];
+ const vector<int>& wj = wji[j];
+ zj.resize(num_words);
+ for (int i = 0; i < num_words; ++i) {
+ cerr << TD::Convert(wji[j][i]) << '(' << zj[i] << ") ";
+ }
+ cerr << endl;
+ }
+#endif
return 0;
}