summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src/contexts_corpus.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pyp-topics/src/contexts_corpus.cc')
-rw-r--r--gi/pyp-topics/src/contexts_corpus.cc22
1 files changed, 15 insertions, 7 deletions
diff --git a/gi/pyp-topics/src/contexts_corpus.cc b/gi/pyp-topics/src/contexts_corpus.cc
index 280b2976..26d5718a 100644
--- a/gi/pyp-topics/src/contexts_corpus.cc
+++ b/gi/pyp-topics/src/contexts_corpus.cc
@@ -28,9 +28,12 @@ void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void*
Document* doc(new Document());
//cout << "READ: " << new_contexts.phrase << "\t";
- for (int i=0; i < new_contexts.contexts.size(); ++i) {
+ for (int i=0; i < new_contexts.counts.size(); ++i) {
int cache_word_count = corpus_ptr->m_dict.max();
- string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[i]);
+
+ //string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[i]);
+ int context_index = new_contexts.counts.at(i).first;
+ string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[context_index]);
// filter out singleton contexts
//if (!counts->empty()) {
@@ -45,7 +48,8 @@ void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void*
corpus_ptr->m_num_types++;
}
- int count = new_contexts.counts[i];
+ //int count = new_contexts.counts[i];
+ int count = new_contexts.counts.at(i).second;
for (int j=0; j<count; ++j)
doc->push_back(id);
corpus_ptr->m_num_terms += count;
@@ -54,7 +58,8 @@ void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void*
if (backoff_gen) {
int order = 1;
WordID backoff_id = id;
- ContextsLexer::Context backedoff_context = new_contexts.contexts[i];
+ //ContextsLexer::Context backedoff_context = new_contexts.contexts[i];
+ ContextsLexer::Context backedoff_context = new_contexts.contexts[context_index];
while (true) {
if (!corpus_ptr->m_backoff->has_backoff(backoff_id)) {
//cerr << "Backing off from " << corpus_ptr->m_dict.Convert(backoff_id) << " to ";
@@ -96,10 +101,13 @@ void filter_callback(const ContextsLexer::PhraseContextsType& new_contexts, void
map<string,int>* context_counts = (static_cast<map<string,int>*>(extra));
- for (int i=0; i < new_contexts.contexts.size(); ++i) {
- int count = new_contexts.counts[i];
+ for (int i=0; i < new_contexts.counts.size(); ++i) {
+ int context_index = new_contexts.counts.at(i).first;
+ int count = new_contexts.counts.at(i).second;
+ //int count = new_contexts.counts[i];
pair<map<string,int>::iterator,bool> result
- = context_counts->insert(make_pair(Dict::toString(new_contexts.contexts[i]),count));
+ = context_counts->insert(make_pair(Dict::toString(new_contexts.contexts[context_index]),count));
+ //= context_counts->insert(make_pair(Dict::toString(new_contexts.contexts[i]),count));
if (!result.second)
result.first->second += count;
}