diff options
author | philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-25 02:11:28 +0000 |
---|---|---|
committer | philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-25 02:11:28 +0000 |
commit | abf2311a2665097de2fd27fb83e1acbbe2a26f59 (patch) | |
tree | 5680454bf719ccda38d3cb440b310d525ff70555 /gi/pyp-topics/src/contexts_corpus.cc | |
parent | 8760b7b41970bfbea6ba124a63633c139331b512 (diff) |
added missing file.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@402 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pyp-topics/src/contexts_corpus.cc')
-rw-r--r-- | gi/pyp-topics/src/contexts_corpus.cc | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/gi/pyp-topics/src/contexts_corpus.cc b/gi/pyp-topics/src/contexts_corpus.cc index 1cf69429..92b1b34c 100644 --- a/gi/pyp-topics/src/contexts_corpus.cc +++ b/gi/pyp-topics/src/contexts_corpus.cc @@ -15,6 +15,8 @@ using namespace std; // ContextsCorpus ////////////////////////////////////////////////// +bool read_callback_binary_contexts = false; + void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void* extra) { assert(new_contexts.contexts.size() == new_contexts.counts.size()); @@ -50,9 +52,15 @@ void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void* //int count = new_contexts.counts[i]; int count = new_contexts.counts.at(i).second; - for (int j=0; j<count; ++j) + if (read_callback_binary_contexts) { doc->push_back(id); - corpus_ptr->m_num_terms += count; + corpus_ptr->m_num_terms++; + } + else { + for (int j=0; j<count; ++j) + doc->push_back(id); + corpus_ptr->m_num_terms += count; + } // generate the backoff map if (backoff_gen) { @@ -104,6 +112,7 @@ void filter_callback(const ContextsLexer::PhraseContextsType& new_contexts, void for (int i=0; i < (int)new_contexts.counts.size(); ++i) { int context_index = new_contexts.counts.at(i).first; int count = new_contexts.counts.at(i).second; + //if (read_callback_binary_contexts) count = 1; //int count = new_contexts.counts[i]; pair<map<string,int>::iterator,bool> result = context_counts->insert(make_pair(Dict::toString(new_contexts.contexts[context_index]),count)); @@ -116,7 +125,10 @@ void filter_callback(const ContextsLexer::PhraseContextsType& new_contexts, void unsigned ContextsCorpus::read_contexts(const string &filename, BackoffGenerator* backoff_gen_ptr, - bool /*filter_singeltons*/) { + bool /*filter_singeltons*/, + bool binary_contexts) { + read_callback_binary_contexts = binary_contexts; + map<string,int> counts; //if (filter_singeltons) { |