summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src
diff options
context:
space:
mode:
authorbothameister <bothameister@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 23:37:29 +0000
committerbothameister <bothameister@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 23:37:29 +0000
commitc29321deae3bc178e9ea0501f598a40894c6bc98 (patch)
treef5f84e6554272c580eaef5a1f42643949809093f /gi/pyp-topics/src
parent1975a182d76171fee56faf671bedcbf13b9dc9ba (diff)
added thresholding for span labelling
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@247 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pyp-topics/src')
-rw-r--r--gi/pyp-topics/src/pyp-topics.cc10
-rw-r--r--gi/pyp-topics/src/pyp-topics.hh4
-rw-r--r--gi/pyp-topics/src/train-contexts.cc7
-rw-r--r--gi/pyp-topics/src/train.cc4
4 files changed, 14 insertions, 11 deletions
diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc
index e528a923..3614fb22 100644
--- a/gi/pyp-topics/src/pyp-topics.cc
+++ b/gi/pyp-topics/src/pyp-topics.cc
@@ -344,7 +344,7 @@ int PYPTopics::max_topic() const {
return current_topic;
}
-int PYPTopics::max(const DocumentId& doc) const {
+std::pair<int,PYPTopics::F> PYPTopics::max(const DocumentId& doc) const {
//std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
// collect probs
F current_max=0.0;
@@ -366,10 +366,11 @@ int PYPTopics::max(const DocumentId& doc) const {
}
}
assert(current_topic >= 0);
- return current_topic;
+ assert(current_max >= 0);
+ return std::make_pair(current_topic, current_max);
}
-int PYPTopics::max(const DocumentId& doc, const Term& term) const {
+std::pair<int,PYPTopics::F> PYPTopics::max(const DocumentId& doc, const Term& term) const {
//std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
// collect probs
F current_max=0.0;
@@ -392,7 +393,8 @@ int PYPTopics::max(const DocumentId& doc, const Term& term) const {
}
}
assert(current_topic >= 0);
- return current_topic;
+ assert(current_max >= 0);
+ return std::make_pair(current_topic,current_max);
}
std::ostream& PYPTopics::print_document_topics(std::ostream& out) const {
diff --git a/gi/pyp-topics/src/pyp-topics.hh b/gi/pyp-topics/src/pyp-topics.hh
index 5e1fc6d6..32d2d939 100644
--- a/gi/pyp-topics/src/pyp-topics.hh
+++ b/gi/pyp-topics/src/pyp-topics.hh
@@ -33,8 +33,8 @@ public:
int freq_cutoff_interval=0);
int sample(const DocumentId& doc, const Term& term);
- int max(const DocumentId& doc, const Term& term) const;
- int max(const DocumentId& doc) const;
+ std::pair<int,F> max(const DocumentId& doc, const Term& term) const;
+ std::pair<int,F> max(const DocumentId& doc) const;
int max_topic() const;
void set_backoff(const std::string& filename) {
diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc
index a673bf4e..0a48d3d9 100644
--- a/gi/pyp-topics/src/train-contexts.cc
+++ b/gi/pyp-topics/src/train-contexts.cc
@@ -131,14 +131,15 @@ int main(int argc, char **argv)
//insert_result.first++;
}
documents_out << contexts_corpus.key(document_id) << '\t';
- documents_out << model.max(document_id) << " " << corpusIt->size() << " ||| ";
+ documents_out << model.max(document_id).first << " " << corpusIt->size() << " ||| ";
for (std::vector<int>::const_iterator termIt=unique_terms.begin();
termIt != unique_terms.end(); ++termIt) {
if (termIt != unique_terms.begin())
documents_out << " ||| ";
vector<std::string> strings = contexts_corpus.context2string(*termIt);
copy(strings.begin(), strings.end(),ostream_iterator<std::string>(documents_out, " "));
- documents_out << "||| C=" << model.max(document_id, *termIt);
+ std::pair<int,PYPTopics::F> maxinfo = model.max(document_id, *termIt);
+ documents_out << "||| C=" << maxinfo.first << " P=" << maxinfo.second;
}
documents_out <<endl;
@@ -150,7 +151,7 @@ int main(int argc, char **argv)
default_topics << model.max_topic() <<endl;
for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
vector<std::string> strings = contexts_corpus.context2string(termIt->first);
- default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| ";
+ default_topics << model.max(-1, termIt->first).first << " ||| " << termIt->second << " ||| ";
copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " "));
default_topics <<endl;
}
diff --git a/gi/pyp-topics/src/train.cc b/gi/pyp-topics/src/train.cc
index 3462f26c..db7ca46e 100644
--- a/gi/pyp-topics/src/train.cc
+++ b/gi/pyp-topics/src/train.cc
@@ -99,7 +99,7 @@ int main(int argc, char **argv)
documents_out << unique_terms.size();
for (std::vector<int>::const_iterator termIt=unique_terms.begin();
termIt != unique_terms.end(); ++termIt)
- documents_out << " " << *termIt << ":" << model.max(document_id, *termIt);
+ documents_out << " " << *termIt << ":" << model.max(document_id, *termIt).first;
documents_out << std::endl;
}
documents_out.close();
@@ -121,7 +121,7 @@ int main(int argc, char **argv)
int index=0;
for (DocumentTerms::const_iterator instanceIt=corpusIt->begin();
instanceIt != corpusIt->end(); ++instanceIt, ++index) {
- int topic = model.max(instanceIt->doc, instanceIt->term);
+ int topic = model.max(instanceIt->doc, instanceIt->term).first;
if (index != 0) topics_out << " ";
topics_out << topic;
}