From 595dd3f0e577f522d32318acb2ad0fe288e0b00f Mon Sep 17 00:00:00 2001
From: "philblunsom@gmail.com"
 <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Fri, 2 Jul 2010 14:31:13 +0000
Subject: Changed timer to be mac compatible.

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@103 ec762483-ff6d-05da-a07a-a48fb63a330f
---
 gi/pyp-topics/src/pyp-topics.cc     | 57 ++++++++++++++++++++++++++++++-------
 gi/pyp-topics/src/pyp-topics.hh     |  1 +
 gi/pyp-topics/src/train-contexts.cc | 12 ++++++++
 3 files changed, 60 insertions(+), 10 deletions(-)

(limited to 'gi')

diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc
index b727458b..796ab7af 100644
--- a/gi/pyp-topics/src/pyp-topics.cc
+++ b/gi/pyp-topics/src/pyp-topics.cc
@@ -3,24 +3,40 @@
 
 #include <boost/date_time/posix_time/posix_time_types.hpp>
 #include <sys/time.h>
+#include <mach/mach_time.h>  
+
 
 struct Timer {
   Timer() { Reset(); }
   void Reset() 
   { 
-   clock_gettime(CLOCK_MONOTONIC, &start_t); 
+    //clock_gettime(CLOCK_MONOTONIC, &start_t); 
+    start_t = mach_absolute_time();  
   }
   double Elapsed() const {
-    timespec end_t;
-  
-    clock_gettime(CLOCK_MONOTONIC, &end_t); 
-  
-    const double elapsed = (end_t.tv_sec - start_t.tv_sec) 
-                + (end_t.tv_nsec - start_t.tv_nsec) / 1000000000.0;
+    //timespec end_t;
+    timespec tp;
+    uint64_t end_t = mach_absolute_time();  
+    mach_absolute_difference(end_t, start_t, &tp);
+    //clock_gettime(CLOCK_MONOTONIC, &end_t); 
+    //const double elapsed = (end_t.tv_sec - start_t.tv_sec) 
+    //            + (end_t.tv_nsec - start_t.tv_nsec) / 1000000000.0;
+    const double elapsed = tp.tv_sec + tp.tv_nsec / 1000000000.0;
     return elapsed;
   }
  private:
-  timespec start_t;
+  void mach_absolute_difference(uint64_t end, uint64_t start, struct timespec *tp) const {  
+    uint64_t difference = end - start;  
+    static mach_timebase_info_data_t info = {0,0};  
+
+    if (info.denom == 0)  
+      mach_timebase_info(&info);  
+    uint64_t elapsednano = difference * (info.numer / info.denom);  
+    tp->tv_sec = elapsednano * 1e-9;  
+    tp->tv_nsec = elapsednano - (tp->tv_sec * 1e9);  
+  }  
+  //timespec start_t;
+  uint64_t start_t;
 };
 
 void PYPTopics::sample(const Corpus& corpus, int samples) {
@@ -265,6 +281,23 @@ PYPTopics::F PYPTopics::prob(const Term& term, int topic, int level) const {
   return p_w_k;
 }
 
+int PYPTopics::max_topic() const {
+  if (!m_use_topic_pyp)
+    return -1;
+
+  F current_max=0.0;
+  int current_topic=-1;
+  for (int k=0; k<m_num_topics; ++k) {
+    F prob = m_topic_pyp.prob(k, m_topic_p0);
+    if (prob > current_max) {
+      current_max = prob;
+      current_topic = k;
+    }
+  }
+  assert(current_topic >= 0);
+  return current_topic;
+}
+
 int PYPTopics::max(const DocumentId& doc, const Term& term) {
   //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
   // collect probs
@@ -274,8 +307,12 @@ int PYPTopics::max(const DocumentId& doc, const Term& term) {
     F p_w_k = prob(term, k);
 
     F topic_prob = m_topic_p0;
-    if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0);
-    F p_k_d = m_document_pyps[doc].prob(k, topic_prob);
+    if (m_use_topic_pyp) 
+      topic_prob = m_topic_pyp.prob(k, m_topic_p0);
+
+    F p_k_d = 0;
+    if (doc < 0) p_k_d = topic_prob;
+    else         p_k_d = m_document_pyps[doc].prob(k, topic_prob);
 
     F prob = (p_w_k*p_k_d);
     if (prob > current_max) {
diff --git a/gi/pyp-topics/src/pyp-topics.hh b/gi/pyp-topics/src/pyp-topics.hh
index db0f7468..7e003228 100644
--- a/gi/pyp-topics/src/pyp-topics.hh
+++ b/gi/pyp-topics/src/pyp-topics.hh
@@ -22,6 +22,7 @@ public:
   void sample(const Corpus& corpus, int samples);
   int sample(const DocumentId& doc, const Term& term);
   int max(const DocumentId& doc, const Term& term);
+  int max_topic() const;
 
   void set_backoff(const std::string& filename) {
     m_backoff.reset(new TermBackoff);
diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc
index 02bb7b76..c58474da 100644
--- a/gi/pyp-topics/src/train-contexts.cc
+++ b/gi/pyp-topics/src/train-contexts.cc
@@ -40,6 +40,7 @@ int main(int argc, char **argv)
       ("data,d", value<string>(), "file containing the documents and context terms")
       ("topics,t", value<int>()->default_value(50), "number of topics")
       ("document-topics-out,o", value<string>(), "file to write the document topics to")
+      ("default-topics-out", value<string>(), "file to write default term topic assignments.")
       ("topic-words-out,w", value<string>(), "file to write the topic word distribution to")
       ("samples,s", value<int>()->default_value(10), "number of sampling passes through the data")
       ("backoff-type", value<string>(), "backoff type: none|simple")
@@ -95,6 +96,7 @@ int main(int argc, char **argv)
     ogzstream documents_out(vm["document-topics-out"].as<string>().c_str());
 
     int document_id=0;
+    std::set<int> all_terms;
     for (Corpus::const_iterator corpusIt=contexts_corpus.begin(); 
          corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) {
       std::vector<int> unique_terms;
@@ -111,10 +113,20 @@ int main(int argc, char **argv)
         std::vector<std::string> strings = contexts_corpus.context2string(*termIt);
         std::copy(strings.begin(), strings.end(), std::ostream_iterator<std::string>(documents_out, " "));
         documents_out << "||| C=" << model.max(document_id, *termIt);
+
+        all_terms.insert(*termIt);
       }
       documents_out << std::endl;
     }
     documents_out.close();
+
+    std::ofstream default_topics(vm["default-topics-out"].as<string>().c_str());
+    default_topics << model.max_topic() << std::endl;
+    for (std::set<int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
+      std::vector<std::string> strings = contexts_corpus.context2string(*termIt);
+      std::copy(strings.begin(), strings.end(), std::ostream_iterator<std::string>(documents_out, " "));
+      default_topics << model.max(-1, *termIt) << std::endl;
+    }
   }
 
   if (vm.count("topic-words-out")) {
-- 
cgit v1.2.3