Merge branch 'const_reorder' into softsyn

author: Wu, Ke <wuke@cs.umd.edu> 2014-12-06 10:37:56 -0500
committer: Wu, Ke <wuke@cs.umd.edu> 2014-12-06 10:37:56 -0500
commit: 41bf2308139d08c992a3342154d1c8b96b44f681 (patch)
tree: a4e3bd820b4923151299588d74ed256d4e65472c /utils/argument_reorder_model.cc
parent: 34b7c1e7c3aa5f9ee780be65effc40726d849303 (diff)
parent: a21959213f9b1cc15befae52dbb5091e848de7a1 (diff)
1 files changed, 52 insertions, 74 deletions
diff --git a/utils/argument_reorder_model.cc b/utils/argument_reorder_model.cc
index 5caf318f..c4e90cba 100644
--- a/utils/argument_reorder_model.cc
+++ b/utils/argument_reorder_model.cc
@@ -12,60 +12,49 @@
 #include <string>
 #include <vector>
 
+#include "filelib.h"
+
 #include "argument_reorder_model.h"
-#include "synutils.h"
 #include "tsuruoka_maxent.h"
 
 using namespace std;
 
 inline void fnPreparingTrainingdata(const char* pszFName, int iCutoff,
                                     const char* pszNewFName) {
-  SFReader* pFReader = new STxtFileReader(pszFName);
-  char* pszLine = new char[100001];
-  int iLen;
   Map hashPredicate;
-  while (pFReader->fnReadNextLine(pszLine, &iLen)) {
-    if (iLen == 0) continue;
-
-    vector<string> vecTerms;
-    SplitOnWhitespace(string(pszLine), &vecTerms);
-
-    for (size_t i = 0; i < vecTerms.size() - 1; i++) {
-      Iterator iter = hashPredicate.find(vecTerms[i]);
-      if (iter == hashPredicate.end()) {
-        hashPredicate[vecTerms[i]] = 1;
-
-      } else {
-        iter->second++;
+  {
+    ReadFile in(pszFName);
+    string line;
+    while (getline(*in.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      for (const auto& i : terms) {
+        ++hashPredicate[i];
       }
     }
   }
-  delete pFReader;
-
-  pFReader = new STxtFileReader(pszFName);
-  FILE* fpOut = fopen(pszNewFName, "w");
-  while (pFReader->fnReadNextLine(pszLine, &iLen)) {
-    if (iLen == 0) continue;
-
-    vector<string> vecTerms;
-    SplitOnWhitespace(string(pszLine), &vecTerms);
-    ostringstream ostr;
-    for (size_t i = 0; i < vecTerms.size() - 1; i++) {
-      Iterator iter = hashPredicate.find(vecTerms[i]);
-      assert(iter != hashPredicate.end());
-      if (iter->second >= iCutoff) {
-        ostr << vecTerms[i] << " ";
+
+  {
+    ReadFile in(pszFName);
+    WriteFile out(pszNewFName);
+    string line;
+    while (getline(*in.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      bool written = false;
+      for (const auto& i : terms) {
+        if (hashPredicate[i] >= iCutoff) {
+          (*out.stream()) << i << " ";
+          written = true;
+        }
+      }
+      if (written) {
+        (*out.stream()) << "\n";
       }
-    }
-    if (ostr.str().length() > 0) {
-      ostr << vecTerms[vecTerms.size() - 1];
-      fprintf(fpOut, "%s\n", ostr.str().c_str());
     }
   }
-  fclose(fpOut);
-  delete pFReader;
-
-  delete[] pszLine;
 }
 
 struct SArgumentReorderTrainer {
@@ -127,8 +116,8 @@ struct SArgumentReorderTrainer {
       ) {
     SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
     SSrlSentenceReader* pSRLReader = new SSrlSentenceReader(pszSRLFname);
-    STxtFileReader* pTxtSReader = new STxtFileReader(pszSourceFname);
-    STxtFileReader* pTxtTReader = new STxtFileReader(pszTargetFname);
+    ReadFile source_file(pszSourceFname);
+    ReadFile target_file(pszTargetFname);
 
     Map* pMapPredicate;
     if (pszTopPredicateFname != NULL)
@@ -136,13 +125,10 @@ struct SArgumentReorderTrainer {
     else
       pMapPredicate = NULL;
 
-    char* pszLine = new char[50001];
+    string line;
 
-    FILE* fpLeftOut, *fpRightOut;
-    sprintf(pszLine, "%s.left", pszInstanceFname);
-    fpLeftOut = fopen(pszLine, "w");
-    sprintf(pszLine, "%s.right", pszInstanceFname);
-    fpRightOut = fopen(pszLine, "w");
+    WriteFile left_file(pszInstanceFname + string(".left"));
+    WriteFile right_file(pszInstanceFname + string(".right"));
 
     // read sentence by sentence
     SAlignment* pAlign;
@@ -153,12 +139,12 @@ struct SArgumentReorderTrainer {
       pSRL = pSRLReader->fnReadNextSrlSentence();
       assert(pSRL != NULL);
       pTree = pSRL->m_pTree;
-      assert(pTxtSReader->fnReadNextLine(pszLine, NULL));
+      assert(getline(*source_file.stream(), line));
       vector<string> vecSTerms;
-      SplitOnWhitespace(string(pszLine), &vecSTerms);
-      assert(pTxtTReader->fnReadNextLine(pszLine, NULL));
+      SplitOnWhitespace(line, &vecSTerms);
+      assert(getline(*target_file.stream(), line));
       vector<string> vecTTerms;
-      SplitOnWhitespace(string(pszLine), &vecTTerms);
+      SplitOnWhitespace(line, &vecTTerms);
       // vecTPOSTerms.size() == 0, given the case when an english sentence fails
       // parsing
 
@@ -204,10 +190,10 @@ struct SArgumentReorderTrainer {
             // strOutcome.c_str());
             // fprintf(fpOut, "sentid=%d %s %s\n", iSentNum, ostr.str().c_str(),
             // strOutcome.c_str());
-            fprintf(fpLeftOut, "%s %s\n", ostr.str().c_str(),
-                    strLeftOutcome.c_str());
-            fprintf(fpRightOut, "%s %s\n", ostr.str().c_str(),
-                    strRightOutcome.c_str());
+            (*left_file.stream()) << ostr.str() << " " << strLeftOutcome
+                                  << "\n";
+            (*right_file.stream()) << ostr.str() << " " << strRightOutcome
+                                   << "\n";
           }
         }
       }
@@ -218,36 +204,28 @@ struct SArgumentReorderTrainer {
 
       if (iSentNum % 100000 == 0) fprintf(stderr, "#%d\n", iSentNum);
     }
-    delete[] pszLine;
-
-    fclose(fpLeftOut);
-    fclose(fpRightOut);
 
     delete pAlignReader;
     delete pSRLReader;
-    delete pTxtSReader;
-    delete pTxtTReader;
   }
 
   Map* fnLoadTopPredicates(const char* pszTopPredicateFname) {
     if (pszTopPredicateFname == NULL) return NULL;
 
     Map* pMapPredicate = new Map();
-    STxtFileReader* pReader = new STxtFileReader(pszTopPredicateFname);
-    char* pszLine = new char[50001];
+    // STxtFileReader* pReader = new STxtFileReader(pszTopPredicateFname);
+    ReadFile in(pszTopPredicateFname);
+    // char* pszLine = new char[50001];
+    string line;
     int iNumCount = 0;
-    while (pReader->fnReadNextLine(pszLine, NULL)) {
-      if (pszLine[0] == '#') continue;
-      char* p = strchr(pszLine, ' ');
-      assert(p != NULL);
-      p[0] = '\0';
-      p++;
-      int iCount = atoi(p);
+    while (getline(*in.stream(), line)) {
+      if (line.size() && line[0] == '#') continue;
+      auto p = line.find(' ');
+      assert(p != string::npos);
+      int iCount = atoi(line.substr(p + 1).c_str());
       if (iCount < 100) break;
-      (*pMapPredicate)[string(pszLine)] = iNumCount++;
+      (*pMapPredicate)[line] = iNumCount++;
     }
-    delete pReader;
-    delete[] pszLine;
     return pMapPredicate;
   }
 };
author	Wu, Ke <wuke@cs.umd.edu>	2014-12-06 10:37:56 -0500
committer	Wu, Ke <wuke@cs.umd.edu>	2014-12-06 10:37:56 -0500
commit	41bf2308139d08c992a3342154d1c8b96b44f681 (patch)
tree	a4e3bd820b4923151299588d74ed256d4e65472c /utils/argument_reorder_model.cc
parent	34b7c1e7c3aa5f9ee780be65effc40726d849303 (diff)
parent	a21959213f9b1cc15befae52dbb5091e848de7a1 (diff)