4 files changed, 112 insertions, 276 deletions
diff --git a/utils/argument_reorder_model.cc b/utils/argument_reorder_model.cc
index 5caf318f..c4e90cba 100644
--- a/utils/argument_reorder_model.cc
+++ b/utils/argument_reorder_model.cc
@@ -12,60 +12,49 @@
 #include <string>
 #include <vector>
 
+#include "filelib.h"
+
 #include "argument_reorder_model.h"
-#include "synutils.h"
 #include "tsuruoka_maxent.h"
 
 using namespace std;
 
 inline void fnPreparingTrainingdata(const char* pszFName, int iCutoff,
                                     const char* pszNewFName) {
-  SFReader* pFReader = new STxtFileReader(pszFName);
-  char* pszLine = new char[100001];
-  int iLen;
   Map hashPredicate;
-  while (pFReader->fnReadNextLine(pszLine, &iLen)) {
-    if (iLen == 0) continue;
-
-    vector<string> vecTerms;
-    SplitOnWhitespace(string(pszLine), &vecTerms);
-
-    for (size_t i = 0; i < vecTerms.size() - 1; i++) {
-      Iterator iter = hashPredicate.find(vecTerms[i]);
-      if (iter == hashPredicate.end()) {
-        hashPredicate[vecTerms[i]] = 1;
-
-      } else {
-        iter->second++;
+  {
+    ReadFile in(pszFName);
+    string line;
+    while (getline(*in.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      for (const auto& i : terms) {
+        ++hashPredicate[i];
       }
     }
   }
-  delete pFReader;
-
-  pFReader = new STxtFileReader(pszFName);
-  FILE* fpOut = fopen(pszNewFName, "w");
-  while (pFReader->fnReadNextLine(pszLine, &iLen)) {
-    if (iLen == 0) continue;
-
-    vector<string> vecTerms;
-    SplitOnWhitespace(string(pszLine), &vecTerms);
-    ostringstream ostr;
-    for (size_t i = 0; i < vecTerms.size() - 1; i++) {
-      Iterator iter = hashPredicate.find(vecTerms[i]);
-      assert(iter != hashPredicate.end());
-      if (iter->second >= iCutoff) {
-        ostr << vecTerms[i] << " ";
+
+  {
+    ReadFile in(pszFName);
+    WriteFile out(pszNewFName);
+    string line;
+    while (getline(*in.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      bool written = false;
+      for (const auto& i : terms) {
+        if (hashPredicate[i] >= iCutoff) {
+          (*out.stream()) << i << " ";
+          written = true;
+        }
+      }
+      if (written) {
+        (*out.stream()) << "\n";
       }
-    }
-    if (ostr.str().length() > 0) {
-      ostr << vecTerms[vecTerms.size() - 1];
-      fprintf(fpOut, "%s\n", ostr.str().c_str());
     }
   }
-  fclose(fpOut);
-  delete pFReader;
-
-  delete[] pszLine;
 }
 
 struct SArgumentReorderTrainer {
@@ -127,8 +116,8 @@ struct SArgumentReorderTrainer {
       ) {
     SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
     SSrlSentenceReader* pSRLReader = new SSrlSentenceReader(pszSRLFname);
-    STxtFileReader* pTxtSReader = new STxtFileReader(pszSourceFname);
-    STxtFileReader* pTxtTReader = new STxtFileReader(pszTargetFname);
+    ReadFile source_file(pszSourceFname);
+    ReadFile target_file(pszTargetFname);
 
     Map* pMapPredicate;
     if (pszTopPredicateFname != NULL)
@@ -136,13 +125,10 @@ struct SArgumentReorderTrainer {
     else
       pMapPredicate = NULL;
 
-    char* pszLine = new char[50001];
+    string line;
 
-    FILE* fpLeftOut, *fpRightOut;
-    sprintf(pszLine, "%s.left", pszInstanceFname);
-    fpLeftOut = fopen(pszLine, "w");
-    sprintf(pszLine, "%s.right", pszInstanceFname);
-    fpRightOut = fopen(pszLine, "w");
+    WriteFile left_file(pszInstanceFname + string(".left"));
+    WriteFile right_file(pszInstanceFname + string(".right"));
 
     // read sentence by sentence
     SAlignment* pAlign;
@@ -153,12 +139,12 @@ struct SArgumentReorderTrainer {
       pSRL = pSRLReader->fnReadNextSrlSentence();
       assert(pSRL != NULL);
       pTree = pSRL->m_pTree;
-      assert(pTxtSReader->fnReadNextLine(pszLine, NULL));
+      assert(getline(*source_file.stream(), line));
       vector<string> vecSTerms;
-      SplitOnWhitespace(string(pszLine), &vecSTerms);
-      assert(pTxtTReader->fnReadNextLine(pszLine, NULL));
+      SplitOnWhitespace(line, &vecSTerms);
+      assert(getline(*target_file.stream(), line));
       vector<string> vecTTerms;
-      SplitOnWhitespace(string(pszLine), &vecTTerms);
+      SplitOnWhitespace(line, &vecTTerms);
       // vecTPOSTerms.size() == 0, given the case when an english sentence fails
       // parsing
 
@@ -204,10 +190,10 @@ struct SArgumentReorderTrainer {
             // strOutcome.c_str());
             // fprintf(fpOut, "sentid=%d %s %s\n", iSentNum, ostr.str().c_str(),
             // strOutcome.c_str());
-            fprintf(fpLeftOut, "%s %s\n", ostr.str().c_str(),
-                    strLeftOutcome.c_str());
-            fprintf(fpRightOut, "%s %s\n", ostr.str().c_str(),
-                    strRightOutcome.c_str());
+            (*left_file.stream()) << ostr.str() << " " << strLeftOutcome
+                                  << "\n";
+            (*right_file.stream()) << ostr.str() << " " << strRightOutcome
+                                   << "\n";
           }
         }
       }
@@ -218,36 +204,28 @@ struct SArgumentReorderTrainer {
 
       if (iSentNum % 100000 == 0) fprintf(stderr, "#%d\n", iSentNum);
     }
-    delete[] pszLine;
-
-    fclose(fpLeftOut);
-    fclose(fpRightOut);
 
     delete pAlignReader;
     delete pSRLReader;
-    delete pTxtSReader;
-    delete pTxtTReader;
   }
 
   Map* fnLoadTopPredicates(const char* pszTopPredicateFname) {
     if (pszTopPredicateFname == NULL) return NULL;
 
     Map* pMapPredicate = new Map();
-    STxtFileReader* pReader = new STxtFileReader(pszTopPredicateFname);
-    char* pszLine = new char[50001];
+    // STxtFileReader* pReader = new STxtFileReader(pszTopPredicateFname);
+    ReadFile in(pszTopPredicateFname);
+    // char* pszLine = new char[50001];
+    string line;
     int iNumCount = 0;
-    while (pReader->fnReadNextLine(pszLine, NULL)) {
-      if (pszLine[0] == '#') continue;
-      char* p = strchr(pszLine, ' ');
-      assert(p != NULL);
-      p[0] = '\0';
-      p++;
-      int iCount = atoi(p);
+    while (getline(*in.stream(), line)) {
+      if (line.size() && line[0] == '#') continue;
+      auto p = line.find(' ');
+      assert(p != string::npos);
+      int iCount = atoi(line.substr(p + 1).c_str());
       if (iCount < 100) break;
-      (*pMapPredicate)[string(pszLine)] = iNumCount++;
+      (*pMapPredicate)[line] = iNumCount++;
     }
-    delete pReader;
-    delete[] pszLine;
     return pMapPredicate;
   }
 };
diff --git a/utils/constituent_reorder_model.cc b/utils/constituent_reorder_model.cc
index df75a1a0..bdb7c5d1 100644
--- a/utils/constituent_reorder_model.cc
+++ b/utils/constituent_reorder_model.cc
@@ -5,15 +5,17 @@
  *      Author: junhuili
  */
 
+#include <string>
+#include <unordered_map>
+
 #include <boost/program_options.hpp>
 
+#include "filelib.h"
+
 #include "alignment.h"
 #include "tree.h"
-#include "synutils.h"
 #include "tsuruoka_maxent.h"
 
-#include <unordered_map>
-
 using namespace std;
 
 typedef std::unordered_map<std::string, int> Map;
@@ -23,52 +25,40 @@ namespace po = boost::program_options;
 
 inline void fnPreparingTrainingdata(const char* pszFName, int iCutoff,
                                     const char* pszNewFName) {
-  SFReader* pFReader = new STxtFileReader(pszFName);
-  char* pszLine = new char[100001];
-  int iLen;
   Map hashPredicate;
-  while (pFReader->fnReadNextLine(pszLine, &iLen)) {
-    if (iLen == 0) continue;
-
-    vector<string> vecTerms;
-    SplitOnWhitespace(string(pszLine), &vecTerms);
-
-    for (size_t i = 0; i < vecTerms.size() - 1; i++) {
-      Iterator iter = hashPredicate.find(vecTerms[i]);
-      if (iter == hashPredicate.end()) {
-        hashPredicate[vecTerms[i]] = 1;
-
-      } else {
-        iter->second++;
+  {
+    ReadFile f(pszFName);
+    string line;
+    while (getline(*f.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      for (const auto& i : terms) {
+        ++hashPredicate[i];
       }
     }
   }
-  delete pFReader;
-
-  pFReader = new STxtFileReader(pszFName);
-  FILE* fpOut = fopen(pszNewFName, "w");
-  while (pFReader->fnReadNextLine(pszLine, &iLen)) {
-    if (iLen == 0) continue;
-
-    vector<string> vecTerms;
-    SplitOnWhitespace(string(pszLine), &vecTerms);
-    ostringstream ostr;
-    for (size_t i = 0; i < vecTerms.size() - 1; i++) {
-      Iterator iter = hashPredicate.find(vecTerms[i]);
-      assert(iter != hashPredicate.end());
-      if (iter->second >= iCutoff) {
-        ostr << vecTerms[i] << " ";
+
+  {
+    ReadFile in(pszFName);
+    WriteFile out(pszNewFName);
+    string line;
+    while (getline(*in.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      bool written = false;
+      for (const auto& i : terms) {
+        if (hashPredicate[i] >= iCutoff) {
+          (*out.stream()) << i << " ";
+          written = true;
+        }
+      }
+      if (written) {
+        (*out.stream()) << "\n";
       }
-    }
-    if (ostr.str().length() > 0) {
-      ostr << vecTerms[vecTerms.size() - 1];
-      fprintf(fpOut, "%s\n", ostr.str().c_str());
     }
   }
-  fclose(fpOut);
-  delete pFReader;
-
-  delete[] pszLine;
 }
 
 struct SConstReorderTrainer {
@@ -408,31 +398,29 @@ delete pZhangleMaxent;*/
       ) {
     SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
     SParseReader* pParseReader = new SParseReader(pszSynFname, false);
-    STxtFileReader* pTxtSReader = new STxtFileReader(pszSourceFname);
-    STxtFileReader* pTxtTReader = new STxtFileReader(pszTargetFname);
 
+    ReadFile source_file(pszSourceFname);
+    ReadFile target_file(pszTargetFname);
     string strInstanceLeftFname = string(pszInstanceFname) + string(".left");
     string strInstanceRightFname = string(pszInstanceFname) + string(".right");
-
-    FILE* fpLeftOut = fopen(strInstanceLeftFname.c_str(), "w");
-    assert(fpLeftOut != NULL);
-
-    FILE* fpRightOut = fopen(strInstanceRightFname.c_str(), "w");
-    assert(fpRightOut != NULL);
+    WriteFile left_file(strInstanceLeftFname);
+    WriteFile right_file(strInstanceRightFname);
 
     // read sentence by sentence
     SAlignment* pAlign;
     SParsedTree* pTree;
-    char* pszLine = new char[50001];
+    string line;
     int iSentNum = 0;
     while ((pAlign = pAlignReader->fnReadNextAlignment()) != NULL) {
       pTree = pParseReader->fnReadNextParseTree();
-      assert(pTxtSReader->fnReadNextLine(pszLine, NULL));
+
+      assert(getline(*source_file.stream(), line));
       vector<string> vecSTerms;
-      SplitOnWhitespace(string(pszLine), &vecSTerms);
-      assert(pTxtTReader->fnReadNextLine(pszLine, NULL));
+      SplitOnWhitespace(line, &vecSTerms);
+
+      assert(getline(*target_file.stream(), line));
       vector<string> vecTTerms;
-      SplitOnWhitespace(string(pszLine), &vecTTerms);
+      SplitOnWhitespace(line, &vecTTerms);
 
       if (pTree != NULL) {
 
@@ -475,16 +463,18 @@ delete pZhangleMaxent;*/
                                vecLeftPosition, vecSTerms, vecTTerms,
                                strLeftOutcome, ostr);
 
+            string ostr_str = ostr.str();
+
             // fprintf(stderr, "%s %s\n", ostr.str().c_str(),
             // strLeftOutcome.c_str());
-            fprintf(fpLeftOut, "%s %s\n", ostr.str().c_str(),
-                    strLeftOutcome.c_str());
+            (*left_file.stream()) << ostr_str << " " << strLeftOutcome << "\n";
 
             string strRightOutcome;
             fnGetOutcome(vecRightPosition[j - 1], vecRightPosition[j],
                          strRightOutcome);
-            fprintf(fpRightOut, "%s LeftOrder=%s %s\n", ostr.str().c_str(),
-                    strLeftOutcome.c_str(), strRightOutcome.c_str());
+            (*right_file.stream()) << ostr_str
+                                   << " LeftOrder=" << strLeftOutcome << " "
+                                   << strRightOutcome << "\n";
           }
         }
         delete pTree;
@@ -496,13 +486,8 @@ delete pZhangleMaxent;*/
       if (iSentNum % 100000 == 0) fprintf(stderr, "#%d\n", iSentNum);
     }
 
-    fclose(fpLeftOut);
-    fclose(fpRightOut);
     delete pAlignReader;
     delete pParseReader;
-    delete pTxtSReader;
-    delete pTxtTReader;
-    delete[] pszLine;
   }
 
   void fnGenerateInstanceFile2(
@@ -514,25 +499,26 @@ delete pZhangleMaxent;*/
       ) {
     SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
     SParseReader* pParseReader = new SParseReader(pszSynFname, false);
-    STxtFileReader* pTxtSReader = new STxtFileReader(pszSourceFname);
-    STxtFileReader* pTxtTReader = new STxtFileReader(pszTargetFname);
 
-    FILE* fpOut = fopen(pszInstanceFname, "w");
-    assert(fpOut != NULL);
+    ReadFile source_file(pszSourceFname);
+    ReadFile target_file(pszTargetFname);
+
+    WriteFile output_file(pszInstanceFname);
 
     // read sentence by sentence
     SAlignment* pAlign;
     SParsedTree* pTree;
-    char* pszLine = new char[50001];
+    string line;
     int iSentNum = 0;
     while ((pAlign = pAlignReader->fnReadNextAlignment()) != NULL) {
       pTree = pParseReader->fnReadNextParseTree();
-      assert(pTxtSReader->fnReadNextLine(pszLine, NULL));
+      assert(getline(*source_file.stream(), line));
       vector<string> vecSTerms;
-      SplitOnWhitespace(string(pszLine), &vecSTerms);
-      assert(pTxtTReader->fnReadNextLine(pszLine, NULL));
+      SplitOnWhitespace(line, &vecSTerms);
+
+      assert(getline(*target_file.stream(), line));
       vector<string> vecTTerms;
-      SplitOnWhitespace(string(pszLine), &vecTTerms);
+      SplitOnWhitespace(line, &vecTTerms);
 
       if (pTree != NULL) {
 
@@ -556,7 +542,7 @@ delete pZhangleMaxent;*/
 
             // fprintf(stderr, "%s %s\n", ostr.str().c_str(),
             // strOutcome.c_str());
-            fprintf(fpOut, "%s %s\n", ostr.str().c_str(), strOutcome.c_str());
+            (*output_file.stream()) << ostr.str() << " " << strOutcome << "\n";
           }
         }
         delete pTree;
@@ -568,12 +554,8 @@ delete pZhangleMaxent;*/
       if (iSentNum % 100000 == 0) fprintf(stderr, "#%d\n", iSentNum);
     }
 
-    fclose(fpOut);
     delete pAlignReader;
     delete pParseReader;
-    delete pTxtSReader;
-    delete pTxtTReader;
-    delete[] pszLine;
   }
 };
 
diff --git a/utils/synutils.h b/utils/synutils.h
deleted file mode 100644
index f611553e..00000000
--- a/utils/synutils.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * utility.h
- *
- *  Created on: Jun 24, 2013
- *      Author: lijunhui
- */
-
-#ifndef UTILITY_H_
-#define UTILITY_H_
-
-#include <zlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-#include <unordered_map>
-
-typedef std::unordered_map<std::string, int> MapString2Int;
-typedef std::unordered_map<std::string, float> MapString2Float;
-typedef std::unordered_map<std::string, float>::iterator
-    MapString2FloatIterator;
-
-struct SFReader {
-  SFReader() {}
-  virtual ~SFReader() {}
-
-  virtual bool fnReadNextLine(char* pszLine, int* piLength) = 0;
-  virtual bool fnReadNextLine(std::string& strLine) = 0;
-};
-
-struct STxtFileReader : public SFReader {
-  STxtFileReader(const char* pszFname) {
-    m_fpIn = fopen(pszFname, "r");
-    assert(m_fpIn != NULL);
-  }
-  ~STxtFileReader() {
-    if (m_fpIn != NULL) fclose(m_fpIn);
-  }
-
-  bool fnReadNextLine(char* pszLine, int* piLength) {
-    if (feof(m_fpIn) == true) return false;
-
-    int iLen;
-
-    pszLine[0] = '\0';
-
-    fgets(pszLine, 10001, m_fpIn);
-    iLen = strlen(pszLine);
-    if (iLen == 0) return false;
-    while (iLen > 0 && pszLine[iLen - 1] > 0 && pszLine[iLen - 1] < 33) {
-      pszLine[iLen - 1] = '\0';
-      iLen--;
-    }
-
-    if (piLength != NULL) (*piLength) = iLen;
-
-    return true;
-  }
-
-  bool fnReadNextLine(std::string& strLine) {
-    char* pszLine = new char[10001];
-    bool bOut = fnReadNextLine(pszLine, NULL);
-    if (bOut)
-      strLine = std::string(pszLine);
-    else
-      strLine = std::string("");
-    delete[] pszLine;
-
-    return bOut;
-  }
-
- private:
-  FILE* m_fpIn;
-};
-
-struct SGZFileReader : public SFReader {
-  SGZFileReader(const char* pszFname) {
-    m_fpIn = gzopen(pszFname, "r");
-    assert(m_fpIn != NULL);
-  }
-  ~SGZFileReader() {
-    if (m_fpIn != NULL) gzclose(m_fpIn);
-  }
-
-  bool fnReadNextLine(char* pszLine, int* piLength) {
-    if (m_fpIn == NULL) exit(0);
-    if (gzeof(m_fpIn) == true) return false;
-
-    int iLen;
-
-    pszLine[0] = '\0';
-
-    gzgets(m_fpIn, pszLine, 10001);
-    iLen = strlen(pszLine);
-    while (iLen > 0 && pszLine[iLen - 1] > 0 && pszLine[iLen - 1] < 33) {
-      pszLine[iLen - 1] = '\0';
-      iLen--;
-    }
-
-    if (piLength != NULL) (*piLength) = iLen;
-
-    return true;
-  }
-
-  bool fnReadNextLine(std::string& strLine) {
-    char* pszLine = new char[10001];
-    bool bOut = fnReadNextLine(pszLine, NULL);
-    if (bOut)
-      strLine = std::string(pszLine);
-    else
-      strLine = std::string("");
-    delete[] pszLine;
-
-    return bOut;
-  }
-
- private:
-  gzFile m_fpIn;
-};
-
-#endif /* UTILITY_H_ */
diff --git a/utils/tsuruoka_maxent.h b/utils/tsuruoka_maxent.h
index 550a4b7f..82da44ff 100644
--- a/utils/tsuruoka_maxent.h
+++ b/utils/tsuruoka_maxent.h
@@ -13,7 +13,6 @@
 #include <utility>
 #include <vector>
 
-#include "synutils.h"
 #include "stringlib.h"
 #include "maxent.h"