major refactor, break bad circular deps

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@509 ec762483-ff6d-05da-a07a-a48fb63a330f
author: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-08-11 02:37:10 +0000
committer: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-08-11 02:37:10 +0000
commit: 80686d4e567bae579ea39e009826a2de92cd4ace (patch)
tree: c3c35fcba57dde423a248f38aa121ad197c79734 /decoder/stringlib.h
parent: 3c85c407c333899f6b4bc26632d312b8e568b638 (diff)
1 files changed, 0 insertions, 267 deletions
diff --git a/decoder/stringlib.h b/decoder/stringlib.h
deleted file mode 100644
index 84e95d44..00000000
--- a/decoder/stringlib.h
+++ /dev/null
@@ -1,267 +0,0 @@
-#ifndef CDEC_STRINGLIB_H_
-#define CDEC_STRINGLIB_H_
-
-//usage: string s=MAKESTRE(1<<" "<<c);
-#define MAKESTR(expr) ((dynamic_cast<ostringstream &>(ostringstream()<<std::dec<<expr)).str())
-// std::dec (or seekp, or another manip) is needed to convert to std::ostream reference.
-
-#ifdef STRINGLIB_DEBUG
-#include <iostream>
-#define SLIBDBG(x) do { std::cerr<<"DBG(stringlib): "<<x<<std::endl; } while(0)
-#else
-#define SLIBDBG(x)
-#endif
-
-#include <map>
-#include <vector>
-#include <cctype>
-#include <cstring>
-#include <string>
-#include <sstream>
-#include <algorithm>
-
-inline std::size_t skip_ws(std::string const& s,std::size_t starting=0,char const* ws=" \t\n\r") {
-  return s.find_first_not_of(ws,starting);
-}
-
-// returns position of end of all non-ws chars before ending, i.e. string(s.begin()+skip_ws(s),s.begin()+trailing_ws(s)) strips both ends
-inline std::size_t trailing_ws(std::string const& s,std::size_t ending=std::string::npos,char const* ws=" \t\n\r") {
-  std::size_t n=s.find_last_not_of(ws,ending);
-  if (n==std::string::npos) return n;
-  else return n+1;
-}
-
-//TEST: if string is all whitespace, make sure that string(a+npos,a+npos) can't segfault (i.e. won't access any memory because begin==end)
-inline std::string strip_ws(std::string const& s) {
-  return std::string(s.begin()+skip_ws(s),s.begin()+trailing_ws(s));
-}
-
-
-inline bool is_single_line(std::string const& line) {
-  return std::count(line.begin(),line.end(),'\n')==0; // but we want to allow terminal newlines/blanks
-}
-
-// is_single_line(strip_ws(line))
-inline bool is_single_line_stripped(std::string const& line) {
-  std::size_t b=skip_ws(line),e=trailing_ws(line);
-  std::size_t n=line.find('\n',b);
-  return n==std::string::npos || n>=e;
-}
-
-struct toupperc {
-  inline char operator()(char c) const {
-    return std::toupper(c);
-  }
-};
-
-inline std::string toupper(std::string s) {
-  std::transform(s.begin(),s.end(),s.begin(),toupperc());
-  return s;
-}
-
-template <class Istr, class Isubstr> inline
-bool match_begin(Istr bstr,Istr estr,Isubstr bsub,Isubstr esub)
-{
-  while (bsub != esub) {
-    if (bstr == estr)
-      return false;
-    if (*bsub++ != *bstr++)
-      return false;
-  }
-  return true;
-}
-
-template <class Istr, class Prefix> inline
-bool match_begin(Istr bstr,Istr estr,Prefix prefix)
-{
-  return match_begin(bstr,estr,prefix.begin(),prefix.end());
-}
-
-template <class Str, class Prefix> inline
-bool match_begin(Str const& str,Prefix const& prefix)
-{
-  return match_begin(str.begin(),str.end(),prefix.begin(),prefix.end());
-}
-
-
-// read line in the form of either:
-//   source
-//   source ||| target
-// source will be returned as a string, target must be a sentence or
-// a lattice (in PLF format) and will be returned as a Lattice object
-void ParseTranslatorInput(const std::string& line, std::string* input, std::string* ref);
-struct Lattice;
-void ParseTranslatorInputLattice(const std::string& line, std::string* input, Lattice* ref);
-
-inline std::string Trim(const std::string& str, const std::string& dropChars = " \t") {
-  std::string res = str;
-  res.erase(str.find_last_not_of(dropChars)+1);
-  return res.erase(0, res.find_first_not_of(dropChars));
-}
-
-inline void Tokenize(const std::string& str, char delimiter, std::vector<std::string>* res) {
-  std::string s = str;
-  int last = 0;
-  res->clear();
-  for (int i=0; i < s.size(); ++i)
-    if (s[i] == delimiter) {
-      s[i]=0;
-      if (last != i) {
-        res->push_back(&s[last]);
-      }
-      last = i + 1;
-    }
-  if (last != s.size())
-    res->push_back(&s[last]);
-}
-
-inline unsigned NTokens(const std::string& str, char delimiter)
-{
-  std::vector<std::string> r;
-  Tokenize(str,delimiter,&r);
-  return r.size();
-}
-
-inline std::string LowercaseString(const std::string& in) {
-  std::string res(in.size(),' ');
-  for (int i = 0; i < in.size(); ++i)
-    res[i] = tolower(in[i]);
-  return res;
-}
-
-inline int CountSubstrings(const std::string& str, const std::string& sub) {
-  size_t p = 0;
-  int res = 0;
-  while (p < str.size()) {
-    p = str.find(sub, p);
-    if (p == std::string::npos) break;
-    ++res;
-    p += sub.size();
-  }
-  return res;
-}
-
-inline int SplitOnWhitespace(const std::string& in, std::vector<std::string>* out) {
-  out->clear();
-  int i = 0;
-  int start = 0;
-  std::string cur;
-  while(i < in.size()) {
-    if (in[i] == ' ' || in[i] == '\t') {
-      if (i - start > 0)
-        out->push_back(in.substr(start, i - start));
-      start = i + 1;
-    }
-    ++i;
-  }
-  if (i > start)
-    out->push_back(in.substr(start, i - start));
-  return out->size();
-}
-
-inline std::vector<std::string> SplitOnWhitespace(std::string const& in)
-{
-  std::vector<std::string> r;
-  SplitOnWhitespace(in,&r);
-  return r;
-}
-
-
-struct mutable_c_str {
-  // because making a copy of a string might not copy its storage, so modifying a c_str() could screw up original (nobody uses cow nowadays because it needs locking under threading)
-  char *p;
-  mutable_c_str(std::string const& s) : p((char *)::operator new(s.size()+1)) {
-    std::memcpy(p,s.data(),s.size());
-    p[s.size()]=0;
-  }
-  ~mutable_c_str() { ::operator delete(p); }
-private:
-  mutable_c_str(mutable_c_str const&);
-};
-
-// ' ' '\t' tokens hardcoded
-//NOTE: you should have stripped endline chars out first.
-inline bool IsWordSep(char c) {
-  return c==' '||c=='\t';
-}
-
-
-template <class F>
-// *end must be 0 (i.e. [p,end] is valid storage, which will be written to with 0 to separate c string tokens
-void VisitTokens(char *p,char *const end,F f) {
-  SLIBDBG("VisitTokens. p="<<p<<" Nleft="<<end-p);
-  if (p==end) return;
-  char *last; // 0 terminated already.  this is ok to mutilate because s is a copy of the string passed in.  well, barring copy on write i guess.
-  while(IsWordSep(*p)) { ++p;if (p==end) return; } // skip init whitespace
-  last=p; // first non-ws char
-  for(;;) {
-    SLIBDBG("Start of word. last="<<last<<" *p="<<*p<<" Nleft="<<end-p);
-    // last==p, pointing at first non-ws char not yet translated into f(word) call
-    for(;;) {// p to end of word
-      ++p;
-      if (p==end) {
-        f(last);
-        SLIBDBG("Returning. word="<<last<<" *p="<<*p<<" Nleft="<<end-p);
-        return;
-      }
-      if (IsWordSep(*p)) break;
-    }
-    *p=0;
-    f(last);
-    SLIBDBG("End of word. word="<<last<<" rest="<<p+1<<" Nleft="<<end-p);
-    for(;;) { // again skip extra whitespace
-      ++p;
-      if (p==end) return;
-      if (!IsWordSep(*p)) break;
-    }
-    last=p;
-  }
-}
-
-template <class F>
-void VisitTokens(char *p,F f) {
-  VisitTokens(p,p+std::strlen(p),f);
-}
-
-
-template <class F>
-void VisitTokens(std::string const& s,F f) {
-  if (0) {
-  std::vector<std::string> ss=SplitOnWhitespace(s);
-  for (int i=0;i<ss.size();++i)
-    f(ss[i]);
-  return;
-  }
-  //FIXME:
-  if (s.empty()) return;
-  mutable_c_str mp(s);
-  SLIBDBG("mp="<<mp.p);
-  VisitTokens(mp.p,mp.p+s.size(),f);
-}
-
-inline void SplitCommandAndParam(const std::string& in, std::string* cmd, std::string* param) {
-  cmd->clear();
-  param->clear();
-  std::vector<std::string> x;
-  SplitOnWhitespace(in, &x);
-  if (x.size() == 0) return;
-  *cmd = x[0];
-  for (int i = 1; i < x.size(); ++i) {
-    if (i > 1) { *param += " "; }
-    *param += x[i];
-  }
-}
-
-void ProcessAndStripSGML(std::string* line, std::map<std::string, std::string>* out);
-
-// given the first character of a UTF8 block, find out how wide it is
-// see http://en.wikipedia.org/wiki/UTF-8 for more info
-inline unsigned int UTF8Len(unsigned char x) {
-  if (x < 0x80) return 1;
-  else if ((x >> 5) == 0x06) return 2;
-  else if ((x >> 4) == 0x0e) return 3;
-  else if ((x >> 3) == 0x1e) return 4;
-  else return 0;
-}
-
-#endif
author	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-08-11 02:37:10 +0000
committer	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-08-11 02:37:10 +0000
commit	80686d4e567bae579ea39e009826a2de92cd4ace (patch)
tree	c3c35fcba57dde423a248f38aa121ad197c79734 /decoder/stringlib.h
parent	3c85c407c333899f6b4bc26632d312b8e568b638 (diff)