diff options
author | Chris Dyer <redpony@gmail.com> | 2009-12-14 20:35:11 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2009-12-14 20:35:11 -0500 |
commit | 851e389dffdd6996ea32d70defb8906de80b9edc (patch) | |
tree | 8c68ee77205badc056b8ab5b332e67e3e98017df /src/stringlib.cc | |
parent | dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (diff) |
few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec
Diffstat (limited to 'src/stringlib.cc')
-rw-r--r-- | src/stringlib.cc | 97 |
1 files changed, 0 insertions, 97 deletions
diff --git a/src/stringlib.cc b/src/stringlib.cc deleted file mode 100644 index 3ed74bef..00000000 --- a/src/stringlib.cc +++ /dev/null @@ -1,97 +0,0 @@ -#include "stringlib.h" - -#include <cstdlib> -#include <cassert> -#include <iostream> -#include <map> - -#include "lattice.h" - -using namespace std; - -void ParseTranslatorInput(const string& line, string* input, string* ref) { - size_t hint = 0; - if (line.find("{\"rules\":") == 0) { - hint = line.find("}}"); - if (hint == string::npos) { - cerr << "Syntax error: " << line << endl; - abort(); - } - hint += 2; - } - size_t pos = line.find("|||", hint); - if (pos == string::npos) { *input = line; return; } - ref->clear(); - *input = line.substr(0, pos - 1); - string rline = line.substr(pos + 4); - if (rline.size() > 0) { - assert(ref); - *ref = rline; - } -} - -void ParseTranslatorInputLattice(const string& line, string* input, Lattice* ref) { - string sref; - ParseTranslatorInput(line, input, &sref); - if (sref.size() > 0) { - assert(ref); - LatticeTools::ConvertTextOrPLF(sref, ref); - } -} - -void ProcessAndStripSGML(string* pline, map<string, string>* out) { - map<string, string>& meta = *out; - string& line = *pline; - string lline = LowercaseString(line); - if (lline.find("<seg")!=0) return; - size_t close = lline.find(">"); - if (close == string::npos) return; // error - size_t end = lline.find("</seg>"); - string seg = Trim(lline.substr(4, close-4)); - string text = line.substr(close+1, end - close - 1); - for (size_t i = 1; i < seg.size(); i++) { - if (seg[i] == '=' && seg[i-1] == ' ') { - string less = seg.substr(0, i-1) + seg.substr(i); - seg = less; i = 0; continue; - } - if (seg[i] == '=' && seg[i+1] == ' ') { - string less = seg.substr(0, i+1); - if (i+2 < seg.size()) less += seg.substr(i+2); - seg = less; i = 0; continue; - } - } - line = Trim(text); - if (seg == "") return; - for (size_t i = 1; i < seg.size(); i++) { - if (seg[i] == '=') { - string label = seg.substr(0, i); - string val = seg.substr(i+1); - if (val[0] == '"') { - val = val.substr(1); - size_t close = val.find('"'); - if (close == string::npos) { - cerr << "SGML parse error: missing \"\n"; - seg = ""; - i = 0; - } else { - seg = val.substr(close+1); - val = val.substr(0, close); - i = 0; - } - } else { - size_t close = val.find(' '); - if (close == string::npos) { - seg = ""; - i = 0; - } else { - seg = val.substr(close+1); - val = val.substr(0, close); - } - } - label = Trim(label); - seg = Trim(seg); - meta[label] = val; - } - } -} - |