From 851e389dffdd6996ea32d70defb8906de80b9edc Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 14 Dec 2009 20:35:11 -0500 Subject: few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec --- src/stringlib.cc | 97 -------------------------------------------------------- 1 file changed, 97 deletions(-) delete mode 100644 src/stringlib.cc (limited to 'src/stringlib.cc') diff --git a/src/stringlib.cc b/src/stringlib.cc deleted file mode 100644 index 3ed74bef..00000000 --- a/src/stringlib.cc +++ /dev/null @@ -1,97 +0,0 @@ -#include "stringlib.h" - -#include -#include -#include -#include - -#include "lattice.h" - -using namespace std; - -void ParseTranslatorInput(const string& line, string* input, string* ref) { - size_t hint = 0; - if (line.find("{\"rules\":") == 0) { - hint = line.find("}}"); - if (hint == string::npos) { - cerr << "Syntax error: " << line << endl; - abort(); - } - hint += 2; - } - size_t pos = line.find("|||", hint); - if (pos == string::npos) { *input = line; return; } - ref->clear(); - *input = line.substr(0, pos - 1); - string rline = line.substr(pos + 4); - if (rline.size() > 0) { - assert(ref); - *ref = rline; - } -} - -void ParseTranslatorInputLattice(const string& line, string* input, Lattice* ref) { - string sref; - ParseTranslatorInput(line, input, &sref); - if (sref.size() > 0) { - assert(ref); - LatticeTools::ConvertTextOrPLF(sref, ref); - } -} - -void ProcessAndStripSGML(string* pline, map* out) { - map& meta = *out; - string& line = *pline; - string lline = LowercaseString(line); - if (lline.find(""); - if (close == string::npos) return; // error - size_t end = lline.find(""); - string seg = Trim(lline.substr(4, close-4)); - string text = line.substr(close+1, end - close - 1); - for (size_t i = 1; i < seg.size(); i++) { - if (seg[i] == '=' && seg[i-1] == ' ') { - string less = seg.substr(0, i-1) + seg.substr(i); - seg = less; i = 0; continue; - } - if (seg[i] == '=' && seg[i+1] == ' ') { - string less = seg.substr(0, i+1); - if (i+2 < seg.size()) less += seg.substr(i+2); - seg = less; i = 0; continue; - } - } - line = Trim(text); - if (seg == "") return; - for (size_t i = 1; i < seg.size(); i++) { - if (seg[i] == '=') { - string label = seg.substr(0, i); - string val = seg.substr(i+1); - if (val[0] == '"') { - val = val.substr(1); - size_t close = val.find('"'); - if (close == string::npos) { - cerr << "SGML parse error: missing \"\n"; - seg = ""; - i = 0; - } else { - seg = val.substr(close+1); - val = val.substr(0, close); - i = 0; - } - } else { - size_t close = val.find(' '); - if (close == string::npos) { - seg = ""; - i = 0; - } else { - seg = val.substr(close+1); - val = val.substr(0, close); - } - } - label = Trim(label); - seg = Trim(seg); - meta[label] = val; - } - } -} - -- cgit v1.2.3