summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-09-12 19:22:59 +0100
committerChris Dyer <cdyer@cs.cmu.edu>2011-09-12 19:22:59 +0100
commitf826e90538dc15fbde4152b0e2f7d30fd6e56784 (patch)
tree3bca97b634b637c79241466a39abcfea2ded22ea /utils
parent4b6222733c4f9dcb3516bcc64394fa8b4716ce48 (diff)
source syntax features ~ blunsom emnlp 2008
Diffstat (limited to 'utils')
-rw-r--r--utils/stringlib.cc7
1 files changed, 6 insertions, 1 deletions
diff --git a/utils/stringlib.cc b/utils/stringlib.cc
index 7aaee9f0..ade02ca9 100644
--- a/utils/stringlib.cc
+++ b/utils/stringlib.cc
@@ -32,7 +32,12 @@ void ParseTranslatorInput(const string& line, string* input, string* ref) {
void ProcessAndStripSGML(string* pline, map<string, string>* out) {
map<string, string>& meta = *out;
string& line = *pline;
- string lline = LowercaseString(line);
+ string lline = *pline;
+ if (lline.find("<SEG")==0 || lline.find("<Seg")==0) {
+ cerr << "Segment tags <seg> must be lowercase!\n";
+ cerr << " " << *pline << endl;
+ abort();
+ }
if (lline.find("<seg")!=0) return;
size_t close = lline.find(">");
if (close == string::npos) return; // error