summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-09-12 19:22:59 +0100
committerChris Dyer <cdyer@cs.cmu.edu>2011-09-12 19:22:59 +0100
commit700b2abf48bf0a455064d6cf08754cbfd4e3a383 (patch)
treeea78740d0c4f58b45a47f7668e767c3627fd8a81 /utils
parent9f7a0765905e2906c43fbb5359d00ccdac38ca7f (diff)
source syntax features ~ blunsom emnlp 2008
Diffstat (limited to 'utils')
-rw-r--r--utils/stringlib.cc7
1 files changed, 6 insertions, 1 deletions
diff --git a/utils/stringlib.cc b/utils/stringlib.cc
index 7aaee9f0..ade02ca9 100644
--- a/utils/stringlib.cc
+++ b/utils/stringlib.cc
@@ -32,7 +32,12 @@ void ParseTranslatorInput(const string& line, string* input, string* ref) {
void ProcessAndStripSGML(string* pline, map<string, string>* out) {
map<string, string>& meta = *out;
string& line = *pline;
- string lline = LowercaseString(line);
+ string lline = *pline;
+ if (lline.find("<SEG")==0 || lline.find("<Seg")==0) {
+ cerr << "Segment tags <seg> must be lowercase!\n";
+ cerr << " " << *pline << endl;
+ abort();
+ }
if (lline.find("<seg")!=0) return;
size_t close = lline.find(">");
if (close == string::npos) return; // error