summaryrefslogtreecommitdiff
path: root/nonbreaking_prefixes/nonbreaking_prefix.sv
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-14 14:43:14 +0200
committerPatrick Simianer <p@simianer.de>2014-06-14 14:43:14 +0200
commit2783f837303ae07c4a1d676302bca779abbb1296 (patch)
treee388dda12d6d31285b32663b937a8d55ecc909c5 /nonbreaking_prefixes/nonbreaking_prefix.sv
parent85ea0fc5e3ae7ea646cc6e843d01939b4d8e4dbf (diff)
steal tokenizer from moses' scripts
Diffstat (limited to 'nonbreaking_prefixes/nonbreaking_prefix.sv')
-rw-r--r--nonbreaking_prefixes/nonbreaking_prefix.sv46
1 files changed, 46 insertions, 0 deletions
diff --git a/nonbreaking_prefixes/nonbreaking_prefix.sv b/nonbreaking_prefixes/nonbreaking_prefix.sv
new file mode 100644
index 0000000..df5ef29
--- /dev/null
+++ b/nonbreaking_prefixes/nonbreaking_prefix.sv
@@ -0,0 +1,46 @@
+#single upper case letter are usually initials
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+#misc abbreviations
+AB
+G
+VG
+dvs
+etc
+from
+iaf
+jfr
+kl
+kr
+mao
+mfl
+mm
+osv
+pga
+tex
+tom
+vs