summaryrefslogtreecommitdiff
path: root/external/nonbreaking_prefixes/nonbreaking_prefix.pt
diff options
context:
space:
mode:
Diffstat (limited to 'external/nonbreaking_prefixes/nonbreaking_prefix.pt')
-rw-r--r--external/nonbreaking_prefixes/nonbreaking_prefix.pt210
1 files changed, 210 insertions, 0 deletions
diff --git a/external/nonbreaking_prefixes/nonbreaking_prefix.pt b/external/nonbreaking_prefixes/nonbreaking_prefix.pt
new file mode 100644
index 0000000..5d65bf2
--- /dev/null
+++ b/external/nonbreaking_prefixes/nonbreaking_prefix.pt
@@ -0,0 +1,210 @@
+#File adapted for PT by H. Leal Fontes from the EN & DE versions published with moses-2009-04-13. Last update: 10.11.2009.
+#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
+#Special cases are included for prefixes that ONLY appear before 0-9 numbers.
+
+#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
+#usually upper case letters are initials in a name
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+
+
+#Roman Numerals. A dot after one of these is not a sentence break in Portuguese.
+I
+II
+III
+IV
+V
+VI
+VII
+VIII
+IX
+X
+XI
+XII
+XIII
+XIV
+XV
+XVI
+XVII
+XVIII
+XIX
+XX
+i
+ii
+iii
+iv
+v
+vi
+vii
+viii
+ix
+x
+xi
+xii
+xiii
+xiv
+xv
+xvi
+xvii
+xviii
+xix
+xx
+
+#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
+Adj
+Adm
+Adv
+Art
+Ca
+Capt
+Cmdr
+Col
+Comdr
+Con
+Corp
+Cpl
+DR
+DRA
+Dr
+Dra
+Dras
+Drs
+Eng
+Enga
+Engas
+Engos
+Ex
+Exo
+Exmo
+Fig
+Gen
+Hosp
+Insp
+Lda
+MM
+MR
+MRS
+MS
+Maj
+Mrs
+Ms
+Msgr
+Op
+Ord
+Pfc
+Ph
+Prof
+Pvt
+Rep
+Reps
+Res
+Rev
+Rt
+Sen
+Sens
+Sfc
+Sgt
+Sr
+Sra
+Sras
+Srs
+Sto
+Supt
+Surg
+adj
+adm
+adv
+art
+cit
+col
+con
+corp
+cpl
+dr
+dra
+dras
+drs
+eng
+enga
+engas
+engos
+ex
+exo
+exmo
+fig
+op
+prof
+sr
+sra
+sras
+srs
+sto
+
+#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
+v
+vs
+i.e
+rev
+e.g
+
+#Numbers only. These should only induce breaks when followed by a numeric sequence
+# add NUMERIC_ONLY after the word for this function
+#This case is mostly for the english "No." which can either be a sentence of its own, or
+#if followed by a number, a non-breaking prefix
+No #NUMERIC_ONLY#
+Nos
+Art #NUMERIC_ONLY#
+Nr
+p #NUMERIC_ONLY#
+pp #NUMERIC_ONLY#
+