diff options
| author | Patrick Simianer <patrick@lilt.com> | 2026-02-26 10:05:59 +0000 |
|---|---|---|
| committer | Patrick Simianer <patrick@lilt.com> | 2026-02-26 10:05:59 +0000 |
| commit | b31ace79ea5f6b3f279c544cd3a443d6fbf2a24d (patch) | |
| tree | 31f2b599fa5f6996aeb134390d58deb63eefe04a /nonbreaking_prefixes | |
| parent | 8805e95ae94d798c6441f7e1b72c90e049563f17 (diff) | |
Diffstat (limited to 'nonbreaking_prefixes')
| -rw-r--r-- | nonbreaking_prefixes/README.txt | 5 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.ca | 75 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.cs | 390 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.de | 325 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.el | 2 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.en | 107 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.es | 118 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.fr | 153 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.is | 251 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.it | 180 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.nl | 115 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.pl | 283 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.pt | 210 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.ro | 38 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.ru | 259 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.sk | 474 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.sl | 78 | ||||
| -rw-r--r-- | nonbreaking_prefixes/nonbreaking_prefix.sv | 46 |
18 files changed, 0 insertions, 3109 deletions
diff --git a/nonbreaking_prefixes/README.txt b/nonbreaking_prefixes/README.txt deleted file mode 100644 index 02cdfcc..0000000 --- a/nonbreaking_prefixes/README.txt +++ /dev/null @@ -1,5 +0,0 @@ -The language suffix can be found here: - -http://www.loc.gov/standards/iso639-2/php/code_list.php - - diff --git a/nonbreaking_prefixes/nonbreaking_prefix.ca b/nonbreaking_prefixes/nonbreaking_prefix.ca deleted file mode 100644 index 2f4fdfc..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.ca +++ /dev/null @@ -1,75 +0,0 @@ -Dr
-Dra
-pàg
-p
-c
-av
-Sr
-Sra
-adm
-esq
-Prof
-S.A
-S.L
-p.e
-ptes
-Sta
-St
-pl
-màx
-cast
-dir
-nre
-fra
-admdora
-Emm
-Excma
-espf
-dc
-admdor
-tel
-angl
-aprox
-ca
-dept
-dj
-dl
-dt
-ds
-dg
-dv
-ed
-entl
-al
-i.e
-maj
-smin
-n
-núm
-pta
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
diff --git a/nonbreaking_prefixes/nonbreaking_prefix.cs b/nonbreaking_prefixes/nonbreaking_prefix.cs deleted file mode 100644 index dce6167..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.cs +++ /dev/null @@ -1,390 +0,0 @@ -Bc -BcA -Ing -Ing.arch -MUDr -MVDr -MgA -Mgr -JUDr -PhDr -RNDr -PharmDr -ThLic -ThDr -Ph.D -Th.D -prof -doc -CSc -DrSc -dr. h. c -PaedDr -Dr -PhMr -DiS -abt -ad -a.i -aj -angl -anon -apod -atd -atp -aut -bd -biogr -b.m -b.p -b.r -cca -cit -cizojaz -c.k -col -čes -čín -čj -ed -facs -fasc -fol -fot -franc -h.c -hist -hl -hrsg -ibid -il -ind -inv.č -jap -jhdt -jv -koed -kol -korej -kl -krit -lat -lit -m.a -maď -mj -mp -násl -např -nepubl -něm -no -nr -n.s -okr -odd -odp -obr -opr -orig -phil -pl -pokrač -pol -port -pozn -př.kr -př.n.l -přel -přeprac -příl -pseud -pt -red -repr -resp -revid -rkp -roč -roz -rozš -samost -sect -sest -seš -sign -sl -srv -stol -sv -šk -šk.ro -špan -tab -t.č -tis -tj -tř -tzv -univ -uspoř -vol -vl.jm -vs -vyd -vyobr -zal -zejm -zkr -zprac -zvl -n.p -např -než -MUDr -abl -absol -adj -adv -ak -ak. sl -akt -alch -amer -anat -angl -anglosas -arab -arch -archit -arg -astr -astrol -att -bás -belg -bibl -biol -boh -bot -bulh -círk -csl -č -čas -čes -dat -děj -dep -dět -dial -dór -dopr -dosl -ekon -epic -etnonym -eufem -f -fam -fem -fil -film -form -fot -fr -fut -fyz -gen -geogr -geol -geom -germ -gram -hebr -herald -hist -hl -hovor -hud -hut -chcsl -chem -ie -imp -impf -ind -indoevr -inf -instr -interj -ión -iron -it -kanad -katalán -klas -kniž -komp -konj - -konkr -kř -kuch -lat -lék -les -lid -lit -liturg -lok -log -m -mat -meteor -metr -mod -ms -mysl -n -náb -námoř -neklas -něm -nesklon -nom -ob -obch -obyč -ojed -opt -part -pas -pejor -pers -pf -pl -plpf - -práv -prep -předl -přivl -r -rcsl -refl -reg -rkp -ř -řec -s -samohl -sg -sl -souhl -spec -srov -stfr -střv -stsl -subj -subst -superl -sv -sz -táz -tech -telev -teol -trans -typogr -var -vedl -verb -vl. jm -voj -vok -vůb -vulg -výtv -vztaž -zahr -zájm -zast -zejm - -zeměd -zkr -zř -mj -dl -atp -sport -Mgr -horn -MVDr -JUDr -RSDr -Bc -PhDr -ThDr -Ing -aj -apod -PharmDr -pomn -ev -slang -nprap -odp -dop -pol -st -stol -p. n. l -před n. l -n. l -př. Kr -po Kr -př. n. l -odd -RNDr -tzv -atd -tzn -resp -tj -p -br -č. j -čj -č. p -čp -a. s -s. r. o -spol. s r. o -p. o -s. p -v. o. s -k. s -o. p. s -o. s -v. r -v z -ml -vč -kr -mld -hod -popř -ap -event -rus -slov -rum -švýc -P. T -zvl -hor -dol -S.O.S
\ No newline at end of file diff --git a/nonbreaking_prefixes/nonbreaking_prefix.de b/nonbreaking_prefixes/nonbreaking_prefix.de deleted file mode 100644 index 35fdf5e..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.de +++ /dev/null @@ -1,325 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -#no german words end in single lower-case letters, so we throw those in too. -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z - - -#Roman Numerals. A dot after one of these is not a sentence break in German. -I -II -III -IV -V -VI -VII -VIII -IX -X -XI -XII -XIII -XIV -XV -XVI -XVII -XVIII -XIX -XX -i -ii -iii -iv -v -vi -vii -viii -ix -x -xi -xii -xiii -xiv -xv -xvi -xvii -xviii -xix -xx - -#Titles and Honorifics -Adj -Adm -Adv -Asst -Bart -Bldg -Brig -Bros -Capt -Cmdr -Col -Comdr -Con -Corp -Cpl -DR -Dr -Ens -Gen -Gov -Hon -Hosp -Insp -Lt -MM -MR -MRS -MS -Maj -Messrs -Mlle -Mme -Mr -Mrs -Ms -Msgr -Op -Ord -Pfc -Ph -Prof -Pvt -Rep -Reps -Res -Rev -Rt -Sen -Sens -Sfc -Sgt -Sr -St -Supt -Surg - -#Misc symbols -Mio -Mrd -bzw -v -vs -usw -d.h -z.B -u.a -etc -Mrd -MwSt -ggf -d.J -D.h -m.E -vgl -I.F -z.T -sogen -ff -u.E -g.U -g.g.A -c.-à-d -Buchst -u.s.w -sog -u.ä -Std -evtl -Zt -Chr -u.U -o.ä -Ltd -b.A -z.Zt -spp -sen -SA -k.o -jun -i.H.v -dgl -dergl -Co -zzt -usf -s.p.a -Dkr -Corp -bzgl -BSE - -#Number indicators -# add #NUMERIC_ONLY# after the word if it should ONLY be non-breaking when a 0-9 digit follows it -No -Nos -Art -Nr -pp -ca -Ca - -#Ordinals are done with . in German - "1." = "1st" in English -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 -21 -22 -23 -24 -25 -26 -27 -28 -29 -30 -31 -32 -33 -34 -35 -36 -37 -38 -39 -40 -41 -42 -43 -44 -45 -46 -47 -48 -49 -50 -51 -52 -53 -54 -55 -56 -57 -58 -59 -60 -61 -62 -63 -64 -65 -66 -67 -68 -69 -70 -71 -72 -73 -74 -75 -76 -77 -78 -79 -80 -81 -82 -83 -84 -85 -86 -87 -88 -89 -90 -91 -92 -93 -94 -95 -96 -97 -98 -99 diff --git a/nonbreaking_prefixes/nonbreaking_prefix.el b/nonbreaking_prefixes/nonbreaking_prefix.el deleted file mode 100644 index 0470f91..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.el +++ /dev/null @@ -1,2 +0,0 @@ -# for now, just include the Greek equivalent of "Mr." -κ diff --git a/nonbreaking_prefixes/nonbreaking_prefix.en b/nonbreaking_prefixes/nonbreaking_prefix.en deleted file mode 100644 index e1a3733..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.en +++ /dev/null @@ -1,107 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -Adj -Adm -Adv -Asst -Bart -Bldg -Brig -Bros -Capt -Cmdr -Col -Comdr -Con -Corp -Cpl -DR -Dr -Drs -Ens -Gen -Gov -Hon -Hr -Hosp -Insp -Lt -MM -MR -MRS -MS -Maj -Messrs -Mlle -Mme -Mr -Mrs -Ms -Msgr -Op -Ord -Pfc -Ph -Prof -Pvt -Rep -Reps -Res -Rev -Rt -Sen -Sens -Sfc -Sgt -Sr -St -Supt -Surg - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -v -vs -i.e -rev -e.g - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -No #NUMERIC_ONLY# -Nos -Art #NUMERIC_ONLY# -Nr -pp #NUMERIC_ONLY# diff --git a/nonbreaking_prefixes/nonbreaking_prefix.es b/nonbreaking_prefixes/nonbreaking_prefix.es deleted file mode 100644 index d8b2755..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.es +++ /dev/null @@ -1,118 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z - -# Period-final abbreviation list from http://www.ctspanish.com/words/abbreviations.htm - -A.C -Apdo -Av -Bco -CC.AA -Da -Dep -Dn -Dr -Dra -EE.UU -Excmo -FF.CC -Fil -Gral -J.C -Let -Lic -N.B -P.D -P.V.P -Prof -Pts -Rte -S.A -S.A.R -S.E -S.L -S.R.C -Sr -Sra -Srta -Sta -Sto -T.V.E -Tel -Ud -Uds -V.B -V.E -Vd -Vds -a/c -adj -admón -afmo -apdo -av -c -c.f -c.g -cap -cm -cta -dcha -doc -ej -entlo -esq -etc -f.c -gr -grs -izq -kg -km -mg -mm -núm -núm -p -p.a -p.ej -ptas -pág -págs -pág -págs -q.e.g.e -q.e.s.m -s -s.s.s -vid -vol diff --git a/nonbreaking_prefixes/nonbreaking_prefix.fr b/nonbreaking_prefixes/nonbreaking_prefix.fr deleted file mode 100644 index 28126fa..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.fr +++ /dev/null @@ -1,153 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. -# -#any single upper case letter followed by a period is not a sentence ender -#usually upper case letters are initials in a name -#no French words end in single lower-case letters, so we throw those in too? -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z - -# Period-final abbreviation list for French -A.C.N -A.M -art -ann -apr -av -auj -lib -B.P -boul -ca -c.-à-d -cf -ch.-l -chap -contr -C.P.I -C.Q.F.D -C.N -C.N.S -C.S -dir -éd -e.g -env -al -etc -E.V -ex -fasc -fém -fig -fr -hab -ibid -id -i.e -inf -LL.AA -LL.AA.II -LL.AA.RR -LL.AA.SS -L.D -LL.EE -LL.MM -LL.MM.II.RR -loc.cit -masc -MM -ms -N.B -N.D.A -N.D.L.R -N.D.T -n/réf -NN.SS -N.S -N.D -N.P.A.I -p.c.c -pl -pp -p.ex -p.j -P.S -R.A.S -R.-V -R.P -R.I.P -SS -S.S -S.A -S.A.I -S.A.R -S.A.S -S.E -sec -sect -sing -S.M -S.M.I.R -sq -sqq -suiv -sup -suppl -tél -T.S.V.P -vb -vol -vs -X.O -Z.I diff --git a/nonbreaking_prefixes/nonbreaking_prefix.is b/nonbreaking_prefixes/nonbreaking_prefix.is deleted file mode 100644 index 5b8a710..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.is +++ /dev/null @@ -1,251 +0,0 @@ -no #NUMERIC_ONLY# -No #NUMERIC_ONLY# -nr #NUMERIC_ONLY# -Nr #NUMERIC_ONLY# -nR #NUMERIC_ONLY# -NR #NUMERIC_ONLY# -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -^ -í -á -ó -æ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -ab.fn -a.fn -afs -al -alm -alg -andh -ath -aths -atr -ao -au -aukaf -áfn -áhrl.s -áhrs -ákv.gr -ákv -bh -bls -dr -e.Kr -et -ef -efn -ennfr -eink -end -e.st -erl -fél -fskj -fh -f.hl -físl -fl -fn -fo -forl -frb -frl -frh -frt -fsl -fsh -fs -fsk -fst -f.Kr -ft -fv -fyrrn -fyrrv -germ -gm -gr -hdl -hdr -hf -hl -hlsk -hljsk -hljv -hljóðv -hr -hv -hvk -holl -Hos -höf -hk -hrl -ísl -kaf -kap -Khöfn -kk -kg -kk -km -kl -klst -kr -kt -kgúrsk -kvk -leturbr -lh -lh.nt -lh.þt -lo -ltr -mlja -mljó -millj -mm -mms -m.fl -miðm -mgr -mst -mín -nf -nh -nhm -nl -nk -nmgr -no -núv -nt -o.áfr -o.m.fl -ohf -o.fl -o.s.frv -ófn -ób -óákv.gr -óákv -pfn -PR -pr -Ritstj -Rvík -Rvk -samb -samhlj -samn -samn -sbr -sek -sérn -sf -sfn -sh -sfn -sh -s.hl -sk -skv -sl -sn -so -ss.us -s.st -samþ -sbr -shlj -sign -skál -st -st.s -stk -sþ -teg -tbl -tfn -tl -tvíhlj -tvt -till -to -umr -uh -us -uppl -útg -vb -Vf -vh -vkf -Vl -vl -vlf -vmf -8vo -vsk -vth -þt -þf -þjs -þgf -þlt -þolm -þm -þml -þýð diff --git a/nonbreaking_prefixes/nonbreaking_prefix.it b/nonbreaking_prefixes/nonbreaking_prefix.it deleted file mode 100644 index 992b9ec..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.it +++ /dev/null @@ -1,180 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -Adj -Adm -Adv -Amn -Arch -Asst -Avv -Bart -Bcc -Bldg -Brig -Bros -C.A.P -C.P -Capt -Cc -Cmdr -Co -Col -Comdr -Con -Corp -Cpl -DR -Dott -Dr -Drs -Egr -Ens -Gen -Geom -Gov -Hon -Hosp -Hr -Id -Ing -Insp -Lt -MM -MR -MRS -MS -Maj -Messrs -Mlle -Mme -Mo -Mons -Mr -Mrs -Ms -Msgr -N.B -Op -Ord -P.S -P.T -Pfc -Ph -Prof -Pvt -RP -RSVP -Rag -Rep -Reps -Res -Rev -Rif -Rt -S.A -S.B.F -S.P.M -S.p.A -S.r.l -Sen -Sens -Sfc -Sgt -Sig -Sigg -Soc -Spett -Sr -St -Supt -Surg -V.P - -# other -a.c -acc -all -banc -c.a -c.c.p -c.m -c.p -c.s -c.v -corr -dott -e.p.c -ecc -es -fatt -gg -int -lett -ogg -on -p.c -p.c.c -p.es -p.f -p.r -p.v -post -pp -racc -ric -s.n.c -seg -sgg -ss -tel -u.s -v.r -v.s - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -v -vs -i.e -rev -e.g - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -No #NUMERIC_ONLY# -Nos -Art #NUMERIC_ONLY# -Nr -pp #NUMERIC_ONLY# diff --git a/nonbreaking_prefixes/nonbreaking_prefix.nl b/nonbreaking_prefixes/nonbreaking_prefix.nl deleted file mode 100644 index c80c417..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.nl +++ /dev/null @@ -1,115 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. -#Sources: http://nl.wikipedia.org/wiki/Lijst_van_afkortingen -# http://nl.wikipedia.org/wiki/Aanspreekvorm -# http://nl.wikipedia.org/wiki/Titulatuur_in_het_Nederlands_hoger_onderwijs -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -bacc -bc -bgen -c.i -dhr -dr -dr.h.c -drs -drs -ds -eint -fa -Fa -fam -gen -genm -ing -ir -jhr -jkvr -jr -kand -kol -lgen -lkol -Lt -maj -Mej -mevr -Mme -mr -mr -Mw -o.b.s -plv -prof -ritm -tint -Vz -Z.D -Z.D.H -Z.E -Z.Em -Z.H -Z.K.H -Z.K.M -Z.M -z.v - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -#we seem to have a lot of these in dutch i.e.: i.p.v - in plaats van (in stead of) never ends a sentence -a.g.v -bijv -bijz -bv -d.w.z -e.c -e.g -e.k -ev -i.p.v -i.s.m -i.t.t -i.v.m -m.a.w -m.b.t -m.b.v -m.h.o -m.i -m.i.v -v.w.t - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -Nr #NUMERIC_ONLY# -Nrs -nrs -nr #NUMERIC_ONLY# diff --git a/nonbreaking_prefixes/nonbreaking_prefix.pl b/nonbreaking_prefixes/nonbreaking_prefix.pl deleted file mode 100644 index 6b7c106..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.pl +++ /dev/null @@ -1,283 +0,0 @@ -adw -afr -akad -al -Al -am -amer -arch -art -Art -artyst -astr -austr -bałt -bdb -bł -bm -br -bryg -bryt -centr -ces -chem -chiń -chir -c.k -c.o -cyg -cyw -cyt -czes -czw -cd -Cd -czyt -ćw -ćwicz -daw -dcn -dekl -demokr -det -diec -dł -dn -dot -dol -dop -dost -dosł -h.c -ds -dst -duszp -dypl -egz -ekol -ekon -elektr -em -ew -fab -farm -fot -fr -gat -gastr -geogr -geol -gimn -głęb -gm -godz -górn -gosp -gr -gram -hist -hiszp -hr -Hr -hot -id -in -im -iron -jn -kard -kat -katol -k.k -kk -kol -kl -k.p.a -kpc -k.p.c -kpt -kr -k.r -krak -k.r.o -kryt -kult -laic -łac -niem -woj -nb -np -Nb -Np -pol -pow -m.in -pt -ps -Pt -Ps -cdn -jw -ryc -rys -Ryc -Rys -tj -tzw -Tzw -tzn -zob -ang -ub -ul -pw -pn -pl -al -k -n -nr #NUMERIC_ONLY# -Nr #NUMERIC_ONLY# -ww -wł -ur -zm -żyd -żarg -żyw -wył -bp -bp -wyst -tow -Tow -o -sp -Sp -st -spółdz -Spółdz -społ -spółgł -stoł -stow -Stoł -Stow -zn -zew -zewn -zdr -zazw -zast -zaw -zał -zal -zam -zak -zakł -zagr -zach -adw -Adw -lek -Lek -med -mec -Mec -doc -Doc -dyw -dyr -Dyw -Dyr -inż -Inż -mgr -Mgr -dh -dr -Dh -Dr -p -P -red -Red -prof -prok -Prof -Prok -hab -płk -Płk -nadkom -Nadkom -podkom -Podkom -ks -Ks -gen -Gen -por -Por -reż -Reż -przyp -Przyp -śp -św -śW -Śp -Św -ŚW -szer -Szer -pkt #NUMERIC_ONLY# -str #NUMERIC_ONLY# -tab #NUMERIC_ONLY# -Tab #NUMERIC_ONLY# -tel -ust #NUMERIC_ONLY# -par #NUMERIC_ONLY# -poz -pok -oo -oO -Oo -OO -r #NUMERIC_ONLY# -l #NUMERIC_ONLY# -s #NUMERIC_ONLY# -najśw -Najśw -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -Ś -Ć -Ż -Ź -Dz diff --git a/nonbreaking_prefixes/nonbreaking_prefix.pt b/nonbreaking_prefixes/nonbreaking_prefix.pt deleted file mode 100644 index 5d65bf2..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.pt +++ /dev/null @@ -1,210 +0,0 @@ -#File adapted for PT by H. Leal Fontes from the EN & DE versions published with moses-2009-04-13. Last update: 10.11.2009. -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z - - -#Roman Numerals. A dot after one of these is not a sentence break in Portuguese. -I -II -III -IV -V -VI -VII -VIII -IX -X -XI -XII -XIII -XIV -XV -XVI -XVII -XVIII -XIX -XX -i -ii -iii -iv -v -vi -vii -viii -ix -x -xi -xii -xiii -xiv -xv -xvi -xvii -xviii -xix -xx - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -Adj -Adm -Adv -Art -Ca -Capt -Cmdr -Col -Comdr -Con -Corp -Cpl -DR -DRA -Dr -Dra -Dras -Drs -Eng -Enga -Engas -Engos -Ex -Exo -Exmo -Fig -Gen -Hosp -Insp -Lda -MM -MR -MRS -MS -Maj -Mrs -Ms -Msgr -Op -Ord -Pfc -Ph -Prof -Pvt -Rep -Reps -Res -Rev -Rt -Sen -Sens -Sfc -Sgt -Sr -Sra -Sras -Srs -Sto -Supt -Surg -adj -adm -adv -art -cit -col -con -corp -cpl -dr -dra -dras -drs -eng -enga -engas -engos -ex -exo -exmo -fig -op -prof -sr -sra -sras -srs -sto - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -v -vs -i.e -rev -e.g - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -No #NUMERIC_ONLY# -Nos -Art #NUMERIC_ONLY# -Nr -p #NUMERIC_ONLY# -pp #NUMERIC_ONLY# - diff --git a/nonbreaking_prefixes/nonbreaking_prefix.ro b/nonbreaking_prefixes/nonbreaking_prefix.ro deleted file mode 100644 index d489f46..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.ro +++ /dev/null @@ -1,38 +0,0 @@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -dpdv -etc -șamd -M.Ap.N -dl -Dl -d-na -D-na -dvs -Dvs -pt -Pt diff --git a/nonbreaking_prefixes/nonbreaking_prefix.ru b/nonbreaking_prefixes/nonbreaking_prefix.ru deleted file mode 100644 index 444465b..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.ru +++ /dev/null @@ -1,259 +0,0 @@ -TBD: Russian uppercase alphabet [А-Я]
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-0гг
-1гг
-2гг
-3гг
-4гг
-5гг
-6гг
-7гг
-8гг
-9гг
-0г
-1г
-2г
-3г
-4г
-5г
-6г
-7г
-8г
-9г
-Xвв
-Vвв
-Iвв
-Lвв
-Mвв
-Cвв
-Xв
-Vв
-Iв
-Lв
-Mв
-Cв
-0м
-1м
-2м
-3м
-4м
-5м
-6м
-7м
-8м
-9м
-0мм
-1мм
-2мм
-3мм
-4мм
-5мм
-6мм
-7мм
-8мм
-9мм
-0см
-1см
-2см
-3см
-4см
-5см
-6см
-7см
-8см
-9см
-0дм
-1дм
-2дм
-3дм
-4дм
-5дм
-6дм
-7дм
-8дм
-9дм
-0л
-1л
-2л
-3л
-4л
-5л
-6л
-7л
-8л
-9л
-0км
-1км
-2км
-3км
-4км
-5км
-6км
-7км
-8км
-9км
-0га
-1га
-2га
-3га
-4га
-5га
-6га
-7га
-8га
-9га
-0кг
-1кг
-2кг
-3кг
-4кг
-5кг
-6кг
-7кг
-8кг
-9кг
-0т
-1т
-2т
-3т
-4т
-5т
-6т
-7т
-8т
-9т
-0г
-1г
-2г
-3г
-4г
-5г
-6г
-7г
-8г
-9г
-0мг
-1мг
-2мг
-3мг
-4мг
-5мг
-6мг
-7мг
-8мг
-9мг
-бульв
-в
-вв
-г
-га
-гг
-гл
-гос
-д
-дм
-доп
-др
-е
-ед
-ед
-зам
-и
-инд
-исп
-Исп
-к
-кап
-кг
-кв
-кл
-км
-кол
-комн
-коп
-куб
-л
-лиц
-лл
-м
-макс
-мг
-мин
-мл
-млн
-млрд
-мм
-н
-наб
-нач
-неуд
-ном
-о
-обл
-обр
-общ
-ок
-ост
-отл
-п
-пер
-перераб
-пл
-пос
-пр
-просп
-проф
-р
-ред
-руб
-с
-сб
-св
-см
-соч
-ср
-ст
-стр
-т
-тел
-Тел
-тех
-тт
-туп
-тыс
-уд
-ул
-уч
-физ
-х
-хор
-ч
-чел
-шт
-экз
-э
diff --git a/nonbreaking_prefixes/nonbreaking_prefix.sk b/nonbreaking_prefixes/nonbreaking_prefix.sk deleted file mode 100644 index 1198d48..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.sk +++ /dev/null @@ -1,474 +0,0 @@ -Bc -Mgr -RNDr -PharmDr -PhDr -JUDr -PaedDr -ThDr -Ing -MUDr -MDDr -MVDr -Dr -ThLic -PhD -ArtD -ThDr -Dr -DrSc -CSs -prof -obr -Obr -Č -č -absol -adj -admin -adr -Adr -adv -advok -afr -ak -akad -akc -akuz -et -al -alch -amer -anat -angl -Angl -anglosas -anorg -ap -apod -arch -archeol -archit -arg -art -astr -astrol -astron -atp -atď -austr -Austr -aut -belg -Belg -bibl -Bibl -biol -bot -bud -bás -býv -cest -chem -cirk -csl -čs -Čs -dat -dep -det -dial -diaľ -dipl -distrib -dokl -dosl -dopr -dram -duš -dv -dvojčl -dór -ekol -ekon -el -elektr -elektrotech -energet -epic -est -etc -etonym -eufem -európ -Európ -ev -evid -expr -fa -fam -farm -fem -feud -fil -filat -filoz -fi -fon -form -fot -fr -Fr -franc -Franc -fraz -fut -fyz -fyziol -garb -gen -genet -genpor -geod -geogr -geol -geom -germ -gr -Gr -gréc -Gréc -gréckokat -hebr -herald -hist -hlav -hosp -hromad -hud -hypok -ident -i.e -ident -imp -impf -indoeur -inf -inform -instr -int -interj -inšt -inštr -iron -jap -Jap -jaz -jedn -juhoamer -juhových -juhozáp -juž -kanad -Kanad -kanc -kapit -kpt -kart -katastr -knih -kniž -komp -konj -konkr -kozmet -krajč -kresť -kt -kuch -lat -latinskoamer -lek -lex -lingv -lit -litur -log -lok -max -Max -maď -Maď -medzinár -mest -metr -mil -Mil -min -Min -miner -ml -mld -mn -mod -mytol -napr -nar -Nar -nasl -nedok -neg -negat -neklas -nem -Nem -neodb -neos -neskl -nesklon -nespis -nespráv -neved -než -niekt -niž -nom -náb -nákl -námor -nár -obch -obj -obv -obyč -obč -občian -odb -odd -ods -ojed -okr -Okr -opt -opyt -org -os -osob -ot -ovoc -par -part -pejor -pers -pf -Pf -P.f -p.f -pl -Plk -pod -podst -pokl -polit -politol -polygr -pomn -popl -por -porad -porov -posch -potrav -použ -poz -pozit -poľ -poľno -poľnohosp -poľov -pošt -pož -prac -predl -pren -prep -preuk -priezv -Priezv -privl -prof -práv -príd -príj -prík -príp -prír -prísl -príslov -príč -psych -publ -pís -písm -pôv -refl -reg -rep -resp -rozk -rozlič -rozpráv -roč -Roč -ryb -rádiotech -rím -samohl -semest -sev -severoamer -severových -severozáp -sg -skr -skup -sl -Sloven -soc -soch -sociol -sp -spol -Spol -spoloč -spoluhl -správ -spôs -st -star -starogréc -starorím -s.r.o -stol -stor -str -stredoamer -stredoškol -subj -subst -superl -sv -sz -súkr -súp -súvzť -tal -Tal -tech -tel -Tel -telef -teles -telev -teol -trans -turist -tuzem -typogr -tzn -tzv -ukaz -ul -Ul -umel -univ -ust -ved -vedľ -verb -veter -vin -viď -vl -vod -vodohosp -pnl -vulg -vyj -vys -vysokoškol -vzťaž -vôb -vých -výd -výrob -výsk -výsl -výtv -výtvar -význ -včel -vš -všeob -zahr -zar -zariad -zast -zastar -zastaráv -zb -zdravot -združ -zjemn -zlat -zn -Zn -zool -zr -zried -zv -záhr -zák -zákl -zám -záp -západoeur -zázn -územ -účt -čast -čes -Čes -čl -čísl -živ -pr -fak -Kr -p.n.l -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z diff --git a/nonbreaking_prefixes/nonbreaking_prefix.sl b/nonbreaking_prefixes/nonbreaking_prefix.sl deleted file mode 100644 index 230062c..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.sl +++ /dev/null @@ -1,78 +0,0 @@ -dr
-Dr
-itd
-itn
-št #NUMERIC_ONLY#
-Št #NUMERIC_ONLY#
-d
-jan
-Jan
-feb
-Feb
-mar
-Mar
-apr
-Apr
-jun
-Jun
-jul
-Jul
-avg
-Avg
-sept
-Sept
-sep
-Sep
-okt
-Okt
-nov
-Nov
-dec
-Dec
-tj
-Tj
-npr
-Npr
-sl
-Sl
-op
-Op
-gl
-Gl
-oz
-Oz
-prev
-dipl
-ing
-prim
-Prim
-cf
-Cf
-gl
-Gl
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
diff --git a/nonbreaking_prefixes/nonbreaking_prefix.sv b/nonbreaking_prefixes/nonbreaking_prefix.sv deleted file mode 100644 index df5ef29..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.sv +++ /dev/null @@ -1,46 +0,0 @@ -#single upper case letter are usually initials -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -#misc abbreviations -AB -G -VG -dvs -etc -from -iaf -jfr -kl -kr -mao -mfl -mm -osv -pga -tex -tom -vs |
