From b31ace79ea5f6b3f279c544cd3a443d6fbf2a24d Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 26 Feb 2026 10:05:59 +0000 Subject: overhaul --- nonbreaking_prefixes/README.txt | 5 - nonbreaking_prefixes/nonbreaking_prefix.ca | 75 ----- nonbreaking_prefixes/nonbreaking_prefix.cs | 390 ------------------------ nonbreaking_prefixes/nonbreaking_prefix.de | 325 -------------------- nonbreaking_prefixes/nonbreaking_prefix.el | 2 - nonbreaking_prefixes/nonbreaking_prefix.en | 107 ------- nonbreaking_prefixes/nonbreaking_prefix.es | 118 ------- nonbreaking_prefixes/nonbreaking_prefix.fr | 153 ---------- nonbreaking_prefixes/nonbreaking_prefix.is | 251 --------------- nonbreaking_prefixes/nonbreaking_prefix.it | 180 ----------- nonbreaking_prefixes/nonbreaking_prefix.nl | 115 ------- nonbreaking_prefixes/nonbreaking_prefix.pl | 283 ----------------- nonbreaking_prefixes/nonbreaking_prefix.pt | 210 ------------- nonbreaking_prefixes/nonbreaking_prefix.ro | 38 --- nonbreaking_prefixes/nonbreaking_prefix.ru | 259 ---------------- nonbreaking_prefixes/nonbreaking_prefix.sk | 474 ----------------------------- nonbreaking_prefixes/nonbreaking_prefix.sl | 78 ----- nonbreaking_prefixes/nonbreaking_prefix.sv | 46 --- 18 files changed, 3109 deletions(-) delete mode 100644 nonbreaking_prefixes/README.txt delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.ca delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.cs delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.de delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.el delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.en delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.es delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.fr delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.is delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.it delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.nl delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.pl delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.pt delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.ro delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.ru delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.sk delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.sl delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.sv (limited to 'nonbreaking_prefixes') diff --git a/nonbreaking_prefixes/README.txt b/nonbreaking_prefixes/README.txt deleted file mode 100644 index 02cdfcc..0000000 --- a/nonbreaking_prefixes/README.txt +++ /dev/null @@ -1,5 +0,0 @@ -The language suffix can be found here: - -http://www.loc.gov/standards/iso639-2/php/code_list.php - - diff --git a/nonbreaking_prefixes/nonbreaking_prefix.ca b/nonbreaking_prefixes/nonbreaking_prefix.ca deleted file mode 100644 index 2f4fdfc..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.ca +++ /dev/null @@ -1,75 +0,0 @@ -Dr -Dra -pàg -p -c -av -Sr -Sra -adm -esq -Prof -S.A -S.L -p.e -ptes -Sta -St -pl -màx -cast -dir -nre -fra -admdora -Emm -Excma -espf -dc -admdor -tel -angl -aprox -ca -dept -dj -dl -dt -ds -dg -dv -ed -entl -al -i.e -maj -smin -n -núm -pta -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z diff --git a/nonbreaking_prefixes/nonbreaking_prefix.cs b/nonbreaking_prefixes/nonbreaking_prefix.cs deleted file mode 100644 index dce6167..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.cs +++ /dev/null @@ -1,390 +0,0 @@ -Bc -BcA -Ing -Ing.arch -MUDr -MVDr -MgA -Mgr -JUDr -PhDr -RNDr -PharmDr -ThLic -ThDr -Ph.D -Th.D -prof -doc -CSc -DrSc -dr. h. c -PaedDr -Dr -PhMr -DiS -abt -ad -a.i -aj -angl -anon -apod -atd -atp -aut -bd -biogr -b.m -b.p -b.r -cca -cit -cizojaz -c.k -col -čes -čín -čj -ed -facs -fasc -fol -fot -franc -h.c -hist -hl -hrsg -ibid -il -ind -inv.č -jap -jhdt -jv -koed -kol -korej -kl -krit -lat -lit -m.a -maď -mj -mp -násl -např -nepubl -něm -no -nr -n.s -okr -odd -odp -obr -opr -orig -phil -pl -pokrač -pol -port -pozn -př.kr -př.n.l -přel -přeprac -příl -pseud -pt -red -repr -resp -revid -rkp -roč -roz -rozš -samost -sect -sest -seš -sign -sl -srv -stol -sv -šk -šk.ro -špan -tab -t.č -tis -tj -tř -tzv -univ -uspoř -vol -vl.jm -vs -vyd -vyobr -zal -zejm -zkr -zprac -zvl -n.p -např -než -MUDr -abl -absol -adj -adv -ak -ak. sl -akt -alch -amer -anat -angl -anglosas -arab -arch -archit -arg -astr -astrol -att -bás -belg -bibl -biol -boh -bot -bulh -círk -csl -č -čas -čes -dat -děj -dep -dět -dial -dór -dopr -dosl -ekon -epic -etnonym -eufem -f -fam -fem -fil -film -form -fot -fr -fut -fyz -gen -geogr -geol -geom -germ -gram -hebr -herald -hist -hl -hovor -hud -hut -chcsl -chem -ie -imp -impf -ind -indoevr -inf -instr -interj -ión -iron -it -kanad -katalán -klas -kniž -komp -konj - -konkr -kř -kuch -lat -lék -les -lid -lit -liturg -lok -log -m -mat -meteor -metr -mod -ms -mysl -n -náb -námoř -neklas -něm -nesklon -nom -ob -obch -obyč -ojed -opt -part -pas -pejor -pers -pf -pl -plpf - -práv -prep -předl -přivl -r -rcsl -refl -reg -rkp -ř -řec -s -samohl -sg -sl -souhl -spec -srov -stfr -střv -stsl -subj -subst -superl -sv -sz -táz -tech -telev -teol -trans -typogr -var -vedl -verb -vl. jm -voj -vok -vůb -vulg -výtv -vztaž -zahr -zájm -zast -zejm - -zeměd -zkr -zř -mj -dl -atp -sport -Mgr -horn -MVDr -JUDr -RSDr -Bc -PhDr -ThDr -Ing -aj -apod -PharmDr -pomn -ev -slang -nprap -odp -dop -pol -st -stol -p. n. l -před n. l -n. l -př. Kr -po Kr -př. n. l -odd -RNDr -tzv -atd -tzn -resp -tj -p -br -č. j -čj -č. p -čp -a. s -s. r. o -spol. s r. o -p. o -s. p -v. o. s -k. s -o. p. s -o. s -v. r -v z -ml -vč -kr -mld -hod -popř -ap -event -rus -slov -rum -švýc -P. T -zvl -hor -dol -S.O.S \ No newline at end of file diff --git a/nonbreaking_prefixes/nonbreaking_prefix.de b/nonbreaking_prefixes/nonbreaking_prefix.de deleted file mode 100644 index 35fdf5e..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.de +++ /dev/null @@ -1,325 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -#no german words end in single lower-case letters, so we throw those in too. -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z - - -#Roman Numerals. A dot after one of these is not a sentence break in German. -I -II -III -IV -V -VI -VII -VIII -IX -X -XI -XII -XIII -XIV -XV -XVI -XVII -XVIII -XIX -XX -i -ii -iii -iv -v -vi -vii -viii -ix -x -xi -xii -xiii -xiv -xv -xvi -xvii -xviii -xix -xx - -#Titles and Honorifics -Adj -Adm -Adv -Asst -Bart -Bldg -Brig -Bros -Capt -Cmdr -Col -Comdr -Con -Corp -Cpl -DR -Dr -Ens -Gen -Gov -Hon -Hosp -Insp -Lt -MM -MR -MRS -MS -Maj -Messrs -Mlle -Mme -Mr -Mrs -Ms -Msgr -Op -Ord -Pfc -Ph -Prof -Pvt -Rep -Reps -Res -Rev -Rt -Sen -Sens -Sfc -Sgt -Sr -St -Supt -Surg - -#Misc symbols -Mio -Mrd -bzw -v -vs -usw -d.h -z.B -u.a -etc -Mrd -MwSt -ggf -d.J -D.h -m.E -vgl -I.F -z.T -sogen -ff -u.E -g.U -g.g.A -c.-à-d -Buchst -u.s.w -sog -u.ä -Std -evtl -Zt -Chr -u.U -o.ä -Ltd -b.A -z.Zt -spp -sen -SA -k.o -jun -i.H.v -dgl -dergl -Co -zzt -usf -s.p.a -Dkr -Corp -bzgl -BSE - -#Number indicators -# add #NUMERIC_ONLY# after the word if it should ONLY be non-breaking when a 0-9 digit follows it -No -Nos -Art -Nr -pp -ca -Ca - -#Ordinals are done with . in German - "1." = "1st" in English -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 -21 -22 -23 -24 -25 -26 -27 -28 -29 -30 -31 -32 -33 -34 -35 -36 -37 -38 -39 -40 -41 -42 -43 -44 -45 -46 -47 -48 -49 -50 -51 -52 -53 -54 -55 -56 -57 -58 -59 -60 -61 -62 -63 -64 -65 -66 -67 -68 -69 -70 -71 -72 -73 -74 -75 -76 -77 -78 -79 -80 -81 -82 -83 -84 -85 -86 -87 -88 -89 -90 -91 -92 -93 -94 -95 -96 -97 -98 -99 diff --git a/nonbreaking_prefixes/nonbreaking_prefix.el b/nonbreaking_prefixes/nonbreaking_prefix.el deleted file mode 100644 index 0470f91..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.el +++ /dev/null @@ -1,2 +0,0 @@ -# for now, just include the Greek equivalent of "Mr." -κ diff --git a/nonbreaking_prefixes/nonbreaking_prefix.en b/nonbreaking_prefixes/nonbreaking_prefix.en deleted file mode 100644 index e1a3733..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.en +++ /dev/null @@ -1,107 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -Adj -Adm -Adv -Asst -Bart -Bldg -Brig -Bros -Capt -Cmdr -Col -Comdr -Con -Corp -Cpl -DR -Dr -Drs -Ens -Gen -Gov -Hon -Hr -Hosp -Insp -Lt -MM -MR -MRS -MS -Maj -Messrs -Mlle -Mme -Mr -Mrs -Ms -Msgr -Op -Ord -Pfc -Ph -Prof -Pvt -Rep -Reps -Res -Rev -Rt -Sen -Sens -Sfc -Sgt -Sr -St -Supt -Surg - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -v -vs -i.e -rev -e.g - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -No #NUMERIC_ONLY# -Nos -Art #NUMERIC_ONLY# -Nr -pp #NUMERIC_ONLY# diff --git a/nonbreaking_prefixes/nonbreaking_prefix.es b/nonbreaking_prefixes/nonbreaking_prefix.es deleted file mode 100644 index d8b2755..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.es +++ /dev/null @@ -1,118 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z - -# Period-final abbreviation list from http://www.ctspanish.com/words/abbreviations.htm - -A.C -Apdo -Av -Bco -CC.AA -Da -Dep -Dn -Dr -Dra -EE.UU -Excmo -FF.CC -Fil -Gral -J.C -Let -Lic -N.B -P.D -P.V.P -Prof -Pts -Rte -S.A -S.A.R -S.E -S.L -S.R.C -Sr -Sra -Srta -Sta -Sto -T.V.E -Tel -Ud -Uds -V.B -V.E -Vd -Vds -a/c -adj -admón -afmo -apdo -av -c -c.f -c.g -cap -cm -cta -dcha -doc -ej -entlo -esq -etc -f.c -gr -grs -izq -kg -km -mg -mm -núm -núm -p -p.a -p.ej -ptas -pág -págs -pág -págs -q.e.g.e -q.e.s.m -s -s.s.s -vid -vol diff --git a/nonbreaking_prefixes/nonbreaking_prefix.fr b/nonbreaking_prefixes/nonbreaking_prefix.fr deleted file mode 100644 index 28126fa..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.fr +++ /dev/null @@ -1,153 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. -# -#any single upper case letter followed by a period is not a sentence ender -#usually upper case letters are initials in a name -#no French words end in single lower-case letters, so we throw those in too? -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z - -# Period-final abbreviation list for French -A.C.N -A.M -art -ann -apr -av -auj -lib -B.P -boul -ca -c.-à-d -cf -ch.-l -chap -contr -C.P.I -C.Q.F.D -C.N -C.N.S -C.S -dir -éd -e.g -env -al -etc -E.V -ex -fasc -fém -fig -fr -hab -ibid -id -i.e -inf -LL.AA -LL.AA.II -LL.AA.RR -LL.AA.SS -L.D -LL.EE -LL.MM -LL.MM.II.RR -loc.cit -masc -MM -ms -N.B -N.D.A -N.D.L.R -N.D.T -n/réf -NN.SS -N.S -N.D -N.P.A.I -p.c.c -pl -pp -p.ex -p.j -P.S -R.A.S -R.-V -R.P -R.I.P -SS -S.S -S.A -S.A.I -S.A.R -S.A.S -S.E -sec -sect -sing -S.M -S.M.I.R -sq -sqq -suiv -sup -suppl -tél -T.S.V.P -vb -vol -vs -X.O -Z.I diff --git a/nonbreaking_prefixes/nonbreaking_prefix.is b/nonbreaking_prefixes/nonbreaking_prefix.is deleted file mode 100644 index 5b8a710..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.is +++ /dev/null @@ -1,251 +0,0 @@ -no #NUMERIC_ONLY# -No #NUMERIC_ONLY# -nr #NUMERIC_ONLY# -Nr #NUMERIC_ONLY# -nR #NUMERIC_ONLY# -NR #NUMERIC_ONLY# -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -^ -í -á -ó -æ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -ab.fn -a.fn -afs -al -alm -alg -andh -ath -aths -atr -ao -au -aukaf -áfn -áhrl.s -áhrs -ákv.gr -ákv -bh -bls -dr -e.Kr -et -ef -efn -ennfr -eink -end -e.st -erl -fél -fskj -fh -f.hl -físl -fl -fn -fo -forl -frb -frl -frh -frt -fsl -fsh -fs -fsk -fst -f.Kr -ft -fv -fyrrn -fyrrv -germ -gm -gr -hdl -hdr -hf -hl -hlsk -hljsk -hljv -hljóðv -hr -hv -hvk -holl -Hos -höf -hk -hrl -ísl -kaf -kap -Khöfn -kk -kg -kk -km -kl -klst -kr -kt -kgúrsk -kvk -leturbr -lh -lh.nt -lh.þt -lo -ltr -mlja -mljó -millj -mm -mms -m.fl -miðm -mgr -mst -mín -nf -nh -nhm -nl -nk -nmgr -no -núv -nt -o.áfr -o.m.fl -ohf -o.fl -o.s.frv -ófn -ób -óákv.gr -óákv -pfn -PR -pr -Ritstj -Rvík -Rvk -samb -samhlj -samn -samn -sbr -sek -sérn -sf -sfn -sh -sfn -sh -s.hl -sk -skv -sl -sn -so -ss.us -s.st -samþ -sbr -shlj -sign -skál -st -st.s -stk -sþ -teg -tbl -tfn -tl -tvíhlj -tvt -till -to -umr -uh -us -uppl -útg -vb -Vf -vh -vkf -Vl -vl -vlf -vmf -8vo -vsk -vth -þt -þf -þjs -þgf -þlt -þolm -þm -þml -þýð diff --git a/nonbreaking_prefixes/nonbreaking_prefix.it b/nonbreaking_prefixes/nonbreaking_prefix.it deleted file mode 100644 index 992b9ec..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.it +++ /dev/null @@ -1,180 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -Adj -Adm -Adv -Amn -Arch -Asst -Avv -Bart -Bcc -Bldg -Brig -Bros -C.A.P -C.P -Capt -Cc -Cmdr -Co -Col -Comdr -Con -Corp -Cpl -DR -Dott -Dr -Drs -Egr -Ens -Gen -Geom -Gov -Hon -Hosp -Hr -Id -Ing -Insp -Lt -MM -MR -MRS -MS -Maj -Messrs -Mlle -Mme -Mo -Mons -Mr -Mrs -Ms -Msgr -N.B -Op -Ord -P.S -P.T -Pfc -Ph -Prof -Pvt -RP -RSVP -Rag -Rep -Reps -Res -Rev -Rif -Rt -S.A -S.B.F -S.P.M -S.p.A -S.r.l -Sen -Sens -Sfc -Sgt -Sig -Sigg -Soc -Spett -Sr -St -Supt -Surg -V.P - -# other -a.c -acc -all -banc -c.a -c.c.p -c.m -c.p -c.s -c.v -corr -dott -e.p.c -ecc -es -fatt -gg -int -lett -ogg -on -p.c -p.c.c -p.es -p.f -p.r -p.v -post -pp -racc -ric -s.n.c -seg -sgg -ss -tel -u.s -v.r -v.s - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -v -vs -i.e -rev -e.g - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -No #NUMERIC_ONLY# -Nos -Art #NUMERIC_ONLY# -Nr -pp #NUMERIC_ONLY# diff --git a/nonbreaking_prefixes/nonbreaking_prefix.nl b/nonbreaking_prefixes/nonbreaking_prefix.nl deleted file mode 100644 index c80c417..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.nl +++ /dev/null @@ -1,115 +0,0 @@ -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. -#Sources: http://nl.wikipedia.org/wiki/Lijst_van_afkortingen -# http://nl.wikipedia.org/wiki/Aanspreekvorm -# http://nl.wikipedia.org/wiki/Titulatuur_in_het_Nederlands_hoger_onderwijs -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -bacc -bc -bgen -c.i -dhr -dr -dr.h.c -drs -drs -ds -eint -fa -Fa -fam -gen -genm -ing -ir -jhr -jkvr -jr -kand -kol -lgen -lkol -Lt -maj -Mej -mevr -Mme -mr -mr -Mw -o.b.s -plv -prof -ritm -tint -Vz -Z.D -Z.D.H -Z.E -Z.Em -Z.H -Z.K.H -Z.K.M -Z.M -z.v - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -#we seem to have a lot of these in dutch i.e.: i.p.v - in plaats van (in stead of) never ends a sentence -a.g.v -bijv -bijz -bv -d.w.z -e.c -e.g -e.k -ev -i.p.v -i.s.m -i.t.t -i.v.m -m.a.w -m.b.t -m.b.v -m.h.o -m.i -m.i.v -v.w.t - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -Nr #NUMERIC_ONLY# -Nrs -nrs -nr #NUMERIC_ONLY# diff --git a/nonbreaking_prefixes/nonbreaking_prefix.pl b/nonbreaking_prefixes/nonbreaking_prefix.pl deleted file mode 100644 index 6b7c106..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.pl +++ /dev/null @@ -1,283 +0,0 @@ -adw -afr -akad -al -Al -am -amer -arch -art -Art -artyst -astr -austr -bałt -bdb -bł -bm -br -bryg -bryt -centr -ces -chem -chiń -chir -c.k -c.o -cyg -cyw -cyt -czes -czw -cd -Cd -czyt -ćw -ćwicz -daw -dcn -dekl -demokr -det -diec -dł -dn -dot -dol -dop -dost -dosł -h.c -ds -dst -duszp -dypl -egz -ekol -ekon -elektr -em -ew -fab -farm -fot -fr -gat -gastr -geogr -geol -gimn -głęb -gm -godz -górn -gosp -gr -gram -hist -hiszp -hr -Hr -hot -id -in -im -iron -jn -kard -kat -katol -k.k -kk -kol -kl -k.p.a -kpc -k.p.c -kpt -kr -k.r -krak -k.r.o -kryt -kult -laic -łac -niem -woj -nb -np -Nb -Np -pol -pow -m.in -pt -ps -Pt -Ps -cdn -jw -ryc -rys -Ryc -Rys -tj -tzw -Tzw -tzn -zob -ang -ub -ul -pw -pn -pl -al -k -n -nr #NUMERIC_ONLY# -Nr #NUMERIC_ONLY# -ww -wł -ur -zm -żyd -żarg -żyw -wył -bp -bp -wyst -tow -Tow -o -sp -Sp -st -spółdz -Spółdz -społ -spółgł -stoł -stow -Stoł -Stow -zn -zew -zewn -zdr -zazw -zast -zaw -zał -zal -zam -zak -zakł -zagr -zach -adw -Adw -lek -Lek -med -mec -Mec -doc -Doc -dyw -dyr -Dyw -Dyr -inż -Inż -mgr -Mgr -dh -dr -Dh -Dr -p -P -red -Red -prof -prok -Prof -Prok -hab -płk -Płk -nadkom -Nadkom -podkom -Podkom -ks -Ks -gen -Gen -por -Por -reż -Reż -przyp -Przyp -śp -św -śW -Śp -Św -ŚW -szer -Szer -pkt #NUMERIC_ONLY# -str #NUMERIC_ONLY# -tab #NUMERIC_ONLY# -Tab #NUMERIC_ONLY# -tel -ust #NUMERIC_ONLY# -par #NUMERIC_ONLY# -poz -pok -oo -oO -Oo -OO -r #NUMERIC_ONLY# -l #NUMERIC_ONLY# -s #NUMERIC_ONLY# -najśw -Najśw -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -Ś -Ć -Ż -Ź -Dz diff --git a/nonbreaking_prefixes/nonbreaking_prefix.pt b/nonbreaking_prefixes/nonbreaking_prefix.pt deleted file mode 100644 index 5d65bf2..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.pt +++ /dev/null @@ -1,210 +0,0 @@ -#File adapted for PT by H. Leal Fontes from the EN & DE versions published with moses-2009-04-13. Last update: 10.11.2009. -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z - - -#Roman Numerals. A dot after one of these is not a sentence break in Portuguese. -I -II -III -IV -V -VI -VII -VIII -IX -X -XI -XII -XIII -XIV -XV -XVI -XVII -XVIII -XIX -XX -i -ii -iii -iv -v -vi -vii -viii -ix -x -xi -xii -xiii -xiv -xv -xvi -xvii -xviii -xix -xx - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -Adj -Adm -Adv -Art -Ca -Capt -Cmdr -Col -Comdr -Con -Corp -Cpl -DR -DRA -Dr -Dra -Dras -Drs -Eng -Enga -Engas -Engos -Ex -Exo -Exmo -Fig -Gen -Hosp -Insp -Lda -MM -MR -MRS -MS -Maj -Mrs -Ms -Msgr -Op -Ord -Pfc -Ph -Prof -Pvt -Rep -Reps -Res -Rev -Rt -Sen -Sens -Sfc -Sgt -Sr -Sra -Sras -Srs -Sto -Supt -Surg -adj -adm -adv -art -cit -col -con -corp -cpl -dr -dra -dras -drs -eng -enga -engas -engos -ex -exo -exmo -fig -op -prof -sr -sra -sras -srs -sto - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -v -vs -i.e -rev -e.g - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -No #NUMERIC_ONLY# -Nos -Art #NUMERIC_ONLY# -Nr -p #NUMERIC_ONLY# -pp #NUMERIC_ONLY# - diff --git a/nonbreaking_prefixes/nonbreaking_prefix.ro b/nonbreaking_prefixes/nonbreaking_prefix.ro deleted file mode 100644 index d489f46..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.ro +++ /dev/null @@ -1,38 +0,0 @@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -dpdv -etc -șamd -M.Ap.N -dl -Dl -d-na -D-na -dvs -Dvs -pt -Pt diff --git a/nonbreaking_prefixes/nonbreaking_prefix.ru b/nonbreaking_prefixes/nonbreaking_prefix.ru deleted file mode 100644 index 444465b..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.ru +++ /dev/null @@ -1,259 +0,0 @@ -TBD: Russian uppercase alphabet [А-Я] -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -0гг -1гг -2гг -3гг -4гг -5гг -6гг -7гг -8гг -9гг -0г -1г -2г -3г -4г -5г -6г -7г -8г -9г -Xвв -Vвв -Iвв -Lвв -Mвв -Cвв -Xв -Vв -Iв -Lв -Mв -Cв -0м -1м -2м -3м -4м -5м -6м -7м -8м -9м -0мм -1мм -2мм -3мм -4мм -5мм -6мм -7мм -8мм -9мм -0см -1см -2см -3см -4см -5см -6см -7см -8см -9см -0дм -1дм -2дм -3дм -4дм -5дм -6дм -7дм -8дм -9дм -0л -1л -2л -3л -4л -5л -6л -7л -8л -9л -0км -1км -2км -3км -4км -5км -6км -7км -8км -9км -0га -1га -2га -3га -4га -5га -6га -7га -8га -9га -0кг -1кг -2кг -3кг -4кг -5кг -6кг -7кг -8кг -9кг -0т -1т -2т -3т -4т -5т -6т -7т -8т -9т -0г -1г -2г -3г -4г -5г -6г -7г -8г -9г -0мг -1мг -2мг -3мг -4мг -5мг -6мг -7мг -8мг -9мг -бульв -в -вв -г -га -гг -гл -гос -д -дм -доп -др -е -ед -ед -зам -и -инд -исп -Исп -к -кап -кг -кв -кл -км -кол -комн -коп -куб -л -лиц -лл -м -макс -мг -мин -мл -млн -млрд -мм -н -наб -нач -неуд -ном -о -обл -обр -общ -ок -ост -отл -п -пер -перераб -пл -пос -пр -просп -проф -р -ред -руб -с -сб -св -см -соч -ср -ст -стр -т -тел -Тел -тех -тт -туп -тыс -уд -ул -уч -физ -х -хор -ч -чел -шт -экз -э diff --git a/nonbreaking_prefixes/nonbreaking_prefix.sk b/nonbreaking_prefixes/nonbreaking_prefix.sk deleted file mode 100644 index 1198d48..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.sk +++ /dev/null @@ -1,474 +0,0 @@ -Bc -Mgr -RNDr -PharmDr -PhDr -JUDr -PaedDr -ThDr -Ing -MUDr -MDDr -MVDr -Dr -ThLic -PhD -ArtD -ThDr -Dr -DrSc -CSs -prof -obr -Obr -Č -č -absol -adj -admin -adr -Adr -adv -advok -afr -ak -akad -akc -akuz -et -al -alch -amer -anat -angl -Angl -anglosas -anorg -ap -apod -arch -archeol -archit -arg -art -astr -astrol -astron -atp -atď -austr -Austr -aut -belg -Belg -bibl -Bibl -biol -bot -bud -bás -býv -cest -chem -cirk -csl -čs -Čs -dat -dep -det -dial -diaľ -dipl -distrib -dokl -dosl -dopr -dram -duš -dv -dvojčl -dór -ekol -ekon -el -elektr -elektrotech -energet -epic -est -etc -etonym -eufem -európ -Európ -ev -evid -expr -fa -fam -farm -fem -feud -fil -filat -filoz -fi -fon -form -fot -fr -Fr -franc -Franc -fraz -fut -fyz -fyziol -garb -gen -genet -genpor -geod -geogr -geol -geom -germ -gr -Gr -gréc -Gréc -gréckokat -hebr -herald -hist -hlav -hosp -hromad -hud -hypok -ident -i.e -ident -imp -impf -indoeur -inf -inform -instr -int -interj -inšt -inštr -iron -jap -Jap -jaz -jedn -juhoamer -juhových -juhozáp -juž -kanad -Kanad -kanc -kapit -kpt -kart -katastr -knih -kniž -komp -konj -konkr -kozmet -krajč -kresť -kt -kuch -lat -latinskoamer -lek -lex -lingv -lit -litur -log -lok -max -Max -maď -Maď -medzinár -mest -metr -mil -Mil -min -Min -miner -ml -mld -mn -mod -mytol -napr -nar -Nar -nasl -nedok -neg -negat -neklas -nem -Nem -neodb -neos -neskl -nesklon -nespis -nespráv -neved -než -niekt -niž -nom -náb -nákl -námor -nár -obch -obj -obv -obyč -obč -občian -odb -odd -ods -ojed -okr -Okr -opt -opyt -org -os -osob -ot -ovoc -par -part -pejor -pers -pf -Pf -P.f -p.f -pl -Plk -pod -podst -pokl -polit -politol -polygr -pomn -popl -por -porad -porov -posch -potrav -použ -poz -pozit -poľ -poľno -poľnohosp -poľov -pošt -pož -prac -predl -pren -prep -preuk -priezv -Priezv -privl -prof -práv -príd -príj -prík -príp -prír -prísl -príslov -príč -psych -publ -pís -písm -pôv -refl -reg -rep -resp -rozk -rozlič -rozpráv -roč -Roč -ryb -rádiotech -rím -samohl -semest -sev -severoamer -severových -severozáp -sg -skr -skup -sl -Sloven -soc -soch -sociol -sp -spol -Spol -spoloč -spoluhl -správ -spôs -st -star -starogréc -starorím -s.r.o -stol -stor -str -stredoamer -stredoškol -subj -subst -superl -sv -sz -súkr -súp -súvzť -tal -Tal -tech -tel -Tel -telef -teles -telev -teol -trans -turist -tuzem -typogr -tzn -tzv -ukaz -ul -Ul -umel -univ -ust -ved -vedľ -verb -veter -vin -viď -vl -vod -vodohosp -pnl -vulg -vyj -vys -vysokoškol -vzťaž -vôb -vých -výd -výrob -výsk -výsl -výtv -výtvar -význ -včel -vš -všeob -zahr -zar -zariad -zast -zastar -zastaráv -zb -zdravot -združ -zjemn -zlat -zn -Zn -zool -zr -zried -zv -záhr -zák -zákl -zám -záp -západoeur -zázn -územ -účt -čast -čes -Čes -čl -čísl -živ -pr -fak -Kr -p.n.l -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z diff --git a/nonbreaking_prefixes/nonbreaking_prefix.sl b/nonbreaking_prefixes/nonbreaking_prefix.sl deleted file mode 100644 index 230062c..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.sl +++ /dev/null @@ -1,78 +0,0 @@ -dr -Dr -itd -itn -št #NUMERIC_ONLY# -Št #NUMERIC_ONLY# -d -jan -Jan -feb -Feb -mar -Mar -apr -Apr -jun -Jun -jul -Jul -avg -Avg -sept -Sept -sep -Sep -okt -Okt -nov -Nov -dec -Dec -tj -Tj -npr -Npr -sl -Sl -op -Op -gl -Gl -oz -Oz -prev -dipl -ing -prim -Prim -cf -Cf -gl -Gl -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z diff --git a/nonbreaking_prefixes/nonbreaking_prefix.sv b/nonbreaking_prefixes/nonbreaking_prefix.sv deleted file mode 100644 index df5ef29..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.sv +++ /dev/null @@ -1,46 +0,0 @@ -#single upper case letter are usually initials -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -#misc abbreviations -AB -G -VG -dvs -etc -from -iaf -jfr -kl -kr -mao -mfl -mm -osv -pga -tex -tom -vs -- cgit v1.2.3