From b31ace79ea5f6b3f279c544cd3a443d6fbf2a24d Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 26 Feb 2026 10:05:59 +0000 Subject: overhaul --- nonbreaking_prefixes/nonbreaking_prefix.pt | 210 ----------------------------- 1 file changed, 210 deletions(-) delete mode 100644 nonbreaking_prefixes/nonbreaking_prefix.pt (limited to 'nonbreaking_prefixes/nonbreaking_prefix.pt') diff --git a/nonbreaking_prefixes/nonbreaking_prefix.pt b/nonbreaking_prefixes/nonbreaking_prefix.pt deleted file mode 100644 index 5d65bf2..0000000 --- a/nonbreaking_prefixes/nonbreaking_prefix.pt +++ /dev/null @@ -1,210 +0,0 @@ -#File adapted for PT by H. Leal Fontes from the EN & DE versions published with moses-2009-04-13. Last update: 10.11.2009. -#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. -#Special cases are included for prefixes that ONLY appear before 0-9 numbers. - -#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) -#usually upper case letters are initials in a name -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z - - -#Roman Numerals. A dot after one of these is not a sentence break in Portuguese. -I -II -III -IV -V -VI -VII -VIII -IX -X -XI -XII -XIII -XIV -XV -XVI -XVII -XVIII -XIX -XX -i -ii -iii -iv -v -vi -vii -viii -ix -x -xi -xii -xiii -xiv -xv -xvi -xvii -xviii -xix -xx - -#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks -Adj -Adm -Adv -Art -Ca -Capt -Cmdr -Col -Comdr -Con -Corp -Cpl -DR -DRA -Dr -Dra -Dras -Drs -Eng -Enga -Engas -Engos -Ex -Exo -Exmo -Fig -Gen -Hosp -Insp -Lda -MM -MR -MRS -MS -Maj -Mrs -Ms -Msgr -Op -Ord -Pfc -Ph -Prof -Pvt -Rep -Reps -Res -Rev -Rt -Sen -Sens -Sfc -Sgt -Sr -Sra -Sras -Srs -Sto -Supt -Surg -adj -adm -adv -art -cit -col -con -corp -cpl -dr -dra -dras -drs -eng -enga -engas -engos -ex -exo -exmo -fig -op -prof -sr -sra -sras -srs -sto - -#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) -v -vs -i.e -rev -e.g - -#Numbers only. These should only induce breaks when followed by a numeric sequence -# add NUMERIC_ONLY after the word for this function -#This case is mostly for the english "No." which can either be a sentence of its own, or -#if followed by a number, a non-breaking prefix -No #NUMERIC_ONLY# -Nos -Art #NUMERIC_ONLY# -Nr -p #NUMERIC_ONLY# -pp #NUMERIC_ONLY# - -- cgit v1.2.3