index
:
nlp_scripts
master
Mirror of https://github.com/pks/nlp_scripts.git
summary
refs
log
tree
commit
diff
log msg
author
committer
range
Mode
Name
Size
-rw-r--r--
.gitmodules
82
log
plain
-rw-r--r--
LICENSE
1080
log
plain
-rwxr-xr-x
NFC
160
log
plain
-rw-r--r--
README.md
117
log
plain
-rwxr-xr-x
add-index
129
log
plain
-rwxr-xr-x
add-ln
90
log
plain
-rwxr-xr-x
add-seg
1073
log
plain
-rwxr-xr-x
add-start-end
139
log
plain
-rwxr-xr-x
avg
360
log
plain
-rwxr-xr-x
avg-seg-len
129
log
plain
-rwxr-xr-x
avg-weights
590
log
plain
-rwxr-xr-x
bishuf
398
log
plain
-rwxr-xr-x
bitext-filter-length
2163
log
plain
-rwxr-xr-x
biuniq
477
log
plain
-rwxr-xr-x
bleu-cmp
468
log
plain
-rwxr-xr-x
cdec-hg-to-json
2185
log
plain
-rwxr-xr-x
chars
110
log
plain
-rwxr-xr-x
cma
395
log
plain
-rwxr-xr-x
cumul
727
log
plain
-rwxr-xr-x
de-bpe
44
log
plain
-rwxr-xr-x
de-sgm
585
log
plain
-rwxr-xr-x
div
116
log
plain
-rwxr-xr-x
dot
135
log
plain
-rwxr-xr-x
even
141
log
plain
-rwxr-xr-x
exclude
226
log
plain
-rwxr-xr-x
feature-dict
414
log
plain
-rwxr-xr-x
filter-illegal
344
log
plain
-rwxr-xr-x
filter-len
458
log
plain
-rwxr-xr-x
filter-tokens
319
log
plain
-rwxr-xr-x
first-upper
119
log
plain
-rwxr-xr-x
fix-utf-8-pua
149
log
plain
-rwxr-xr-x
gigaword-collapse-tags
651
log
plain
-rwxr-xr-x
hadoop-uniq
247
log
plain
-rwxr-xr-x
hist-tok
290
log
plain
-rwxr-xr-x
htmlentities
185
log
plain
-rwxr-xr-x
inv
626
log
plain
-rwxr-xr-x
is-first-lower
143
log
plain
-rwxr-xr-x
joint-set
349
log
plain
-rwxr-xr-x
kbest-bleu-oracles
1462
log
plain
-rwxr-xr-x
kendalls-tau
1503
log
plain
-rwxr-xr-x
key-count
196
log
plain
-rwxr-xr-x
kmeans
3137
log
plain
-rwxr-xr-x
lang
1464
log
plain
-rwxr-xr-x
langid-polyglot
405
log
plain
-rwxr-xr-x
length-ratio
196
log
plain
-rwxr-xr-x
lin-reg
1873
log
plain
-rwxr-xr-x
log-reg
1516
log
plain
-rwxr-xr-x
ltok
137
log
plain
-rwxr-xr-x
make-rule-features
713
log
plain
-rwxr-xr-x
max
110
log
plain
-rwxr-xr-x
max-len
180
log
plain
-rwxr-xr-x
median
117
log
plain
-rwxr-xr-x
merge-files
452
log
plain
-rwxr-xr-x
merge-ttable
611
log
plain
-rwxr-xr-x
min
107
log
plain
-rwxr-xr-x
min-max
1256
log
plain
-rwxr-xr-x
mkidx
101
log
plain
-rwxr-xr-x
moses-1best
187
log
plain
-rwxr-xr-x
moving-sum
122
log
plain
-rwxr-xr-x
mult
98
log
plain
-rwxr-xr-x
ng
527
log
plain
-rwxr-xr-x
nn
72
log
plain
-rwxr-xr-x
no-empty
363
log
plain
-rwxr-xr-x
no-non-printables
88
log
plain
d---------
nonbreaking_prefixes
871
log
plain
-rwxr-xr-x
norm
84
log
plain
-rwxr-xr-x
norm-german
1867
log
plain
-rwxr-xr-x
norm-hyphens
50
log
plain
-rwxr-xr-x
normalize-punctuation
793
log
plain
-rwxr-xr-x
normchr
1435
log
plain
-rwxr-xr-x
num-tok
137
log
plain
-rwxr-xr-x
odd
141
log
plain
-rwxr-xr-x
overlap
318
log
plain
-rwxr-xr-x
paste-pairs
228
log
plain
-rwxr-xr-x
per-sentence-bleu
643
log
plain
-rwxr-xr-x
per-sentence-bleu-kbest
688
log
plain
-rwxr-xr-x
per-sentence-ter
797
log
plain
-rwxr-xr-x
percentile
271
log
plain
-rwxr-xr-x
pot
91
log
plain
-rwxr-xr-x
preprocess
356
log
plain
-rwxr-xr-x
preprocess-no-lower
314
log
plain
-rwxr-xr-x
pt-bloom
498
log
plain
-rwxr-xr-x
push-rules
353
log
plain
-rwxr-xr-x
repetition-rate
732
log
plain
-rwxr-xr-x
round
92
log
plain
-rwxr-xr-x
rule-shapes
453
log
plain
-rwxr-xr-x
sample
1250
log
plain
-rwxr-xr-x
select
436
log
plain
-rwxr-xr-x
select-from
736
log
plain
-rwxr-xr-x
sentencepiece-decode
122
log
plain
-rwxr-xr-x
shard
2430
log
plain
-rwxr-xr-x
sort-features
197
log
plain
-rwxr-xr-x
source-sides
89
log
plain
-rwxr-xr-x
split-kbest
347
log
plain
-rwxr-xr-x
split-lines
191
log
plain
-rwxr-xr-x
split-pipes
829
log
plain
-rwxr-xr-x
sqrt
77
log
plain
-rwxr-xr-x
stanford-parser-run
496
log
plain
-rwxr-xr-x
stddev
644
log
plain
-rwxr-xr-x
strips
68
log
plain
-rwxr-xr-x
substract
170
log
plain
-rwxr-xr-x
sum
89
log
plain
-rwxr-xr-x
tc
99
log
plain
d---------
test
272
log
plain
-rwxr-xr-x
tf-idf
1398
log
plain
-rwxr-xr-x
tmx-extract.py
2247
log
plain
-rwxr-xr-x
to-ascii
256
log
plain
-rwxr-xr-x
toks
147
log
plain
-rwxr-xr-x
toks-per-line
253
log
plain
-rwxr-xr-x
train-test-split
1474
log
plain
-rwxr-xr-x
tsv-joint-set
1114
log
plain
-rwxr-xr-x
tsv-uniq
1173
log
plain
-rwxr-xr-x
var
471
log
plain
-rwxr-xr-x
vocab
58
log
plain
-rwxr-xr-x
vocab2
140
log
plain
-rwxr-xr-x
zh-ko-or-ja
351
log
plain