index
:
nlp_scripts
master
Mirror of https://github.com/pks/nlp_scripts.git
summary
refs
log
tree
commit
diff
log msg
author
committer
range
Mode
Name
Size
-rw-r--r--
.gitmodules
82
log
plain
-rw-r--r--
LICENSE
1080
log
plain
-rw-r--r--
README.md
117
log
plain
-rwxr-xr-x
add-index
128
log
plain
-rwxr-xr-x
add-ln
87
log
plain
-rwxr-xr-x
add-seg
1072
log
plain
-rwxr-xr-x
add-start-end
138
log
plain
-rwxr-xr-x
avg
618
log
plain
-rwxr-xr-x
avg-seg-len
128
log
plain
-rwxr-xr-x
avg-weights
589
log
plain
-rwxr-xr-x
bishuf
397
log
plain
-rwxr-xr-x
bitext-filter-length
2161
log
plain
-rwxr-xr-x
bitext2tmx
1016
log
plain
-rwxr-xr-x
biuniq
512
log
plain
-rwxr-xr-x
bleu-cmp
467
log
plain
-rwxr-xr-x
cdec-hg-to-json
2184
log
plain
-rwxr-xr-x
chars
109
log
plain
-rwxr-xr-x
cma
394
log
plain
-rwxr-xr-x
cumul
777
log
plain
-rwxr-xr-x
de-bpe
44
log
plain
-rwxr-xr-x
de-sgm
559
log
plain
-rwxr-xr-x
div
115
log
plain
-rwxr-xr-x
dot
134
log
plain
-rwxr-xr-x
even
140
log
plain
-rwxr-xr-x
exclude
225
log
plain
-rwxr-xr-x
feature-dict
413
log
plain
-rwxr-xr-x
filter-illegal
342
log
plain
-rwxr-xr-x
filter-len
457
log
plain
-rwxr-xr-x
filter-tokens
317
log
plain
-rwxr-xr-x
first-upper
118
log
plain
-rwxr-xr-x
fix-utf-8-pua
148
log
plain
-rwxr-xr-x
gigaword-collapse-tags
650
log
plain
-rwxr-xr-x
hadoop-uniq
246
log
plain
-rwxr-xr-x
hist-tok
289
log
plain
-rwxr-xr-x
htmlentities
188
log
plain
-rwxr-xr-x
inv
625
log
plain
-rwxr-xr-x
is-first-lower
142
log
plain
-rwxr-xr-x
joint-set
348
log
plain
-rwxr-xr-x
kbest-bleu-oracles
1461
log
plain
-rwxr-xr-x
kendalls-tau
1497
log
plain
-rwxr-xr-x
key-count
195
log
plain
-rwxr-xr-x
kmeans
3148
log
plain
-rwxr-xr-x
lang
1461
log
plain
-rwxr-xr-x
langid-polyglot
405
log
plain
-rwxr-xr-x
length-ratio
195
log
plain
-rwxr-xr-x
lin-reg
1872
log
plain
-rwxr-xr-x
log-reg
1513
log
plain
-rwxr-xr-x
ltok
140
log
plain
-rwxr-xr-x
make-rule-features
711
log
plain
-rwxr-xr-x
max
119
log
plain
-rwxr-xr-x
max-len
178
log
plain
-rwxr-xr-x
median
116
log
plain
-rwxr-xr-x
merge-files
451
log
plain
-rwxr-xr-x
merge-ttable
601
log
plain
-rwxr-xr-x
min
116
log
plain
-rwxr-xr-x
min-max
1251
log
plain
-rwxr-xr-x
mkidx
100
log
plain
-rwxr-xr-x
moses-1best
186
log
plain
-rw-r--r--
moving-average
486
log
plain
-rwxr-xr-x
moving-sum
121
log
plain
-rwxr-xr-x
mult
97
log
plain
-rwxr-xr-x
nfc
160
log
plain
-rwxr-xr-x
ng
527
log
plain
-rwxr-xr-x
nn
71
log
plain
-rwxr-xr-x
no-empty
362
log
plain
-rwxr-xr-x
no-non-printables
86
log
plain
d---------
nonbreaking-prefixes
871
log
plain
-rwxr-xr-x
norm
83
log
plain
-rwxr-xr-x
norm-german
1866
log
plain
-rwxr-xr-x
norm-hyphens
46
log
plain
-rwxr-xr-x
normalize-punctuation
793
log
plain
-rwxr-xr-x
normchr
1432
log
plain
-rwxr-xr-x
num-tok
136
log
plain
-rwxr-xr-x
odd
140
log
plain
-rwxr-xr-x
overlap
315
log
plain
-rwxr-xr-x
paste-pairs
210
log
plain
-rwxr-xr-x
per-sentence-bleu
642
log
plain
-rwxr-xr-x
per-sentence-bleu-kbest
686
log
plain
-rwxr-xr-x
per-sentence-ter
794
log
plain
-rwxr-xr-x
percentile
270
log
plain
-rwxr-xr-x
pot
90
log
plain
-rwxr-xr-x
preprocess
363
log
plain
-rwxr-xr-x
preprocess-no-lower
321
log
plain
-rwxr-xr-x
pt-bloom
497
log
plain
-rwxr-xr-x
push-rules
352
log
plain
-rwxr-xr-x
remove-devtest
1094
log
plain
-rwxr-xr-x
remove-test-from-bitext
1190
log
plain
-rwxr-xr-x
repetition-rate
729
log
plain
-rwxr-xr-x
round
91
log
plain
-rwxr-xr-x
rule-shapes
452
log
plain
-rwxr-xr-x
sample
1193
log
plain
-rwxr-xr-x
select
435
log
plain
-rwxr-xr-x
select-from
722
log
plain
-rwxr-xr-x
sentencepiece-decode
121
log
plain
-rwxr-xr-x
shard
2429
log
plain
-rwxr-xr-x
sort-features
196
log
plain
-rwxr-xr-x
source-sides
85
log
plain
-rwxr-xr-x
split-kbest
346
log
plain
-rwxr-xr-x
split-lines
189
log
plain
-rwxr-xr-x
split-pipes
825
log
plain
-rwxr-xr-x
sqrt
76
log
plain
-rwxr-xr-x
stanford-parser-run
494
log
plain
-rwxr-xr-x
stddev
643
log
plain
-rwxr-xr-x
strips
67
log
plain
-rwxr-xr-x
subtract
169
log
plain
-rwxr-xr-x
sum
88
log
plain
-rwxr-xr-x
tc
98
log
plain
d---------
test
272
log
plain
-rwxr-xr-x
tf-idf
1397
log
plain
-rwxr-xr-x
tmx-extract
2248
log
plain
-rwxr-xr-x
tmx-extract-original-py2
2087
log
plain
-rwxr-xr-x
tmx-to-plain
3151
log
plain
-rwxr-xr-x
to-ascii
255
log
plain
-rwxr-xr-x
toks
150
log
plain
-rwxr-xr-x
toks-per-line
252
log
plain
-rwxr-xr-x
train-test-split
2201
log
plain
-rwxr-xr-x
tsv-exclude
365
log
plain
-rwxr-xr-x
tsv-joint-set
1113
log
plain
-rwxr-xr-x
tsv-uniq
1172
log
plain
-rwxr-xr-x
var
470
log
plain
-rwxr-xr-x
vocab
57
log
plain
-rwxr-xr-x
vocab-2
139
log
plain
-rwxr-xr-x
zh-ko-or-ja
350
log
plain