index
:
nlp_scripts
master
Mirror of https://github.com/pks/nlp_scripts.git
summary
refs
log
tree
commit
diff
log msg
author
committer
range
Mode
Name
Size
-rw-r--r--
.gitmodules
82
log
plain
-rw-r--r--
LICENSE
1080
log
plain
-rw-r--r--
README.md
266
log
plain
-rwxr-xr-x
add-ln
90
log
plain
-rwxr-xr-x
add-seg
1071
log
plain
-rwxr-xr-x
add-start-end
139
log
plain
-rwxr-xr-x
avg
358
log
plain
-rwxr-xr-x
avg-seg-len
129
log
plain
-rwxr-xr-x
avg-weights
588
log
plain
-rwxr-xr-x
bishuf
398
log
plain
-rwxr-xr-x
bitext-filter-length
1574
log
plain
-rwxr-xr-x
cdec-hg-to-json
2185
log
plain
-rwxr-xr-x
cmp
468
log
plain
-rwxr-xr-x
compound-splitter.perl
9044
log
plain
-rwxr-xr-x
cumul
727
log
plain
-rwxr-xr-x
de-bpe
40
log
plain
-rwxr-xr-x
de-sgm
282
log
plain
-rwxr-xr-x
detruecase.perl
2199
log
plain
-rwxr-xr-x
div
116
log
plain
-rwxr-xr-x
dot
135
log
plain
-rwxr-xr-x
even
141
log
plain
-rwxr-xr-x
feature-dict
414
log
plain
-rwxr-xr-x
filter-illegal
344
log
plain
-rwxr-xr-x
filter-len
382
log
plain
-rwxr-xr-x
filter-tokens
319
log
plain
-rwxr-xr-x
first-lower
143
log
plain
-rwxr-xr-x
fix-utf-8-pua
149
log
plain
-rwxr-xr-x
gigaword-collapse-tags
651
log
plain
-rwxr-xr-x
hadoop-uniq
247
log
plain
-rwxr-xr-x
hist-tok
290
log
plain
-rwxr-xr-x
htmlentities
185
log
plain
-rwxr-xr-x
kbest-bleu-oracles
1460
log
plain
-rwxr-xr-x
kendalls-tau
1503
log
plain
-rwxr-xr-x
key-count
196
log
plain
-rwxr-xr-x
kmeans
3135
log
plain
-rwxr-xr-x
lang
1464
log
plain
-rwxr-xr-x
length-ratio
196
log
plain
-rwxr-xr-x
lin-reg
1871
log
plain
-rwxr-xr-x
log-reg
1514
log
plain
-rwxr-xr-x
lowercase.perl
120
log
plain
-rwxr-xr-x
ltok
137
log
plain
-rwxr-xr-x
make-rule-features
713
log
plain
-rwxr-xr-x
max
110
log
plain
-rwxr-xr-x
max-len
180
log
plain
-rwxr-xr-x
median
117
log
plain
-rwxr-xr-x
merge-files
452
log
plain
-rwxr-xr-x
merge-ttable
609
log
plain
-rwxr-xr-x
min
107
log
plain
-rwxr-xr-x
min-max
1254
log
plain
-rwxr-xr-x
moses-1best
187
log
plain
-rwxr-xr-x
mult
98
log
plain
-rwxr-xr-x
multi-bleu.perl
4826
log
plain
-rwxr-xr-x
ng
525
log
plain
-rwxr-xr-x
nn
72
log
plain
-rwxr-xr-x
no-empty
363
log
plain
-rwxr-xr-x
no-non-printables
88
log
plain
d---------
nonbreaking_prefixes
871
log
plain
-rwxr-xr-x
norm
84
log
plain
-rwxr-xr-x
norm-german
1865
log
plain
-rwxr-xr-x
norm-hyphens
50
log
plain
-rwxr-xr-x
normalize-punctuation
793
log
plain
-rwxr-xr-x
normchr
1435
log
plain
-rwxr-xr-x
num-tok
137
log
plain
-rwxr-xr-x
odd
141
log
plain
-rwxr-xr-x
overlap
318
log
plain
-rwxr-xr-x
paste-pairs
228
log
plain
-rwxr-xr-x
per-sentence-bleu
641
log
plain
-rwxr-xr-x
per-sentence-bleu-kbest
686
log
plain
-rwxr-xr-x
per-sentence-ter
813
log
plain
-rwxr-xr-x
pot
91
log
plain
-rwxr-xr-x
preprocess
356
log
plain
-rwxr-xr-x
preprocess-no-lower
314
log
plain
-rwxr-xr-x
pt-bloom
496
log
plain
-rwxr-xr-x
push-rules
353
log
plain
-rwxr-xr-x
repetition-rate
732
log
plain
-rwxr-xr-x
round
92
log
plain
-rwxr-xr-x
rule-shapes
453
log
plain
-rwxr-xr-x
sample
1248
log
plain
-rwxr-xr-x
select
434
log
plain
-rwxr-xr-x
select-from
734
log
plain
-rwxr-xr-x
shard
2428
log
plain
-rwxr-xr-x
sort-features
197
log
plain
-rwxr-xr-x
source-sides
89
log
plain
-rwxr-xr-x
split-kbest
347
log
plain
-rwxr-xr-x
split-lines
191
log
plain
-rwxr-xr-x
split-pipes
827
log
plain
-rwxr-xr-x
sqrt
77
log
plain
-rwxr-xr-x
stanford-parser-run
496
log
plain
-rwxr-xr-x
stddev
642
log
plain
-rwxr-xr-x
strips
68
log
plain
-rwxr-xr-x
sum
89
log
plain
-rwxr-xr-x
tc
99
log
plain
d---------
test
272
log
plain
-rwxr-xr-x
tf-idf
1396
log
plain
-rwxr-xr-x
tmx-extract.py
1825
log
plain
-rwxr-xr-x
to-ascii
256
log
plain
-rwxr-xr-x
tokenizer-no-escape.perl
10147
log
plain
-rwxr-xr-x
toks
147
log
plain
-rwxr-xr-x
toks-per-line
156
log
plain
-rwxr-xr-x
train-test-split
1382
log
plain
-rwxr-xr-x
train-truecaser.perl
3886
log
plain
-rwxr-xr-x
truecase.perl
2527
log
plain
-rwxr-xr-x
var
469
log
plain
-rwxr-xr-x
vocab
125
log
plain
-rwxr-xr-x
vocab2
140
log
plain