index
:
nlp_scripts
master
Mirror of https://github.com/pks/nlp_scripts.git
summary
refs
log
tree
commit
diff
log msg
author
committer
range
Mode
Name
Size
-rw-r--r--
.gitmodules
82
log
plain
-rw-r--r--
LICENSE
1080
log
plain
-rw-r--r--
README.md
252
log
plain
-rwxr-xr-x
add_seg
757
log
plain
-rwxr-xr-x
add_start_end
139
log
plain
-rwxr-xr-x
avg
356
log
plain
-rwxr-xr-x
avg_weights
587
log
plain
-rwxr-xr-x
collapse_tags.rb
652
log
plain
-rwxr-xr-x
compound-splitter.perl
9044
log
plain
-rwxr-xr-x
de-sgm
277
log
plain
-rwxr-xr-x
even
141
log
plain
-rwxr-xr-x
filter_by_rule_shape
550
log
plain
-rwxr-xr-x
firstlower
144
log
plain
-rwxr-xr-x
hg2json.py
2186
log
plain
-rwxr-xr-x
htmlentities
185
log
plain
-rwxr-xr-x
kbest_bleu_oracles
1455
log
plain
-rwxr-xr-x
keycount
196
log
plain
-rwxr-xr-x
kmeans
3128
log
plain
-rwxr-xr-x
lin_reg
1866
log
plain
-rwxr-xr-x
log_reg
1513
log
plain
-rwxr-xr-x
lowercase.perl
120
log
plain
-rwxr-xr-x
max
111
log
plain
-rwxr-xr-x
median
118
log
plain
-rwxr-xr-x
memusg
195
log
plain
-rwxr-xr-x
merge_files
453
log
plain
-rwxr-xr-x
merge_ttable
608
log
plain
-rwxr-xr-x
min
108
log
plain
-rwxr-xr-x
min_max
1245
log
plain
-rwxr-xr-x
moses_1best
188
log
plain
-rwxr-xr-x
mult
99
log
plain
-rwxr-xr-x
ng
521
log
plain
-rwxr-xr-x
nn
72
log
plain
-rwxr-xr-x
no_empty
364
log
plain
-rwxr-xr-x
no_non_printables
88
log
plain
d---------
nonbreaking_prefixes
871
log
plain
-rwxr-xr-x
norm_german
1861
log
plain
-rwxr-xr-x
normalize_punctuation
793
log
plain
-rwxr-xr-x
num_tok
138
log
plain
-rwxr-xr-x
odd
142
log
plain
-rwxr-xr-x
parse-stanford.sh
496
log
plain
-rwxr-xr-x
paste_pairs
229
log
plain
-rwxr-xr-x
per_sentence_bleu
632
log
plain
-rwxr-xr-x
per_sentence_bleu_kbest
685
log
plain
-rwxr-xr-x
per_sentence_ter
812
log
plain
-rwxr-xr-x
preprocess
356
log
plain
-rwxr-xr-x
pt_bloom
493
log
plain
-rwxr-xr-x
round
93
log
plain
-rwxr-xr-x
ruby_eval
73
log
plain
-rwxr-xr-x
rule_shapes
454
log
plain
-rwxr-xr-x
sample
1247
log
plain
-rwxr-xr-x
select
434
log
plain
-rwxr-xr-x
shard
2429
log
plain
-rwxr-xr-x
sort_features
197
log
plain
-rwxr-xr-x
splitpipes
358
log
plain
-rwxr-xr-x
stddev
489
log
plain
-rwxr-xr-x
strips
68
log
plain
-rwxr-xr-x
sum
90
log
plain
-rwxr-xr-x
tc
100
log
plain
d---------
test
180
log
plain
-rwxr-xr-x
tf-idf
1277
log
plain
-rwxr-xr-x
to_ascii
257
log
plain
-rwxr-xr-x
tokenizer.no-escape.perl
10147
log
plain
-rwxr-xr-x
toks
148
log
plain
-rwxr-xr-x
traintestsplit
1377
log
plain
-rwxr-xr-x
var
467
log
plain
-rwxr-xr-x
vocab
125
log
plain
-rwxr-xr-x
wrap-xml.perl
934
log
plain