summaryrefslogtreecommitdiff
path: root/model
diff options
context:
space:
mode:
Diffstat (limited to 'model')
-rw-r--r--model/cdec.ini28
-rw-r--r--model/dtrain.ini4
-rw-r--r--model/grammar.gzbin0 -> 121 bytes
-rw-r--r--model/nc-wmt11.en.srilm.gzbin0 -> 16017291 bytes
-rwxr-xr-xmodel/run4
-rw-r--r--model/src.gzbin0 -> 70 bytes
-rw-r--r--model/weights.init12
7 files changed, 48 insertions, 0 deletions
diff --git a/model/cdec.ini b/model/cdec.ini
new file mode 100644
index 0000000..36f53ac
--- /dev/null
+++ b/model/cdec.ini
@@ -0,0 +1,28 @@
+formalism=scfg
+add_pass_through_rules=true
+scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+grammar=grammar.gz
+feature_function=WordPenalty
+feature_function=KLanguageModel nc-wmt11.en.srilm.gz
+# all currently working feature functions for translation:
+# (with those features active that were used in the ACL paper)
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+feature_function=RuleIdentityFeatures
+feature_function=RuleSourceBigramFeatures
+feature_function=RuleTargetBigramFeatures
+feature_function=RuleShape
+#feature_function=LexicalFeatures 1 1 1
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
+weights=weights.init
diff --git a/model/dtrain.ini b/model/dtrain.ini
new file mode 100644
index 0000000..b6d29bb
--- /dev/null
+++ b/model/dtrain.ini
@@ -0,0 +1,4 @@
+decoder_conf=./cdec.ini # config for cdec
+k=100 # use 100best lists
+N=4 # optimize (approx.) BLEU4
+margin=0.0 # perceptron's margin
diff --git a/model/grammar.gz b/model/grammar.gz
new file mode 100644
index 0000000..84eb73c
--- /dev/null
+++ b/model/grammar.gz
Binary files differ
diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz
new file mode 100644
index 0000000..7ce8105
--- /dev/null
+++ b/model/nc-wmt11.en.srilm.gz
Binary files differ
diff --git a/model/run b/model/run
new file mode 100755
index 0000000..43f20b0
--- /dev/null
+++ b/model/run
@@ -0,0 +1,4 @@
+#!/bin/bash -x
+
+~/src/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc
+
diff --git a/model/src.gz b/model/src.gz
new file mode 100644
index 0000000..9aa247f
--- /dev/null
+++ b/model/src.gz
Binary files differ
diff --git a/model/weights.init b/model/weights.init
new file mode 100644
index 0000000..0d09f9f
--- /dev/null
+++ b/model/weights.init
@@ -0,0 +1,12 @@
+CountEF 0.1
+EgivenFCoherent -0.1
+Glue 0.01
+IsSingletonF -0.01
+IsSingletonFE -0.01
+LanguageModel 0.1
+LanguageModel_OOV -1
+MaxLexFgivenE -0.1
+MaxLexEgivenF -0.1
+PassThrough -0.1
+SampleCountF -0.1
+WordPenalty -0.1