1 files changed, 159 insertions, 0 deletions
diff --git a/inbetwmert.sh b/inbetwmert.sh
new file mode 100755
index 0000000..94f2b84
--- /dev/null
+++ b/inbetwmert.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+
+# mmert v0.2
+# manipulate mert-moses.pl script
+# Copyright 2011
+# Patrick Simianer
+# Heidelberg University, ICL
+#
+# This file is part of MMERT.
+#
+# MMERT is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# MMERT is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with MMERT.  If not, see <http://www.gnu.org/licenses/>.
+
+
+function usage()
+{
+    echo "Usage: $0 <suffix>"
+    exit
+}
+
+if [ -z "$1" ]; then usage; fi
+
+
+MMERTPKG=~/mmert/example/   	     # base directory
+BIN=$MMERTPKG/bin/      	         # binaries (moses, mert, extractor)
+DECODER=$BIN/moses                   # decoder
+SCRIPTS=$MMERTPKG/scripts/           # moses scripts folder
+FOLDER_PREFIX=mert_$1                # directory for logs (created in current directory)
+WORKDIR=mmert_$1                     # working directory for mmert (created in current directory)
+PARALLEL=2                           # number moses/mert processes to run in parallel
+DECODER_FLAGS="-th 1"	             # additional decoder flags, e.g. '-th 8' for 8 threads per moses instance, needs moses with thread support
+TASKS=(A B C D)                      # used to identify tasks
+TASKL="A,B,C,D"                      # same as above as string, tasks ids separated by ','
+NUMTASKS=${#TASKS[@]}                # for parallelization, number of tasks
+INI=$MMERTPKG/ini/moses.ini          # one moses ini for all tasks (e.g. pooled model and/or same start weights)? or prefix for individual inis
+SET=dev                              # dev or devtest (only used to build filenames)?
+#declare -A INIS
+#INIS[${TASKS[0]}]=$INI/$SET/${TASKS[0]}/moses.ini # individual inis (for individual models and/or individual start weights)
+#INIS[${TASKS[1]}]=$INI/$SET/${TASKS[1]}/moses.ini
+#INIS[${TASKS[2]}]=$INI/$SET/${TASKS[2]}/moses.ini
+#INIS[${TASKS[3]}]=$INI/$SET/${TASKS[3]}/moses.ini
+#INIS[${TASKS[4]}]=$INI/$SET/${TASKS[4]}/moses.ini
+#INIS[${TASKS[5]}]=$INI/$SET/${TASKS[5]}/moses.ini
+#INIS[${TASKS[6]}]=$INI/$SET/${TASKS[6]}/moses.ini
+
+# the next variables enable the script to locate your dev set(s), please set accordingly
+# see also run_mert_wrapper() function 
+FR=de                          # source language
+EN=en                          # target language
+TUNEFILE_PREFIX=epmini-$SET    # to build filenames of dev(test) sets
+
+MAX_ITER=100                   # max mert iterations
+NBEST=100                      # n for nbest lists
+INBETW=./regmtl.py             # script to run after MERT runs finished
+NUM_WEIGHTS=14                 # dimension, length of weight vector
+MIN_CHANGE=0.01                # minimum change in average vector, stopping criterion
+LAMBDA=0.01                    # regularization parameter
+FIRST_AVG=0                    # 0: zero vector, 1: provide run0.avector.txt yourself (in $WORKDIR)
+
+# parameters
+#  $1 FR tuning data
+#  $2 EN tuning data
+#  $3 /path/to/ini
+#  $4 task id
+#  $5 --continue
+#
+#
+function run_mert()
+{
+    ./mert-moses.pl \
+        $1 \
+        $2 \
+        $DECODER \
+        $3 \
+        --no-filter-phrase-table \
+        --working-dir $FOLDER_PREFIX"_$4" \
+        --rootdir $SCRIPTS \
+        --decoder-flags "$DECODER_FLAGS" \
+        --mertdir $BIN \
+        --inputtype=0 \
+        --maximum-iterations=9999 \
+        --efficient_scorenbest_flag \
+        --nocase \
+        --nonorm \
+        --nbest=$NBEST \
+        $5
+}
+
+function run_mert_wrapper()
+{
+    T=$1
+    echo -e "\n ===> $IT ========>\n\n" >> $WORKDIR/mert.$T.out >> $WORKDIR/mert.$T.err
+    # replace $INI with ${INIS[$T]} to use separate inis
+    run_mert $MMERTPKG/data/$TUNEFILE_PREFIX-$T.$FR $MMERTPKG/data/$TUNEFILE_PREFIX-$T.$EN $INI $T $CONT >> $WORKDIR/mert.$T.out 2>> $WORKDIR/mert.$T.err
+}
+
+function wait_for()
+{
+    echo "Waiting for ${#WAITFOR[@]} MERT procs..."
+    for pid in ${WAITFOR[@]}; do
+        wait $pid;
+    done
+}
+
+
+if [ ! -d "$WORKDIR" ]; then
+    mkdir $WORKDIR
+fi
+
+IT=0
+while true; do
+    IT=$(($IT+1))
+    if [ $IT -eq 1 ]; then
+        echo "First iteration"
+        CONT="";
+    else
+        echo -e "\nContinue with $IT"
+        CONT="--continue";
+    fi
+
+    # first half
+    WAITFOR=()
+    for (( i = 1; i <= $PARALLEL; i++ )); do
+        echo "Start for ${TASKS[$i-1]}"
+        run_mert_wrapper ${TASKS[$i-1]} &
+        WAITFOR+=( $! )
+    done
+    wait_for $WAITFOR
+
+    # second half
+    WAITFOR=()
+    for (( i = $PARALLEL+1; i <= $NUMTASKS; i++)); do
+        echo "Start for ${TASKS[$i-1]}"
+        run_mert_wrapper ${TASKS[$i-1]} &        
+        WAITFOR+=( $! )
+    done
+    wait_for $WAITFOR
+
+    echo "Running $INBETW ..."
+    $INBETW $FOLDER_PREFIX $WORKDIR $TASKL $IT $NUM_WEIGHTS $MIN_CHANGE $LAMBDA $FIRST_AVG
+
+    if [ -f "$WORKDIR/CONVERGED" ]; then break; fi
+    if [ $IT -eq $MAX_ITER ]; then
+    	echo "Reached global iteration limit ($MAX_ITER), stopping.";
+        break;
+    fi
+done
+echo 'done'
+