diff options
Diffstat (limited to 'inbetwmert.sh')
-rwxr-xr-x | inbetwmert.sh | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/inbetwmert.sh b/inbetwmert.sh new file mode 100755 index 0000000..94f2b84 --- /dev/null +++ b/inbetwmert.sh @@ -0,0 +1,159 @@ +#!/bin/bash + +# mmert v0.2 +# manipulate mert-moses.pl script +# Copyright 2011 +# Patrick Simianer +# Heidelberg University, ICL +# +# This file is part of MMERT. +# +# MMERT is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# MMERT is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MMERT. If not, see <http://www.gnu.org/licenses/>. + + +function usage() +{ + echo "Usage: $0 <suffix>" + exit +} + +if [ -z "$1" ]; then usage; fi + + +MMERTPKG=~/mmert/example/ # base directory +BIN=$MMERTPKG/bin/ # binaries (moses, mert, extractor) +DECODER=$BIN/moses # decoder +SCRIPTS=$MMERTPKG/scripts/ # moses scripts folder +FOLDER_PREFIX=mert_$1 # directory for logs (created in current directory) +WORKDIR=mmert_$1 # working directory for mmert (created in current directory) +PARALLEL=2 # number moses/mert processes to run in parallel +DECODER_FLAGS="-th 1" # additional decoder flags, e.g. '-th 8' for 8 threads per moses instance, needs moses with thread support +TASKS=(A B C D) # used to identify tasks +TASKL="A,B,C,D" # same as above as string, tasks ids separated by ',' +NUMTASKS=${#TASKS[@]} # for parallelization, number of tasks +INI=$MMERTPKG/ini/moses.ini # one moses ini for all tasks (e.g. pooled model and/or same start weights)? or prefix for individual inis +SET=dev # dev or devtest (only used to build filenames)? +#declare -A INIS +#INIS[${TASKS[0]}]=$INI/$SET/${TASKS[0]}/moses.ini # individual inis (for individual models and/or individual start weights) +#INIS[${TASKS[1]}]=$INI/$SET/${TASKS[1]}/moses.ini +#INIS[${TASKS[2]}]=$INI/$SET/${TASKS[2]}/moses.ini +#INIS[${TASKS[3]}]=$INI/$SET/${TASKS[3]}/moses.ini +#INIS[${TASKS[4]}]=$INI/$SET/${TASKS[4]}/moses.ini +#INIS[${TASKS[5]}]=$INI/$SET/${TASKS[5]}/moses.ini +#INIS[${TASKS[6]}]=$INI/$SET/${TASKS[6]}/moses.ini + +# the next variables enable the script to locate your dev set(s), please set accordingly +# see also run_mert_wrapper() function +FR=de # source language +EN=en # target language +TUNEFILE_PREFIX=epmini-$SET # to build filenames of dev(test) sets + +MAX_ITER=100 # max mert iterations +NBEST=100 # n for nbest lists +INBETW=./regmtl.py # script to run after MERT runs finished +NUM_WEIGHTS=14 # dimension, length of weight vector +MIN_CHANGE=0.01 # minimum change in average vector, stopping criterion +LAMBDA=0.01 # regularization parameter +FIRST_AVG=0 # 0: zero vector, 1: provide run0.avector.txt yourself (in $WORKDIR) + +# parameters +# $1 FR tuning data +# $2 EN tuning data +# $3 /path/to/ini +# $4 task id +# $5 --continue +# +# +function run_mert() +{ + ./mert-moses.pl \ + $1 \ + $2 \ + $DECODER \ + $3 \ + --no-filter-phrase-table \ + --working-dir $FOLDER_PREFIX"_$4" \ + --rootdir $SCRIPTS \ + --decoder-flags "$DECODER_FLAGS" \ + --mertdir $BIN \ + --inputtype=0 \ + --maximum-iterations=9999 \ + --efficient_scorenbest_flag \ + --nocase \ + --nonorm \ + --nbest=$NBEST \ + $5 +} + +function run_mert_wrapper() +{ + T=$1 + echo -e "\n ===> $IT ========>\n\n" >> $WORKDIR/mert.$T.out >> $WORKDIR/mert.$T.err + # replace $INI with ${INIS[$T]} to use separate inis + run_mert $MMERTPKG/data/$TUNEFILE_PREFIX-$T.$FR $MMERTPKG/data/$TUNEFILE_PREFIX-$T.$EN $INI $T $CONT >> $WORKDIR/mert.$T.out 2>> $WORKDIR/mert.$T.err +} + +function wait_for() +{ + echo "Waiting for ${#WAITFOR[@]} MERT procs..." + for pid in ${WAITFOR[@]}; do + wait $pid; + done +} + + +if [ ! -d "$WORKDIR" ]; then + mkdir $WORKDIR +fi + +IT=0 +while true; do + IT=$(($IT+1)) + if [ $IT -eq 1 ]; then + echo "First iteration" + CONT=""; + else + echo -e "\nContinue with $IT" + CONT="--continue"; + fi + + # first half + WAITFOR=() + for (( i = 1; i <= $PARALLEL; i++ )); do + echo "Start for ${TASKS[$i-1]}" + run_mert_wrapper ${TASKS[$i-1]} & + WAITFOR+=( $! ) + done + wait_for $WAITFOR + + # second half + WAITFOR=() + for (( i = $PARALLEL+1; i <= $NUMTASKS; i++)); do + echo "Start for ${TASKS[$i-1]}" + run_mert_wrapper ${TASKS[$i-1]} & + WAITFOR+=( $! ) + done + wait_for $WAITFOR + + echo "Running $INBETW ..." + $INBETW $FOLDER_PREFIX $WORKDIR $TASKL $IT $NUM_WEIGHTS $MIN_CHANGE $LAMBDA $FIRST_AVG + + if [ -f "$WORKDIR/CONVERGED" ]; then break; fi + if [ $IT -eq $MAX_ITER ]; then + echo "Reached global iteration limit ($MAX_ITER), stopping."; + break; + fi +done +echo 'done' + |