#!/bin/bash
# mmert v0.2
# manipulate mert-moses.pl script
# Copyright 2011
# Patrick Simianer
# Heidelberg University, ICL
#
# This file is part of MMERT.
#
# MMERT is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# MMERT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with MMERT. If not, see .
function usage()
{
echo "Usage: $0 "
exit
}
if [ -z "$1" ]; then usage; fi
MMERTPKG=~/mmert/example/ # base directory
BIN=$MMERTPKG/bin/ # binaries (moses, mert, extractor)
DECODER=$BIN/moses # decoder
SCRIPTS=$MMERTPKG/scripts/ # moses scripts folder
FOLDER_PREFIX=mert_$1 # directory for logs (created in current directory)
WORKDIR=mmert_$1 # working directory for mmert (created in current directory)
PARALLEL=2 # number moses/mert processes to run in parallel
DECODER_FLAGS="-th 1" # additional decoder flags, e.g. '-th 8' for 8 threads per moses instance, needs moses with thread support
TASKS=(A B C D) # used to identify tasks
TASKL="A,B,C,D" # same as above as string, tasks ids separated by ','
NUMTASKS=${#TASKS[@]} # for parallelization, number of tasks
INI=$MMERTPKG/ini/moses.ini # one moses ini for all tasks (e.g. pooled model and/or same start weights)? or prefix for individual inis
SET=dev # dev or devtest (only used to build filenames)?
#declare -A INIS
#INIS[${TASKS[0]}]=$INI/$SET/${TASKS[0]}/moses.ini # individual inis (for individual models and/or individual start weights)
#INIS[${TASKS[1]}]=$INI/$SET/${TASKS[1]}/moses.ini
#INIS[${TASKS[2]}]=$INI/$SET/${TASKS[2]}/moses.ini
#INIS[${TASKS[3]}]=$INI/$SET/${TASKS[3]}/moses.ini
#INIS[${TASKS[4]}]=$INI/$SET/${TASKS[4]}/moses.ini
#INIS[${TASKS[5]}]=$INI/$SET/${TASKS[5]}/moses.ini
#INIS[${TASKS[6]}]=$INI/$SET/${TASKS[6]}/moses.ini
# the next variables enable the script to locate your dev set(s), please set accordingly
# see also run_mert_wrapper() function
FR=de # source language
EN=en # target language
TUNEFILE_PREFIX=epmini-$SET # to build filenames of dev(test) sets
MAX_ITER=100 # max mert iterations
NBEST=100 # n for nbest lists
INBETW=./regmtl.py # script to run after MERT runs finished
NUM_WEIGHTS=14 # dimension, length of weight vector
MIN_CHANGE=0.01 # minimum change in average vector, stopping criterion
LAMBDA=0.01 # regularization parameter
FIRST_AVG=0 # 0: zero vector, 1: provide run0.avector.txt yourself (in $WORKDIR)
# parameters
# $1 FR tuning data
# $2 EN tuning data
# $3 /path/to/ini
# $4 task id
# $5 --continue
#
#
function run_mert()
{
./mert-moses.pl \
$1 \
$2 \
$DECODER \
$3 \
--no-filter-phrase-table \
--working-dir $FOLDER_PREFIX"_$4" \
--rootdir $SCRIPTS \
--decoder-flags "$DECODER_FLAGS" \
--mertdir $BIN \
--inputtype=0 \
--maximum-iterations=9999 \
--efficient_scorenbest_flag \
--nocase \
--nonorm \
--nbest=$NBEST \
$5
}
function run_mert_wrapper()
{
T=$1
echo -e "\n ===> $IT ========>\n\n" >> $WORKDIR/mert.$T.out >> $WORKDIR/mert.$T.err
# replace $INI with ${INIS[$T]} to use separate inis
run_mert $MMERTPKG/data/$TUNEFILE_PREFIX-$T.$FR $MMERTPKG/data/$TUNEFILE_PREFIX-$T.$EN $INI $T $CONT >> $WORKDIR/mert.$T.out 2>> $WORKDIR/mert.$T.err
}
function wait_for()
{
echo "Waiting for ${#WAITFOR[@]} MERT procs..."
for pid in ${WAITFOR[@]}; do
wait $pid;
done
}
if [ ! -d "$WORKDIR" ]; then
mkdir $WORKDIR
fi
IT=0
while true; do
IT=$(($IT+1))
if [ $IT -eq 1 ]; then
echo "First iteration"
CONT="";
else
echo -e "\nContinue with $IT"
CONT="--continue";
fi
# first half
WAITFOR=()
for (( i = 1; i <= $PARALLEL; i++ )); do
echo "Start for ${TASKS[$i-1]}"
run_mert_wrapper ${TASKS[$i-1]} &
WAITFOR+=( $! )
done
wait_for $WAITFOR
# second half
WAITFOR=()
for (( i = $PARALLEL+1; i <= $NUMTASKS; i++)); do
echo "Start for ${TASKS[$i-1]}"
run_mert_wrapper ${TASKS[$i-1]} &
WAITFOR+=( $! )
done
wait_for $WAITFOR
echo "Running $INBETW ..."
$INBETW $FOLDER_PREFIX $WORKDIR $TASKL $IT $NUM_WEIGHTS $MIN_CHANGE $LAMBDA $FIRST_AVG
if [ -f "$WORKDIR/CONVERGED" ]; then break; fi
if [ $IT -eq $MAX_ITER ]; then
echo "Reached global iteration limit ($MAX_ITER), stopping.";
break;
fi
done
echo 'done'