diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-31 13:57:24 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-31 13:57:24 +0200 |
commit | f1ba05780db1705493d9afb562332498b93d26f1 (patch) | |
tree | fb429a657ba97f33e8140742de9bc74d9fc88e75 | |
parent | aadabfdf37dfd451485277cb77fad02f77b361c6 (diff) | |
parent | 317d650f6cb1e24ac6f3be6f7bf9d4246a59e0e5 (diff) |
Merge remote-tracking branch 'upstream/master'
153 files changed, 3947 insertions, 5374 deletions
@@ -1,3 +1,33 @@ +cdec/ +creg/bin/ +creg/creg +creg/json_feature_map_lexer.cc +decoder/bin/ +gi/pf/align-tl +gi/pf/bayes_lattice_score +gi/pf/nuisance_test +gi/pf/pf_test +gi/pf/pyp_lm +jam-files/bjam +jam-files/engine/bin.linuxx86_64/ +jam-files/engine/bootstrap/ +klm/lm/bin/ +klm/lm/libkenlm.so +klm/lm/libkenutil.so +klm/lm/query +klm/util/bin/ +mteval/bin/ +rampion/rampion_cccp +rst_parser/librst.a +rst_parser/mst_train +rst_parser/rst_parse +rst_parser/rst_train +training/liblbfgs/bin/ +training/liblbfgs/liblbfgs.a +training/liblbfgs/ll_test +utils/atools +utils/bin/ +utils/crp_test mira/kbest_mira utils/m_test sa-extract/calignment.c diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..326609d2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +before_script: + - sudo apt-get install libboost-program-options-dev + - sudo apt-get install libboost-regex-dev + - sudo apt-get install libboost-test-dev + - sudo apt-get install flex + - autoreconf -ifv + - ./configure +script: make +after_script: + - ./tests/run-system-tests.pl @@ -26,13 +26,14 @@ if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serializa requirements += <define>HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ; } -# project : requirements $(requirements) ; project : requirements $(requirements) <toolset>darwin:<link>static ; -project : default-build <threading>single <warnings>on <variant>release ; +project : default-build <warnings>on <variant>release ; -install-bin-libs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec creg//creg phrasinator//programs mira//kbest_mira ; +install-bin-libs dpmert//programs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec phrasinator//programs mira//kbest_mira ; -build-projects mteval decoder klm/lm training/liblbfgs creg ; +install perl-scripts : dpmert//dpmert.pl : <location>$(bindir) ; + +build-projects mteval decoder dpmert klm/lm training/liblbfgs ; #Compile everything ending with _test.cc into a test and run it. rule all_tests ( targets * : dependencies : properties * ) { diff --git a/Makefile.am b/Makefile.am index 0e08ee72..ea5e2b3f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,7 +1,7 @@ # warning - the subdirectories in the following list should # be kept in topologically sorted order. Also, DO NOT introduce # cyclic dependencies between these directories! -SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training training/liblbfgs creg mira dtrain dpmert pro-train rampion extools gi/pf gi/markov_al rst_parser +SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training training/liblbfgs mira dtrain dpmert pro-train rampion extools gi/pf gi/markov_al rst_parser #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava diff --git a/README.scons b/README.scons deleted file mode 100644 index eb56a666..00000000 --- a/README.scons +++ /dev/null @@ -1,21 +0,0 @@ -Building cdec with the scons build system: - -While we don't build all components of cdec under scons (yet), -scons makes things much simpler and reliable. Currently, -building cdec with scons is experimental. - -Step-by-step: -1) Obtain a Python V2.4 or later (you probably already have this) -2) Download and install Scons v2.0+ from http://www.scons.org/download.php - $ wget http://prdownloads.sourceforge.net/scons/scons-2.0.1.tar.gz - $ tar -xvzf scons-2.0.1.tar.gz - $ cd scons-2.0.1 - $ python setup.py build - $ sudo setup.py install #(if you have root) - $ setup.py install --home=~/prefix #(to install to a user-space prefix instead) - -3) Run scons. Some examples: - $ scons #(just build the decoder) - $ scons -j16 #(build using 16 cores) - $ scons -h #(get help) - $ scons --with-boost=~/prefix #(use ~/prefix as the boost directory) diff --git a/SConstruct b/SConstruct deleted file mode 100644 index 93d740b7..00000000 --- a/SConstruct +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/python - -# EXPERIMENTAL and HACKY version of cdec build in scons - -# TODO: Persist these so that you don't have to specify flags every time -# http://www.scons.org/wiki/SavingVariablesToAFile -AddOption('--prefix', dest='prefix', type='string', nargs=1, action='store', metavar='DIR', - help='installation prefix') -AddOption('--with-boost', dest='boost', type='string', nargs=1, action='store', metavar='DIR', - help='boost installation directory (if in a non-standard location)') -AddOption('--with-glc', dest='glc', type='string', nargs=1, action='store', metavar='DIR', - help='path to Global Lexical Coherence package (optional)') -AddOption('--with-mpi', dest='mpi', action='store_true', - help='build tools that use Message Passing Interface? (optional)') -AddOption('--efence', dest='efence', action='store_true', - help='use electric fence for debugging memory corruptions') - -platform = ARGUMENTS.get('OS', Platform()) -include = Split('decoder utils klm mteval training .') -env = Environment(PREFIX=GetOption('prefix'), - PLATFORM = platform, -# BINDIR = bin, -# INCDIR = include, -# LIBDIR = lib, - CPPPATH = include, - LIBPATH = [], - LIBS = Split('boost_program_options boost_serialization boost_thread z'), - CCFLAGS=Split('-g -O3 -DHAVE_SCONS')) - - -# Do some autoconf-like sanity checks (http://www.scons.org/wiki/SconsAutoconf) -conf = Configure(env) -print('Checking if the environment is sane...') -if not conf.CheckCXX(): - print('!! Your compiler and/or environment is not correctly configured.') - Exit(1) -if not conf.CheckFunc('printf'): - print('!! Your compiler and/or environment is not correctly configured.') - Exit(1) -#env = conf.Finish() - -boost = GetOption('boost') -if boost: - print 'Using Boost at {0}'.format(boost) - env.Append(CCFLAGS='-DHAVE_BOOST', - CPPPATH=boost+'/include', - LIBPATH=boost+'/lib') - -if not conf.CheckLib('boost_program_options'): - print "Boost library 'boost_program_options' not found" - Exit(1) -#if not conf.CheckHeader('boost/math/special_functions/digamma.hpp'): -# print "Boost header 'digamma.hpp' not found" -# Exit(1) - -mpi = GetOption('mpi') -if mpi: - if not conf.CheckHeader('mpi.h'): - print "MPI header 'mpi.h' not found" - Exit(1) - -if GetOption('efence'): - env.Append(LIBS=Split('efence Segfault')) - -print('Environment is sane.') -print - -srcs = [] - -# TODO: Get rid of config.h - -glc = GetOption('glc') -if glc: - print 'Using Global Lexical Coherence package at {0}'.format(glc) - env.Append(CCFLAGS='-DHAVE_GLC', - CPPPATH=[glc, glc+'/cdec']) - srcs.append(glc+'/string_util.cc') - srcs.append(glc+'/sys_util.cc') - srcs.append(glc+'/debug.cc') - srcs.append(glc+'/feature-factory.cc') - srcs.append(glc+'/cdec/ff_glc.cc') - -for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc', 'vest/*.cc']: - srcs.extend([ file for file in Glob(pattern) - if not 'test' in str(file) - and 'build_binary.cc' not in str(file) - and 'ngram_query.cc' not in str(file) - and 'mbr_kbest.cc' not in str(file) - and 'sri.cc' not in str(file) - and 'fast_score.cc' not in str(file) - and 'cdec.cc' not in str(file) - and 'mr_' not in str(file) - and 'utils/ts.cc' != str(file) - ]) - -print 'Found {0} source files'.format(len(srcs)) -def comb(cc, srcs): - x = [cc] - x.extend(srcs) - return x - -env.Program(target='decoder/cdec', source=comb('decoder/cdec.cc', srcs)) -# TODO: The various decoder tests -# TODO: extools -env.Program(target='klm/lm/build_binary', source=comb('klm/lm/build_binary.cc', srcs)) -# TODO: klm ngram_query and tests -env.Program(target='mteval/fast_score', source=comb('mteval/fast_score.cc', srcs)) -env.Program(target='mteval/mbr_kbest', source=comb('mteval/mbr_kbest.cc', srcs)) -#env.Program(target='mteval/scorer_test', source=comb('mteval/fast_score.cc', srcs)) -# TODO: phrasinator - -# TODO: Various training binaries -env.Program(target='training/model1', source=comb('training/model1.cc', srcs)) -env.Program(target='training/augment_grammar', source=comb('training/augment_grammar.cc', srcs)) -env.Program(target='training/grammar_convert', source=comb('training/grammar_convert.cc', srcs)) -#env.Program(target='training/optimize_test', source=comb('training/optimize_test.cc', srcs)) -env.Program(target='training/collapse_weights', source=comb('training/collapse_weights.cc', srcs)) -#env.Program(target='training/lbfgs_test', source=comb('training/lbfgs_test.cc', srcs)) -#env.Program(target='training/mr_optimize_reduce', source=comb('training/mr_optimize_reduce.cc', srcs)) -env.Program(target='training/mr_em_map_adapter', source=comb('training/mr_em_map_adapter.cc', srcs)) -env.Program(target='training/mr_reduce_to_weights', source=comb('training/mr_reduce_to_weights.cc', srcs)) -env.Program(target='training/mr_em_adapted_reduce', source=comb('training/mr_em_adapted_reduce.cc', srcs)) - -env.Program(target='vest/sentserver', source=['vest/sentserver.c'], LINKFLAGS='-all-static') -env.Program(target='vest/sentclient', source=['vest/sentclient.c'], LINKFLAGS='-all-static') -env.Program(target='vest/mr_vest_generate_mapper_input', source=comb('vest/mr_vest_generate_mapper_input.cc', srcs)) -env.Program(target='vest/mr_vest_map', source=comb('vest/mr_vest_map.cc', srcs)) -env.Program(target='vest/mr_vest_reduce', source=comb('vest/mr_vest_reduce.cc', srcs)) -#env.Program(target='vest/lo_test', source=comb('vest/lo_test.cc', srcs)) -# TODO: util tests - -if mpi: - env.Program(target='training/mpi_online_optimize', source=comb('training/mpi_online_optimize.cc', srcs)) - env.Program(target='training/mpi_batch_optimize', source=comb('training/mpi_batch_optimize.cc', srcs)) - env.Program(target='training/compute_cllh', source=comb('training/compute_cllh.cc', srcs)) - env.Program(target='training/cllh_filter_grammar', source=comb('training/cllh_filter_grammar.cc', srcs)) - diff --git a/configure.ac b/configure.ac index 6d2a8c60..0635e8dc 100644 --- a/configure.ac +++ b/configure.ac @@ -132,4 +132,4 @@ fi CPPFLAGS="$CPPFLAGS -DHAVE_CONFIG_H" -AC_OUTPUT(Makefile rst_parser/Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile training/liblbfgs/Makefile creg/Makefile dpmert/Makefile pro-train/Makefile rampion/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile dtrain/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) +AC_OUTPUT(Makefile rst_parser/Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile training/liblbfgs/Makefile dpmert/Makefile pro-train/Makefile rampion/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile dtrain/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) diff --git a/creg/Jamfile b/creg/Jamfile deleted file mode 100644 index cfed2388..00000000 --- a/creg/Jamfile +++ /dev/null @@ -1,6 +0,0 @@ -import lex ; - -exe creg : creg.cc json_feature_map_lexer.ll ..//utils ../training//liblbfgs ..//boost_program_options : <include>../training <include>. : <library>..//z ; - -alias programs : creg ; - diff --git a/creg/Makefile.am b/creg/Makefile.am deleted file mode 100644 index 9e25b838..00000000 --- a/creg/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -bin_PROGRAMS = \ - creg - -creg_SOURCES = creg.cc json_feature_map_lexer.cc -creg_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz - -json_feature_map_lexer.cc: json_feature_map_lexer.ll - $(LEX) -s -8 -CF -o$@ $< - -AM_CPPFLAGS = -W -Wall -DNDEBUG -I$(top_srcdir)/utils -I$(top_srcdir)/training - diff --git a/creg/README b/creg/README deleted file mode 100644 index 28a71d11..00000000 --- a/creg/README +++ /dev/null @@ -1,17 +0,0 @@ -creg is a fast tool for training linear and logistic regression models with -l_1 and l_2 regularization. Its data (feature and response) format is -compatible with ARKRegression. - -EXAMPLES - -Logistic regression example (training only): - $ ./creg -x test_data/iris.trainfeat -y test_data/iris.trainresp --l1 1.0 > weights.txt - -Logistic regression example (training and testing): - $ ./creg -x test_data/iris.trainfeat -y test_data/iris.trainresp --l1 1.0 \ - -t test_data/iris.testfeat -s test_data/iris.testresp > weights.txt - -Linear regression example (training and testing): - $ ./creg -n -x test_data/auto-mpg.trainfeat -y test_data/auto-mpg.trainresp --l2 1000 \ - -t test_data/auto-mpg.testfeat -s test_data/auto-mpg.testresp > weights.txt - diff --git a/creg/creg.cc b/creg/creg.cc deleted file mode 100644 index b145ac49..00000000 --- a/creg/creg.cc +++ /dev/null @@ -1,384 +0,0 @@ -#include <cstdlib> -#include <iostream> -#include <vector> -#include <tr1/unordered_map> -#include <limits> -#include <cmath> - -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "json_feature_map_lexer.h" -#include "prob.h" -#include "filelib.h" -#include "weights.h" -#include "sparse_vector.h" -#include "liblbfgs/lbfgs++.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("training_features,x", po::value<string>(), "File containing training instance features (ARKRegression format)") - ("training_responses,y", po::value<string>(), "File containing training response features (ARKRegression format)") - ("linear,n", "Linear (rather than logistic) regression") - ("l1",po::value<double>()->default_value(0.0), "l_1 regularization strength") - ("l2",po::value<double>()->default_value(0.0), "l_2 regularization strength") - ("test_features,t", po::value<string>(), "File containing training instance features (ARKRegression format)") - ("test_responses,s", po::value<string>(), "File containing training response features (ARKRegression format)") - ("weights,w", po::value<string>(), "Initial weights") - ("epsilon,e", po::value<double>()->default_value(1e-4), "Epsilon for convergence test. Terminates when ||g|| < epsilon * max(1, ||w||)") - ("memory_buffers,m",po::value<unsigned>()->default_value(40), "Number of memory buffers for LBFGS") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("help") || !conf->count("training_features") || !conf->count("training_responses")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct TrainingInstance { - SparseVector<float> x; - union { - unsigned label; // for categorical predictions - float value; // for continuous predictions - } y; -}; - -struct ReaderHelper { - explicit ReaderHelper(vector<TrainingInstance>* xyp) : xy_pairs(xyp), lc(), flag() {} - unordered_map<string, unsigned> id2ind; - vector<TrainingInstance>* xy_pairs; - int lc; - bool flag; -}; - -void ReaderCB(const string& id, const SparseVector<float>& fmap, void* extra) { - ReaderHelper& rh = *reinterpret_cast<ReaderHelper*>(extra); - ++rh.lc; - if (rh.lc % 1000 == 0) { cerr << '.'; rh.flag = true; } - if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; } - const unordered_map<string, unsigned>::iterator it = rh.id2ind.find(id); - if (it == rh.id2ind.end()) { - cerr << "Unlabeled example in line " << rh.lc << " (key=" << id << ')' << endl; - abort(); - } - (*rh.xy_pairs)[it->second - 1].x = fmap; -} - -void ReadLabeledInstances(const string& ffeats, - const string& fresp, - const bool is_continuous, - vector<TrainingInstance>* xy_pairs, - vector<string>* labels) { - bool flag = false; - xy_pairs->clear(); - int lc = 0; - ReaderHelper rh(xy_pairs); - unordered_map<string, unsigned> label2id; - cerr << "Reading responses from " << fresp << " ..." << endl; - ReadFile fr(fresp); - for (unsigned i = 0; i < labels->size(); ++i) - label2id[(*labels)[i]] = i; - istream& in = *fr.stream(); - string line; - while(getline(in, line)) { - ++lc; - if (lc % 1000 == 0) { cerr << '.'; flag = true; } - if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; } - if (line.size() == 0) continue; - if (line[0] == '#') continue; - unsigned p = 0; - while (p < line.size() && line[p] != ' ' && line[p] != '\t') { ++p; } - unsigned& ind = rh.id2ind[line.substr(0, p)]; - if (ind != 0) { cerr << "ID " << line.substr(0, p) << " duplicated in line " << lc << endl; abort(); } - while (p < line.size() && (line[p] == ' ' || line[p] == '\t')) { ++p; } - assert(p < line.size()); - xy_pairs->push_back(TrainingInstance()); - ind = xy_pairs->size(); - if (is_continuous) { - xy_pairs->back().y.value = strtof(&line[p], 0); - } else { // categorical predictions - unordered_map<string, unsigned>::iterator it = label2id.find(line.substr(p)); - if (it == label2id.end()) { - const string label = line.substr(p); - it = label2id.insert(make_pair(label, labels->size())).first; - labels->push_back(label); - } - xy_pairs->back().y.label = it->second; // label id - } - } - if (flag) cerr << endl; - if (!is_continuous) { - cerr << "LABELS:"; - for (unsigned j = 0; j < labels->size(); ++j) - cerr << " " << (*labels)[j]; - cerr << endl; - } - cerr << "Reading features from " << ffeats << " ..." << endl; - ReadFile ff(ffeats); - JSONFeatureMapLexer::ReadRules(ff.stream(), ReaderCB, &rh); - if (rh.flag) cerr << endl; -} - -// helper base class (not polymorphic- just a container and some helper functions) for loss functions -// real loss functions should implement double operator()(const vector<double>& x, double* g), -// which should evaluate f(x) and g = f'(x) -struct BaseLoss { - // dimp1 = number of categorial outputs possible for logistic regression - // for linear regression, it should be 1 more than the dimension of the response variable - BaseLoss( - const vector<TrainingInstance>& tr, - unsigned dimp1, - unsigned numfeats, - unsigned ll2) : training(tr), K(dimp1), p(numfeats), l2(ll2) {} - - // weight vector layout for K classes, with p features - // w[0 : K-1] = bias weights - // w[y*p + K : y*p + K + p - 1] = feature weights for y^th class - // this representation is used in ComputeDotProducts and GradAdd - void ComputeDotProducts(const SparseVector<float>& fx, // feature vector of x - const vector<double>& w, // full weight vector - vector<double>* pdotprods) const { - vector<double>& dotprods = *pdotprods; - const unsigned km1 = K - 1; - dotprods.resize(km1); - for (unsigned y = 0; y < km1; ++y) - dotprods[y] = w[y]; // bias terms - for (SparseVector<float>::const_iterator it = fx.begin(); it != fx.end(); ++it) { - const float fval = it->second; - const unsigned fid = it->first; - for (unsigned y = 0; y < km1; ++y) - dotprods[y] += w[fid + y * p + km1] * fval; - } - } - - double ApplyRegularizationTerms(const vector<double>& weights, - double* g) const { - double reg = 0; - for (size_t i = K - 1; i < weights.size(); ++i) { - const double& w_i = weights[i]; - reg += l2 * w_i * w_i; - g[i] += 2 * l2 * w_i; - } - return reg; - } - - void GradAdd(const SparseVector<float>& fx, - const unsigned y, - const double scale, - double* acc) const { - acc[y] += scale; // class bias - for (SparseVector<float>::const_iterator it = fx.begin(); - it != fx.end(); ++it) - acc[it->first + y * p + K - 1] += it->second * scale; - } - - const vector<TrainingInstance>& training; - const unsigned K, p; - const double l2; -}; - -struct UnivariateSquaredLoss : public BaseLoss { - UnivariateSquaredLoss( - const vector<TrainingInstance>& tr, - unsigned numfeats, - const double l2) : BaseLoss(tr, 2, numfeats, l2) {} - - // evaluate squared loss and gradient - double operator()(const vector<double>& x, double* g) const { - fill(g, g + x.size(), 0.0); - double cll = 0; - vector<double> dotprods(1); // univariate prediction - for (unsigned i = 0; i < training.size(); ++i) { - const SparseVector<float>& fmapx = training[i].x; - const double refy = training[i].y.value; - ComputeDotProducts(fmapx, x, &dotprods); - double diff = dotprods[0] - refy; - cll += diff * diff; - - double scale = 2 * diff; - GradAdd(fmapx, 0, scale, g); - } - double reg = ApplyRegularizationTerms(x, g); - return cll + reg; - } - - // return root mse - double Evaluate(const vector<TrainingInstance>& test, - const vector<double>& w) const { - vector<double> dotprods(1); // K-1 degrees of freedom - double mse = 0; - for (unsigned i = 0; i < test.size(); ++i) { - const SparseVector<float>& fmapx = test[i].x; - const float refy = test[i].y.value; - ComputeDotProducts(fmapx, w, &dotprods); - double diff = dotprods[0] - refy; - cerr << "line=" << (i+1) << " true=" << refy << " pred=" << dotprods[0] << endl; - mse += diff * diff; - } - mse /= test.size(); - return sqrt(mse); - } -}; - -struct MulticlassLogLoss : public BaseLoss { - MulticlassLogLoss( - const vector<TrainingInstance>& tr, - unsigned k, - unsigned numfeats, - const double l2) : BaseLoss(tr, k, numfeats, l2) {} - - // evaluate log loss and gradient - double operator()(const vector<double>& x, double* g) const { - fill(g, g + x.size(), 0.0); - vector<double> dotprods(K - 1); // K-1 degrees of freedom - vector<prob_t> probs(K); - double cll = 0; - for (unsigned i = 0; i < training.size(); ++i) { - const SparseVector<float>& fmapx = training[i].x; - const unsigned refy = training[i].y.label; - //cerr << "FMAP: " << fmapx << endl; - ComputeDotProducts(fmapx, x, &dotprods); - prob_t z; - for (unsigned j = 0; j < dotprods.size(); ++j) - z += (probs[j] = prob_t(dotprods[j], init_lnx())); - z += (probs.back() = prob_t::One()); - for (unsigned y = 0; y < probs.size(); ++y) { - probs[y] /= z; - //cerr << " p(y=" << y << ")=" << probs[y].as_float() << "\tz=" << z << endl; - } - cll -= log(probs[refy]); // log p(y | x) - - for (unsigned y = 0; y < dotprods.size(); ++y) { - double scale = probs[y].as_float(); - if (y == refy) { scale -= 1.0; } - GradAdd(fmapx, y, scale, g); - } - } - double reg = ApplyRegularizationTerms(x, g); - return cll + reg; - } - - double Evaluate(const vector<TrainingInstance>& test, - const vector<double>& w) const { - vector<double> dotprods(K - 1); // K-1 degrees of freedom - double correct = 0; - for (unsigned i = 0; i < test.size(); ++i) { - const SparseVector<float>& fmapx = test[i].x; - const unsigned refy = test[i].y.label; - ComputeDotProducts(fmapx, w, &dotprods); - double best = 0; - unsigned besty = dotprods.size(); - for (unsigned y = 0; y < dotprods.size(); ++y) - if (dotprods[y] > best) { best = dotprods[y]; besty = y; } - if (refy == besty) { ++correct; } - } - return correct / test.size(); - } -}; - -template <class LossFunction> -double LearnParameters(LossFunction& loss, - const double l1, - const unsigned l1_start, - const unsigned memory_buffers, - const double eps, - vector<double>* px) { - LBFGS<LossFunction> lbfgs(px, loss, memory_buffers, l1, l1_start, eps); - lbfgs.MinimizeFunction(); - return 0; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - string line; - double l1 = conf["l1"].as<double>(); - double l2 = conf["l2"].as<double>(); - const unsigned memory_buffers = conf["memory_buffers"].as<unsigned>(); - const double epsilon = conf["epsilon"].as<double>(); - if (l1 < 0.0) { - cerr << "L1 strength must be >= 0\n"; - return 1; - } - if (l2 < 0.0) { - cerr << "L2 strength must be >= 0\n"; - return 2; - } - - const bool is_continuous = conf.count("linear"); - const string xfile = conf["training_features"].as<string>(); - const string yfile = conf["training_responses"].as<string>(); - vector<string> labels; // only populated for non-continuous models - vector<TrainingInstance> training, test; - ReadLabeledInstances(xfile, yfile, is_continuous, &training, &labels); - if (conf.count("test_features")) { - const string txfile = conf["test_features"].as<string>(); - const string tyfile = conf["test_responses"].as<string>(); - ReadLabeledInstances(txfile, tyfile, is_continuous, &test, &labels); - } - - if (conf.count("weights")) { - cerr << "Initial weights are not implemented, please implement." << endl; - // TODO read weights for categorical and continuous predictions - // can't use normal cdec weight framework - abort(); - } - - cerr << " Number of features: " << FD::NumFeats() << endl; - cerr << "Number of training examples: " << training.size() << endl; - const unsigned p = FD::NumFeats(); - cout.precision(15); - - if (conf.count("linear")) { // linear regression - vector<double> weights(1 + FD::NumFeats(), 0.0); - cerr << " Number of parameters: " << weights.size() << endl; - UnivariateSquaredLoss loss(training, p, l2); - LearnParameters(loss, l1, 1, memory_buffers, epsilon, &weights); - - if (test.size()) - cerr << "Held-out root MSE: " << loss.Evaluate(test, weights) << endl; - - cout << p << "\t***CONTINUOUS***" << endl; - cout << "***BIAS***\t" << weights[0] << endl; - for (unsigned f = 0; f < p; ++f) { - const double w = weights[1 + f]; - if (w) - cout << FD::Convert(f) << "\t" << w << endl; - } - } else { // logistic regression - vector<double> weights((1 + FD::NumFeats()) * (labels.size() - 1), 0.0); - cerr << " Number of parameters: " << weights.size() << endl; - cerr << " Number of labels: " << labels.size() << endl; - const unsigned K = labels.size(); - const unsigned km1 = K - 1; - MulticlassLogLoss loss(training, K, p, l2); - LearnParameters(loss, l1, km1, memory_buffers, epsilon, &weights); - - if (test.size()) - cerr << "Held-out accuracy: " << loss.Evaluate(test, weights) << endl; - - cout << p << "\t***CATEGORICAL***"; - for (unsigned y = 0; y < K; ++y) - cout << '\t' << labels[y]; - cout << endl; - for (unsigned y = 0; y < km1; ++y) - cout << labels[y] << "\t***BIAS***\t" << weights[y] << endl; - for (unsigned y = 0; y < km1; ++y) { - for (unsigned f = 0; f < p; ++f) { - const double w = weights[km1 + y * p + f]; - if (w) - cout << labels[y] << "\t" << FD::Convert(f) << "\t" << w << endl; - } - } - } - - return 0; -} - diff --git a/creg/json_feature_map_lexer.h b/creg/json_feature_map_lexer.h deleted file mode 100644 index 3324aa29..00000000 --- a/creg/json_feature_map_lexer.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _RULE_LEXER_H_ -#define _RULE_LEXER_H_ - -#include <iostream> -#include <string> - -#include "sparse_vector.h" - -struct JSONFeatureMapLexer { - typedef void (*FeatureMapCallback)(const std::string& id, const SparseVector<float>& fmap, void* extra); - static void ReadRules(std::istream* in, FeatureMapCallback func, void* extra); -}; - -#endif - diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll deleted file mode 100644 index f9ce7977..00000000 --- a/creg/json_feature_map_lexer.ll +++ /dev/null @@ -1,137 +0,0 @@ -%option nounput -%{ - -#include "json_feature_map_lexer.h" -#include "fdict.h" -#include "fast_sparse_vector.h" - -#define YY_DECL int json_fmap_yylex (void) -#undef YY_INPUT -#define YY_INPUT(buf, result, max_size) (result = jfmap_stream->read(buf, max_size).gcount()) -#define YY_SKIP_YYWRAP 1 -int yywrap() { return 1; } - -JSONFeatureMapLexer::FeatureMapCallback json_fmap_callback = NULL; -void* json_fmap_callback_extra = NULL; -std::istream* jfmap_stream = NULL; -bool fl = true; -unsigned spos = 0; -char featname[16000]; -#define MAX_FEATS 20000 -std::pair<int, float> featmap[MAX_FEATS]; -unsigned curfeat = 0; -std::string instid; - -inline unsigned unicode_escape_to_utf8(uint16_t w1, uint16_t w2, char* putf8) { - uint32_t cp; - if((w1 & 0xfc00) == 0xd800) { - if((w2 & 0xfc00) == 0xdc00) { - cp = 0x10000 + (((static_cast<uint32_t>(w1) & 0x3ff) << 10) | (w2 & 0x3ff)); - } else { - abort(); - } - } else { - cp = w1; - } - - - if(cp < 0x80) { - putf8[0] = static_cast<char>(cp); - return 1; - } else if(cp < 0x0800) { - putf8[0] = 0xc0 | ((cp >> 6) & 0x1f); - putf8[1] = 0x80 | (cp & 0x3f); - return 2; - } else if(cp < 0x10000) { - putf8[0] = 0xe0 | ((cp >> 6) & 0x0f); - putf8[1] = 0x80 | ((cp >> 6) & 0x3f); - putf8[2] = 0x80 | (cp & 0x3f); - return 3; - } else if(cp < 0x1fffff) { - putf8[0] = 0xf0 | ((cp >> 18) & 0x07); - putf8[1] = 0x80 | ((cp >> 12) & 0x3f); - putf8[2] = 0x80 | ((cp >> 6) & 0x3f); - putf8[3] = 0x80 | (cp & 0x3f); - return 4; - } else { - abort(); - } - return 0; -} - -%} - -ID [^ \t\n\r]+ -HEX_D [a-fA-F0-9] -INT [-]?[0-9]+ -DOUBLE {INT}((\.[0-9]+)?([eE][-+]?[0-9]+)?) -WS [ \t\r\n] -LCB [{] -RCB [}] -UNESCAPED_CH [^\"\\\b\n\r\f\t] - -%x JSON PREVAL STRING JSONVAL POSTVAL DOUBLE -%% - -<INITIAL>{ID} { instid = yytext; BEGIN(JSON); } - -<JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); } - -<JSON>{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector<float> x; - json_fmap_callback(instid, x, json_fmap_callback_extra); - curfeat = 0; - BEGIN(INITIAL);} - -<PREVAL>\" { BEGIN(STRING); spos=0; } - -<STRING>\" { featname[spos] = 0; - featmap[curfeat].first = FD::Convert(featname); - BEGIN(JSONVAL); - } -<STRING>{UNESCAPED_CH} { featname[spos++] = yytext[0]; } -<STRING>\\\" { featname[spos++] = '"'; } -<STRING>\\\\ { featname[spos++] = '\\'; } -<STRING>\\\/ { featname[spos++] = '/'; } -<STRING>\\b { } -<STRING>\\f { } -<STRING>\\n { } -<STRING>\\r { } -<STRING>\\t { } -<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16); - spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1; - } - -<JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); } -<DOUBLE>{DOUBLE} { featmap[curfeat++].second = strtod(yytext, 0); - BEGIN(POSTVAL); } - -<POSTVAL>{WS}*,{WS}* { BEGIN(PREVAL); } -<POSTVAL>{WS}*{RCB}\n* { - const SparseVector<float> x(&featmap[0], &featmap[curfeat]); - json_fmap_callback(instid, x, json_fmap_callback_extra); - curfeat = 0; - BEGIN(INITIAL); - } - -<PREVAL,POSTVAL,DOUBLE,JSONVAL,INITIAL>. { std::cerr << "bad input: " << yytext << std::endl; abort(); } - -%% - -void JSONFeatureMapLexer::ReadRules(std::istream* in, FeatureMapCallback func, void* extra) { - json_fmap_callback = func; - json_fmap_callback_extra = extra; - jfmap_stream = in; - json_fmap_yylex(); -} - -#if 0 -void cb(const std::string& id, const SparseVector<float>& fmap, void* extra) { - (void) extra; - static int cc = 0; - cc++; -} - -int main() { - JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); -} -#endif diff --git a/creg/test_data/auto-mpg.testfeat b/creg/test_data/auto-mpg.testfeat deleted file mode 100644 index 91c21e99..00000000 --- a/creg/test_data/auto-mpg.testfeat +++ /dev/null @@ -1,93 +0,0 @@ -test1_fiat_124b {"horsepower": 76.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 88.0, "weight": 2065.0} -test2_dodge_aspen {"horsepower": 110.0, "acceleration": 18.7, "cylinders": 6.0, "displacement": 225.0, "weight": 3620.0} -test3_toyota_celica_gt_liftback {"horsepower": 95.0, "acceleration": 14.8, "cylinders": 4.0, "displacement": 134.0, "weight": 2515.0} -test4_amc_matador {"horsepower": 110.0, "acceleration": 18.0, "cylinders": 6.0, "displacement": 258.0, "weight": 3632.0} -test5_audi_5000s_(diesel) {"horsepower": 67.0, "acceleration": 19.9, "cylinders": 5.0, "displacement": 121.0, "weight": 2950.0} -test6_datsun_310 {"horsepower": 65.0, "acceleration": 16.4, "cylinders": 4.0, "displacement": 86.0, "weight": 2019.0} -test7_chevrolet_woody {"horsepower": 60.0, "acceleration": 22.1, "cylinders": 4.0, "displacement": 98.0, "weight": 2164.0} -test8_volkswagen_1131_deluxe_sedan {"horsepower": 46.0, "acceleration": 20.5, "cylinders": 4.0, "displacement": 97.0, "weight": 1835.0} -test9_plymouth_satellite_sebring {"horsepower": 105.0, "acceleration": 16.5, "cylinders": 6.0, "displacement": 225.0, "weight": 3613.0} -test10_amc_matador {"horsepower": 120.0, "acceleration": 13.9, "cylinders": 8.0, "displacement": 304.0, "weight": 3962.0} -test11_chevrolet_chevette {"horsepower": 52.0, "acceleration": 22.2, "cylinders": 4.0, "displacement": 85.0, "weight": 2035.0} -test12_buick_century_special {"horsepower": 105.0, "acceleration": 15.8, "cylinders": 6.0, "displacement": 231.0, "weight": 3380.0} -test13_ford_escort_2h {"horsepower": 65.0, "acceleration": 20.7, "cylinders": 4.0, "displacement": 98.0, "weight": 2380.0} -test14_ford_torino_500 {"horsepower": 88.0, "acceleration": 15.5, "cylinders": 6.0, "displacement": 250.0, "weight": 3302.0} -test15_dodge_aries_wagon_(sw) {"horsepower": 92.0, "acceleration": 14.4, "cylinders": 4.0, "displacement": 156.0, "weight": 2620.0} -test16_plymouth_valiant {"horsepower": 105.0, "acceleration": 16.5, "cylinders": 6.0, "displacement": 225.0, "weight": 3121.0} -test17_amc_pacer {"horsepower": 90.0, "acceleration": 17.0, "cylinders": 6.0, "displacement": 232.0, "weight": 3211.0} -test18_honda_civic {"horsepower": 97.0, "acceleration": 15.0, "cylinders": 4.0, "displacement": 120.0, "weight": 2489.0} -test19_ford_ltd {"horsepower": 148.0, "acceleration": 13.5, "cylinders": 8.0, "displacement": 351.0, "weight": 4657.0} -test20_volkswagen_rabbit {"horsepower": 70.0, "acceleration": 14.0, "cylinders": 4.0, "displacement": 90.0, "weight": 1937.0} -test21_honda_civic_1500_gl {"horsepower": 67.0, "acceleration": 13.8, "cylinders": 4.0, "displacement": 91.0, "weight": 1850.0} -test22_chevrolet_impala {"horsepower": 165.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 350.0, "weight": 4209.0} -test23_chevrolet_chevette {"horsepower": 63.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 98.0, "weight": 2051.0} -test24_amc_concord {"horsepower": 90.0, "acceleration": 17.2, "cylinders": 6.0, "displacement": 232.0, "weight": 3210.0} -test25_ford_pinto {"horsepower": 83.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 140.0, "weight": 2639.0} -test26_ford_country_squire_(sw) {"horsepower": 142.0, "acceleration": 14.3, "cylinders": 8.0, "displacement": 351.0, "weight": 4054.0} -test27_plymouth_valiant_custom {"horsepower": 95.0, "acceleration": 16.0, "cylinders": 6.0, "displacement": 225.0, "weight": 3264.0} -test28_chevy_s-10 {"horsepower": 82.0, "acceleration": 19.4, "cylinders": 4.0, "displacement": 119.0, "weight": 2720.0} -test29_maxda_rx3 {"horsepower": 90.0, "acceleration": 13.5, "cylinders": 3.0, "displacement": 70.0, "weight": 2124.0} -test30_plymouth_sapporo {"horsepower": 105.0, "acceleration": 16.7, "cylinders": 4.0, "displacement": 156.0, "weight": 2745.0} -test31_amc_hornet {"horsepower": 90.0, "acceleration": 17.6, "cylinders": 6.0, "displacement": 232.0, "weight": 3085.0} -test32_chevrolet_caprice_classic {"horsepower": 145.0, "acceleration": 12.5, "cylinders": 8.0, "displacement": 305.0, "weight": 3880.0} -test33_pontiac_sunbird_coupe {"horsepower": 88.0, "acceleration": 16.0, "cylinders": 4.0, "displacement": 151.0, "weight": 2740.0} -test34_mercury_marquis_brougham {"horsepower": 198.0, "acceleration": 11.5, "cylinders": 8.0, "displacement": 429.0, "weight": 4952.0} -test35_mercury_monarch_ghia {"horsepower": 139.0, "acceleration": 12.8, "cylinders": 8.0, "displacement": 302.0, "weight": 3570.0} -test36_ford_country_squire_(sw) {"horsepower": 170.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 400.0, "weight": 4746.0} -test37_audi_fox {"horsepower": 83.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 98.0, "weight": 2219.0} -test38_triumph_tr7_coupe {"horsepower": 88.0, "acceleration": 15.1, "cylinders": 4.0, "displacement": 122.0, "weight": 2500.0} -test39_dodge_d100 {"horsepower": 150.0, "acceleration": 14.0, "cylinders": 8.0, "displacement": 318.0, "weight": 3755.0} -test40_mercury_cougar_brougham {"horsepower": 130.0, "acceleration": 14.9, "cylinders": 8.0, "displacement": 302.0, "weight": 4295.0} -test41_pontiac_phoenix_lj {"horsepower": 105.0, "acceleration": 19.2, "cylinders": 6.0, "displacement": 231.0, "weight": 3535.0} -test42_vw_rabbit_custom {"horsepower": 71.0, "acceleration": 14.0, "cylinders": 4.0, "displacement": 89.0, "weight": 1925.0} -test43_peugeot_504 {"horsepower": 87.0, "acceleration": 17.5, "cylinders": 4.0, "displacement": 110.0, "weight": 2672.0} -test44_datsun_310_gx {"horsepower": 67.0, "acceleration": 16.2, "cylinders": 4.0, "displacement": 91.0, "weight": 1995.0} -test45_dodge_monaco_(sw) {"horsepower": 180.0, "acceleration": 11.5, "cylinders": 8.0, "displacement": 383.0, "weight": 4955.0} -test46_chevrolet_monza_2+2 {"horsepower": 110.0, "acceleration": 13.5, "cylinders": 8.0, "displacement": 262.0, "weight": 3221.0} -test47_capri_ii {"horsepower": 92.0, "acceleration": 14.9, "cylinders": 4.0, "displacement": 140.0, "weight": 2572.0} -test48_mazda_rx2_coupe {"horsepower": 97.0, "acceleration": 13.5, "cylinders": 3.0, "displacement": 70.0, "weight": 2330.0} -test49_plymouth_fury {"horsepower": 95.0, "acceleration": 19.0, "cylinders": 6.0, "displacement": 225.0, "weight": 3785.0} -test50_plymouth_fury_iii {"horsepower": 215.0, "acceleration": 8.5, "cylinders": 8.0, "displacement": 440.0, "weight": 4312.0} -test51_dodge_st._regis {"horsepower": 135.0, "acceleration": 15.2, "cylinders": 8.0, "displacement": 318.0, "weight": 3830.0} -test52_amc_spirit_dl {"horsepower": 80.0, "acceleration": 15.0, "cylinders": 4.0, "displacement": 121.0, "weight": 2670.0} -test53_peugeot_504_(sw) {"horsepower": 87.0, "acceleration": 19.5, "cylinders": 4.0, "displacement": 120.0, "weight": 2979.0} -test54_chevroelt_chevelle_malibu {"horsepower": 105.0, "acceleration": 18.5, "cylinders": 6.0, "displacement": 250.0, "weight": 3897.0} -test55_opel_1900 {"horsepower": 90.0, "acceleration": 14.0, "cylinders": 4.0, "displacement": 116.0, "weight": 2123.0} -test56_dodge_challenger_se {"horsepower": 170.0, "acceleration": 10.0, "cylinders": 8.0, "displacement": 383.0, "weight": 3563.0} -test57_vw_pickup {"horsepower": 52.0, "acceleration": 24.6, "cylinders": 4.0, "displacement": 97.0, "weight": 2130.0} -test58_peugeot_604sl {"horsepower": 133.0, "acceleration": 15.8, "cylinders": 6.0, "displacement": 163.0, "weight": 3410.0} -test59_mazda_glc_4 {"horsepower": 68.0, "acceleration": 16.0, "cylinders": 4.0, "displacement": 91.0, "weight": 1985.0} -test60_audi_4000 {"horsepower": 78.0, "acceleration": 15.8, "cylinders": 4.0, "displacement": 97.0, "weight": 2188.0} -test61_plymouth_duster {"horsepower": 95.0, "acceleration": 16.5, "cylinders": 6.0, "displacement": 198.0, "weight": 3102.0} -test62_dodge_magnum_xe {"horsepower": 140.0, "acceleration": 13.7, "cylinders": 8.0, "displacement": 318.0, "weight": 4080.0} -test63_buick_century_limited {"horsepower": 110.0, "acceleration": 16.4, "cylinders": 6.0, "displacement": 181.0, "weight": 2945.0} -test64_chevrolet_monte_carlo_s {"horsepower": 145.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 350.0, "weight": 4082.0} -test65_amc_matador {"horsepower": 150.0, "acceleration": 11.5, "cylinders": 8.0, "displacement": 304.0, "weight": 3672.0} -test66_honda_accord {"horsepower": 75.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 107.0, "weight": 2205.0} -test67_plymouth_volare {"horsepower": 100.0, "acceleration": 17.2, "cylinders": 6.0, "displacement": 225.0, "weight": 3430.0} -test68_ford_galaxie_500 {"horsepower": 198.0, "acceleration": 10.0, "cylinders": 8.0, "displacement": 429.0, "weight": 4341.0} -test69_chevrolet_citation {"horsepower": 110.0, "acceleration": 12.6, "cylinders": 6.0, "displacement": 173.0, "weight": 2725.0} -test70_amc_ambassador_sst {"horsepower": 150.0, "acceleration": 11.5, "cylinders": 8.0, "displacement": 304.0, "weight": 3672.0} -test71_ford_thunderbird {"horsepower": 149.0, "acceleration": 14.5, "cylinders": 8.0, "displacement": 351.0, "weight": 4335.0} -test72_pontiac_phoenix {"horsepower": 90.0, "acceleration": 13.2, "cylinders": 4.0, "displacement": 151.0, "weight": 2556.0} -test73_vw_rabbit {"horsepower": 76.0, "acceleration": 14.7, "cylinders": 4.0, "displacement": 98.0, "weight": 2144.0} -test74_bmw_2002 {"horsepower": 113.0, "acceleration": 12.5, "cylinders": 4.0, "displacement": 121.0, "weight": 2234.0} -test75_dodge_colt {"horsepower": 105.0, "acceleration": 14.4, "cylinders": 4.0, "displacement": 156.0, "weight": 2800.0} -test76_mercury_capri_2000 {"horsepower": 86.0, "acceleration": 14.0, "cylinders": 4.0, "displacement": 122.0, "weight": 2220.0} -test77_ford_galaxie_500 {"horsepower": 153.0, "acceleration": 13.5, "cylinders": 8.0, "displacement": 351.0, "weight": 4154.0} -test78_volkswagen_rabbit_custom_diesel {"horsepower": 48.0, "acceleration": 21.5, "cylinders": 4.0, "displacement": 90.0, "weight": 1985.0} -test79_dodge_aspen_se {"horsepower": 100.0, "acceleration": 17.7, "cylinders": 6.0, "displacement": 225.0, "weight": 3651.0} -test80_mazda_glc_deluxe {"horsepower": 52.0, "acceleration": 19.4, "cylinders": 4.0, "displacement": 78.0, "weight": 1985.0} -test81_mazda_glc_custom {"horsepower": 68.0, "acceleration": 17.6, "cylinders": 4.0, "displacement": 91.0, "weight": 1970.0} -test82_audi_100ls {"horsepower": 91.0, "acceleration": 14.0, "cylinders": 4.0, "displacement": 114.0, "weight": 2582.0} -test83_oldsmobile_delta_88_royale {"horsepower": 160.0, "acceleration": 13.5, "cylinders": 8.0, "displacement": 350.0, "weight": 4456.0} -test84_ford_gran_torino {"horsepower": 137.0, "acceleration": 14.5, "cylinders": 8.0, "displacement": 302.0, "weight": 4042.0} -test85_plymouth_cricket {"horsepower": 70.0, "acceleration": 20.5, "cylinders": 4.0, "displacement": 91.0, "weight": 1955.0} -test86_plymouth_fury_iii {"horsepower": 150.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 318.0, "weight": 4096.0} -test87_plymouth_horizon {"horsepower": 70.0, "acceleration": 13.2, "cylinders": 4.0, "displacement": 105.0, "weight": 2200.0} -test88_peugeot_504 {"horsepower": 88.0, "acceleration": 21.9, "cylinders": 4.0, "displacement": 120.0, "weight": 3270.0} -test89_dodge_dart_custom {"horsepower": 150.0, "acceleration": 11.0, "cylinders": 8.0, "displacement": 318.0, "weight": 3399.0} -test90_pontiac_j2000_se_hatchback {"horsepower": 85.0, "acceleration": 16.2, "cylinders": 4.0, "displacement": 112.0, "weight": 2575.0} -test91_toyota_starlet {"horsepower": 58.0, "acceleration": 16.9, "cylinders": 4.0, "displacement": 79.0, "weight": 1755.0} -test92_mazda_glc_custom_l {"horsepower": 68.0, "acceleration": 18.2, "cylinders": 4.0, "displacement": 91.0, "weight": 2025.0} -test93_fiat_124_tc {"horsepower": 75.0, "acceleration": 14.0, "cylinders": 4.0, "displacement": 116.0, "weight": 2246.0} diff --git a/creg/test_data/auto-mpg.testresp b/creg/test_data/auto-mpg.testresp deleted file mode 100644 index 4f2cad37..00000000 --- a/creg/test_data/auto-mpg.testresp +++ /dev/null @@ -1,93 +0,0 @@ -test1_fiat_124b 30.0 -test2_dodge_aspen 18.6 -test3_toyota_celica_gt_liftback 21.1 -test4_amc_matador 16.0 -test5_audi_5000s_(diesel) 36.4 -test6_datsun_310 37.2 -test7_chevrolet_woody 24.5 -test8_volkswagen_1131_deluxe_sedan 26.0 -test9_plymouth_satellite_sebring 18.0 -test10_amc_matador 15.5 -test11_chevrolet_chevette 29.0 -test12_buick_century_special 20.6 -test13_ford_escort_2h 29.9 -test14_ford_torino_500 19.0 -test15_dodge_aries_wagon_(sw) 25.8 -test16_plymouth_valiant 18.0 -test17_amc_pacer 19.0 -test18_honda_civic 24.0 -test19_ford_ltd 14.0 -test20_volkswagen_rabbit 29.0 -test21_honda_civic_1500_gl 44.6 -test22_chevrolet_impala 14.0 -test23_chevrolet_chevette 30.5 -test24_amc_concord 19.4 -test25_ford_pinto 23.0 -test26_ford_country_squire_(sw) 15.5 -test27_plymouth_valiant_custom 19.0 -test28_chevy_s-10 31.0 -test29_maxda_rx3 18.0 -test30_plymouth_sapporo 23.2 -test31_amc_hornet 22.5 -test32_chevrolet_caprice_classic 17.5 -test33_pontiac_sunbird_coupe 24.5 -test34_mercury_marquis_brougham 12.0 -test35_mercury_monarch_ghia 20.2 -test36_ford_country_squire_(sw) 13.0 -test37_audi_fox 29.0 -test38_triumph_tr7_coupe 35.0 -test39_dodge_d100 13.0 -test40_mercury_cougar_brougham 15.0 -test41_pontiac_phoenix_lj 19.2 -test42_vw_rabbit_custom 31.9 -test43_peugeot_504 25.0 -test44_datsun_310_gx 38.0 -test45_dodge_monaco_(sw) 12.0 -test46_chevrolet_monza_2+2 20.0 -test47_capri_ii 25.0 -test48_mazda_rx2_coupe 19.0 -test49_plymouth_fury 18.0 -test50_plymouth_fury_iii 14.0 -test51_dodge_st._regis 18.2 -test52_amc_spirit_dl 27.4 -test53_peugeot_504_(sw) 21.0 -test54_chevroelt_chevelle_malibu 16.0 -test55_opel_1900 28.0 -test56_dodge_challenger_se 15.0 -test57_vw_pickup 44.0 -test58_peugeot_604sl 16.2 -test59_mazda_glc_4 34.1 -test60_audi_4000 34.3 -test61_plymouth_duster 20.0 -test62_dodge_magnum_xe 17.5 -test63_buick_century_limited 25.0 -test64_chevrolet_monte_carlo_s 15.0 -test65_amc_matador 14.0 -test66_honda_accord 36.0 -test67_plymouth_volare 20.5 -test68_ford_galaxie_500 15.0 -test69_chevrolet_citation 23.5 -test70_amc_ambassador_sst 17.0 -test71_ford_thunderbird 16.0 -test72_pontiac_phoenix 33.5 -test73_vw_rabbit 41.5 -test74_bmw_2002 26.0 -test75_dodge_colt 27.9 -test76_mercury_capri_2000 23.0 -test77_ford_galaxie_500 14.0 -test78_volkswagen_rabbit_custom_diesel 43.1 -test79_dodge_aspen_se 20.0 -test80_mazda_glc_deluxe 32.8 -test81_mazda_glc_custom 31.0 -test82_audi_100ls 20.0 -test83_oldsmobile_delta_88_royale 12.0 -test84_ford_gran_torino 14.0 -test85_plymouth_cricket 26.0 -test86_plymouth_fury_iii 14.0 -test87_plymouth_horizon 34.2 -test88_peugeot_504 19.0 -test89_dodge_dart_custom 15.0 -test90_pontiac_j2000_se_hatchback 31.0 -test91_toyota_starlet 39.1 -test92_mazda_glc_custom_l 37.0 -test93_fiat_124_tc 26.0 diff --git a/creg/test_data/auto-mpg.trainfeat b/creg/test_data/auto-mpg.trainfeat deleted file mode 100644 index 20b87d3f..00000000 --- a/creg/test_data/auto-mpg.trainfeat +++ /dev/null @@ -1,300 +0,0 @@ -train1_buick_century_350 {"horsepower": 175.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 350.0, "weight": 4100.0} -train2_audi_100_ls {"horsepower": 90.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 107.0, "weight": 2430.0} -train3_chrysler_newport_royal {"horsepower": 190.0, "acceleration": 12.5, "cylinders": 8.0, "displacement": 400.0, "weight": 4422.0} -train4_mercury_zephyr_6 {"horsepower": 85.0, "acceleration": 18.2, "cylinders": 6.0, "displacement": 200.0, "weight": 2990.0} -train5_volkswagen_model_111 {"horsepower": 60.0, "acceleration": 19.0, "cylinders": 4.0, "displacement": 97.0, "weight": 1834.0} -train6_dodge_monaco_brougham {"horsepower": 145.0, "acceleration": 13.7, "cylinders": 8.0, "displacement": 318.0, "weight": 4140.0} -train7_mercedes-benz_240d {"horsepower": 67.0, "acceleration": 21.8, "cylinders": 4.0, "displacement": 146.0, "weight": 3250.0} -train8_datsun_510_(sw) {"horsepower": 92.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 97.0, "weight": 2288.0} -train9_opel_manta {"horsepower": 75.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 116.0, "weight": 2158.0} -train10_renault_12_(sw) {"horsepower": 69.0, "acceleration": 18.0, "cylinders": 4.0, "displacement": 96.0, "weight": 2189.0} -train11_chevrolet_nova {"horsepower": 100.0, "acceleration": 17.0, "cylinders": 6.0, "displacement": 250.0, "weight": 3336.0} -train12_plymouth_arrow_gs {"horsepower": 96.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 122.0, "weight": 2300.0} -train13_toyota_mark_ii {"horsepower": 108.0, "acceleration": 15.5, "cylinders": 6.0, "displacement": 156.0, "weight": 2930.0} -train14_chevrolet_caprice_classic {"horsepower": 150.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 400.0, "weight": 4464.0} -train15_ford_maverick {"horsepower": 88.0, "acceleration": 16.5, "cylinders": 6.0, "displacement": 250.0, "weight": 3021.0} -train16_pontiac_lemans_v6 {"horsepower": 115.0, "acceleration": 15.4, "cylinders": 6.0, "displacement": 231.0, "weight": 3245.0} -train17_ford_granada_gl {"horsepower": 88.0, "acceleration": 17.1, "cylinders": 6.0, "displacement": 200.0, "weight": 3060.0} -train18_chevrolet_monte_carlo_landau {"horsepower": 170.0, "acceleration": 11.4, "cylinders": 8.0, "displacement": 350.0, "weight": 4165.0} -train19_subaru {"horsepower": 93.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 108.0, "weight": 2391.0} -train20_volkswagen_411_(sw) {"horsepower": 76.0, "acceleration": 18.0, "cylinders": 4.0, "displacement": 121.0, "weight": 2511.0} -train21_chrysler_lebaron_salon {"horsepower": 85.0, "acceleration": 16.6, "cylinders": 6.0, "displacement": 225.0, "weight": 3465.0} -train22_volkswagen_jetta {"horsepower": 74.0, "acceleration": 14.2, "cylinders": 4.0, "displacement": 105.0, "weight": 2190.0} -train23_dodge_coronet_brougham {"horsepower": 150.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 318.0, "weight": 4190.0} -train24_pontiac_safari_(sw) {"horsepower": 175.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 400.0, "weight": 5140.0} -train25_mazda_rx-7_gs {"horsepower": 100.0, "acceleration": 12.5, "cylinders": 3.0, "displacement": 70.0, "weight": 2420.0} -train26_datsun_210 {"horsepower": 65.0, "acceleration": 19.2, "cylinders": 4.0, "displacement": 85.0, "weight": 2110.0} -train27_vw_rabbit_c_(diesel) {"horsepower": 48.0, "acceleration": 21.7, "cylinders": 4.0, "displacement": 90.0, "weight": 2085.0} -train28_nissan_stanza_xe {"horsepower": 88.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 120.0, "weight": 2160.0} -train29_audi_5000 {"horsepower": 103.0, "acceleration": 15.9, "cylinders": 5.0, "displacement": 131.0, "weight": 2830.0} -train30_chevrolet_malibu {"horsepower": 145.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 350.0, "weight": 3988.0} -train31_mercury_capri_v6 {"horsepower": 107.0, "acceleration": 14.0, "cylinders": 6.0, "displacement": 155.0, "weight": 2472.0} -train32_datsun_b210_gx {"horsepower": 70.0, "acceleration": 18.6, "cylinders": 4.0, "displacement": 85.0, "weight": 2070.0} -train33_volkswagen_dasher {"horsepower": 67.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 79.0, "weight": 1963.0} -train34_volvo_145e_(sw) {"horsepower": 112.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 121.0, "weight": 2933.0} -train35_chrysler_lebaron_medallion {"horsepower": 92.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 156.0, "weight": 2585.0} -train36_pontiac_catalina {"horsepower": 170.0, "acceleration": 11.5, "cylinders": 8.0, "displacement": 400.0, "weight": 4668.0} -train37_toyota_tercel {"horsepower": 62.0, "acceleration": 17.3, "cylinders": 4.0, "displacement": 89.0, "weight": 2050.0} -train38_datsun_f-10_hatchback {"horsepower": 70.0, "acceleration": 16.8, "cylinders": 4.0, "displacement": 85.0, "weight": 1945.0} -train39_chrysler_lebaron_town_@_country_(sw) {"horsepower": 150.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 360.0, "weight": 3940.0} -train40_buick_century {"horsepower": 110.0, "acceleration": 15.8, "cylinders": 6.0, "displacement": 231.0, "weight": 3415.0} -train41_amc_matador_(sw) {"horsepower": 150.0, "acceleration": 12.5, "cylinders": 8.0, "displacement": 304.0, "weight": 3892.0} -train42_honda_civic {"horsepower": 67.0, "acceleration": 15.0, "cylinders": 4.0, "displacement": 91.0, "weight": 1965.0} -train43_dodge_coronet_custom_(sw) {"horsepower": 150.0, "acceleration": 13.5, "cylinders": 8.0, "displacement": 318.0, "weight": 4457.0} -train44_bmw_320i {"horsepower": 110.0, "acceleration": 12.8, "cylinders": 4.0, "displacement": 121.0, "weight": 2600.0} -train45_mercury_marquis {"horsepower": 208.0, "acceleration": 11.0, "cylinders": 8.0, "displacement": 429.0, "weight": 4633.0} -train46_amc_matador_(sw) {"horsepower": 150.0, "acceleration": 15.5, "cylinders": 8.0, "displacement": 304.0, "weight": 4257.0} -train47_ford_ltd {"horsepower": 158.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 351.0, "weight": 4363.0} -train48_toyota_corolla_1200 {"horsepower": 65.0, "acceleration": 19.0, "cylinders": 4.0, "displacement": 71.0, "weight": 1773.0} -train49_amc_ambassador_brougham {"horsepower": 175.0, "acceleration": 11.0, "cylinders": 8.0, "displacement": 360.0, "weight": 3821.0} -train50_ford_galaxie_500 {"horsepower": 153.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 351.0, "weight": 4129.0} -train51_amc_concord_d/l {"horsepower": 120.0, "acceleration": 15.1, "cylinders": 6.0, "displacement": 258.0, "weight": 3410.0} -train52_amc_matador {"horsepower": 100.0, "acceleration": 15.5, "cylinders": 6.0, "displacement": 232.0, "weight": 3288.0} -train53_toyota_corona {"horsepower": 96.0, "acceleration": 13.5, "cylinders": 4.0, "displacement": 134.0, "weight": 2702.0} -train54_plymouth_reliant {"horsepower": 84.0, "acceleration": 15.7, "cylinders": 4.0, "displacement": 135.0, "weight": 2490.0} -train55_ford_pinto {"horsepower": 97.0, "acceleration": 14.5, "cylinders": 6.0, "displacement": 171.0, "weight": 2984.0} -train56_datsun_810 {"horsepower": 97.0, "acceleration": 14.5, "cylinders": 6.0, "displacement": 146.0, "weight": 2815.0} -train57_fiat_x1.9 {"horsepower": 67.0, "acceleration": 16.0, "cylinders": 4.0, "displacement": 79.0, "weight": 2000.0} -train58_mercury_zephyr {"horsepower": 85.0, "acceleration": 16.7, "cylinders": 6.0, "displacement": 200.0, "weight": 3070.0} -train59_toyota_corolla_1600_(sw) {"horsepower": 88.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 97.0, "weight": 2100.0} -train60_chevrolet_chevette {"horsepower": 68.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 98.0, "weight": 2155.0} -train61_toyota_cressida {"horsepower": 116.0, "acceleration": 12.6, "cylinders": 6.0, "displacement": 168.0, "weight": 2900.0} -train62_toyota_corolla {"horsepower": 70.0, "acceleration": 16.9, "cylinders": 4.0, "displacement": 108.0, "weight": 2245.0} -train63_datsun_710 {"horsepower": 97.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 119.0, "weight": 2545.0} -train64_mercury_grand_marquis {"horsepower": 138.0, "acceleration": 13.2, "cylinders": 8.0, "displacement": 351.0, "weight": 3955.0} -train65_plymouth_champ {"horsepower": 64.0, "acceleration": 16.4, "cylinders": 4.0, "displacement": 86.0, "weight": 1875.0} -train66_datsun_200-sx {"horsepower": 97.0, "acceleration": 14.9, "cylinders": 4.0, "displacement": 119.0, "weight": 2405.0} -train67_chevrolet_nova {"horsepower": 105.0, "acceleration": 16.0, "cylinders": 6.0, "displacement": 250.0, "weight": 3459.0} -train68_hi_1200d {"horsepower": 193.0, "acceleration": 18.5, "cylinders": 8.0, "displacement": 304.0, "weight": 4732.0} -train69_ford_country {"horsepower": 167.0, "acceleration": 12.5, "cylinders": 8.0, "displacement": 400.0, "weight": 4906.0} -train70_plymouth_valiant {"horsepower": 100.0, "acceleration": 15.4, "cylinders": 6.0, "displacement": 225.0, "weight": 3233.0} -train71_amc_gremlin {"horsepower": 100.0, "acceleration": 16.0, "cylinders": 6.0, "displacement": 232.0, "weight": 2914.0} -train72_ford_pinto_runabout {"horsepower": 86.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 122.0, "weight": 2226.0} -train73_chevrolet_monte_carlo_landau {"horsepower": 145.0, "acceleration": 13.2, "cylinders": 8.0, "displacement": 305.0, "weight": 3425.0} -train74_amc_concord_dl_6 {"horsepower": 90.0, "acceleration": 18.2, "cylinders": 6.0, "displacement": 232.0, "weight": 3265.0} -train75_pontiac_firebird {"horsepower": 100.0, "acceleration": 15.0, "cylinders": 6.0, "displacement": 250.0, "weight": 3282.0} -train76_chevrolet_cavalier_2-door {"horsepower": 88.0, "acceleration": 18.0, "cylinders": 4.0, "displacement": 112.0, "weight": 2395.0} -train77_mercedes-benz_280s {"horsepower": 120.0, "acceleration": 16.7, "cylinders": 6.0, "displacement": 168.0, "weight": 3820.0} -train78_dodge_d200 {"horsepower": 210.0, "acceleration": 13.5, "cylinders": 8.0, "displacement": 318.0, "weight": 4382.0} -train79_amc_hornet {"horsepower": 97.0, "acceleration": 15.5, "cylinders": 6.0, "displacement": 199.0, "weight": 2774.0} -train80_plymouth_reliant {"horsepower": 84.0, "acceleration": 12.9, "cylinders": 4.0, "displacement": 135.0, "weight": 2385.0} -train81_amc_hornet {"horsepower": 100.0, "acceleration": 16.0, "cylinders": 6.0, "displacement": 232.0, "weight": 2901.0} -train82_toyota_corona_liftback {"horsepower": 90.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 134.0, "weight": 2711.0} -train83_ford_maverick {"horsepower": 72.0, "acceleration": 19.5, "cylinders": 6.0, "displacement": 250.0, "weight": 3158.0} -train84_cadillac_eldorado {"horsepower": 125.0, "acceleration": 17.4, "cylinders": 8.0, "displacement": 350.0, "weight": 3900.0} -train85_mercury_monarch {"horsepower": 72.0, "acceleration": 21.0, "cylinders": 6.0, "displacement": 250.0, "weight": 3432.0} -train86_buick_skylark {"horsepower": 105.0, "acceleration": 16.9, "cylinders": 6.0, "displacement": 231.0, "weight": 3425.0} -train87_chevrolet_cavalier {"horsepower": 88.0, "acceleration": 19.6, "cylinders": 4.0, "displacement": 112.0, "weight": 2605.0} -train88_chevrolet_vega {"horsepower": 90.0, "acceleration": 19.5, "cylinders": 4.0, "displacement": 140.0, "weight": 2408.0} -train89_ford_torino {"horsepower": 140.0, "acceleration": 10.5, "cylinders": 8.0, "displacement": 302.0, "weight": 3449.0} -train90_chevrolet_chevelle_malibu {"horsepower": 130.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 307.0, "weight": 3504.0} -train91_toyota_corona {"horsepower": 52.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 76.0, "weight": 1649.0} -train92_buick_opel_isuzu_deluxe {"horsepower": 80.0, "acceleration": 14.8, "cylinders": 4.0, "displacement": 111.0, "weight": 2155.0} -train93_volvo_244dl {"horsepower": 98.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 121.0, "weight": 2945.0} -train94_opel_1900 {"horsepower": 81.0, "acceleration": 16.9, "cylinders": 4.0, "displacement": 116.0, "weight": 2220.0} -train95_peugeot_304 {"horsepower": 70.0, "acceleration": 19.5, "cylinders": 4.0, "displacement": 79.0, "weight": 2074.0} -train96_toyota_celica_gt {"horsepower": 96.0, "acceleration": 13.9, "cylinders": 4.0, "displacement": 144.0, "weight": 2665.0} -train97_buick_estate_wagon_(sw) {"horsepower": 155.0, "acceleration": 14.9, "cylinders": 8.0, "displacement": 350.0, "weight": 4360.0} -train98_volvo_245 {"horsepower": 102.0, "acceleration": 15.7, "cylinders": 4.0, "displacement": 130.0, "weight": 3150.0} -train99_chevrolet_impala {"horsepower": 150.0, "acceleration": 14.0, "cylinders": 8.0, "displacement": 400.0, "weight": 4997.0} -train100_chevrolet_bel_air {"horsepower": 145.0, "acceleration": 14.0, "cylinders": 8.0, "displacement": 350.0, "weight": 4440.0} -train101_datsun_pl510 {"horsepower": 88.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 97.0, "weight": 2130.0} -train102_saab_99gle {"horsepower": 115.0, "acceleration": 15.7, "cylinders": 4.0, "displacement": 121.0, "weight": 2795.0} -train103_renault_12tl {"horsepower": 83.0, "acceleration": 15.3, "cylinders": 4.0, "displacement": 101.0, "weight": 2202.0} -train104_pontiac_catalina {"horsepower": 175.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 400.0, "weight": 4385.0} -train105_subaru {"horsepower": 67.0, "acceleration": 17.8, "cylinders": 4.0, "displacement": 97.0, "weight": 2065.0} -train106_chevrolet_chevette {"horsepower": 70.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 98.0, "weight": 2120.0} -train107_saab_99le {"horsepower": 115.0, "acceleration": 13.5, "cylinders": 4.0, "displacement": 121.0, "weight": 2671.0} -train108_amc_rebel_sst {"horsepower": 150.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 304.0, "weight": 3433.0} -train109_ford_gran_torino {"horsepower": 140.0, "acceleration": 14.0, "cylinders": 8.0, "displacement": 302.0, "weight": 4141.0} -train110_ford_mustang_ii_2+2 {"horsepower": 89.0, "acceleration": 15.8, "cylinders": 4.0, "displacement": 140.0, "weight": 2755.0} -train111_ford_pinto {"horsepower": 80.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 122.0, "weight": 2451.0} -train112_toyota_corolla {"horsepower": 75.0, "acceleration": 16.8, "cylinders": 4.0, "displacement": 108.0, "weight": 2350.0} -train113_toyota_carina {"horsepower": 88.0, "acceleration": 19.0, "cylinders": 4.0, "displacement": 97.0, "weight": 2279.0} -train114_amc_hornet {"horsepower": 100.0, "acceleration": 16.0, "cylinders": 6.0, "displacement": 232.0, "weight": 2945.0} -train115_plymouth_horizon_4 {"horsepower": 63.0, "acceleration": 14.9, "cylinders": 4.0, "displacement": 105.0, "weight": 2215.0} -train116_chevrolet_nova {"horsepower": 105.0, "acceleration": 14.5, "cylinders": 6.0, "displacement": 250.0, "weight": 3353.0} -train117_fiat_128 {"horsepower": 75.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 90.0, "weight": 2108.0} -train118_dodge_coronet_custom {"horsepower": 150.0, "acceleration": 12.5, "cylinders": 8.0, "displacement": 318.0, "weight": 3777.0} -train119_volkswagen_rabbit {"horsepower": 71.0, "acceleration": 12.2, "cylinders": 4.0, "displacement": 97.0, "weight": 1825.0} -train120_chevrolet_camaro {"horsepower": 90.0, "acceleration": 17.3, "cylinders": 4.0, "displacement": 151.0, "weight": 2950.0} -train121_ford_gran_torino_(sw) {"horsepower": 140.0, "acceleration": 16.0, "cylinders": 8.0, "displacement": 302.0, "weight": 4294.0} -train122_subaru_dl {"horsepower": 67.0, "acceleration": 16.4, "cylinders": 4.0, "displacement": 97.0, "weight": 1985.0} -train123_mazda_626 {"horsepower": 75.0, "acceleration": 17.5, "cylinders": 4.0, "displacement": 120.0, "weight": 2542.0} -train124_oldsmobile_cutlass_salon_brougham {"horsepower": 90.0, "acceleration": 22.2, "cylinders": 8.0, "displacement": 260.0, "weight": 3420.0} -train125_volvo_144ea {"horsepower": 112.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 121.0, "weight": 2868.0} -train126_plymouth_satellite {"horsepower": 150.0, "acceleration": 11.0, "cylinders": 8.0, "displacement": 318.0, "weight": 3436.0} -train127_amc_gremlin {"horsepower": 100.0, "acceleration": 15.0, "cylinders": 6.0, "displacement": 232.0, "weight": 2789.0} -train128_chevrolet_malibu_classic_(sw) {"horsepower": 125.0, "acceleration": 15.0, "cylinders": 8.0, "displacement": 267.0, "weight": 3605.0} -train129_toyota_corolla {"horsepower": 75.0, "acceleration": 16.0, "cylinders": 4.0, "displacement": 97.0, "weight": 2171.0} -train130_buick_skylark_320 {"horsepower": 165.0, "acceleration": 11.5, "cylinders": 8.0, "displacement": 350.0, "weight": 3693.0} -train131_toyota_corona_mark_ii {"horsepower": 95.0, "acceleration": 15.0, "cylinders": 4.0, "displacement": 113.0, "weight": 2372.0} -train132_ford_gran_torino {"horsepower": 152.0, "acceleration": 12.8, "cylinders": 8.0, "displacement": 351.0, "weight": 4215.0} -train133_dodge_colt {"horsepower": 79.0, "acceleration": 17.7, "cylinders": 4.0, "displacement": 98.0, "weight": 2255.0} -train134_pontiac_ventura_sj {"horsepower": 110.0, "acceleration": 16.2, "cylinders": 6.0, "displacement": 250.0, "weight": 3645.0} -train135_oldsmobile_cutlass_ls {"horsepower": 105.0, "acceleration": 19.0, "cylinders": 8.0, "displacement": 350.0, "weight": 3725.0} -train136_toyouta_corona_mark_ii_(sw) {"horsepower": 97.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 120.0, "weight": 2506.0} -train137_amc_gremlin {"horsepower": 100.0, "acceleration": 13.0, "cylinders": 6.0, "displacement": 232.0, "weight": 2634.0} -train138_chevrolet_malibu {"horsepower": 95.0, "acceleration": 18.2, "cylinders": 6.0, "displacement": 200.0, "weight": 3155.0} -train139_oldsmobile_starfire_sx {"horsepower": 85.0, "acceleration": 17.6, "cylinders": 4.0, "displacement": 151.0, "weight": 2855.0} -train140_amc_ambassador_dpl {"horsepower": 190.0, "acceleration": 8.5, "cylinders": 8.0, "displacement": 390.0, "weight": 3850.0} -train141_volkswagen_scirocco {"horsepower": 71.0, "acceleration": 14.9, "cylinders": 4.0, "displacement": 89.0, "weight": 1990.0} -train142_plymouth_satellite_custom_(sw) {"horsepower": 150.0, "acceleration": 14.0, "cylinders": 8.0, "displacement": 318.0, "weight": 4077.0} -train143_toyota_corolla_liftback {"horsepower": 75.0, "acceleration": 18.2, "cylinders": 4.0, "displacement": 97.0, "weight": 2265.0} -train144_ford_fairmont {"horsepower": 88.0, "acceleration": 18.1, "cylinders": 4.0, "displacement": 140.0, "weight": 2870.0} -train145_plymouth_fury_gran_sedan {"horsepower": 150.0, "acceleration": 14.5, "cylinders": 8.0, "displacement": 318.0, "weight": 4237.0} -train146_buick_century_luxus_(sw) {"horsepower": 150.0, "acceleration": 14.5, "cylinders": 8.0, "displacement": 350.0, "weight": 4699.0} -train147_ford_granada_ghia {"horsepower": 78.0, "acceleration": 21.0, "cylinders": 6.0, "displacement": 250.0, "weight": 3574.0} -train148_dodge_charger_2.2 {"horsepower": 84.0, "acceleration": 13.0, "cylinders": 4.0, "displacement": 135.0, "weight": 2370.0} -train149_chevrolet_concours {"horsepower": 110.0, "acceleration": 16.4, "cylinders": 6.0, "displacement": 250.0, "weight": 3520.0} -train150_toyota_corona {"horsepower": 95.0, "acceleration": 14.2, "cylinders": 4.0, "displacement": 134.0, "weight": 2560.0} -train151_pontiac_grand_prix_lj {"horsepower": 180.0, "acceleration": 11.1, "cylinders": 8.0, "displacement": 400.0, "weight": 4220.0} -train152_dodge_colt_m/m {"horsepower": 83.0, "acceleration": 15.9, "cylinders": 4.0, "displacement": 98.0, "weight": 2075.0} -train153_pontiac_astro {"horsepower": 78.0, "acceleration": 18.5, "cylinders": 4.0, "displacement": 140.0, "weight": 2592.0} -train154_amc_gremlin {"horsepower": 90.0, "acceleration": 15.0, "cylinders": 6.0, "displacement": 199.0, "weight": 2648.0} -train155_fiat_strada_custom {"horsepower": 69.0, "acceleration": 14.7, "cylinders": 4.0, "displacement": 91.0, "weight": 2130.0} -train156_toyota_corona_hardtop {"horsepower": 95.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 113.0, "weight": 2278.0} -train157_datsun_b210 {"horsepower": 67.0, "acceleration": 19.0, "cylinders": 4.0, "displacement": 79.0, "weight": 1950.0} -train158_volkswagen_dasher {"horsepower": 78.0, "acceleration": 14.1, "cylinders": 4.0, "displacement": 97.0, "weight": 2190.0} -train159_plymouth_duster {"horsepower": 95.0, "acceleration": 16.0, "cylinders": 6.0, "displacement": 198.0, "weight": 2904.0} -train160_mazda_626 {"horsepower": 74.0, "acceleration": 18.3, "cylinders": 4.0, "displacement": 120.0, "weight": 2635.0} -train161_buick_lesabre_custom {"horsepower": 155.0, "acceleration": 13.5, "cylinders": 8.0, "displacement": 350.0, "weight": 4502.0} -train162_chevrolet_monte_carlo {"horsepower": 150.0, "acceleration": 9.5, "cylinders": 8.0, "displacement": 400.0, "weight": 3761.0} -train163_peugeot_505s_turbo_diesel {"horsepower": 80.0, "acceleration": 20.4, "cylinders": 4.0, "displacement": 141.0, "weight": 3230.0} -train164_datsun_610 {"horsepower": 94.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 108.0, "weight": 2379.0} -train165_ford_granada {"horsepower": 98.0, "acceleration": 19.0, "cylinders": 6.0, "displacement": 250.0, "weight": 3525.0} -train166_pontiac_grand_prix {"horsepower": 230.0, "acceleration": 9.5, "cylinders": 8.0, "displacement": 400.0, "weight": 4278.0} -train167_cadillac_seville {"horsepower": 180.0, "acceleration": 12.1, "cylinders": 8.0, "displacement": 350.0, "weight": 4380.0} -train168_amc_concord {"horsepower": 90.0, "acceleration": 20.1, "cylinders": 4.0, "displacement": 151.0, "weight": 3003.0} -train169_ford_mustang_gl {"horsepower": 86.0, "acceleration": 15.6, "cylinders": 4.0, "displacement": 140.0, "weight": 2790.0} -train170_amc_matador {"horsepower": 110.0, "acceleration": 19.0, "cylinders": 6.0, "displacement": 258.0, "weight": 3730.0} -train171_ford_gran_torino_(sw) {"horsepower": 140.0, "acceleration": 16.0, "cylinders": 8.0, "displacement": 302.0, "weight": 4638.0} -train172_datsun_510_hatchback {"horsepower": 92.0, "acceleration": 15.0, "cylinders": 4.0, "displacement": 119.0, "weight": 2434.0} -train173_dodge_rampage {"horsepower": 84.0, "acceleration": 11.6, "cylinders": 4.0, "displacement": 135.0, "weight": 2295.0} -train174_datsun_b-210 {"horsepower": 70.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 85.0, "weight": 1990.0} -train175_datsun_210_mpg {"horsepower": 65.0, "acceleration": 19.4, "cylinders": 4.0, "displacement": 85.0, "weight": 1975.0} -train176_plymouth_horizon_tc3 {"horsepower": 70.0, "acceleration": 14.9, "cylinders": 4.0, "displacement": 105.0, "weight": 2150.0} -train177_ford_pinto {"horsepower": 85.0, "acceleration": 18.5, "cylinders": 4.0, "displacement": 122.0, "weight": 2310.0} -train178_chrysler_new_yorker_brougham {"horsepower": 215.0, "acceleration": 11.0, "cylinders": 8.0, "displacement": 440.0, "weight": 4735.0} -train179_chrysler_cordoba {"horsepower": 190.0, "acceleration": 12.2, "cylinders": 8.0, "displacement": 400.0, "weight": 4325.0} -train180_oldsmobile_cutlass_salon_brougham {"horsepower": 110.0, "acceleration": 15.5, "cylinders": 8.0, "displacement": 260.0, "weight": 3365.0} -train181_plymouth_satellite_custom {"horsepower": 105.0, "acceleration": 15.5, "cylinders": 6.0, "displacement": 225.0, "weight": 3439.0} -train182_maxda_glc_deluxe {"horsepower": 65.0, "acceleration": 15.2, "cylinders": 4.0, "displacement": 86.0, "weight": 1975.0} -train183_datsun_1200 {"horsepower": 69.0, "acceleration": 18.0, "cylinders": 4.0, "displacement": 72.0, "weight": 1613.0} -train184_ford_pinto {"horsepower": 72.0, "acceleration": 13.6, "cylinders": 4.0, "displacement": 140.0, "weight": 2565.0} -train185_volkswagen_rabbit_l {"horsepower": 74.0, "acceleration": 15.3, "cylinders": 4.0, "displacement": 105.0, "weight": 1980.0} -train186_audi_100ls {"horsepower": 95.0, "acceleration": 15.0, "cylinders": 4.0, "displacement": 115.0, "weight": 2694.0} -train187_chevrolet_citation {"horsepower": 115.0, "acceleration": 11.3, "cylinders": 6.0, "displacement": 173.0, "weight": 2595.0} -train188_buick_electra_225_custom {"horsepower": 225.0, "acceleration": 11.0, "cylinders": 8.0, "displacement": 455.0, "weight": 4951.0} -train189_datsun_200sx {"horsepower": 100.0, "acceleration": 14.8, "cylinders": 4.0, "displacement": 119.0, "weight": 2615.0} -train190_dodge_colt_hatchback_custom {"horsepower": 80.0, "acceleration": 14.4, "cylinders": 4.0, "displacement": 98.0, "weight": 1915.0} -train191_honda_accord_lx {"horsepower": 68.0, "acceleration": 16.6, "cylinders": 4.0, "displacement": 98.0, "weight": 2135.0} -train192_plymouth_custom_suburb {"horsepower": 170.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 360.0, "weight": 4654.0} -train193_toyota_mark_ii {"horsepower": 122.0, "acceleration": 13.5, "cylinders": 6.0, "displacement": 156.0, "weight": 2807.0} -train194_oldsmobile_omega_brougham {"horsepower": 115.0, "acceleration": 12.9, "cylinders": 6.0, "displacement": 173.0, "weight": 2700.0} -train195_ford_ltd_landau {"horsepower": 129.0, "acceleration": 13.4, "cylinders": 8.0, "displacement": 302.0, "weight": 3725.0} -train196_chevy_c10 {"horsepower": 145.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 350.0, "weight": 4055.0} -train197_dodge_colt {"horsepower": 75.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 90.0, "weight": 2125.0} -train198_oldsmobile_cutlass_supreme {"horsepower": 110.0, "acceleration": 19.0, "cylinders": 8.0, "displacement": 260.0, "weight": 4060.0} -train199_ford_pinto_(sw) {"horsepower": 86.0, "acceleration": 16.0, "cylinders": 4.0, "displacement": 122.0, "weight": 2395.0} -train200_chevrolet_vega {"horsepower": 75.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 140.0, "weight": 2542.0} -train201_pontiac_catalina_brougham {"horsepower": 175.0, "acceleration": 11.5, "cylinders": 8.0, "displacement": 400.0, "weight": 4464.0} -train202_ford_futura {"horsepower": 139.0, "acceleration": 11.2, "cylinders": 8.0, "displacement": 302.0, "weight": 3205.0} -train203_honda_civic {"horsepower": 53.0, "acceleration": 17.4, "cylinders": 4.0, "displacement": 91.0, "weight": 1795.0} -train204_ford_mustang {"horsepower": 88.0, "acceleration": 14.5, "cylinders": 6.0, "displacement": 250.0, "weight": 3139.0} -train205_chevrolet_vega_(sw) {"horsepower": 72.0, "acceleration": 19.0, "cylinders": 4.0, "displacement": 140.0, "weight": 2408.0} -train206_buick_century {"horsepower": 110.0, "acceleration": 21.0, "cylinders": 6.0, "displacement": 231.0, "weight": 3907.0} -train207_dodge_aspen {"horsepower": 90.0, "acceleration": 18.7, "cylinders": 6.0, "displacement": 225.0, "weight": 3381.0} -train208_subaru_dl {"horsepower": 67.0, "acceleration": 18.0, "cylinders": 4.0, "displacement": 97.0, "weight": 2145.0} -train209_plymouth_fury_iii {"horsepower": 150.0, "acceleration": 13.5, "cylinders": 8.0, "displacement": 318.0, "weight": 4135.0} -train210_fiat_128 {"horsepower": 49.0, "acceleration": 19.5, "cylinders": 4.0, "displacement": 68.0, "weight": 1867.0} -train211_volvo_264gl {"horsepower": 125.0, "acceleration": 13.6, "cylinders": 6.0, "displacement": 163.0, "weight": 3140.0} -train212_dodge_aries_se {"horsepower": 84.0, "acceleration": 16.0, "cylinders": 4.0, "displacement": 135.0, "weight": 2525.0} -train213_datsun_510 {"horsepower": 97.0, "acceleration": 14.7, "cylinders": 4.0, "displacement": 119.0, "weight": 2300.0} -train214_chevrolet_chevelle_malibu_classic {"horsepower": 100.0, "acceleration": 17.0, "cylinders": 6.0, "displacement": 250.0, "weight": 3781.0} -train215_ford_ranger {"horsepower": 79.0, "acceleration": 18.6, "cylinders": 4.0, "displacement": 120.0, "weight": 2625.0} -train216_vw_dasher_(diesel) {"horsepower": 48.0, "acceleration": 23.7, "cylinders": 4.0, "displacement": 90.0, "weight": 2335.0} -train217_chevy_c20 {"horsepower": 200.0, "acceleration": 15.0, "cylinders": 8.0, "displacement": 307.0, "weight": 4376.0} -train218_buick_skyhawk {"horsepower": 110.0, "acceleration": 15.0, "cylinders": 6.0, "displacement": 231.0, "weight": 3039.0} -train219_peugeot_504 {"horsepower": 88.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 120.0, "weight": 2957.0} -train220_ford_f250 {"horsepower": 215.0, "acceleration": 14.0, "cylinders": 8.0, "displacement": 360.0, "weight": 4615.0} -train221_dodge_diplomat {"horsepower": 140.0, "acceleration": 13.2, "cylinders": 8.0, "displacement": 318.0, "weight": 3735.0} -train222_honda_civic_(auto) {"horsepower": 67.0, "acceleration": 15.7, "cylinders": 4.0, "displacement": 91.0, "weight": 1965.0} -train223_chevrolet_caprice_classic {"horsepower": 130.0, "acceleration": 15.4, "cylinders": 8.0, "displacement": 305.0, "weight": 3840.0} -train224_toyota_corolla_1200 {"horsepower": 65.0, "acceleration": 21.0, "cylinders": 4.0, "displacement": 71.0, "weight": 1836.0} -train225_plymouth_volare_premier_v8 {"horsepower": 150.0, "acceleration": 13.2, "cylinders": 8.0, "displacement": 318.0, "weight": 3940.0} -train226_opel_manta {"horsepower": 78.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 97.0, "weight": 2300.0} -train227_mercedes_benz_300d {"horsepower": 77.0, "acceleration": 20.1, "cylinders": 5.0, "displacement": 183.0, "weight": 3530.0} -train228_chevrolet_cavalier_wagon {"horsepower": 88.0, "acceleration": 18.6, "cylinders": 4.0, "displacement": 112.0, "weight": 2640.0} -train229_vw_rabbit {"horsepower": 70.0, "acceleration": 14.2, "cylinders": 4.0, "displacement": 90.0, "weight": 1937.0} -train230_toyota_corolla {"horsepower": 75.0, "acceleration": 15.2, "cylinders": 4.0, "displacement": 108.0, "weight": 2265.0} -train231_buick_estate_wagon_(sw) {"horsepower": 225.0, "acceleration": 10.0, "cylinders": 8.0, "displacement": 455.0, "weight": 3086.0} -train232_ford_f108 {"horsepower": 130.0, "acceleration": 15.0, "cylinders": 8.0, "displacement": 302.0, "weight": 3870.0} -train233_oldsmobile_omega {"horsepower": 180.0, "acceleration": 11.0, "cylinders": 8.0, "displacement": 350.0, "weight": 3664.0} -train234_oldsmobile_cutlass_ciera_(diesel) {"horsepower": 85.0, "acceleration": 17.0, "cylinders": 6.0, "displacement": 262.0, "weight": 3015.0} -train235_chevrolet_impala {"horsepower": 220.0, "acceleration": 9.0, "cylinders": 8.0, "displacement": 454.0, "weight": 4354.0} -train236_volkswagen_dasher {"horsepower": 71.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 90.0, "weight": 2223.0} -train237_ford_fairmont_futura {"horsepower": 92.0, "acceleration": 16.4, "cylinders": 4.0, "displacement": 140.0, "weight": 2865.0} -train238_datsun_210 {"horsepower": 65.0, "acceleration": 19.2, "cylinders": 4.0, "displacement": 85.0, "weight": 2020.0} -train239_honda_accord_cvcc {"horsepower": 68.0, "acceleration": 18.5, "cylinders": 4.0, "displacement": 98.0, "weight": 2045.0} -train240_mazda_glc {"horsepower": 65.0, "acceleration": 17.9, "cylinders": 4.0, "displacement": 86.0, "weight": 2110.0} -train241_chevrolet_citation {"horsepower": 90.0, "acceleration": 16.5, "cylinders": 4.0, "displacement": 151.0, "weight": 2678.0} -train242_datsun_810_maxima {"horsepower": 120.0, "acceleration": 13.8, "cylinders": 6.0, "displacement": 146.0, "weight": 2930.0} -train243_ford_maverick {"horsepower": 85.0, "acceleration": 16.0, "cylinders": 6.0, "displacement": 200.0, "weight": 2587.0} -train244_fiat_124_sport_coupe {"horsepower": 90.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 98.0, "weight": 2265.0} -train245_chevrolet_vega {"horsepower": 72.0, "acceleration": 19.5, "cylinders": 4.0, "displacement": 140.0, "weight": 2401.0} -train246_chevrolet_chevelle_malibu_classic {"horsepower": 140.0, "acceleration": 13.0, "cylinders": 8.0, "displacement": 305.0, "weight": 4215.0} -train247_buick_skylark {"horsepower": 84.0, "acceleration": 16.4, "cylinders": 4.0, "displacement": 151.0, "weight": 2635.0} -train248_renault_5_gtl {"horsepower": 58.0, "acceleration": 18.6, "cylinders": 4.0, "displacement": 79.0, "weight": 1825.0} -train249_plymouth_duster {"horsepower": 95.0, "acceleration": 15.5, "cylinders": 6.0, "displacement": 198.0, "weight": 2833.0} -train250_buick_regal_sport_coupe_(turbo) {"horsepower": 165.0, "acceleration": 13.4, "cylinders": 6.0, "displacement": 231.0, "weight": 3445.0} -train251_mercury_lynx_l {"horsepower": 70.0, "acceleration": 17.3, "cylinders": 4.0, "displacement": 98.0, "weight": 2125.0} -train252_dodge_aspen_6 {"horsepower": 110.0, "acceleration": 16.6, "cylinders": 6.0, "displacement": 225.0, "weight": 3360.0} -train253_oldsmobile_vista_cruiser {"horsepower": 180.0, "acceleration": 12.5, "cylinders": 8.0, "displacement": 350.0, "weight": 4499.0} -train254_amc_pacer_d/l {"horsepower": 95.0, "acceleration": 17.8, "cylinders": 6.0, "displacement": 258.0, "weight": 3193.0} -train255_ford_fairmont_4 {"horsepower": 88.0, "acceleration": 17.3, "cylinders": 4.0, "displacement": 140.0, "weight": 2890.0} -train256_chevrolet_chevelle_concours_(sw) {"horsepower": 130.0, "acceleration": 14.0, "cylinders": 8.0, "displacement": 307.0, "weight": 4098.0} -train257_honda_accord {"horsepower": 72.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 107.0, "weight": 2290.0} -train258_dodge_colt_hardtop {"horsepower": 80.0, "acceleration": 17.0, "cylinders": 4.0, "displacement": 97.5, "weight": 2126.0} -train259_amc_hornet_sportabout_(sw) {"horsepower": 110.0, "acceleration": 13.5, "cylinders": 6.0, "displacement": 258.0, "weight": 2962.0} -train260_toyota_corona {"horsepower": 95.0, "acceleration": 14.0, "cylinders": 4.0, "displacement": 113.0, "weight": 2228.0} -train261_ford_fiesta {"horsepower": 66.0, "acceleration": 14.4, "cylinders": 4.0, "displacement": 98.0, "weight": 1800.0} -train262_datsun_280-zx {"horsepower": 132.0, "acceleration": 11.4, "cylinders": 6.0, "displacement": 168.0, "weight": 2910.0} -train263_chevrolet_impala {"horsepower": 165.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 350.0, "weight": 4274.0} -train264_toyota_corolla {"horsepower": 75.0, "acceleration": 16.4, "cylinders": 4.0, "displacement": 97.0, "weight": 2155.0} -train265_chevrolet_chevelle_malibu {"horsepower": 100.0, "acceleration": 15.5, "cylinders": 6.0, "displacement": 250.0, "weight": 3329.0} -train266_volvo_diesel {"horsepower": 76.0, "acceleration": 19.6, "cylinders": 6.0, "displacement": 145.0, "weight": 3160.0} -train267_pontiac_phoenix {"horsepower": 90.0, "acceleration": 18.0, "cylinders": 4.0, "displacement": 151.0, "weight": 2735.0} -train268_dodge_colt_(sw) {"horsepower": 80.0, "acceleration": 15.0, "cylinders": 4.0, "displacement": 98.0, "weight": 2164.0} -train269_vokswagen_rabbit {"horsepower": 62.0, "acceleration": 15.3, "cylinders": 4.0, "displacement": 89.0, "weight": 1845.0} -train270_peugeot_504 {"horsepower": 71.0, "acceleration": 24.8, "cylinders": 4.0, "displacement": 141.0, "weight": 3190.0} -train271_chevrolet_vega_2300 {"horsepower": 90.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 140.0, "weight": 2264.0} -train272_ford_fairmont_(auto) {"horsepower": 85.0, "acceleration": 15.8, "cylinders": 6.0, "displacement": 200.0, "weight": 2965.0} -train273_plymouth_grand_fury {"horsepower": 150.0, "acceleration": 14.5, "cylinders": 8.0, "displacement": 318.0, "weight": 4498.0} -train274_plymouth_'cuda_340 {"horsepower": 160.0, "acceleration": 8.0, "cylinders": 8.0, "displacement": 340.0, "weight": 3609.0} -train275_ford_granada_l {"horsepower": 112.0, "acceleration": 14.7, "cylinders": 6.0, "displacement": 232.0, "weight": 2835.0} -train276_volkswagen_rabbit_custom {"horsepower": 78.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 97.0, "weight": 1940.0} -train277_honda_civic_cvcc {"horsepower": 53.0, "acceleration": 17.5, "cylinders": 4.0, "displacement": 91.0, "weight": 1795.0} -train278_chevrolet_nova_custom {"horsepower": 100.0, "acceleration": 18.0, "cylinders": 6.0, "displacement": 250.0, "weight": 3278.0} -train279_honda_civic_cvcc {"horsepower": 60.0, "acceleration": 16.4, "cylinders": 4.0, "displacement": 91.0, "weight": 1800.0} -train280_volkswagen_type_3 {"horsepower": 54.0, "acceleration": 23.5, "cylinders": 4.0, "displacement": 97.0, "weight": 2254.0} -train281_toyota_corolla_tercel {"horsepower": 60.0, "acceleration": 18.8, "cylinders": 4.0, "displacement": 89.0, "weight": 1968.0} -train282_datsun_pl510 {"horsepower": 88.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 97.0, "weight": 2130.0} -train283_ford_maverick {"horsepower": 81.0, "acceleration": 17.6, "cylinders": 6.0, "displacement": 200.0, "weight": 3012.0} -train284_dodge_omni {"horsepower": 75.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 105.0, "weight": 2230.0} -train285_plymouth_volare_custom {"horsepower": 100.0, "acceleration": 17.7, "cylinders": 6.0, "displacement": 225.0, "weight": 3630.0} -train286_datsun_710 {"horsepower": 61.0, "acceleration": 19.0, "cylinders": 4.0, "displacement": 83.0, "weight": 2003.0} -train287_honda_civic_1300 {"horsepower": 60.0, "acceleration": 16.1, "cylinders": 4.0, "displacement": 81.0, "weight": 1760.0} -train288_saab_99le {"horsepower": 110.0, "acceleration": 14.0, "cylinders": 4.0, "displacement": 121.0, "weight": 2660.0} -train289_honda_prelude {"horsepower": 75.0, "acceleration": 14.4, "cylinders": 4.0, "displacement": 107.0, "weight": 2210.0} -train290_buick_skylark_limited {"horsepower": 90.0, "acceleration": 16.0, "cylinders": 4.0, "displacement": 151.0, "weight": 2670.0} -train291_pontiac_catalina {"horsepower": 225.0, "acceleration": 10.0, "cylinders": 8.0, "displacement": 455.0, "weight": 4425.0} -train292_mazda_rx-4 {"horsepower": 110.0, "acceleration": 13.5, "cylinders": 3.0, "displacement": 80.0, "weight": 2720.0} -train293_volkswagen_super_beetle {"horsepower": 46.0, "acceleration": 21.0, "cylinders": 4.0, "displacement": 97.0, "weight": 1950.0} -train294_ford_fairmont_(man) {"horsepower": 88.0, "acceleration": 15.4, "cylinders": 4.0, "displacement": 140.0, "weight": 2720.0} -train295_fiat_131 {"horsepower": 86.0, "acceleration": 15.5, "cylinders": 4.0, "displacement": 107.0, "weight": 2464.0} -train296_ford_escort_4w {"horsepower": 65.0, "acceleration": 16.2, "cylinders": 4.0, "displacement": 98.0, "weight": 2045.0} -train297_plymouth_horizon_miser {"horsepower": 63.0, "acceleration": 14.7, "cylinders": 4.0, "displacement": 105.0, "weight": 2125.0} -train298_ford_mustang_ii {"horsepower": 129.0, "acceleration": 12.0, "cylinders": 8.0, "displacement": 302.0, "weight": 3169.0} -train299_saab_99e {"horsepower": 95.0, "acceleration": 17.5, "cylinders": 4.0, "displacement": 104.0, "weight": 2375.0} -train300_fiat_124b {"horsepower": 76.0, "acceleration": 14.5, "cylinders": 4.0, "displacement": 88.0, "weight": 2065.0} diff --git a/creg/test_data/auto-mpg.trainresp b/creg/test_data/auto-mpg.trainresp deleted file mode 100644 index ece7fa37..00000000 --- a/creg/test_data/auto-mpg.trainresp +++ /dev/null @@ -1,300 +0,0 @@ -train1_buick_century_350 13.0 -train2_audi_100_ls 24.0 -train3_chrysler_newport_royal 13.0 -train4_mercury_zephyr_6 19.8 -train5_volkswagen_model_111 27.0 -train6_dodge_monaco_brougham 15.5 -train7_mercedes-benz_240d 30.0 -train8_datsun_510_(sw) 28.0 -train9_opel_manta 24.0 -train10_renault_12_(sw) 26.0 -train11_chevrolet_nova 15.0 -train12_plymouth_arrow_gs 25.5 -train13_toyota_mark_ii 19.0 -train14_chevrolet_caprice_classic 13.0 -train15_ford_maverick 18.0 -train16_pontiac_lemans_v6 21.5 -train17_ford_granada_gl 20.2 -train18_chevrolet_monte_carlo_landau 15.5 -train19_subaru 26.0 -train20_volkswagen_411_(sw) 22.0 -train21_chrysler_lebaron_salon 17.6 -train22_volkswagen_jetta 33.0 -train23_dodge_coronet_brougham 16.0 -train24_pontiac_safari_(sw) 13.0 -train25_mazda_rx-7_gs 23.7 -train26_datsun_210 40.8 -train27_vw_rabbit_c_(diesel) 44.3 -train28_nissan_stanza_xe 36.0 -train29_audi_5000 20.3 -train30_chevrolet_malibu 13.0 -train31_mercury_capri_v6 21.0 -train32_datsun_b210_gx 39.4 -train33_volkswagen_dasher 26.0 -train34_volvo_145e_(sw) 18.0 -train35_chrysler_lebaron_medallion 26.0 -train36_pontiac_catalina 16.0 -train37_toyota_tercel 37.7 -train38_datsun_f-10_hatchback 33.5 -train39_chrysler_lebaron_town_@_country_(sw) 18.5 -train40_buick_century 22.4 -train41_amc_matador_(sw) 15.0 -train42_honda_civic 38.0 -train43_dodge_coronet_custom_(sw) 14.0 -train44_bmw_320i 21.5 -train45_mercury_marquis 11.0 -train46_amc_matador_(sw) 14.0 -train47_ford_ltd 13.0 -train48_toyota_corolla_1200 31.0 -train49_amc_ambassador_brougham 13.0 -train50_ford_galaxie_500 14.0 -train51_amc_concord_d/l 18.1 -train52_amc_matador 18.0 -train53_toyota_corona 24.0 -train54_plymouth_reliant 27.2 -train55_ford_pinto 18.0 -train56_datsun_810 22.0 -train57_fiat_x1.9 31.0 -train58_mercury_zephyr 20.8 -train59_toyota_corolla_1600_(sw) 27.0 -train60_chevrolet_chevette 30.0 -train61_toyota_cressida 25.4 -train62_toyota_corolla 34.0 -train63_datsun_710 24.0 -train64_mercury_grand_marquis 16.5 -train65_plymouth_champ 39.0 -train66_datsun_200-sx 23.9 -train67_chevrolet_nova 18.0 -train68_hi_1200d 9.0 -train69_ford_country 12.0 -train70_plymouth_valiant 22.0 -train71_amc_gremlin 20.0 -train72_ford_pinto_runabout 21.0 -train73_chevrolet_monte_carlo_landau 19.2 -train74_amc_concord_dl_6 20.2 -train75_pontiac_firebird 19.0 -train76_chevrolet_cavalier_2-door 34.0 -train77_mercedes-benz_280s 16.5 -train78_dodge_d200 11.0 -train79_amc_hornet 18.0 -train80_plymouth_reliant 30.0 -train81_amc_hornet 19.0 -train82_toyota_corona_liftback 29.8 -train83_ford_maverick 15.0 -train84_cadillac_eldorado 23.0 -train85_mercury_monarch 15.0 -train86_buick_skylark 20.5 -train87_chevrolet_cavalier 28.0 -train88_chevrolet_vega 20.0 -train89_ford_torino 17.0 -train90_chevrolet_chevelle_malibu 18.0 -train91_toyota_corona 31.0 -train92_buick_opel_isuzu_deluxe 30.0 -train93_volvo_244dl 22.0 -train94_opel_1900 25.0 -train95_peugeot_304 30.0 -train96_toyota_celica_gt 32.0 -train97_buick_estate_wagon_(sw) 16.9 -train98_volvo_245 20.0 -train99_chevrolet_impala 11.0 -train100_chevrolet_bel_air 15.0 -train101_datsun_pl510 27.0 -train102_saab_99gle 21.6 -train103_renault_12tl 27.0 -train104_pontiac_catalina 14.0 -train105_subaru 32.3 -train106_chevrolet_chevette 32.1 -train107_saab_99le 25.0 -train108_amc_rebel_sst 16.0 -train109_ford_gran_torino 16.0 -train110_ford_mustang_ii_2+2 25.5 -train111_ford_pinto 26.0 -train112_toyota_corolla 32.4 -train113_toyota_carina 20.0 -train114_amc_hornet 18.0 -train115_plymouth_horizon_4 34.7 -train116_chevrolet_nova 22.0 -train117_fiat_128 24.0 -train118_dodge_coronet_custom 15.0 -train119_volkswagen_rabbit 29.5 -train120_chevrolet_camaro 27.0 -train121_ford_gran_torino_(sw) 13.0 -train122_subaru_dl 30.0 -train123_mazda_626 31.3 -train124_oldsmobile_cutlass_salon_brougham 23.9 -train125_volvo_144ea 19.0 -train126_plymouth_satellite 18.0 -train127_amc_gremlin 18.0 -train128_chevrolet_malibu_classic_(sw) 19.2 -train129_toyota_corolla 29.0 -train130_buick_skylark_320 15.0 -train131_toyota_corona_mark_ii 24.0 -train132_ford_gran_torino 14.5 -train133_dodge_colt 26.0 -train134_pontiac_ventura_sj 18.5 -train135_oldsmobile_cutlass_ls 26.6 -train136_toyouta_corona_mark_ii_(sw) 23.0 -train137_amc_gremlin 19.0 -train138_chevrolet_malibu 20.5 -train139_oldsmobile_starfire_sx 23.8 -train140_amc_ambassador_dpl 15.0 -train141_volkswagen_scirocco 31.5 -train142_plymouth_satellite_custom_(sw) 14.0 -train143_toyota_corolla_liftback 26.0 -train144_ford_fairmont 26.4 -train145_plymouth_fury_gran_sedan 14.0 -train146_buick_century_luxus_(sw) 13.0 -train147_ford_granada_ghia 18.0 -train148_dodge_charger_2.2 36.0 -train149_chevrolet_concours 17.5 -train150_toyota_corona 27.5 -train151_pontiac_grand_prix_lj 16.0 -train152_dodge_colt_m/m 33.5 -train153_pontiac_astro 23.0 -train154_amc_gremlin 21.0 -train155_fiat_strada_custom 37.3 -train156_toyota_corona_hardtop 24.0 -train157_datsun_b210 31.0 -train158_volkswagen_dasher 30.5 -train159_plymouth_duster 23.0 -train160_mazda_626 31.6 -train161_buick_lesabre_custom 13.0 -train162_chevrolet_monte_carlo 15.0 -train163_peugeot_505s_turbo_diesel 28.1 -train164_datsun_610 22.0 -train165_ford_granada 18.5 -train166_pontiac_grand_prix 16.0 -train167_cadillac_seville 16.5 -train168_amc_concord 24.3 -train169_ford_mustang_gl 27.0 -train170_amc_matador 15.0 -train171_ford_gran_torino_(sw) 14.0 -train172_datsun_510_hatchback 37.0 -train173_dodge_rampage 32.0 -train174_datsun_b-210 32.0 -train175_datsun_210_mpg 37.0 -train176_plymouth_horizon_tc3 34.5 -train177_ford_pinto 19.0 -train178_chrysler_new_yorker_brougham 13.0 -train179_chrysler_cordoba 15.5 -train180_oldsmobile_cutlass_salon_brougham 19.9 -train181_plymouth_satellite_custom 16.0 -train182_maxda_glc_deluxe 34.1 -train183_datsun_1200 35.0 -train184_ford_pinto 26.5 -train185_volkswagen_rabbit_l 36.0 -train186_audi_100ls 23.0 -train187_chevrolet_citation 28.8 -train188_buick_electra_225_custom 12.0 -train189_datsun_200sx 32.9 -train190_dodge_colt_hatchback_custom 35.7 -train191_honda_accord_lx 29.5 -train192_plymouth_custom_suburb 13.0 -train193_toyota_mark_ii 20.0 -train194_oldsmobile_omega_brougham 26.8 -train195_ford_ltd_landau 17.6 -train196_chevy_c10 13.0 -train197_dodge_colt 28.0 -train198_oldsmobile_cutlass_supreme 17.0 -train199_ford_pinto_(sw) 22.0 -train200_chevrolet_vega 25.0 -train201_pontiac_catalina_brougham 14.0 -train202_ford_futura 18.1 -train203_honda_civic 33.0 -train204_ford_mustang 18.0 -train205_chevrolet_vega_(sw) 22.0 -train206_buick_century 17.0 -train207_dodge_aspen 19.1 -train208_subaru_dl 33.8 -train209_plymouth_fury_iii 15.0 -train210_fiat_128 29.0 -train211_volvo_264gl 17.0 -train212_dodge_aries_se 29.0 -train213_datsun_510 27.2 -train214_chevrolet_chevelle_malibu_classic 16.0 -train215_ford_ranger 28.0 -train216_vw_dasher_(diesel) 43.4 -train217_chevy_c20 10.0 -train218_buick_skyhawk 21.0 -train219_peugeot_504 23.0 -train220_ford_f250 10.0 -train221_dodge_diplomat 19.4 -train222_honda_civic_(auto) 32.0 -train223_chevrolet_caprice_classic 17.0 -train224_toyota_corolla_1200 32.0 -train225_plymouth_volare_premier_v8 13.0 -train226_opel_manta 26.0 -train227_mercedes_benz_300d 25.4 -train228_chevrolet_cavalier_wagon 27.0 -train229_vw_rabbit 29.0 -train230_toyota_corolla 32.2 -train231_buick_estate_wagon_(sw) 14.0 -train232_ford_f108 13.0 -train233_oldsmobile_omega 11.0 -train234_oldsmobile_cutlass_ciera_(diesel) 38.0 -train235_chevrolet_impala 14.0 -train236_volkswagen_dasher 25.0 -train237_ford_fairmont_futura 24.0 -train238_datsun_210 31.8 -train239_honda_accord_cvcc 31.5 -train240_mazda_glc 46.6 -train241_chevrolet_citation 28.0 -train242_datsun_810_maxima 24.2 -train243_ford_maverick 21.0 -train244_fiat_124_sport_coupe 26.0 -train245_chevrolet_vega 21.0 -train246_chevrolet_chevelle_malibu_classic 17.5 -train247_buick_skylark 26.6 -train248_renault_5_gtl 36.0 -train249_plymouth_duster 22.0 -train250_buick_regal_sport_coupe_(turbo) 17.7 -train251_mercury_lynx_l 36.0 -train252_dodge_aspen_6 20.6 -train253_oldsmobile_vista_cruiser 12.0 -train254_amc_pacer_d/l 17.5 -train255_ford_fairmont_4 22.3 -train256_chevrolet_chevelle_concours_(sw) 13.0 -train257_honda_accord 32.4 -train258_dodge_colt_hardtop 25.0 -train259_amc_hornet_sportabout_(sw) 18.0 -train260_toyota_corona 25.0 -train261_ford_fiesta 36.1 -train262_datsun_280-zx 32.7 -train263_chevrolet_impala 13.0 -train264_toyota_corolla 28.0 -train265_chevrolet_chevelle_malibu 17.0 -train266_volvo_diesel 30.7 -train267_pontiac_phoenix 27.0 -train268_dodge_colt_(sw) 28.0 -train269_vokswagen_rabbit 29.8 -train270_peugeot_504 27.2 -train271_chevrolet_vega_2300 28.0 -train272_ford_fairmont_(auto) 20.2 -train273_plymouth_grand_fury 16.0 -train274_plymouth_'cuda_340 14.0 -train275_ford_granada_l 22.0 -train276_volkswagen_rabbit_custom 29.0 -train277_honda_civic_cvcc 33.0 -train278_chevrolet_nova_custom 16.0 -train279_honda_civic_cvcc 36.1 -train280_volkswagen_type_3 23.0 -train281_toyota_corolla_tercel 38.1 -train282_datsun_pl510 27.0 -train283_ford_maverick 24.0 -train284_dodge_omni 30.9 -train285_plymouth_volare_custom 19.0 -train286_datsun_710 32.0 -train287_honda_civic_1300 35.1 -train288_saab_99le 24.0 -train289_honda_prelude 33.7 -train290_buick_skylark_limited 28.4 -train291_pontiac_catalina 14.0 -train292_mazda_rx-4 21.5 -train293_volkswagen_super_beetle 26.0 -train294_ford_fairmont_(man) 25.1 -train295_fiat_131 28.0 -train296_ford_escort_4w 34.4 -train297_plymouth_horizon_miser 38.0 -train298_ford_mustang_ii 13.0 -train299_saab_99e 25.0 -train300_fiat_124b 30.0 diff --git a/creg/test_data/iris.testfeat b/creg/test_data/iris.testfeat deleted file mode 100644 index f7528f81..00000000 --- a/creg/test_data/iris.testfeat +++ /dev/null @@ -1,50 +0,0 @@ -100 {"sepal-length": 4.9, "sepal-width": 2.5, "petal-length": 4.5, "petal-width": 1.7} -101 {"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.2, "petal-width": 2.0} -102 {"sepal-length": 4.4, "sepal-width": 3.0, "petal-length": 1.3, "petal-width": 0.2} -103 {"sepal-length": 5.0, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.2} -104 {"sepal-length": 5.0, "sepal-width": 3.0, "petal-length": 1.6, "petal-width": 0.2} -105 {"sepal-length": 5.1, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.2} -106 {"sepal-length": 5.5, "sepal-width": 2.3, "petal-length": 4.0, "petal-width": 1.3} -107 {"sepal-length": 5.5, "sepal-width": 2.6, "petal-length": 4.4, "petal-width": 1.2} -108 {"sepal-length": 5.4, "sepal-width": 3.4, "petal-length": 1.7, "petal-width": 0.2} -109 {"sepal-length": 5.5, "sepal-width": 2.4, "petal-length": 3.7, "petal-width": 1.0} -110 {"sepal-length": 6.7, "sepal-width": 3.0, "petal-length": 5.0, "petal-width": 1.7} -111 {"sepal-length": 6.4, "sepal-width": 2.8, "petal-length": 5.6, "petal-width": 2.2} -112 {"sepal-length": 5.5, "sepal-width": 4.2, "petal-length": 1.4, "petal-width": 0.2} -113 {"sepal-length": 5.9, "sepal-width": 3.0, "petal-length": 4.2, "petal-width": 1.5} -114 {"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1} -115 {"sepal-length": 7.7, "sepal-width": 2.6, "petal-length": 6.9, "petal-width": 2.3} -116 {"sepal-length": 5.0, "sepal-width": 3.6, "petal-length": 1.4, "petal-width": 0.2} -117 {"sepal-length": 6.3, "sepal-width": 2.3, "petal-length": 4.4, "petal-width": 1.3} -118 {"sepal-length": 6.7, "sepal-width": 3.3, "petal-length": 5.7, "petal-width": 2.1} -119 {"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.9} -120 {"sepal-length": 5.2, "sepal-width": 2.7, "petal-length": 3.9, "petal-width": 1.4} -121 {"sepal-length": 5.0, "sepal-width": 3.5, "petal-length": 1.6, "petal-width": 0.6} -122 {"sepal-length": 5.0, "sepal-width": 3.2, "petal-length": 1.2, "petal-width": 0.2} -123 {"sepal-length": 6.7, "sepal-width": 3.0, "petal-length": 5.2, "petal-width": 2.3} -124 {"sepal-length": 5.5, "sepal-width": 2.5, "petal-length": 4.0, "petal-width": 1.3} -125 {"sepal-length": 5.6, "sepal-width": 3.0, "petal-length": 4.5, "petal-width": 1.5} -126 {"sepal-length": 6.6, "sepal-width": 3.0, "petal-length": 4.4, "petal-width": 1.4} -127 {"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.6, "petal-width": 0.2} -128 {"sepal-length": 5.9, "sepal-width": 3.0, "petal-length": 5.1, "petal-width": 1.8} -129 {"sepal-length": 6.2, "sepal-width": 3.4, "petal-length": 5.4, "petal-width": 2.3} -130 {"sepal-length": 5.6, "sepal-width": 2.8, "petal-length": 4.9, "petal-width": 2.0} -131 {"sepal-length": 5.7, "sepal-width": 2.9, "petal-length": 4.2, "petal-width": 1.3} -132 {"sepal-length": 6.2, "sepal-width": 2.9, "petal-length": 4.3, "petal-width": 1.3} -133 {"sepal-length": 6.0, "sepal-width": 3.4, "petal-length": 4.5, "petal-width": 1.6} -134 {"sepal-length": 5.4, "sepal-width": 3.9, "petal-length": 1.7, "petal-width": 0.4} -135 {"sepal-length": 6.3, "sepal-width": 3.3, "petal-length": 6.0, "petal-width": 2.5} -136 {"sepal-length": 6.5, "sepal-width": 3.2, "petal-length": 5.1, "petal-width": 2.0} -137 {"sepal-length": 5.1, "sepal-width": 2.5, "petal-length": 3.0, "petal-width": 1.1} -138 {"sepal-length": 4.3, "sepal-width": 3.0, "petal-length": 1.1, "petal-width": 0.1} -139 {"sepal-length": 5.7, "sepal-width": 2.5, "petal-length": 5.0, "petal-width": 2.0} -140 {"sepal-length": 6.0, "sepal-width": 2.2, "petal-length": 5.0, "petal-width": 1.5} -141 {"sepal-length": 6.4, "sepal-width": 3.2, "petal-length": 5.3, "petal-width": 2.3} -142 {"sepal-length": 6.5, "sepal-width": 2.8, "petal-length": 4.6, "petal-width": 1.5} -143 {"sepal-length": 5.5, "sepal-width": 3.5, "petal-length": 1.3, "petal-width": 0.2} -144 {"sepal-length": 4.7, "sepal-width": 3.2, "petal-length": 1.3, "petal-width": 0.2} -145 {"sepal-length": 4.6, "sepal-width": 3.4, "petal-length": 1.4, "petal-width": 0.3} -146 {"sepal-length": 5.7, "sepal-width": 2.6, "petal-length": 3.5, "petal-width": 1.0} -147 {"sepal-length": 5.8, "sepal-width": 2.8, "petal-length": 5.1, "petal-width": 2.4} -148 {"sepal-length": 7.7, "sepal-width": 2.8, "petal-length": 6.7, "petal-width": 2.0} -149 {"sepal-length": 6.3, "sepal-width": 2.9, "petal-length": 5.6, "petal-width": 1.8} diff --git a/creg/test_data/iris.testresp b/creg/test_data/iris.testresp deleted file mode 100644 index 0952e4da..00000000 --- a/creg/test_data/iris.testresp +++ /dev/null @@ -1,50 +0,0 @@ -100 Iris-virginica -101 Iris-virginica -102 Iris-setosa -103 Iris-setosa -104 Iris-setosa -105 Iris-setosa -106 Iris-versicolor -107 Iris-versicolor -108 Iris-setosa -109 Iris-versicolor -110 Iris-versicolor -111 Iris-virginica -112 Iris-setosa -113 Iris-versicolor -114 Iris-setosa -115 Iris-virginica -116 Iris-setosa -117 Iris-versicolor -118 Iris-virginica -119 Iris-virginica -120 Iris-versicolor -121 Iris-setosa -122 Iris-setosa -123 Iris-virginica -124 Iris-versicolor -125 Iris-versicolor -126 Iris-versicolor -127 Iris-setosa -128 Iris-virginica -129 Iris-virginica -130 Iris-virginica -131 Iris-versicolor -132 Iris-versicolor -133 Iris-versicolor -134 Iris-setosa -135 Iris-virginica -136 Iris-virginica -137 Iris-versicolor -138 Iris-setosa -139 Iris-virginica -140 Iris-virginica -141 Iris-virginica -142 Iris-versicolor -143 Iris-setosa -144 Iris-setosa -145 Iris-setosa -146 Iris-versicolor -147 Iris-virginica -148 Iris-virginica -149 Iris-virginica diff --git a/creg/test_data/iris.trainfeat b/creg/test_data/iris.trainfeat deleted file mode 100644 index a930a446..00000000 --- a/creg/test_data/iris.trainfeat +++ /dev/null @@ -1,100 +0,0 @@ -0 {"sepal-length": 5.4, "sepal-width": 3.0, "petal-length": 4.5, "petal-width": 1.5} -1 {"sepal-length": 5.0, "sepal-width": 3.4, "petal-length": 1.6, "petal-width": 0.4} -2 {"sepal-length": 5.0, "sepal-width": 3.3, "petal-length": 1.4, "petal-width": 0.2} -3 {"sepal-length": 5.7, "sepal-width": 2.8, "petal-length": 4.5, "petal-width": 1.3} -4 {"sepal-length": 6.4, "sepal-width": 3.1, "petal-length": 5.5, "petal-width": 1.8} -5 {"sepal-length": 7.9, "sepal-width": 3.8, "petal-length": 6.4, "petal-width": 2.0} -6 {"sepal-length": 5.9, "sepal-width": 3.2, "petal-length": 4.8, "petal-width": 1.8} -7 {"sepal-length": 6.7, "sepal-width": 2.5, "petal-length": 5.8, "petal-width": 1.8} -8 {"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 4.4, "petal-width": 1.4} -9 {"sepal-length": 6.3, "sepal-width": 2.5, "petal-length": 4.9, "petal-width": 1.5} -10 {"sepal-length": 6.1, "sepal-width": 2.9, "petal-length": 4.7, "petal-width": 1.4} -11 {"sepal-length": 6.3, "sepal-width": 3.3, "petal-length": 4.7, "petal-width": 1.6} -12 {"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 4.7, "petal-width": 1.5} -13 {"sepal-length": 6.2, "sepal-width": 2.8, "petal-length": 4.8, "petal-width": 1.8} -14 {"sepal-length": 5.0, "sepal-width": 3.5, "petal-length": 1.3, "petal-width": 0.3} -15 {"sepal-length": 5.4, "sepal-width": 3.9, "petal-length": 1.3, "petal-width": 0.4} -16 {"sepal-length": 7.4, "sepal-width": 2.8, "petal-length": 6.1, "petal-width": 1.9} -17 {"sepal-length": 7.2, "sepal-width": 3.2, "petal-length": 6.0, "petal-width": 1.8} -18 {"sepal-length": 5.7, "sepal-width": 3.8, "petal-length": 1.7, "petal-width": 0.3} -19 {"sepal-length": 4.5, "sepal-width": 2.3, "petal-length": 1.3, "petal-width": 0.3} -20 {"sepal-length": 5.6, "sepal-width": 3.0, "petal-length": 4.1, "petal-width": 1.3} -21 {"sepal-length": 6.8, "sepal-width": 3.0, "petal-length": 5.5, "petal-width": 2.1} -22 {"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.8, "petal-width": 2.2} -23 {"sepal-length": 4.4, "sepal-width": 3.2, "petal-length": 1.3, "petal-width": 0.2} -24 {"sepal-length": 6.3, "sepal-width": 2.5, "petal-length": 5.0, "petal-width": 1.9} -25 {"sepal-length": 4.4, "sepal-width": 2.9, "petal-length": 1.4, "petal-width": 0.2} -26 {"sepal-length": 4.9, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.2} -27 {"sepal-length": 5.4, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.4} -28 {"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 3.9, "petal-width": 1.2} -29 {"sepal-length": 5.6, "sepal-width": 2.5, "petal-length": 3.9, "petal-width": 1.1} -30 {"sepal-length": 5.1, "sepal-width": 3.5, "petal-length": 1.4, "petal-width": 0.3} -31 {"sepal-length": 5.6, "sepal-width": 2.7, "petal-length": 4.2, "petal-width": 1.3} -32 {"sepal-length": 5.1, "sepal-width": 3.5, "petal-length": 1.4, "petal-width": 0.2} -33 {"sepal-length": 6.4, "sepal-width": 2.7, "petal-length": 5.3, "petal-width": 1.9} -34 {"sepal-length": 5.8, "sepal-width": 4.0, "petal-length": 1.2, "petal-width": 0.2} -35 {"sepal-length": 5.2, "sepal-width": 3.4, "petal-length": 1.4, "petal-width": 0.2} -36 {"sepal-length": 7.6, "sepal-width": 3.0, "petal-length": 6.6, "petal-width": 2.1} -37 {"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.9} -38 {"sepal-length": 6.0, "sepal-width": 2.2, "petal-length": 4.0, "petal-width": 1.0} -39 {"sepal-length": 7.7, "sepal-width": 3.0, "petal-length": 6.1, "petal-width": 2.3} -40 {"sepal-length": 5.1, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.4} -41 {"sepal-length": 6.1, "sepal-width": 2.6, "petal-length": 5.6, "petal-width": 1.4} -42 {"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 5.6, "petal-width": 2.4} -43 {"sepal-length": 7.7, "sepal-width": 3.8, "petal-length": 6.7, "petal-width": 2.2} -44 {"sepal-length": 5.1, "sepal-width": 3.3, "petal-length": 1.7, "petal-width": 0.5} -45 {"sepal-length": 6.3, "sepal-width": 2.8, "petal-length": 5.1, "petal-width": 1.5} -46 {"sepal-length": 5.0, "sepal-width": 2.0, "petal-length": 3.5, "petal-width": 1.0} -47 {"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.5, "petal-width": 0.3} -48 {"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1} -49 {"sepal-length": 6.1, "sepal-width": 3.0, "petal-length": 4.9, "petal-width": 1.8} -50 {"sepal-length": 6.4, "sepal-width": 2.8, "petal-length": 5.6, "petal-width": 2.1} -51 {"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.5, "petal-width": 1.8} -52 {"sepal-length": 6.1, "sepal-width": 2.8, "petal-length": 4.7, "petal-width": 1.2} -53 {"sepal-length": 6.1, "sepal-width": 2.8, "petal-length": 4.0, "petal-width": 1.3} -54 {"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1} -55 {"sepal-length": 6.8, "sepal-width": 2.8, "petal-length": 4.8, "petal-width": 1.4} -56 {"sepal-length": 6.3, "sepal-width": 2.7, "petal-length": 4.9, "petal-width": 1.8} -57 {"sepal-length": 4.6, "sepal-width": 3.2, "petal-length": 1.4, "petal-width": 0.2} -58 {"sepal-length": 6.3, "sepal-width": 3.4, "petal-length": 5.6, "petal-width": 2.4} -59 {"sepal-length": 5.7, "sepal-width": 4.4, "petal-length": 1.5, "petal-width": 0.4} -60 {"sepal-length": 6.4, "sepal-width": 2.9, "petal-length": 4.3, "petal-width": 1.3} -61 {"sepal-length": 7.2, "sepal-width": 3.6, "petal-length": 6.1, "petal-width": 2.5} -62 {"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 4.1, "petal-width": 1.0} -63 {"sepal-length": 6.0, "sepal-width": 3.0, "petal-length": 4.8, "petal-width": 1.8} -64 {"sepal-length": 4.7, "sepal-width": 3.2, "petal-length": 1.6, "petal-width": 0.2} -65 {"sepal-length": 6.9, "sepal-width": 3.2, "petal-length": 5.7, "petal-width": 2.3} -66 {"sepal-length": 6.4, "sepal-width": 3.2, "petal-length": 4.5, "petal-width": 1.5} -67 {"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 5.4, "petal-width": 2.1} -68 {"sepal-length": 5.2, "sepal-width": 3.5, "petal-length": 1.5, "petal-width": 0.2} -69 {"sepal-length": 5.3, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.2} -70 {"sepal-length": 5.5, "sepal-width": 2.4, "petal-length": 3.8, "petal-width": 1.1} -71 {"sepal-length": 4.8, "sepal-width": 3.4, "petal-length": 1.9, "petal-width": 0.2} -72 {"sepal-length": 5.7, "sepal-width": 2.8, "petal-length": 4.1, "petal-width": 1.3} -73 {"sepal-length": 4.9, "sepal-width": 2.4, "petal-length": 3.3, "petal-width": 1.0} -74 {"sepal-length": 6.2, "sepal-width": 2.2, "petal-length": 4.5, "petal-width": 1.5} -75 {"sepal-length": 6.7, "sepal-width": 3.3, "petal-length": 5.7, "petal-width": 2.5} -76 {"sepal-length": 6.1, "sepal-width": 3.0, "petal-length": 4.6, "petal-width": 1.4} -77 {"sepal-length": 4.6, "sepal-width": 3.6, "petal-length": 1.0, "petal-width": 0.2} -78 {"sepal-length": 7.0, "sepal-width": 3.2, "petal-length": 4.7, "petal-width": 1.4} -79 {"sepal-length": 6.6, "sepal-width": 2.9, "petal-length": 4.6, "petal-width": 1.3} -80 {"sepal-length": 5.4, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.2} -81 {"sepal-length": 4.8, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.3} -82 {"sepal-length": 7.2, "sepal-width": 3.0, "petal-length": 5.8, "petal-width": 1.6} -83 {"sepal-length": 7.1, "sepal-width": 3.0, "petal-length": 5.9, "petal-width": 2.1} -84 {"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 4.9, "petal-width": 1.5} -85 {"sepal-length": 4.8, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.1} -86 {"sepal-length": 7.3, "sepal-width": 2.9, "petal-length": 6.3, "petal-width": 1.8} -87 {"sepal-length": 6.0, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.6} -88 {"sepal-length": 6.8, "sepal-width": 3.2, "petal-length": 5.9, "petal-width": 2.3} -89 {"sepal-length": 4.6, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.2} -90 {"sepal-length": 4.8, "sepal-width": 3.1, "petal-length": 1.6, "petal-width": 0.2} -91 {"sepal-length": 5.0, "sepal-width": 2.3, "petal-length": 3.3, "petal-width": 1.0} -92 {"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 5.1, "petal-width": 2.3} -93 {"sepal-length": 5.7, "sepal-width": 3.0, "petal-length": 4.2, "petal-width": 1.2} -94 {"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.9, "petal-width": 0.4} -95 {"sepal-length": 6.0, "sepal-width": 2.9, "petal-length": 4.5, "petal-width": 1.5} -96 {"sepal-length": 4.8, "sepal-width": 3.4, "petal-length": 1.6, "petal-width": 0.2} -97 {"sepal-length": 5.2, "sepal-width": 4.1, "petal-length": 1.5, "petal-width": 0.1} -98 {"sepal-length": 5.6, "sepal-width": 2.9, "petal-length": 3.6, "petal-width": 1.3} -99 {"sepal-length": 5.8, "sepal-width": 2.6, "petal-length": 4.0, "petal-width": 1.2} diff --git a/creg/test_data/iris.trainresp b/creg/test_data/iris.trainresp deleted file mode 100644 index d77bc6a2..00000000 --- a/creg/test_data/iris.trainresp +++ /dev/null @@ -1,100 +0,0 @@ -0 Iris-versicolor -1 Iris-setosa -2 Iris-setosa -3 Iris-versicolor -4 Iris-virginica -5 Iris-virginica -6 Iris-versicolor -7 Iris-virginica -8 Iris-versicolor -9 Iris-versicolor -10 Iris-versicolor -11 Iris-versicolor -12 Iris-versicolor -13 Iris-virginica -14 Iris-setosa -15 Iris-setosa -16 Iris-virginica -17 Iris-virginica -18 Iris-setosa -19 Iris-setosa -20 Iris-versicolor -21 Iris-virginica -22 Iris-virginica -23 Iris-setosa -24 Iris-virginica -25 Iris-setosa -26 Iris-setosa -27 Iris-setosa -28 Iris-versicolor -29 Iris-versicolor -30 Iris-setosa -31 Iris-versicolor -32 Iris-setosa -33 Iris-virginica -34 Iris-setosa -35 Iris-setosa -36 Iris-virginica -37 Iris-virginica -38 Iris-versicolor -39 Iris-virginica -40 Iris-setosa -41 Iris-virginica -42 Iris-virginica -43 Iris-virginica -44 Iris-setosa -45 Iris-virginica -46 Iris-versicolor -47 Iris-setosa -48 Iris-setosa -49 Iris-virginica -50 Iris-virginica -51 Iris-virginica -52 Iris-versicolor -53 Iris-versicolor -54 Iris-setosa -55 Iris-versicolor -56 Iris-virginica -57 Iris-setosa -58 Iris-virginica -59 Iris-setosa -60 Iris-versicolor -61 Iris-virginica -62 Iris-versicolor -63 Iris-virginica -64 Iris-setosa -65 Iris-virginica -66 Iris-versicolor -67 Iris-virginica -68 Iris-setosa -69 Iris-setosa -70 Iris-versicolor -71 Iris-setosa -72 Iris-versicolor -73 Iris-versicolor -74 Iris-versicolor -75 Iris-virginica -76 Iris-versicolor -77 Iris-setosa -78 Iris-versicolor -79 Iris-versicolor -80 Iris-setosa -81 Iris-setosa -82 Iris-virginica -83 Iris-virginica -84 Iris-versicolor -85 Iris-setosa -86 Iris-virginica -87 Iris-versicolor -88 Iris-virginica -89 Iris-setosa -90 Iris-setosa -91 Iris-versicolor -92 Iris-virginica -93 Iris-versicolor -94 Iris-setosa -95 Iris-versicolor -96 Iris-setosa -97 Iris-setosa -98 Iris-versicolor -99 Iris-versicolor diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index 63939221..ed79aaf0 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -84,7 +84,7 @@ class ActiveChart { const GrammarIter* ni = gptr_->Extend(symbol); if (!ni) return; Hypergraph::TailNodeVector na(ant_nodes_.size() + 1); - for (int i = 0; i < ant_nodes_.size(); ++i) + for (unsigned i = 0; i < ant_nodes_.size(); ++i) na[i] = ant_nodes_[i]; na[ant_nodes_.size()] = node_index; out_cell->push_back(ActiveItem(ni, na, lattice_cost)); @@ -154,7 +154,7 @@ PassiveChart::PassiveChart(const string& goal, goal_idx_(-1), lc_fid_(FD::Convert("LatticeCost")) { act_chart_.resize(grammars_.size()); - for (int i = 0; i < grammars_.size(); ++i) + for (unsigned i = 0; i < grammars_.size(); ++i) act_chart_[i] = new ActiveChart(forest, *this); if (!kGOAL) kGOAL = TD::Convert("Goal") * -1; if (!SILENT) cerr << " Goal category: [" << goal << ']' << endl; @@ -204,12 +204,12 @@ void PassiveChart::ApplyRules(const int i, void PassiveChart::ApplyUnaryRules(const int i, const int j) { const vector<int>& nodes = chart_(i,j); // reference is important! - for (int gi = 0; gi < grammars_.size(); ++gi) { + for (unsigned gi = 0; gi < grammars_.size(); ++gi) { if (!grammars_[gi]->HasRuleForSpan(i,j,input_.Distance(i,j))) continue; - for (int di = 0; di < nodes.size(); ++di) { + for (unsigned di = 0; di < nodes.size(); ++di) { const WordID& cat = forest_->nodes_[nodes[di]].cat_; const vector<TRulePtr>& unaries = grammars_[gi]->GetUnaryRulesForRHS(cat); - for (int ri = 0; ri < unaries.size(); ++ri) { + for (unsigned ri = 0; ri < unaries.size(); ++ri) { // cerr << "At (" << i << "," << j << "): applying " << unaries[ri]->AsString() << endl; const Hypergraph::TailNodeVector ant(1, nodes[di]); ApplyRule(i, j, unaries[ri], ant, 0); // may update nodes @@ -224,15 +224,15 @@ bool PassiveChart::Parse() { size_t res = min(static_cast<size_t>(2000000), static_cast<size_t>(in_size_2 * 1000)); forest_->edges_.reserve(res); goal_idx_ = -1; - for (int gi = 0; gi < grammars_.size(); ++gi) + for (unsigned gi = 0; gi < grammars_.size(); ++gi) act_chart_[gi]->SeedActiveChart(*grammars_[gi]); if (!SILENT) cerr << " "; - for (int l=1; l<input_.size()+1; ++l) { + for (unsigned l=1; l<input_.size()+1; ++l) { if (!SILENT) cerr << '.'; - for (int i=0; i<input_.size() + 1 - l; ++i) { - int j = i + l; - for (int gi = 0; gi < grammars_.size(); ++gi) { + for (unsigned i=0; i<input_.size() + 1 - l; ++i) { + unsigned j = i + l; + for (unsigned gi = 0; gi < grammars_.size(); ++gi) { const Grammar& g = *grammars_[gi]; if (g.HasRuleForSpan(i, j, input_.Distance(i, j))) { act_chart_[gi]->AdvanceDotsForAllItemsInCell(i, j, input_); @@ -248,7 +248,7 @@ bool PassiveChart::Parse() { } ApplyUnaryRules(i,j); - for (int gi = 0; gi < grammars_.size(); ++gi) { + for (unsigned gi = 0; gi < grammars_.size(); ++gi) { const Grammar& g = *grammars_[gi]; // deal with non-terminals that were just proved if (g.HasRuleForSpan(i, j, input_.Distance(i,j))) @@ -256,7 +256,7 @@ bool PassiveChart::Parse() { } } const vector<int>& dh = chart_(0, input_.size()); - for (int di = 0; di < dh.size(); ++di) { + for (unsigned di = 0; di < dh.size(); ++di) { const Hypergraph::Node& node = forest_->nodes_[dh[di]]; if (node.cat_ == goal_cat_) { Hypergraph::TailNodeVector ant(1, node.id_); @@ -272,7 +272,7 @@ bool PassiveChart::Parse() { } PassiveChart::~PassiveChart() { - for (int i = 0; i < act_chart_.size(); ++i) + for (unsigned i = 0; i < act_chart_.size(); ++i) delete act_chart_[i]; } diff --git a/decoder/cfg.cc b/decoder/cfg.cc index cd7e66e9..d6ee651a 100644 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -229,13 +229,13 @@ template <> struct null_for<RHS> { static RHS null; }; -*/ template <> -BinRhs null_traits<BinRhs>::null(std::numeric_limits<int>::min(),std::numeric_limits<int>::min()); +BinRhs null_traits<BinRhs>::xnull(std::numeric_limits<int>::min(),std::numeric_limits<int>::min()); template <> -RHS null_traits<RHS>::null(1,std::numeric_limits<int>::min()); +RHS null_traits<RHS>::xnull(1,std::numeric_limits<int>::min()); +*/ template <class Rhs> struct add_virtual_rules { @@ -250,7 +250,7 @@ struct add_virtual_rules { R2L rhs2lhs; // an rhs maps to this -virtntid, or original id if length 1 bool name_nts; add_virtual_rules(CFG &cfg,bool name_nts=false) : nts(cfg.nts),rules(cfg.rules),newnt(-nts.size()),newruleid(rules.size()),name_nts(name_nts) { - HASH_MAP_EMPTY(rhs2lhs,null_traits<Rhs>::null); + HASH_MAP_EMPTY(rhs2lhs,null_traits<Rhs>::xnull); } NTHandle get_virt(Rhs const& r) { NTHandle nt=get_default(rhs2lhs,r,newnt); diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h index e93731c3..5544920e 100644 --- a/decoder/ff_bleu.h +++ b/decoder/ff_bleu.h @@ -6,7 +6,6 @@ #include "hg.h" #include "ff.h" -#include "config.h" class BLEUModelImpl; diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc index 3daa85ac..43528405 100644 --- a/decoder/ff_dwarf.cc +++ b/decoder/ff_dwarf.cc @@ -519,7 +519,7 @@ void Dwarf::neighboringFWs(const Lattice& l, const int& i, const int& j, const m while (idx>=0) { if (l[idx].size()>0) { if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) { - *lfw++; + lfw++; } } idx-=l[idx][0].dist2next; @@ -528,7 +528,7 @@ void Dwarf::neighboringFWs(const Lattice& l, const int& i, const int& j, const m while (idx<l.size()) { if (l[idx].size()>0) { if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) { - *rfw++; + rfw++; } } idx+=l[idx][0].dist2next; @@ -787,7 +787,7 @@ bool Dwarf::generalizeOrientation(CountTable* table, const std::map<WordID,WordI } } } - + return false; // no idea if this is right } diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc index a4b26f7c..7a84add7 100644 --- a/decoder/ff_klm.cc +++ b/decoder/ff_klm.cc @@ -373,15 +373,17 @@ boost::shared_ptr<FeatureFunction> KLanguageModelFactory::Create(std::string par if (!RecognizeBinary(filename.c_str(), m)) m = HASH_PROBING; switch (m) { - case HASH_PROBING: + case PROBING: return CreateModel<ProbingModel>(param); - case TRIE_SORTED: + case REST_PROBING: + return CreateModel<RestProbingModel>(param); + case TRIE: return CreateModel<TrieModel>(param); - case ARRAY_TRIE_SORTED: + case ARRAY_TRIE: return CreateModel<ArrayTrieModel>(param); - case QUANT_TRIE_SORTED: + case QUANT_TRIE: return CreateModel<QuantTrieModel>(param); - case QUANT_ARRAY_TRIE_SORTED: + case QUANT_ARRAY_TRIE: return CreateModel<QuantArrayTrieModel>(param); default: UTIL_THROW(util::Exception, "Unrecognized kenlm binary file type " << (unsigned)m); diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index 8885efce..ccee4268 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -6,7 +6,9 @@ #include "hg.h" #include "ff.h" +#ifdef HAVE_CONFIG_H #include "config.h" +#endif // everything in this file is deprecated and may be broken. // Chris Dyer, Mar 2011 diff --git a/decoder/hg.cc b/decoder/hg.cc index 180986d7..0dcbe91f 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -56,7 +56,7 @@ struct less_ve { Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges(EdgeProbs const& ev) { - for (int i=0;i<nodes_.size();++i) { + for (unsigned i=0;i<nodes_.size();++i) { EdgesVector &ie=nodes_[i].in_edges_; std::sort(ie.begin(),ie.end(),less_ve(ev)); } @@ -70,9 +70,9 @@ prob_t Hypergraph::ComputeEdgeViterbi(EdgeProbs *ev) const { } prob_t Hypergraph::ComputeEdgeViterbi(NodeProbs const& nv,EdgeProbs *ev) const { - int ne=edges_.size(); + unsigned ne=edges_.size(); ev->resize(ne); - for (int i=0;i<ne;++i) { + for (unsigned i=0;i<ne;++i) { Edge const& e=edges_[i]; prob_t r=e.edge_prob_; TailNodeVector const& t=e.tail_nodes_; @@ -162,7 +162,7 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector<prob_t>* posts) co SparseVector<prob_t>, ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2); posts->resize(edges_.size()); - for (int i = 0; i < edges_.size(); ++i) + for (unsigned i = 0; i < edges_.size(); ++i) (*posts)[i] = prob_t(pv.value(i)); return inside; } @@ -175,7 +175,7 @@ prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const { SparseVector<TropicalValue>, ViterbiTransitionEventWeightFunction>(*this, &pv); post->resize(edges_.size()); - for (int i = 0; i < edges_.size(); ++i) + for (unsigned i = 0; i < edges_.size(); ++i) (*post)[i] = pv.value(i).v_; return viterbi_weight.v_; } @@ -183,12 +183,12 @@ prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const { void Hypergraph::PushWeightsToSource(double scale) { vector<prob_t> posts; ComputeEdgePosteriors(scale, &posts); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Hypergraph::Node& node = nodes_[i]; prob_t z = prob_t::Zero(); - for (int j = 0; j < node.out_edges_.size(); ++j) + for (unsigned j = 0; j < node.out_edges_.size(); ++j) z += posts[node.out_edges_[j]]; - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { edges_[node.out_edges_[j]].edge_prob_ = posts[node.out_edges_[j]] / z; } } @@ -201,7 +201,7 @@ struct vpusher : public vector<TropicalValue> { void operator()(int n,int /*ei*/,Hypergraph::Edge &e) const { Hypergraph::TailNodeVector const& t=e.tail_nodes_; prob_t p=e.edge_prob_; - for (int i=0;i<t.size();++i) + for (unsigned i=0;i<t.size();++i) p*=(*this)[t[i]].v_; e.feature_values_.set_value(fid,log(e.edge_prob_=p/(*this)[n].v_)); } @@ -229,12 +229,12 @@ prob_t Hypergraph::PushViterbiWeightsToGoal(int fid) { prob_t Hypergraph::PushWeightsToGoal(double scale) { vector<prob_t> posts; const prob_t inside_z = ComputeEdgePosteriors(scale, &posts); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Hypergraph::Node& node = nodes_[i]; prob_t z = prob_t::Zero(); - for (int j = 0; j < node.in_edges_.size(); ++j) + for (unsigned j = 0; j < node.in_edges_.size(); ++j) z += posts[node.in_edges_[j]]; - for (int j = 0; j < node.in_edges_.size(); ++j) { + for (unsigned j = 0; j < node.in_edges_.size(); ++j) { edges_[node.in_edges_[j]].edge_prob_ = posts[node.in_edges_[j]] / z; } } @@ -257,7 +257,7 @@ void Hypergraph::PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorith if (run_inside_algorithm) { const EdgeExistsWeightFunction wf(prune_edge); vector<Boolean> reachable; - bool goal_derivable = Inside/* <Boolean, EdgeExistsWeightFunction> */(*this, &reachable, wf); + bool goal_derivable = Inside<Boolean, EdgeExistsWeightFunction>(*this, &reachable, wf); if (!goal_derivable) { edges_.clear(); nodes_.clear(); @@ -266,11 +266,11 @@ void Hypergraph::PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorith } assert(reachable.size() == nodes_.size()); - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { bool prune = prune_edge[i]; if (!prune) { const Edge& edge = edges_[i]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) { + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { if (!reachable[edge.tail_nodes_[j]]) { prune = true; break; @@ -299,7 +299,7 @@ void Hypergraph::MarginPrune(vector<prob_t> const& io,prob_t cutoff,vector<bool> cerr<<"Finishing prune for "<<prune.size()<<" edges; CUTOFF=" << cutoff << endl; } unsigned pc = 0; - for (int i = 0; i < io.size(); ++i) { + for (unsigned i = 0; i < io.size(); ++i) { cutoff*=creep; // start more permissive, then become less generous. this is barely more than 1. we want to do this because it's a disaster if something lower in a derivation tree is deleted, but the higher thing remains (unless safe_inside) const bool prune_edge = (io[i] < cutoff); if (prune_edge) { @@ -325,11 +325,11 @@ bool Hypergraph::PruneInsideOutside(double alpha,double density,const EdgeMask* assert(!use_beam||alpha>0); assert(!use_density||density>=1); assert(!use_sum_prod_semiring||scale>0); - int rnum=edges_.size(); + unsigned rnum=edges_.size(); if (use_density) { const int plen = ViterbiPathLength(*this); vector<WordID> bp; - rnum = min(rnum, static_cast<int>(density * static_cast<double>(plen))); + rnum = min(rnum, static_cast<unsigned>(density * plen)); cerr << "Density pruning: keep "<<rnum<<" of "<<edges_.size()<<" edges (viterbi = "<<plen<<" edges)"<<endl; if (rnum == edges_.size()) { cerr << "No pruning required: denisty already sufficient\n"; @@ -357,7 +357,7 @@ bool Hypergraph::PruneInsideOutside(double alpha,double density,const EdgeMask* if (use_beam) { prob_t best=prob_t::One(); if (use_sum_prod_semiring) { - for (int i = 0; i < mm.size(); ++i) + for (unsigned i = 0; i < mm.size(); ++i) if (mm[i] > best) best = mm[i]; } prob_t beam_cut=best*prob_t::exp(-alpha); @@ -386,10 +386,10 @@ void Hypergraph::PrintGraphviz() const { << "\" shape=\"rect\"];\n"; Hypergraph::TailNodeVector indorder(edge.tail_nodes_.size(), 0); int ntc = 0; - for (int i = 0; i < edge.rule_->e_.size(); ++i) { + for (unsigned i = 0; i < edge.rule_->e_.size(); ++i) { if (edge.rule_->e_[i] <= 0) indorder[ntc++] = 1 + (-1 * edge.rule_->e_[i]); } - for (int i = 0; i < edge.tail_nodes_.size(); ++i) { + for (unsigned i = 0; i < edge.tail_nodes_.size(); ++i) { cerr << " " << edge.tail_nodes_[i] << " -> A_" << ei; if (edge.tail_nodes_.size() > 1) { cerr << " [label=\"" << indorder[i] << "\"]"; @@ -414,8 +414,8 @@ void Hypergraph::PrintGraphviz() const { void Hypergraph::Union(const Hypergraph& other) { if (&other == this) return; if (nodes_.empty()) { nodes_ = other.nodes_; edges_ = other.edges_; return; } - int noff = nodes_.size(); - int eoff = edges_.size(); + unsigned noff = nodes_.size(); + unsigned eoff = edges_.size(); int ogoal = other.nodes_.size() - 1; int cgoal = noff - 1; // keep a single goal node, so add nodes.size - 1 @@ -428,15 +428,15 @@ void Hypergraph::Union(const Hypergraph& other) { Node& cn = nodes_[i + noff]; cn.id_ = i + noff; cn.in_edges_.resize(on.in_edges_.size()); - for (int j = 0; j < on.in_edges_.size(); ++j) + for (unsigned j = 0; j < on.in_edges_.size(); ++j) cn.in_edges_[j] = on.in_edges_[j] + eoff; cn.out_edges_.resize(on.out_edges_.size()); - for (int j = 0; j < on.out_edges_.size(); ++j) + for (unsigned j = 0; j < on.out_edges_.size(); ++j) cn.out_edges_[j] = on.out_edges_[j] + eoff; } - for (int i = 0; i < other.edges_.size(); ++i) { + for (unsigned i = 0; i < other.edges_.size(); ++i) { const Edge& oe = other.edges_[i]; Edge& ce = edges_[i + eoff]; ce.id_ = i + eoff; @@ -449,7 +449,7 @@ void Hypergraph::Union(const Hypergraph& other) { ce.head_node_ = oe.head_node_ + noff; } ce.tail_nodes_.resize(oe.tail_nodes_.size()); - for (int j = 0; j < oe.tail_nodes_.size(); ++j) + for (unsigned j = 0; j < oe.tail_nodes_.size(); ++j) ce.tail_nodes_[j] = oe.tail_nodes_[j] + noff; } @@ -460,16 +460,6 @@ void Hypergraph::PruneUnreachable(int goal_node_id) { TopologicallySortNodesAndEdges(goal_node_id, NULL); } -void Hypergraph::RemoveNoncoaccessibleStates(int goal_node_id) { - if (goal_node_id < 0) goal_node_id += nodes_.size(); - assert(goal_node_id >= 0); - assert(goal_node_id < nodes_.size()); - - // I don't get it: does TopologicallySortNodesAndEdges not remove things that don't connect to goal_index? it uses goal_index just to order things? InsideOutside pruning can do this anyway (nearly infinite beam, viterbi semiring) - // TODO finish implementation - abort(); -} - struct DFSContext { int node; int edge_iter; @@ -559,7 +549,7 @@ void Hypergraph::TopologicallySortNodesAndEdges(int goal_index, } #ifndef HG_EDGES_TOPO_SORTED int ec = 0; - for (int i = 0; i < reloc_edge.size(); ++i) { + for (unsigned i = 0; i < reloc_edge.size(); ++i) { int& cp = reloc_edge[i]; if (cp >= 0) { cp = ec++; } } @@ -576,34 +566,34 @@ void Hypergraph::TopologicallySortNodesAndEdges(int goal_index, cerr << endl; #endif bool no_op = true; - for (int i = 0; i < reloc_node.size() && no_op; ++i) - if (reloc_node[i] != i) no_op = false; - for (int i = 0; i < reloc_edge.size() && no_op; ++i) - if (reloc_edge[i] != i) no_op = false; + for (unsigned i = 0; i < reloc_node.size() && no_op; ++i) + if (reloc_node[i] != static_cast<int>(i)) no_op = false; + for (unsigned i = 0; i < reloc_edge.size() && no_op; ++i) + if (reloc_edge[i] != static_cast<int>(i)) no_op = false; if (no_op) return; - for (int i = 0; i < reloc_node.size(); ++i) { + for (unsigned i = 0; i < reloc_node.size(); ++i) { Node& node = nodes_[i]; node.id_ = reloc_node[i]; int c = 0; - for (int j = 0; j < node.in_edges_.size(); ++j) { + for (unsigned j = 0; j < node.in_edges_.size(); ++j) { const int new_index = reloc_edge[node.in_edges_[j]]; if (new_index >= 0) node.in_edges_[c++] = new_index; } node.in_edges_.resize(c); c = 0; - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { const int new_index = reloc_edge[node.out_edges_[j]]; if (new_index >= 0) node.out_edges_[c++] = new_index; } node.out_edges_.resize(c); } - for (int i = 0; i < reloc_edge.size(); ++i) { + for (unsigned i = 0; i < reloc_edge.size(); ++i) { Edge& edge = edges_[i]; edge.id_ = reloc_edge[i]; edge.head_node_ = reloc_node[edge.head_node_]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) edge.tail_nodes_[j] = reloc_node[edge.tail_nodes_[j]]; } edges_.erase(remove_if(edges_.begin(), edges_.end(), BadId<Edge>()), edges_.end()); @@ -623,7 +613,7 @@ void Hypergraph::EpsilonRemove(WordID eps) { kUnaryRule.reset(new TRule("[X] ||| [X,1] ||| [X,1]")); } vector<bool> kill(edges_.size(), false); - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { const Edge& edge = edges_[i]; if (edge.tail_nodes_.empty() && edge.rule_->f_.size() == 1 && @@ -637,7 +627,7 @@ void Hypergraph::EpsilonRemove(WordID eps) { // same sequence via different paths through the input forest // this needs to be investigated and fixed } else { - for (int j = 0; j < node.out_edges_.size(); ++j) + for (unsigned j = 0; j < node.out_edges_.size(); ++j) edges_[node.out_edges_[j]].feature_values_ += edge.feature_values_; // cerr << "PROMOTED " << edge.feature_values_ << endl; } @@ -646,19 +636,19 @@ void Hypergraph::EpsilonRemove(WordID eps) { } bool created_eps = false; PruneEdges(kill); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Node& node = nodes_[i]; if (node.in_edges_.empty()) { - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { Edge& edge = edges_[node.out_edges_[j]]; if (edge.rule_->Arity() == 2) { assert(edge.rule_->f_.size() == 2); assert(edge.rule_->e_.size() == 2); edge.rule_ = kUnaryRule; - int cur = node.id_; + unsigned cur = node.id_; int t = -1; assert(edge.tail_nodes_.size() == 2); - for (int i = 0; i < 2; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; } + for (unsigned i = 0; i < 2u; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; } assert(t != -1); edge.tail_nodes_.resize(1); edge.tail_nodes_[0] = t; @@ -712,14 +702,14 @@ HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges) const { HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges,NodeMask &kn) const { kn.clear(); kn.resize(nodes_.size()); - for (int n=0;n<nodes_.size();++n) { // this nested iteration gives us edges in topo order too + for (unsigned n=0;n<nodes_.size();++n) { // this nested iteration gives us edges in topo order too EdgesVector const& es=nodes_[n].in_edges_; - for (int i=0;i<es.size();++i) { + for (unsigned i=0;i<es.size();++i) { int ei=es[i]; if (keep_edges[ei]) { const Edge& e = edges_[ei]; TailNodeVector const& tails=e.tail_nodes_; - for (int j=0;j<e.tail_nodes_.size();++j) { + for (unsigned j=0;j<e.tail_nodes_.size();++j) { if (!kn[tails[j]]) { keep_edges[ei]=false; goto next_edge; @@ -738,11 +728,11 @@ HypergraphP Hypergraph::CreateNodeEdgeSubset(NodeMask const& keep_nodes,EdgeMask indices_after e2(keep_edges); HypergraphP ret(new Hypergraph(n2.n_kept, e2.n_kept, is_linear_chain_)); Nodes &rn=ret->nodes_; - for (int i=0;i<nodes_.size();++i) + for (unsigned i=0;i<nodes_.size();++i) if (n2.keeping(i)) rn[n2[i]].copy_reindex(nodes_[i],n2,e2); Edges &re=ret->edges_; - for (int i=0;i<edges_.size();++i) + for (unsigned i=0;i<edges_.size();++i) if (e2.keeping(i)) re[e2[i]].copy_reindex(edges_[i],n2,e2); return ret; @@ -750,11 +740,11 @@ HypergraphP Hypergraph::CreateNodeEdgeSubset(NodeMask const& keep_nodes,EdgeMask void Hypergraph::TightenEdgeMask(EdgeMask &ke,NodeMask const& kn) const { - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { if (ke[i]) { const Edge& edge = edges_[i]; TailNodeVector const& tails=edge.tail_nodes_; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) { + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { if (!kn[tails[j]]) { ke[i]=false; goto next_edge; @@ -766,18 +756,18 @@ void Hypergraph::TightenEdgeMask(EdgeMask &ke,NodeMask const& kn) const } void Hypergraph::set_ids() { - for (int i = 0; i < edges_.size(); ++i) + for (unsigned i = 0; i < edges_.size(); ++i) edges_[i].id_=i; - for (int i = 0; i < nodes_.size(); ++i) + for (unsigned i = 0; i < nodes_.size(); ++i) nodes_[i].id_=i; } void Hypergraph::check_ids() const { - for (int i = 0; i < edges_.size(); ++i) - assert(edges_[i].id_==i); - for (int i = 0; i < nodes_.size(); ++i) - assert(nodes_[i].id_==i); + for (unsigned i = 0; i < edges_.size(); ++i) + assert(edges_[i].id_==static_cast<int>(i)); + for (unsigned i = 0; i < nodes_.size(); ++i) + assert(nodes_[i].id_==static_cast<int>(i)); } HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const { @@ -796,15 +786,15 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const set_ids(); # endif EdgeMask used(edges_.size()); - for (int i = 0; i < vit_edges.size(); ++i) + for (unsigned i = 0; i < vit_edges.size(); ++i) used[vit_edges[i]->id_]=true; return CreateEdgeSubset(used); #else map<int, int> old2new_node; int num_new_nodes = 0; - for (int i = 0; i < vit_edges.size(); ++i) { + for (unsigned i = 0; i < vit_edges.size(); ++i) { const Edge& edge = *vit_edges[i]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) assert(old2new_node.count(edge.tail_nodes_[j]) > 0); + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) assert(old2new_node.count(edge.tail_nodes_[j]) > 0); if (old2new_node.count(edge.head_node_) == 0) { old2new_node[edge.head_node_] = num_new_nodes; ++num_new_nodes; @@ -820,7 +810,7 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const new_node.id_ = it->second; } - for (int i = 0; i < vit_edges.size(); ++i) { + for (unsigned i = 0; i < vit_edges.size(); ++i) { const Edge& old_edge = *vit_edges[i]; Edge& new_edge = out->edges_[i]; new_edge = old_edge; @@ -828,7 +818,7 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const const int new_head_node = old2new_node[old_edge.head_node_]; new_edge.head_node_ = new_head_node; out->nodes_[new_head_node].in_edges_.push_back(i); - for (int j = 0; j < old_edge.tail_nodes_.size(); ++j) { + for (unsigned j = 0; j < old_edge.tail_nodes_.size(); ++j) { const int new_tail_node = old2new_node[old_edge.tail_nodes_[j]]; new_edge.tail_nodes_[j] = new_tail_node; out->nodes_[new_tail_node].out_edges_.push_back(i); diff --git a/decoder/hg.h b/decoder/hg.h index dfa4ac6d..91d25f01 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -43,7 +43,7 @@ public: Hypergraph() : is_linear_chain_(false) {} // SmallVector is a fast, small vector<int> implementation for sizes <= 2 - typedef SmallVectorInt TailNodeVector; // indices in nodes_ + typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ typedef std::vector<int> EdgesVector; // indices in edges_ // TODO get rid of cat_? @@ -396,7 +396,7 @@ public: // (inner product) to set the edge probabilities template <class V> void Reweight(const V& weights) { - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { Edge& e = edges_[i]; e.edge_prob_.logeq(e.feature_values_.dot(weights)); } @@ -457,8 +457,6 @@ public: void PruneUnreachable(int goal_node_id); // DEPRECATED - void RemoveNoncoaccessibleStates(int goal_node_id = -1); - // remove edges from the hypergraph if prune_edge[edge_id] is true // note: if run_inside_algorithm is false, then consumers may be unhappy if you pruned nodes that are built on by nodes that are kept. void PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorithm = false); @@ -524,7 +522,7 @@ public: template <class V> void visit_edges(V &v) { - for (int i=0;i<edges_.size();++i) + for (unsigned i=0;i<edges_.size();++i) v(edges_[i].head_node_,i,edges_[i]); } diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc index 8752838f..6e3bfee6 100644 --- a/decoder/hg_intersect.cc +++ b/decoder/hg_intersect.cc @@ -19,12 +19,12 @@ using namespace std; struct RuleFilter { unordered_map<vector<WordID>, bool, boost::hash<vector<WordID> > > exists_; bool true_lattice; - RuleFilter(const Lattice& target, int max_phrase_size) { + RuleFilter(const Lattice& target, unsigned max_phrase_size) { true_lattice = false; - for (int i = 0; i < target.size(); ++i) { + for (unsigned i = 0; i < target.size(); ++i) { vector<WordID> phrase; - int lim = min(static_cast<int>(target.size()), i + max_phrase_size); - for (int j = i; j < lim; ++j) { + const unsigned lim = min(static_cast<unsigned>(target.size()), i + max_phrase_size); + for (unsigned j = i; j < lim; ++j) { if (target[j].size() > 1) { true_lattice = true; break; } phrase.push_back(target[j][0].label); exists_[phrase] = true; @@ -37,10 +37,10 @@ struct RuleFilter { // TODO do some smarter filtering for lattices if (true_lattice) return false; // don't filter "true lattice" input const vector<WordID>& e = r.e(); - for (int i = 0; i < e.size(); ++i) { + for (unsigned i = 0; i < e.size(); ++i) { if (e[i] <= 0) continue; vector<WordID> phrase; - for (int j = i; j < e.size(); ++j) { + for (unsigned j = i; j < e.size(); ++j) { if (e[j] <= 0) break; phrase.push_back(e[j]); if (exists_.count(phrase) == 0) return true; @@ -55,7 +55,7 @@ static bool FastLinearIntersect(const Lattice& target, Hypergraph* hg) { vector<bool> prune(hg->edges_.size(), false); set<int> cov; map<const TRule*, TRulePtr> inverted_rules; - for (int i = 0; i < prune.size(); ++i) { + for (unsigned i = 0; i < prune.size(); ++i) { Hypergraph::Edge& edge = hg->edges_[i]; if (edge.Arity() == 0) { const int trg_index = edge.prev_i_; @@ -87,12 +87,12 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { vector<bool> rem(hg->edges_.size(), false); const RuleFilter filter(target, 15); // TODO make configurable - for (int i = 0; i < rem.size(); ++i) + for (unsigned i = 0; i < rem.size(); ++i) rem[i] = filter(*hg->edges_[i].rule_); hg->PruneEdges(rem, true); - const int nedges = hg->edges_.size(); - const int nnodes = hg->nodes_.size(); + const unsigned nedges = hg->edges_.size(); + const unsigned nnodes = hg->nodes_.size(); TextGrammar* g = new TextGrammar; GrammarPtr gp(g); @@ -100,7 +100,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { // each node in the translation forest becomes a "non-terminal" in the new // grammar, create the labels here const string kSEP = "_"; - for (int i = 0; i < nnodes; ++i) { + for (unsigned i = 0; i < nnodes; ++i) { const char* pstr = "CAT"; if (hg->nodes_[i].cat_ < 0) pstr = TD::Convert(-hg->nodes_[i].cat_); @@ -108,7 +108,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { } // construct the grammar - for (int i = 0; i < nedges; ++i) { + for (unsigned i = 0; i < nedges; ++i) { const Hypergraph::Edge& edge = hg->edges_[i]; const vector<WordID>& tgt = edge.rule_->e(); const vector<WordID>& src = edge.rule_->f(); @@ -122,7 +122,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { e.resize(src.size()); // parses using the source side! Hypergraph::TailNodeVector tn(edge.tail_nodes_.size()); int ntc = 0; - for (int j = 0; j < tgt.size(); ++j) { + for (unsigned j = 0; j < tgt.size(); ++j) { const WordID& cur = tgt[j]; if (cur > 0) { f[j] = cur; @@ -133,7 +133,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { } } ntc = 0; - for (int j = 0; j < src.size(); ++j) { + for (unsigned j = 0; j < src.size(); ++j) { const WordID& cur = src[j]; if (cur > 0) { e[j] = cur; diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 734c2ce8..bfb2fb80 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -28,7 +28,7 @@ struct HGReader : public JSONParser { hg.ConnectEdgeToHeadNode(&hg.edges_[in_edges[i]], node); } } - void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorInt& tail) { + void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorUnsigned& tail) { Hypergraph::Edge* edge = hg.AddEdge(rule, tail); feats->swap(edge->feature_values_); edge->i_ = spans[0]; @@ -229,7 +229,7 @@ struct HGReader : public JSONParser { } string rp; string cat; - SmallVectorInt tail; + SmallVectorUnsigned tail; vector<int> in_edges; TRulePtr cur_rule; map<int, TRulePtr> rules; @@ -488,13 +488,13 @@ int getInt(const std::string& in, int &c) #define MAX_NODES 100000000 // parse ('foo', 0.23) void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) { - if (get(in,c++) != '(') { assert(!"PCN/PLF parse error: expected ( at start of cn alt block\n"); } + if (get(in,c++) != '(') { cerr << "PCN/PLF parse error: expected (\n"; abort(); } vector<WordID> ewords(2, 0); ewords[1] = TD::Convert(getEscapedString(in,c)); TRulePtr r(new TRule(ewords)); r->ComputeArity(); // cerr << "RULE: " << r->AsString() << endl; - if (get(in,c++) != ',') { cerr << in << endl; assert(!"PCN/PLF parse error: expected , after string\n"); } + if (get(in,c++) != ',') { cerr << in << endl; cerr << "PCN/PLF parse error: expected , after string\n"; abort(); } size_t cnNext = 1; std::vector<float> probs; probs.push_back(getFloat(in,c)); @@ -508,10 +508,9 @@ void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) { if (probs.size()>1) { cnNext = static_cast<size_t>(probs.back()); probs.pop_back(); - if (cnNext < 1) { cerr << cnNext << endl; - assert(!"PCN/PLF parse error: bad link length at last element of cn alt block\n"); } + if (cnNext < 1) { cerr << cnNext << endl << "PCN/PLF parse error: bad link length at last element of cn alt block\n"; abort(); } } - if (get(in,c++) != ')') { assert(!"PCN/PLF parse error: expected ) at end of cn alt block\n"); } + if (get(in,c++) != ')') { cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; abort(); } eatws(in,c); Hypergraph::TailNodeVector tail(1, cur_node); Hypergraph::Edge* edge = hg->AddEdge(r, tail); diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index dc96f1a9..bb7f9fcc 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -31,24 +31,24 @@ template<class WeightType, class WeightFunction> WeightType Inside(const Hypergraph& hg, std::vector<WeightType>* result = NULL, const WeightFunction& weight = WeightFunction()) { - const int num_nodes = hg.nodes_.size(); + const unsigned num_nodes = hg.nodes_.size(); std::vector<WeightType> dummy; std::vector<WeightType>& inside_score = result ? *result : dummy; inside_score.clear(); inside_score.resize(num_nodes); // std::fill(inside_score.begin(), inside_score.end(), WeightType()); // clear handles - for (int i = 0; i < num_nodes; ++i) { + for (unsigned i = 0; i < num_nodes; ++i) { WeightType* const cur_node_inside_score = &inside_score[i]; Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; - const int num_in_edges = in.size(); + const unsigned num_in_edges = in.size(); if (num_in_edges == 0) { *cur_node_inside_score = WeightType(1); //FIXME: why not call weight(edge) instead? continue; } - for (int j = 0; j < num_in_edges; ++j) { + for (unsigned j = 0; j < num_in_edges; ++j) { const Hypergraph::Edge& edge = hg.edges_[in[j]]; WeightType score = weight(edge); - for (int k = 0; k < edge.tail_nodes_.size(); ++k) { + for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) { const int tail_node_index = edge.tail_nodes_[k]; score *= inside_score[tail_node_index]; } @@ -67,7 +67,7 @@ void Outside(const Hypergraph& hg, ) { assert(result); const int num_nodes = hg.nodes_.size(); - assert(inside_score.size() == num_nodes); + assert(static_cast<int>(inside_score.size()) == num_nodes); std::vector<WeightType>& outside_score = *result; outside_score.clear(); outside_score.resize(num_nodes); diff --git a/decoder/kbest.h b/decoder/kbest.h index 03a8311c..9af3a20e 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -43,7 +43,7 @@ namespace KBest { traverse(tf), w(wf), g(hg), nds(g.nodes_.size()), k_prime(k) {} ~KBestDerivations() { - for (int i = 0; i < freelist.size(); ++i) + for (unsigned i = 0; i < freelist.size(); ++i) delete freelist[i]; } @@ -86,7 +86,7 @@ namespace KBest { // Hypergraph::Edge const * operator ->() const { return d->edge; } }; - EdgeHandle operator()(int t,int taili,EdgeHandle const& parent) const { + EdgeHandle operator()(unsigned t,unsigned taili,EdgeHandle const& parent) const { return EdgeHandle(nds[t].D[parent.d->j[taili]]); } @@ -98,7 +98,7 @@ namespace KBest { size_t operator()(const Derivation* d) const { size_t x = 5381; x = ((x << 5) + x) ^ d->edge->id_; - for (int i = 0; i < d->j.size(); ++i) + for (unsigned i = 0; i < d->j.size(); ++i) x = ((x << 5) + x) ^ d->j[i]; return x; } @@ -121,7 +121,7 @@ namespace KBest { explicit NodeDerivationState(const DerivationFilter& f = DerivationFilter()) : filter(f) {} }; - Derivation* LazyKthBest(int v, int k) { + Derivation* LazyKthBest(unsigned v, unsigned k) { NodeDerivationState& s = GetCandidates(v); CandidateHeap& cand = s.cand; DerivationList& D = s.D; @@ -139,7 +139,7 @@ namespace KBest { Derivation* d = cand.back(); cand.pop_back(); std::vector<const T*> ants(d->edge->Arity()); - for (int j = 0; j < ants.size(); ++j) + for (unsigned j = 0; j < ants.size(); ++j) ants[j] = &LazyKthBest(d->edge->tail_nodes_[j], d->j[j])->yield; traverse(*d->edge, ants, &d->yield); if (!filter(d->yield)) { @@ -171,12 +171,12 @@ namespace KBest { return freelist.back(); } - NodeDerivationState& GetCandidates(int v) { + NodeDerivationState& GetCandidates(unsigned v) { NodeDerivationState& s = nds[v]; if (!s.D.empty() || !s.cand.empty()) return s; const Hypergraph::Node& node = g.nodes_[v]; - for (int i = 0; i < node.in_edges_.size(); ++i) { + for (unsigned i = 0; i < node.in_edges_.size(); ++i) { const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]]; SmallVectorInt jv(edge.Arity(), 0); Derivation* d = CreateDerivation(edge, jv); @@ -184,7 +184,7 @@ namespace KBest { s.cand.push_back(d); } - const int effective_k = std::min(k_prime, s.cand.size()); + const unsigned effective_k = std::min(k_prime, s.cand.size()); const typename CandidateHeap::iterator kth = s.cand.begin() + effective_k; std::nth_element(s.cand.begin(), kth, s.cand.end(), DerivationCompare()); s.cand.resize(effective_k); @@ -194,7 +194,7 @@ namespace KBest { } void LazyNext(const Derivation* d, CandidateHeap* cand, UniqueDerivationSet* ds) { - for (int i = 0; i < d->j.size(); ++i) { + for (unsigned i = 0; i < d->j.size(); ++i) { SmallVectorInt j = d->j; ++j[i]; const Derivation* ant = LazyKthBest(d->edge->tail_nodes_[i], j[i]); @@ -205,8 +205,12 @@ namespace KBest { if (new_d) { cand->push_back(new_d); std::push_heap(cand->begin(), cand->end(), HeapCompare()); +#ifdef NDEBUG + ds->insert(new_d).second; // insert into uniqueness set +#else bool inserted = ds->insert(new_d).second; // insert into uniqueness set assert(inserted); +#endif } } } diff --git a/decoder/maxtrans_blunsom.cc b/decoder/maxtrans_blunsom.cc index 6efab454..774e4170 100644 --- a/decoder/maxtrans_blunsom.cc +++ b/decoder/maxtrans_blunsom.cc @@ -73,7 +73,7 @@ struct Candidate { prob_t p = prob_t::One(); // cerr << "\nEstimating application of " << in_edge.rule_->AsString() << endl; vector<const vector<WordID>* > ants(tail.size()); - for (int i = 0; i < tail.size(); ++i) { + for (unsigned i = 0; i < tail.size(); ++i) { const Candidate& ant = *D[in_edge.tail_nodes_[i]][j_[i]]; ants[i] = &ant.state_; assert(ant.IsIncorporatedIntoHypergraph()); @@ -99,7 +99,7 @@ ostream& operator<<(ostream& os, const Candidate& cand) { else { os << "+LM_node=" << cand.node_index_; } os << " edge=" << cand.in_edge_->id_; os << " j=<"; - for (int i = 0; i < cand.j_.size(); ++i) + for (unsigned i = 0; i < cand.j_.size(); ++i) os << (i==0 ? "" : " ") << cand.j_[i]; os << "> vit=" << log(cand.inside_prob_); os << " est=" << log(cand.est_prob_); @@ -127,7 +127,7 @@ struct CandidateUniquenessHash { size_t operator()(const Candidate* c) const { size_t x = 5381; x = ((x << 5) + x) ^ c->in_edge_->id_; - for (int i = 0; i < c->j_.size(); ++i) + for (unsigned i = 0; i < c->j_.size(); ++i) x = ((x << 5) + x) ^ c->j_[i]; return x; } @@ -154,12 +154,12 @@ public: } void Apply() { - int num_nodes = in.nodes_.size(); - int goal_id = num_nodes - 1; - int pregoal = goal_id - 1; + const unsigned num_nodes = in.nodes_.size(); + const unsigned goal_id = num_nodes - 1; + const unsigned pregoal = goal_id - 1; assert(in.nodes_[pregoal].out_edges_.size() == 1); cerr << " "; - for (int i = 0; i < in.nodes_.size(); ++i) { + for (unsigned i = 0; i < in.nodes_.size(); ++i) { cerr << '.'; KBest(i, i == goal_id); } @@ -174,9 +174,9 @@ public: private: void FreeAll() { - for (int i = 0; i < D.size(); ++i) { + for (unsigned i = 0; i < D.size(); ++i) { CandidateList& D_i = D[i]; - for (int j = 0; j < D_i.size(); ++j) + for (unsigned j = 0; j < D_i.size(); ++j) delete D_i[j]; } D.clear(); @@ -216,7 +216,7 @@ public: CandidateList freelist; cand.reserve(in_edges.size()); UniqueCandidateSet unique_cands; - for (int i = 0; i < in_edges.size(); ++i) { + for (unsigned i = 0; i < in_edges.size(); ++i) { const Hypergraph::Edge& edge = in.edges_[in_edges[i]]; const JVector j(edge.tail_nodes_.size(), 0); cand.push_back(new Candidate(edge, j, D, is_goal)); @@ -242,20 +242,20 @@ public: sort(D_v.begin(), D_v.end(), EstProbSorter()); // cerr << " expanded to " << D_v.size() << " nodes\n"; - for (int i = 0; i < cand.size(); ++i) + for (unsigned i = 0; i < cand.size(); ++i) delete cand[i]; // freelist is necessary since even after an item merged, it still stays in // the unique set so it can't be deleted til now - for (int i = 0; i < freelist.size(); ++i) + for (unsigned i = 0; i < freelist.size(); ++i) delete freelist[i]; } void PushSucc(const Candidate& item, const bool is_goal, CandidateHeap* pcand, UniqueCandidateSet* cs) { CandidateHeap& cand = *pcand; - for (int i = 0; i < item.j_.size(); ++i) { + for (unsigned i = 0; i < item.j_.size(); ++i) { JVector j = item.j_; ++j[i]; - if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) { + if (static_cast<unsigned>(j[i]) < D[item.in_edge_->tail_nodes_[i]].size()) { Candidate query_unique(*item.in_edge_, j); if (cs->count(&query_unique) == 0) { Candidate* new_cand = new Candidate(*item.in_edge_, j, D, is_goal); diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 15abb600..185f979a 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -33,7 +33,7 @@ struct SCFGTranslatorImpl { { if(conf.count("grammar")){ vector<string> gfiles = conf["grammar"].as<vector<string> >(); - for (int i = 0; i < gfiles.size(); ++i) { + for (unsigned i = 0; i < gfiles.size(); ++i) { if (!SILENT) cerr << "Reading SCFG grammar from " << gfiles[i] << endl; TextGrammar* g = new TextGrammar(gfiles[i]); g->SetMaxSpan(max_span_limit); @@ -132,7 +132,7 @@ struct SCFGTranslatorImpl { g->SetGrammarName("PassThrough"); glist.push_back(GrammarPtr(g)); } - for (int gi = 0; gi < glist.size(); ++gi) { + for (unsigned gi = 0; gi < glist.size(); ++gi) { if(printGrammarsUsed) cerr << "Using grammar::" << glist[gi]->GetGrammarName() << endl; } @@ -147,7 +147,7 @@ struct SCFGTranslatorImpl { forest->Reweight(weights); if (use_ctf_) { Hypergraph::Node& goal_node = *(forest->nodes_.end()-1); - foreach(int edge_id, goal_node.in_edges_) + foreach(unsigned edge_id, goal_node.in_edges_) RefineRule(forest->edges_[edge_id].rule_, ctf_iterations_); double alpha = ctf_alpha_; bool found_parse=false; @@ -155,7 +155,7 @@ struct SCFGTranslatorImpl { cerr << "Coarse-to-fine source parse, alpha=" << alpha << endl; found_parse = true; Hypergraph refined_forest = *forest; - for (int j=0; j < ctf_iterations_; ++j) { + for (unsigned j=0; j < ctf_iterations_; ++j) { cerr << viterbi_stats(refined_forest," Coarse forest",true,show_tree_structure_); cerr << " Iteration " << (j+1) << ": Pruning forest... "; refined_forest.BeamPruneInsideOutside(1.0, false, alpha, NULL); @@ -178,7 +178,7 @@ struct SCFGTranslatorImpl { if (!found_parse){ if (ctf_exhaustive_){ cerr << "Last resort: refining coarse forest without pruning..."; - for (int j=0; j < ctf_iterations_; ++j) { + for (unsigned j=0; j < ctf_iterations_; ++j) { if (RefineForest(forest)){ cerr << " Refinement succeeded." << endl; forest->Reweight(weights); @@ -213,7 +213,7 @@ struct SCFGTranslatorImpl { Hypergraph::Edge& edge = forest->edges_[edge_id]; std::vector<int> nt_positions; TRulePtr& coarse_rule_ptr = edge.rule_; - for(int i=0; i< coarse_rule_ptr->f_.size(); ++i){ + for(unsigned i=0; i< coarse_rule_ptr->f_.size(); ++i){ if (coarse_rule_ptr->f_[i] < 0) nt_positions.push_back(i); } @@ -225,7 +225,7 @@ struct SCFGTranslatorImpl { // fine rules apply only if state splits on tail nodes match fine rule nonterminals foreach(TRulePtr& fine_rule_ptr, *(coarse_rule_ptr->fine_rules_)) { Hypergraph::TailNodeVector tail; - for (int pos_i=0; pos_i<nt_positions.size(); ++pos_i){ + for (unsigned pos_i=0; pos_i<nt_positions.size(); ++pos_i){ WordID fine_cat = fine_rule_ptr->f_[nt_positions[pos_i]]; Split2Node::iterator it = s2n.find(StateSplit(edge.tail_nodes_[pos_i], fine_cat)); diff --git a/decoder/trule.cc b/decoder/trule.cc index 141b8faa..187a003d 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -18,7 +18,7 @@ bool TRule::IsGoal() const { } static WordID ConvertTrgString(const string& w) { - int len = w.size(); + const unsigned len = w.size(); WordID id = 0; // [X,0] or [0] // for target rules, we ignore the category, just keep the index @@ -33,7 +33,7 @@ static WordID ConvertTrgString(const string& w) { } static WordID ConvertSrcString(const string& w, bool mono = false) { - int len = w.size(); + const unsigned len = w.size(); // [X,0] // for source rules, we keep the category and ignore the index (source rules are // always numbered 1, 2, 3... @@ -60,7 +60,7 @@ static WordID ConvertSrcString(const string& w, bool mono = false) { static WordID ConvertLHS(const string& w) { if (w[0] == '[') { - int len = w.size(); + const unsigned len = w.size(); if (len < 3) { cerr << "Format error: " << w << endl; exit(1); } return TD::Convert(w.substr(1, len-2)) * -1; } else { @@ -100,6 +100,8 @@ namespace { // callback for lexer int n_assigned=0; void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) { + (void) ctf_level; + (void) coarse_rule; TRule *assignto=(TRule *)extra; *assignto=*new_rule; ++n_assigned; @@ -143,15 +145,15 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { string ss; getline(is, ss); //cerr << "L: " << ss << endl; - int start = 0; - int len = ss.size(); + unsigned start = 0; + unsigned len = ss.size(); const size_t ppos = ss.find(" |||"); if (ppos != string::npos) { len = ppos; } while (start < len) { while(start < len && (ss[start] == ' ' || ss[start] == ';')) ++start; if (start == len) break; - int end = start + 1; + unsigned end = start + 1; while(end < len && (ss[end] != '=' && ss[end] != ' ' && ss[end] != ';')) ++end; if (end == len || ss[end] == ' ' || ss[end] == ';') { @@ -188,7 +190,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); } f_ = e_; int x = ConvertLHS("[X]"); - for (int i = 0; i < f_.size(); ++i) + for (unsigned i = 0; i < f_.size(); ++i) if (f_[i] <= 0) { f_[i] = x; } } else { cerr << "F: " << format << endl; @@ -197,7 +199,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { if (mono) { e_ = f_; int ci = 0; - for (int i = 0; i < e_.size(); ++i) + for (unsigned i = 0; i < e_.size(); ++i) if (e_[i] < 0) e_[i] = ci--; } @@ -208,7 +210,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { bool TRule::SanityCheck() const { vector<int> used(f_.size(), 0); int ac = 0; - for (int i = 0; i < e_.size(); ++i) { + for (unsigned i = 0; i < e_.size(); ++i) { int ind = e_[i]; if (ind > 0) continue; ind = -ind; @@ -238,7 +240,7 @@ string TRule::AsString(bool verbose) const { if (lhs_ && verbose) { os << '[' << TD::Convert(lhs_ * -1) << "] |||"; } - for (int i = 0; i < f_.size(); ++i) { + for (unsigned i = 0; i < f_.size(); ++i) { const WordID& w = f_[i]; if (w < 0) { int wi = w * -1; @@ -249,7 +251,7 @@ string TRule::AsString(bool verbose) const { } } os << " ||| "; - for (int i =0; i<e_.size(); ++i) { + for (unsigned i =0; i<e_.size(); ++i) { if (i) os << ' '; const WordID& w = e_[i]; if (w < 1) @@ -261,7 +263,7 @@ string TRule::AsString(bool verbose) const { os << " ||| " << scores_; if (!a_.empty()) { os << " |||"; - for (int i = 0; i < a_.size(); ++i) + for (unsigned i = 0; i < a_.size(); ++i) os << ' ' << a_[i]; } } diff --git a/decoder/trule.h b/decoder/trule.h index 8eb2a059..6a33d052 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -76,7 +76,7 @@ class TRule { void ESubstitute(const std::vector<const std::vector<WordID>* >& var_values, std::vector<WordID>* result) const { - int vc = 0; + unsigned vc = 0; result->clear(); for (std::vector<WordID>::const_iterator i = e_.begin(); i != e_.end(); ++i) { const WordID& c = *i; @@ -95,7 +95,7 @@ class TRule { void FSubstitute(const std::vector<const std::vector<WordID>* >& var_values, std::vector<WordID>* result) const { - int vc = 0; + unsigned vc = 0; result->clear(); for (std::vector<WordID>::const_iterator i = f_.begin(); i != f_.end(); ++i) { const WordID& c = *i; diff --git a/dpmert/Jamfile b/dpmert/Jamfile new file mode 100644 index 00000000..bc4b079b --- /dev/null +++ b/dpmert/Jamfile @@ -0,0 +1,32 @@ +import testing ; +import lex ; +import option ; + +lib dpmert : + ces.cc + error_surface.cc + line_optimizer.cc + mert_geometry.cc + ..//utils + ..//mteval + ..//decoder + ../klm/lm//kenlm + ..//boost_program_options + : <include>. + : : + <library>..//utils + <library>..//mteval + <library>../klm/lm//kenlm + <library>..//boost_program_options + <include>. + ; + +all_tests [ glob *_test.cc ] : dpmert : <testing.arg>$(TOP)/dpmert/test_data ; + +exe sentserver : sentserver.c : <threading>multi ; +exe sentclient : sentclient.c ; +exe mr_dpmert_generate_mapper_input : mr_dpmert_generate_mapper_input.cc dpmert ..//boost_program_options ; +exe mr_dpmert_map : mr_dpmert_map.cc dpmert ..//boost_program_options ; +exe mr_dpmert_reduce : mr_dpmert_reduce.cc dpmert ..//boost_program_options ; + +alias programs : sentserver sentclient mr_dpmert_generate_mapper_input mr_dpmert_map mr_dpmert_reduce ; diff --git a/dpmert/ces.cc b/dpmert/ces.cc index c6cb1cdf..157b2d17 100644 --- a/dpmert/ces.cc +++ b/dpmert/ces.cc @@ -25,7 +25,7 @@ void ComputeErrorSurface(const SegmentEvaluator& ss, env->resize(ienv.size()); SufficientStats prev_score; // defaults to 0 int j = 0; - for (int i = 0; i < ienv.size(); ++i) { + for (unsigned i = 0; i < ienv.size(); ++i) { const MERTPoint& seg = *ienv[i]; vector<WordID> trans; #if 0 diff --git a/dpmert/divide_refs.py b/dpmert/divide_refs.py new file mode 100755 index 00000000..b478f918 --- /dev/null +++ b/dpmert/divide_refs.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +import sys + +(numRefs, outPrefix) = sys.argv[1:] +numRefs = int(numRefs) + +outs = [open(outPrefix+str(i), "w") for i in range(numRefs)] + +i = 0 +for line in sys.stdin: + outs[i].write(line) + i = (i + 1) % numRefs + +for out in outs: + out.close() diff --git a/dpmert/lo_test.cc b/dpmert/lo_test.cc index d47a95b5..2daf87bb 100644 --- a/dpmert/lo_test.cc +++ b/dpmert/lo_test.cc @@ -88,7 +88,7 @@ BOOST_AUTO_TEST_CASE(TestConvexHullInside) { if (!d) break; cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; } - for (int i = 0; i < segs.size(); ++i) { + for (unsigned i = 0; i < segs.size(); ++i) { cerr << "seg=" << i << endl; vector<WordID> trans; segs[i]->ConstructTranslation(&trans); @@ -118,13 +118,15 @@ BOOST_AUTO_TEST_CASE( TestS1) { to_optimize.push_back(fPhraseModel_1); to_optimize.push_back(fPhraseModel_2); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + Hypergraph hg; - ReadFile rf("./test_data/0.json.gz"); + ReadFile rf(path + "/0.json.gz"); HypergraphIO::ReadFromJSON(rf.stream(), &hg); hg.Reweight(wts); Hypergraph hg2; - ReadFile rf2("./test_data/1.json.gz"); + ReadFile rf2(path + "/1.json.gz"); HypergraphIO::ReadFromJSON(rf2.stream(), &hg2); hg2.Reweight(wts); @@ -149,7 +151,7 @@ BOOST_AUTO_TEST_CASE( TestS1) { &rng, &axes); assert(axes.size() == 10 + to_optimize.size()); - for (int i = 0; i < axes.size(); ++i) + for (unsigned i = 0; i < axes.size(); ++i) cerr << axes[i] << endl; const SparseVector<double>& axis = axes[0]; diff --git a/dpmert/mr_dpmert_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc index 59d4f24f..199cd23a 100644 --- a/dpmert/mr_dpmert_generate_mapper_input.cc +++ b/dpmert/mr_dpmert_generate_mapper_input.cc @@ -52,12 +52,15 @@ int main(int argc, char** argv) { Weights::InitFromFile(conf["weights"].as<string>(), &w, &features); Weights::InitSparseVector(w, &origin); const string forest_repository = conf["forest_repository"].as<string>(); - assert(DirectoryExists(forest_repository)); + if (!DirectoryExists(forest_repository)) { + cerr << "Forest repository directory " << forest_repository << " not found!\n"; + return 1; + } if (conf.count("optimize_feature") > 0) features=conf["optimize_feature"].as<vector<string> >(); vector<SparseVector<weight_t> > directions; vector<int> fids(features.size()); - for (int i = 0; i < features.size(); ++i) + for (unsigned i = 0; i < features.size(); ++i) fids[i] = FD::Convert(features[i]); LineOptimizer::CreateOptimizationDirections( fids, diff --git a/dpmert/mr_dpmert_map.cc b/dpmert/mr_dpmert_map.cc index f3304f0f..d1efcf96 100644 --- a/dpmert/mr_dpmert_map.cc +++ b/dpmert/mr_dpmert_map.cc @@ -52,7 +52,7 @@ bool ReadSparseVectorString(const string& s, SparseVector<double>* v) { vector<string> fields; Tokenize(s, ';', &fields); if (fields.empty()) return false; - for (int i = 0; i < fields.size(); ++i) { + for (unsigned i = 0; i < fields.size(); ++i) { vector<string> pair(2); Tokenize(fields[i], '=', &pair); if (pair.size() != 2) { diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index abae1e3b..b9549c6e 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -15,9 +15,15 @@ my $CCONFIG = { 'StarCluster' => { 'HOST_REGEXP' => qr/compute-\d+\.internal$/, 'JobControl' => 'qsub', - 'QSubMemFlag' => '-l mem', + 'QSubMemFlag' => '-l mem=', 'DefaultJobs' => 20, }, + 'Cab' => { + 'HOST_REGEXP' => qr/cab\.ark\.cs\.cmu\.edu$|cab\.local$/, + 'JobControl' => 'qsub', + 'QSubMemFlag' => '-l mem=', + 'DefaultJobs' => 8 + }, 'LTICluster' => { 'HOST_REGEXP' => qr/^cluster\d+\.lti\.cs\.cmu\.edu$/, 'JobControl' => 'qsub', diff --git a/gi/scfg/abc/Release/IConv.d b/gi/scfg/abc/Release/IConv.d deleted file mode 100644 index 082cb15b..00000000 --- a/gi/scfg/abc/Release/IConv.d +++ /dev/null @@ -1,3 +0,0 @@ -IConv.d IConv.o: ../../utils/IConv.cc ../../utils/IConv.hpp - -../../utils/IConv.hpp: diff --git a/gi/scfg/abc/Release/Util.d b/gi/scfg/abc/Release/Util.d deleted file mode 100644 index 586d4d60..00000000 --- a/gi/scfg/abc/Release/Util.d +++ /dev/null @@ -1,8 +0,0 @@ -Util.d Util.o: ../../utils/Util.cc ../../utils/Util.h \ - ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/agrammar.d b/gi/scfg/abc/Release/agrammar.d deleted file mode 100644 index 553752ca..00000000 --- a/gi/scfg/abc/Release/agrammar.d +++ /dev/null @@ -1,205 +0,0 @@ -agrammar.d agrammar.o: ../agrammar.cc \ - /home/tnguyen/ws10smt/decoder/rule_lexer.h \ - /home/tnguyen/ws10smt/decoder/trule.h \ - /export/ws10smt/software/include/boost/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/config.hpp \ - /export/ws10smt/software/include/boost/config/user.hpp \ - /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ - /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ - /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ - /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ - /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ - /export/ws10smt/software/include/boost/config/platform/linux.hpp \ - /export/ws10smt/software/include/boost/config/posix_features.hpp \ - /export/ws10smt/software/include/boost/config/suffix.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ - /export/ws10smt/software/include/boost/assert.hpp \ - /export/ws10smt/software/include/boost/checked_delete.hpp \ - /export/ws10smt/software/include/boost/throw_exception.hpp \ - /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ - /export/ws10smt/software/include/boost/detail/workaround.hpp \ - /export/ws10smt/software/include/boost/exception/exception.hpp \ - /export/ws10smt/software/include/boost/current_function.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ - /export/ws10smt/software/include/boost/memory_order.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/tnguyen/ws10smt/decoder/sparse_vector.h \ - /home/tnguyen/ws10smt/decoder/fdict.h \ - /home/tnguyen/ws10smt/decoder/dict.h \ - /export/ws10smt/software/include/boost/functional/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ - /export/ws10smt/software/include/boost/limits.hpp \ - /export/ws10smt/software/include/boost/integer/static_log2.hpp \ - /export/ws10smt/software/include/boost/integer_fwd.hpp \ - /export/ws10smt/software/include/boost/cstdint.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ - /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ - /home/tnguyen/ws10smt/decoder/wordid.h \ - /home/tnguyen/ws10smt/decoder/filelib.h \ - /home/tnguyen/ws10smt/decoder/gzstream.h \ - /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/array2d.h \ - /home/tnguyen/ws10smt/decoder/hg.h \ - /home/tnguyen/ws10smt/decoder/small_vector.h \ - /home/tnguyen/ws10smt/decoder/prob.h \ - /home/tnguyen/ws10smt/decoder/logval.h ../../utils/Util.h \ - ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -/home/tnguyen/ws10smt/decoder/rule_lexer.h: - -/home/tnguyen/ws10smt/decoder/trule.h: - -/export/ws10smt/software/include/boost/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/config.hpp: - -/export/ws10smt/software/include/boost/config/user.hpp: - -/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: - -/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: - -/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: - -/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: - -/export/ws10smt/software/include/boost/config/select_platform_config.hpp: - -/export/ws10smt/software/include/boost/config/platform/linux.hpp: - -/export/ws10smt/software/include/boost/config/posix_features.hpp: - -/export/ws10smt/software/include/boost/config/suffix.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: - -/export/ws10smt/software/include/boost/assert.hpp: - -/export/ws10smt/software/include/boost/checked_delete.hpp: - -/export/ws10smt/software/include/boost/throw_exception.hpp: - -/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: - -/export/ws10smt/software/include/boost/detail/workaround.hpp: - -/export/ws10smt/software/include/boost/exception/exception.hpp: - -/export/ws10smt/software/include/boost/current_function.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: - -/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: - -/export/ws10smt/software/include/boost/memory_order.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: - -/home/tnguyen/ws10smt/decoder/sparse_vector.h: - -/home/tnguyen/ws10smt/decoder/fdict.h: - -/home/tnguyen/ws10smt/decoder/dict.h: - -/export/ws10smt/software/include/boost/functional/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: - -/export/ws10smt/software/include/boost/limits.hpp: - -/export/ws10smt/software/include/boost/integer/static_log2.hpp: - -/export/ws10smt/software/include/boost/integer_fwd.hpp: - -/export/ws10smt/software/include/boost/cstdint.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: - -/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: - -/export/ws10smt/software/include/boost/detail/container_fwd.hpp: - -/home/tnguyen/ws10smt/decoder/wordid.h: - -/home/tnguyen/ws10smt/decoder/filelib.h: - -/home/tnguyen/ws10smt/decoder/gzstream.h: - -/home/tnguyen/ws10smt/decoder/tdict.h: - -../agrammar.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/array2d.h: - -/home/tnguyen/ws10smt/decoder/hg.h: - -/home/tnguyen/ws10smt/decoder/small_vector.h: - -/home/tnguyen/ws10smt/decoder/prob.h: - -/home/tnguyen/ws10smt/decoder/logval.h: - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/dict_test b/gi/scfg/abc/Release/dict_test Binary files differdeleted file mode 100755 index 1ba94218..00000000 --- a/gi/scfg/abc/Release/dict_test +++ /dev/null diff --git a/gi/scfg/abc/Release/grammar b/gi/scfg/abc/Release/grammar deleted file mode 100644 index 75fac3a0..00000000 --- a/gi/scfg/abc/Release/grammar +++ /dev/null @@ -1,13 +0,0 @@ -[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 diff --git a/gi/scfg/abc/Release/grammar.pr b/gi/scfg/abc/Release/grammar.pr deleted file mode 100644 index e4e327cf..00000000 --- a/gi/scfg/abc/Release/grammar.pr +++ /dev/null @@ -1,13 +0,0 @@ -[X] ||| . ||| . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] . ||| [1] . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano ||| [1] old man ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano . ||| [1] old man . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo ||| ugly [1] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo . ||| ugly [1] . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] gato ||| [1] cat ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] gato . ||| [1] cat . ||| MinusLogP=2.56494935746154 -[X] ||| el ||| the ||| MinusLogP=2.56494935746154 -[X] ||| el [X,1] ||| the [1] ||| MinusLogP=2.56494935746154 -[X] ||| el [X,1] . ||| the [1] . ||| MinusLogP=2.56494935746154 diff --git a/gi/scfg/abc/Release/makefile b/gi/scfg/abc/Release/makefile deleted file mode 100644 index 25949e74..00000000 --- a/gi/scfg/abc/Release/makefile +++ /dev/null @@ -1,66 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -#-include ../makefile.init - -RM := rm -rf - -# All of the sources participating in the build are defined here --include sources.mk --include subdir.mk --include objects.mk - -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(strip $(C++_DEPS)),) --include $(C++_DEPS) -endif -ifneq ($(strip $(CC_DEPS)),) --include $(CC_DEPS) -endif -ifneq ($(strip $(C_DEPS)),) --include $(C_DEPS) -endif -ifneq ($(strip $(CPP_DEPS)),) --include $(CPP_DEPS) -endif -ifneq ($(strip $(CXX_DEPS)),) --include $(CXX_DEPS) -endif -ifneq ($(strip $(C_UPPER_DEPS)),) --include $(C_UPPER_DEPS) -endif -endif - -#-include ../makefile.defs - -# Add inputs and outputs from these tool invocations to the build variables - -# All Target -all: scfg - -# Tool invocations - -# scfg.o: ../scfg.cpp -# @echo 'Building file: $<' -# @echo 'Invoking: GCC C++ Compiler' -# g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -# @echo 'Finished building: $<' -# @echo ' ' - -scfg: $(OBJS) $(USER_OBJS) - @echo 'Building target: $@' - @echo 'Invoking: GCC C++ Linker' - /bin/sh ../../../../libtool --tag=CXX --mode=link g++ -g -O2 -lz -L/export/ws10smt/software/lib -R/export/ws10smt/software/lib -L/export/ws10smt/software/srilm-1.5.10/lib/i686 -o scfg $(OBJS) -L/export/ws10smt/software/lib -lgtest -pthread ../../../../decoder/libcdec.a -lboost_program_options -loolm -ldstruct -lmisc - @echo 'Finished building target: $@' - @echo ' ' -#g++ -I/home/tnguyen/ws10smt/gi/scfg/cdec/ -I/export/ws10smt/software/srilm-1.5.10/include/ -L/home/tnguyen/ws10smt/decoder -lpthread -ldl -lm $(OBJS) $(USER_OBJS) $(LIBS) -o"scfg" -# Other Targets -clean: - -$(RM) $(OBJS)$(C++_DEPS)$(EXECUTABLES)$(CC_DEPS)$(C_DEPS)$(CPP_DEPS)$(CXX_DEPS)$(C_UPPER_DEPS) scfg - -@echo ' ' - -.PHONY: all clean dependents -.SECONDARY: - --include ../makefile.targets diff --git a/gi/scfg/abc/Release/process_grammar.pl b/gi/scfg/abc/Release/process_grammar.pl deleted file mode 100644 index f82a8e5a..00000000 --- a/gi/scfg/abc/Release/process_grammar.pl +++ /dev/null @@ -1,36 +0,0 @@ -#!perl - -use warnings; -use strict; - -my $grammar_file = $ARGV[0]; - -my %nt_count; #maps nt--> count rules whose lhs is nt - -open(G, "<$grammar_file") or die "Can't open file $grammar_file"; - -while (<G>){ - - chomp(); - - s/\|\|\|.*//g; - s/\s//g; - - $nt_count{$_}++; -} - - -close (G); - -open(G, "<$grammar_file") or die "Can't open file $grammar_file"; - -while (<G>){ - - chomp(); - - (my $nt = $_) =~ s/\|\|\|.*//g; - $nt =~ s/\s//g; - - s/(.+\|\|\|.+\|\|\|.+\|\|\|).+/$1/g; - print $_ . " MinusLogP=" .(log($nt_count{$nt})) ."\n"; -} diff --git a/gi/scfg/abc/Release/scfg b/gi/scfg/abc/Release/scfg Binary files differdeleted file mode 100755 index 3faa52cc..00000000 --- a/gi/scfg/abc/Release/scfg +++ /dev/null diff --git a/gi/scfg/abc/Release/scfg.d b/gi/scfg/abc/Release/scfg.d deleted file mode 100644 index b3cfbbb5..00000000 --- a/gi/scfg/abc/Release/scfg.d +++ /dev/null @@ -1,213 +0,0 @@ -scfg.d scfg.o: ../scfg.cpp \ - /export/ws10smt/software/include/boost/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/config.hpp \ - /export/ws10smt/software/include/boost/config/user.hpp \ - /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ - /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ - /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ - /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ - /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ - /export/ws10smt/software/include/boost/config/platform/linux.hpp \ - /export/ws10smt/software/include/boost/config/posix_features.hpp \ - /export/ws10smt/software/include/boost/config/suffix.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ - /export/ws10smt/software/include/boost/assert.hpp \ - /export/ws10smt/software/include/boost/checked_delete.hpp \ - /export/ws10smt/software/include/boost/throw_exception.hpp \ - /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ - /export/ws10smt/software/include/boost/detail/workaround.hpp \ - /export/ws10smt/software/include/boost/exception/exception.hpp \ - /export/ws10smt/software/include/boost/current_function.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ - /export/ws10smt/software/include/boost/memory_order.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ - /export/ws10smt/software/include/boost/pointer_cast.hpp \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/wordid.h \ - /home/tnguyen/ws10smt/decoder/array2d.h \ - /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/trule.h \ - /home/tnguyen/ws10smt/decoder/sparse_vector.h \ - /home/tnguyen/ws10smt/decoder/fdict.h \ - /home/tnguyen/ws10smt/decoder/dict.h \ - /export/ws10smt/software/include/boost/functional/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ - /export/ws10smt/software/include/boost/limits.hpp \ - /export/ws10smt/software/include/boost/integer/static_log2.hpp \ - /export/ws10smt/software/include/boost/integer_fwd.hpp \ - /export/ws10smt/software/include/boost/cstdint.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ - /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ - /home/tnguyen/ws10smt/decoder/hg.h \ - /home/tnguyen/ws10smt/decoder/small_vector.h \ - /home/tnguyen/ws10smt/decoder/prob.h \ - /home/tnguyen/ws10smt/decoder/logval.h \ - /home/tnguyen/ws10smt/decoder/bottom_up_parser.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/hg_intersect.h ../../utils/ParamsArray.h \ - ../../utils/Util.h ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -/export/ws10smt/software/include/boost/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/config.hpp: - -/export/ws10smt/software/include/boost/config/user.hpp: - -/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: - -/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: - -/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: - -/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: - -/export/ws10smt/software/include/boost/config/select_platform_config.hpp: - -/export/ws10smt/software/include/boost/config/platform/linux.hpp: - -/export/ws10smt/software/include/boost/config/posix_features.hpp: - -/export/ws10smt/software/include/boost/config/suffix.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: - -/export/ws10smt/software/include/boost/assert.hpp: - -/export/ws10smt/software/include/boost/checked_delete.hpp: - -/export/ws10smt/software/include/boost/throw_exception.hpp: - -/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: - -/export/ws10smt/software/include/boost/detail/workaround.hpp: - -/export/ws10smt/software/include/boost/exception/exception.hpp: - -/export/ws10smt/software/include/boost/current_function.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: - -/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: - -/export/ws10smt/software/include/boost/memory_order.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: - -/export/ws10smt/software/include/boost/pointer_cast.hpp: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/wordid.h: - -/home/tnguyen/ws10smt/decoder/array2d.h: - -/home/tnguyen/ws10smt/decoder/tdict.h: - -../agrammar.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/trule.h: - -/home/tnguyen/ws10smt/decoder/sparse_vector.h: - -/home/tnguyen/ws10smt/decoder/fdict.h: - -/home/tnguyen/ws10smt/decoder/dict.h: - -/export/ws10smt/software/include/boost/functional/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: - -/export/ws10smt/software/include/boost/limits.hpp: - -/export/ws10smt/software/include/boost/integer/static_log2.hpp: - -/export/ws10smt/software/include/boost/integer_fwd.hpp: - -/export/ws10smt/software/include/boost/cstdint.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: - -/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: - -/export/ws10smt/software/include/boost/detail/container_fwd.hpp: - -/home/tnguyen/ws10smt/decoder/hg.h: - -/home/tnguyen/ws10smt/decoder/small_vector.h: - -/home/tnguyen/ws10smt/decoder/prob.h: - -/home/tnguyen/ws10smt/decoder/logval.h: - -/home/tnguyen/ws10smt/decoder/bottom_up_parser.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/hg_intersect.h: - -../../utils/ParamsArray.h: - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/sources.mk b/gi/scfg/abc/Release/sources.mk deleted file mode 100644 index 6c7070aa..00000000 --- a/gi/scfg/abc/Release/sources.mk +++ /dev/null @@ -1,27 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -C_UPPER_SRCS := -C_SRCS := -CPP_SRCS := -O_SRCS := -ASM_SRCS := -S_SRCS := -C++_SRCS := -CXX_SRCS := -CC_SRCS := -OBJ_SRCS := -OBJS := -C++_DEPS := -EXECUTABLES := -CC_DEPS := -C_DEPS := -CPP_DEPS := -CXX_DEPS := -C_UPPER_DEPS := - -# Every subdirectory with source files must be described here -SUBDIRS := \ -. \ - diff --git a/gi/scfg/abc/Release/subdir.mk b/gi/scfg/abc/Release/subdir.mk deleted file mode 100644 index 49080b36..00000000 --- a/gi/scfg/abc/Release/subdir.mk +++ /dev/null @@ -1,59 +0,0 @@ - -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -# Add inputs and outputs from these tool invocations to the build variables -CPP_SRCS += \ -../../utils/Util.cc \ -../agrammar.cc \ -../scfg.cpp - - -OBJS += \ -./Util.o \ -./agrammar.o \ -./scfg.o - - -CPP_DEPS += \ -./Util.d \ -./agrammar.d \ -./scfg.d - -# Each subdirectory must supply rules for building sources it contributes -# %.o: ../%.cpp -# @echo 'Building file: $<' -# @echo 'Invoking: GCC C++ Compiler' -# g++ -g -p -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -# -# @echo ' ' - -%.o: ../../utils/%.cc - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../../utils/%.c - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../%.cpp - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../%.cc - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - diff --git a/gi/scfg/abc/Release/tmp.grammar b/gi/scfg/abc/Release/tmp.grammar deleted file mode 100644 index 9df1b77d..00000000 --- a/gi/scfg/abc/Release/tmp.grammar +++ /dev/null @@ -1,2 +0,0 @@ -[A] ||| [B] [C] . ||| [B] [C]. ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[A] ||| [B] asd . ||| [B] asd . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
\ No newline at end of file diff --git a/gi/scfg/abc/Release/toy-grammar b/gi/scfg/abc/Release/toy-grammar deleted file mode 120000 index 50dea8df..00000000 --- a/gi/scfg/abc/Release/toy-grammar +++ /dev/null @@ -1 +0,0 @@ -/export/ws10smt/toy-grammar/
\ No newline at end of file diff --git a/gi/scfg/abc/a.out b/gi/scfg/abc/a.out Binary files differdeleted file mode 100755 index 0467acf0..00000000 --- a/gi/scfg/abc/a.out +++ /dev/null diff --git a/gi/scfg/abc/agrammar.cc b/gi/scfg/abc/agrammar.cc deleted file mode 100644 index 016a0189..00000000 --- a/gi/scfg/abc/agrammar.cc +++ /dev/null @@ -1,489 +0,0 @@ -#include <algorithm> -#include <utility> -#include <map> - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include "agrammar.h" -#include "../utils/Util.h" - - - -aTRule::aTRule(TRulePtr rule){ - - this -> e_ = rule->e_; - this -> f_ = rule->f_; - this ->lhs_ = rule->lhs_; - this -> arity_ = rule->arity_; - this -> scores_ = rule->scores_; - ResetScore(0.00000001); -} - -bool equal(TRulePtr const & rule1, TRulePtr const & rule2){ - if (rule1->lhs_ != rule2->lhs_) return false; - if (rule1->f_.size() != rule2->f_.size()) return false; - if (rule1->e_.size() != rule2->e_.size()) return false; - - for (int i=0; i<rule1->f_.size(); i++) - if (rule1->f_.at(i) != rule2->f_.at(i)) return false; - for (int i=0; i<rule1->e_.size(); i++) - if (rule1->e_.at(i) != rule2->e_.at(i)) return false; - return true; -} - - -//const vector<TRulePtr> Grammar::NO_RULES; - -void aRemoveRule(vector<TRulePtr> & v, const TRulePtr & rule){ // remove rule from v if found - for (int i=0; i< v.size(); i++) - if (equal(v[i], rule )){ - // cout<<"erase rule from vector:"<<rule->AsString()<<endl; - v.erase(v.begin()+i); - } -} - -void aRemoveRule(vector<NTRule> & v, const NTRule & ntrule){ // remove rule from v if found - for (int i=0; i< v.size(); i++) - if (equal(v[i].rule_, ntrule.rule_ )){ - // cout<<"erase rule from vector:"<<rule->AsString()<<endl; - v.erase(v.begin()+i); - } -} - -struct aTextRuleBin : public RuleBin { - int GetNumRules() const { - return rules_.size(); - } - TRulePtr GetIthRule(int i) const { - return rules_[i]; - } - void AddRule(TRulePtr t) { - rules_.push_back(t); - } - - void RemoveRule(const TRulePtr & rule ){ - aRemoveRule(rules_, rule); - } - - - int Arity() const { - return rules_.front()->Arity(); - } - - void Dump() const { - for (int i = 0; i < rules_.size(); ++i) - cerr << rules_[i]->AsString() << endl; - } - private: - vector<TRulePtr> rules_; -}; - - -struct aTextGrammarNode : public GrammarIter { - aTextGrammarNode() : rb_(NULL) {} - ~aTextGrammarNode() { - delete rb_; - } - const GrammarIter* Extend(int symbol) const { - map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol); - if (i == tree_.end()) return NULL; - return &i->second; - } - - const RuleBin* GetRules() const { - if (rb_) { - //rb_->Dump(); - } - return rb_; - } - - map<WordID, aTextGrammarNode> tree_; - aTextRuleBin* rb_; -}; - -struct aTGImpl { - aTextGrammarNode root_; -}; - -aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} -aTextGrammar::aTextGrammar(const string& file) : - max_span_(10), - pimpl_(new aTGImpl) { - ReadFromFile(file); -} - -const GrammarIter* aTextGrammar::GetRoot() const { - return &pimpl_->root_; -} - -void aTextGrammar::SetGoalNT(const string & goal_str){ - goalID = TD::Convert(goal_str); - -} - -void getNTRule( const TRulePtr & rule, map<WordID, NTRule> & ntrule_map){ - - NTRule lhs_ntrule(rule, rule->lhs_ * -1); - ntrule_map[rule->lhs_ * -1] = lhs_ntrule; - - for (int i=0; i< (rule->f_).size(); i++) - if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){ - NTRule rhs_ntrule(rule, rule->f_.at(i) * -1); - ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule; - } -} - - -void aTextGrammar::AddRule(const TRulePtr& rule) { - if (rule->IsUnary()) { - rhs2unaries_[rule->f().front()].push_back(rule); - unaries_.push_back(rule); - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i) - cur = &cur->tree_[rule->f_[i]]; - if (cur->rb_ == NULL) - cur->rb_ = new aTextRuleBin; - cur->rb_->AddRule(rule); - } - - //add the rule to lhs_rules_ - lhs_rules_[rule->lhs_* -1].push_back(rule); - - //add the rule to nt_rules_ - map<WordID, NTRule> ntrule_map; - getNTRule (rule, ntrule_map); - for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ - nt_rules_[it->first].push_back(it->second); - } -} - -void aTextGrammar::RemoveRule(const TRulePtr & rule){ - // cout<<"Remove rule: "<<rule->AsString()<<endl; - if (rule->IsUnary()) { - aRemoveRule(rhs2unaries_[rule->f().front()], rule); - aRemoveRule(unaries_, rule); - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i) - cur = &cur->tree_[rule->f_[i]]; -// if (cur->rb_ == NULL) -// cur->rb_ = new aTextRuleBin; - cur->rb_->RemoveRule(rule); - } - - //remove rules from lhs_rules_ - - aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule); - - - //remove the rule from nt_rules_ - map<WordID, NTRule> ntrule_map; - getNTRule (rule, ntrule_map); - for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ - aRemoveRule(nt_rules_[it->first], it->second); - } - -} - -void aTextGrammar::RemoveNonterminal(WordID wordID){ - vector<NTRule> rules = nt_rules_[wordID]; -// // remove the nonterminal from ntrules_ - nt_rules_.erase(wordID); - for (int i =0; i<rules.size(); i++) - RemoveRule(rules[i].rule_); - sum_probs_.erase(wordID); - cnt_rules.erase(wordID); - -} - -void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} - - - - -void aTextGrammar::AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts){ - - vector<NTRule> rules = nt_rules_[nt_old]; - - // cout<<"\n\n\n start add splitting rules"<<endl; - - const double epsilon = 0.001; - for (int i=0; i<rules.size(); i++){ - NTRule old_rule = rules.at(i); - vector<int> ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old - //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts' - //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions, - //hence the rules' new probabilities have to divide to cnt also - // cout<<"splitting NT in rule "<<old_rule.rule_->AsString()<<endl; - -// cout<<"nt position in the rules"<<endl; -// for (int j=0; j<ntPos.size();j++) cout<<ntPos[j]<<" "; cout<<endl; - - int cnt_newrules = pow( nts.size(), ntPos.size() ); - // cout<<"cnt_newrules="<<cnt_newrules<<endl; - - double log_nts_size = log(nts.size()); - - - map<WordID, int> cnt_addepsilon; //cnt_addepsilon and cont_minusepsilon to track the number of rules epsilon is added or minus for each lhs nonterminal, ideally we want these two numbers are equal - map<WordID, int> cnt_minusepsilon; - cnt_addepsilon[old_rule.rule_->lhs_] = 0; - cnt_minusepsilon[old_rule.rule_->lhs_] = 0; - for (int j =0; j<nts.size(); j++) { cnt_addepsilon[nts[j] ] = 0; cnt_minusepsilon[nts[j] ] = 0;} - - - for (int j=0; j<cnt_newrules; j++){ //each j represents a new rule - //convert j to a vector of size ntPos.size(), each entry in the vector >=0 and <nts.size() - int mod = nts.size(); - vector <int> j_vector(ntPos.size(), 0); //initiate the vector to all 0 - int j_tmp =j; - for (int k=0; k<ntPos.size(); k++){ - j_vector[k] = j_tmp % mod; - j_tmp = (j_tmp - j_vector[k]) / mod; - } - // cout<<"print vector j_vector"<<endl; - // for (int k=0; k<ntPos.size();k++) cout<<j_vector[k]<<" "; cout<<endl; - //now use the vector to create a new rule - TRulePtr newrule(new aTRule()); - - newrule -> e_ = (old_rule.rule_)->e_; - newrule -> f_ = old_rule.rule_->f_; - newrule->lhs_ = old_rule.rule_->lhs_; - newrule -> arity_ = old_rule.rule_->arity_; - newrule -> scores_ = old_rule.rule_->scores_; - - // cout<<"end up update score\n"; - if (ntPos[0] == -1){ //update the lhs - newrule->lhs_ = nts[j_vector[0]] * -1; - - //score has to randomly add/minus a small epsilon to break the balance - if (nts.size() >1 && ntPos.size() >1){ - // cout<<"start to add/minus epsilon"<<endl; - if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon - newrule-> scores_ -= epsilon; - else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) - newrule-> scores_ += epsilon; - else{ - double random = rand()/RAND_MAX; - if (random > .5){ - newrule-> scores_ += epsilon; - cnt_addepsilon[newrule->lhs_]++; - } - else{ - newrule-> scores_ -= epsilon; - cnt_minusepsilon[newrule->lhs_]++; - } - } - } - - - for (int k=1; k<ntPos.size(); k++){//update f_ - // cout<<"ntPos[k]="<<ntPos[k]<<endl; - newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1; //update the ntPos[k-1]-th nonterminal in f_ to the j_vector[k] NT in nts - } - newrule -> scores_ += (ntPos.size() -1) * log_nts_size; - - - } - else{ - //score has to randomly add/minus a small epsilon to break the balance - if ( ntPos.size() >0 && nts.size()>1){ - // cout<<"start to add/minus epsilon"<<endl; - if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon - newrule-> scores_ -= epsilon; - else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 ) - newrule-> scores_ += epsilon; - else{ - double random = rand()/RAND_MAX; - if (random > .5){ - newrule-> scores_ += epsilon; - cnt_addepsilon[newrule->lhs_]++; - } - else{ - newrule-> scores_ -= epsilon; - cnt_minusepsilon[newrule->lhs_]++; - } - } - } - - - for (int k=0; k<ntPos.size(); k++){ //update f_ - // cout<<"ntPos[k]="<<ntPos[k]<<endl; - newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1; - } - newrule -> scores_ += ntPos.size() * log_nts_size; - } - this->AddRule (newrule); - }//add new rules for each grammar rules - - } //iterate through all grammar rules - -} - - -void aTextGrammar::splitNonterminal(WordID wordID){ - - //first added the splits nonterminal into the TD dictionary - - string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split - - vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits - for (int i =0; i< this->max_split_; i++){ - string split_str = old_str + "+" + itos(i); - WordID splitID = TD::Convert(split_str); - v_splits.push_back(splitID); - - } - - // grSplitNonterminals[wordID] = v_splits; - - //print split nonterminas of wordID - // v_splits = grSplitNonterminals[wordID]; - // cout<<"print split nonterminals\n"; - // for (int i =0; i<v_splits.size(); i++) - // cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl; - - AddSplitNonTerminal(wordID, v_splits); - RemoveNonterminal(wordID); - - // grSplitNonterminals.erase (grSplitNonterminals.find(WordID) ); - - if (wordID == goalID){ //add rule X-> X1; X->X2,... if X is the goal NT - for (int i =0; i<v_splits.size(); i++){ - TRulePtr rule (new aTRule()); - rule ->lhs_ = goalID * -1; - rule ->f_.push_back(v_splits[i] * -1); - rule->e_.push_back(0); - - rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) ); - AddRule(rule); - } - - } - -} - - -void aTextGrammar::splitAllNonterminals(){ - map<WordID, vector<TRulePtr> >::const_iterator it; - vector<WordID> v ; // WordID >0 - for (it = lhs_rules_.begin(); it != lhs_rules_.end(); it++) //iterate through all nts - if (it->first != goalID || lhs_rules_.size() ==1) - v.push_back(it->first); - - for (int i=0; i< v.size(); i++) - splitNonterminal(v[i]); -} - - -void aTextGrammar::PrintAllRules(const string & filename) const{ - - - cerr<<"print grammar to "<<filename<<endl; - - ofstream outfile(filename.c_str()); - if (!outfile.good()) { - cerr << "error opening output file " << filename << endl; - exit(1); - } - - map<WordID, vector<TRulePtr > >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - - vector<TRulePtr> v = it-> second; - for (int i =0; i< v.size(); i++){ - outfile<<v[i]->AsString()<<"\t"<<endl; - } - } -} - - -void aTextGrammar::ResetScore(){ - - map<WordID, vector<TRulePtr > >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - vector<TRulePtr> v = it-> second; - for (int i =0; i< v.size(); i++){ - // cerr<<"Reset score of Rule "<<v[i]->AsString()<<endl; - boost::static_pointer_cast<aTRule>(v[i])->ResetScore(alpha_ /v.size()); - } - lhs_rules_[it->first] = v; - sum_probs_[it->first] = alpha_; - } - -} - -void aTextGrammar::UpdateScore(){ - - map<WordID, vector<TRulePtr > >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - vector<TRulePtr> v = it-> second; - for (int i =0; i< v.size(); i++){ - boost::static_pointer_cast<aTRule>(v[i])->UpdateScore(sum_probs_[it->first] ); - } - - // cerr<<"sum_probs_[it->first] ="<<sum_probs_[it->first] <<endl; - sum_probs_[it->first] = alpha_; - } - -} - - -void aTextGrammar::UpdateHgProsteriorProb(Hypergraph & hg){ - std::vector<prob_t> posts ; - - prob_t goal_score = hg.ComputeEdgePosteriors(1, &posts); - for (int i =0; i<posts.size(); i++){ - - //cout<<posts[i]<<endl; - Hypergraph::Edge& e = hg.edges_[i]; - string goalstr("Goal"); - string str_lhs = TD::Convert(e.rule_->lhs_ * -1); - - if (str_lhs.find(goalstr) != string::npos) - continue; - - // cerr<<e.rule_->AsString()<<endl; - // cerr<<e.rule_->parent_rule_->AsString()<<endl; - - boost::static_pointer_cast<aTRule>(e.rule_->parent_rule_)->AddProb(posts[i] / goal_score); - // cerr<<"add count for rule\n"; -// cerr<<"posts[i]="<<posts[i]<<" goal_score="<<goal_score<<endl; -// cerr<<"posts[i] /goal_score="<<(posts[i] /goal_score)<<endl; - sum_probs_[e.rule_->parent_rule_->lhs_* -1 ] += posts[i] /goal_score; - - } - - -} - - -void aTextGrammar::PrintNonterminalRules(WordID nt) const{ - vector< NTRule > v; - map<WordID, vector<NTRule> >::const_iterator mit= nt_rules_.find(nt); - if (mit == nt_rules_.end()) - return; - - v = mit->second; - - for (vector<NTRule>::const_iterator it = v.begin(); it != v.end(); it++) - cout<<it->rule_->AsString()<<endl; -} - -static void AddRuleHelper(const TRulePtr& new_rule, void* extra) { - aTRule *p = new aTRule(new_rule); - - static_cast<aTextGrammar*>(extra)->AddRule(TRulePtr(p)); -} - -void aTextGrammar::ReadFromFile(const string& filename) { - ReadFile in(filename); - RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this); -} - -bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { - return (max_span_ >= distance); -} - diff --git a/gi/scfg/abc/agrammar.h b/gi/scfg/abc/agrammar.h deleted file mode 100644 index 0910aae6..00000000 --- a/gi/scfg/abc/agrammar.h +++ /dev/null @@ -1,116 +0,0 @@ -#ifndef AGRAMMAR_H_ -#define AGRAMMAR_H_ - -#include "grammar.h" -#include "hg.h" - - -using namespace std; - -class aTRule: public TRule{ - public: - aTRule() : TRule(){ResetScore(0.00000001); } - aTRule(TRulePtr rule_); - - void ResetScore(double initscore){//cerr<<"Reset Score "<<this->AsString()<<endl; - sum_scores_.set_value(FD::Convert("Prob"), initscore);} - void AddProb(double p ){ - // cerr<<"in AddProb p="<<p<<endl; - // cerr<<"prob sumscores ="<<sum_scores_[FD::Convert("Prob")]<<endl; - sum_scores_.add_value(FD::Convert("Prob"), p); - // cerr<<"after AddProb\n"; - } - - void UpdateScore(double sumprob){ - double minuslogp = 0 - log( sum_scores_.value(FD::Convert("Prob")) /sumprob); - if (sumprob< sum_scores_.value(FD::Convert("Prob"))){ - cerr<<"UpdateScore sumprob="<<sumprob<< " sum_scores_.value(FD::Convert(\"Prob\"))="<< sum_scores_.value(FD::Convert("Prob"))<< this->AsString()<<endl; - exit(1); - } - this->scores_.set_value(FD::Convert("MinusLogP"), minuslogp); - - } - private: - SparseVector<double> sum_scores_; -}; - - -class aTGImpl; -struct NTRule{ - - NTRule(){}; - NTRule(const TRulePtr & rule, WordID nt){ - nt_ = nt; - rule_ = rule; - - if (rule->lhs_ * -1 == nt) - ntPos_.push_back(-1); - - for (int i=0; i< rule->f().size(); i++) - if (rule->f().at(i) * -1 == nt) - ntPos_.push_back(i); - - - } - - TRulePtr rule_; - WordID nt_; //the labelID of the nt (nt_>0); - - vector<int> ntPos_; //position of nt_ -1: lhs, from 0...f_.size() for nt of f_() - //i.e the rules is: NP-> DET NP; if nt_=5 is the labelID of NP then ntPos_ = (-1, 1): the indexes of nonterminal NP - -}; - - -struct aTextGrammar : public Grammar { - aTextGrammar(); - aTextGrammar(const std::string& file); - void SetMaxSpan(int m) { max_span_ = m; } - - virtual const GrammarIter* GetRoot() const; - void AddRule(const TRulePtr& rule); - void ReadFromFile(const std::string& filename); - virtual bool HasRuleForSpan(int i, int j, int distance) const; - const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const; - - void AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts); - void setMaxSplit(int max_split); - void splitNonterminal(WordID wordID); - - - void splitAllNonterminals(); - - void PrintAllRules(const string & filename) const; - void PrintNonterminalRules(WordID nt) const; - void SetGoalNT(const string & goal_str); - - void ResetScore(); - - void UpdateScore(); - - void UpdateHgProsteriorProb(Hypergraph & hg); - - void set_alpha(double alpha){alpha_ = alpha;} - private: - - void RemoveRule(const TRulePtr & rule); - void RemoveNonterminal(WordID wordID); - - int max_span_; - int max_split_; - boost::shared_ptr<aTGImpl> pimpl_; - - map <WordID, vector<TRulePtr> > lhs_rules_;// WordID >0 - map <WordID, vector<NTRule> > nt_rules_; - - map <WordID, double> sum_probs_; - map <WordID, double> cnt_rules; - - double alpha_; - - // map<WordID, vector<WordID> > grSplitNonterminals; - WordID goalID; -}; - - -#endif diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc deleted file mode 100644 index 33d70dfc..00000000 --- a/gi/scfg/abc/old_agrammar.cc +++ /dev/null @@ -1,383 +0,0 @@ -#include "agrammar.h" -#include "Util.h" - -#include <algorithm> -#include <utility> -#include <map> - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include <iostream> -#include <fstream> - -map<WordID, vector<WordID> > grSplitNonterminals; -//const vector<TRulePtr> Grammar::NO_RULES; - - -// vector<TRulePtr> substituteF(TRulePtr & rule, WordID wordID, vector<WordID> & v){ -// vector<TRulePtr> vRules; //outputs - -// vector<WordID> f = rule->f(); -// vector<vector<WordID> > newfvector; -// for (int i =0; i< f.size(); i++){ -// if (f[i] == wordID){ -// newfvector.push_back(v); -// } -// else -// newfvector.push_back(vector<WordID> (1, f[i])); -// } - -// //now creates new rules; - - -// return vRules; -// } - - -struct aTextRuleBin : public RuleBin { - int GetNumRules() const { - return rules_.size(); - } - TRulePtr GetIthRule(int i) const { - return rules_[i]; - } - void AddRule(TRulePtr t) { - rules_.push_back(t); - } - int Arity() const { - return rules_.front()->Arity(); - } - void Dump() const { - for (int i = 0; i < rules_.size(); ++i) - cerr << rules_[i]->AsString() << endl; - } - - - vector<TRulePtr> getRules(){ return rules_;} - - - void substituteF(vector<WordID> & f_path, map<WordID, vector<WordID> > & grSplitNonterminals){ - //this substituteF method is different with substituteF procedure found in cdec code; - // - //aTextRuleBin has a collection of rules with the same f() on the rhs, - //substituteF() replaces the f_ of all the rules with f_path vector, - //the grSplitNonterminals input to split the lhs_ nonterminals of the rules incase the lhs_ nonterminal found in grSplitNonterminals - - vector <TRulePtr> newrules; - for (vector<TRulePtr>::iterator it = rules_.begin() ; it != rules_.end(); it++){ - assert(f_path.size() == (*it)->f_.size()); - - if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){ - (*it)->f_ = f_path; - } - else{ // split the lhs NT, - vector<WordID> new_lhs = grSplitNonterminals[ (*it)->lhs_ ]; - for (vector<WordID>::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){ - TRulePtr newrule; - newrule -> e_ = (*it)->e_; - newrule -> f_ = (*it)->f_; - newrule->lhs_ = *vit; - newrule -> scores_ = (*it)->scores_; - newrule -> arity_ = (*it)->arity_; - newrules.push_back (newrule); - } - rules_.erase(it); - } - } - - //now add back newrules(output of splitting lhs_) to rules_ - rules_.insert(newrules.begin(),newrules.begin(), newrules.end()); - } - -private: - vector<TRulePtr> rules_; -}; - - - -struct aTextGrammarNode : public GrammarIter { - aTextGrammarNode() : rb_(NULL) {} - - aTextGrammarNode(const aTextGrammarNode & a){ - nonterminals_ = a.nonterminals_; - tree_ = a.tree_; - rb_ = new aTextRuleBin(); //cp constructor: don't cp the set of rules over - } - - ~aTextGrammarNode() { - delete rb_; - } - const GrammarIter* Extend(int symbol) const { - map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol); - if (i == tree_.end()) return NULL; - return &i->second; - } - - const RuleBin* GetRules() const { - if (rb_) { - //rb_->Dump(); - } - return rb_; - } - - void DFS(); - - void visit (); //todo: make this as a function pointer - - vector <WordID > path_; //vector of f_ nonterminals/terminals from the top to the current node; - set<WordID> nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID is the label in the dict; i.e WordID>0 - map<WordID, aTextGrammarNode> tree_; - aTextRuleBin* rb_; - - void print_path(){ //for debug only - cout<<"path="<<endl; - for (int i =0; i< path_.size(); i++) - cout<<path_[i]<<" "; - cout<<endl; - } -}; - -void aTextGrammarNode::DFS(){ //because the grammar is a tree without circle, DFS does not require to color the nodes - - visit(); - - for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin(); it != tree_.end(); it++){ - (it->second).DFS(); - } -} - - -void aTextGrammarNode::visit( ){ - - cout<<"start visit()"<<endl; - - cout<<"got grSplitNonterminals"<<endl; -// if (grSplitNonterminals.find(*it) != grSplitNonterminals.end()){ //split this *it nonterminal -// vector<WordID> vsplits = grSplitNonterminals[*it]; //split *it into vsplits - - //iterate through next terminals/nonterminals in tree_ - vector<WordID> tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_ - - for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin() ; it != tree_.end(); it++){ - cout<<"in visit(): inside for loop: wordID=="<<it->first<<endl; - - map<WordID, vector<WordID> >::const_iterator git = grSplitNonterminals.find(it->first * -1 ); - - if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split - cout<<"not split\n"; - tree_[it->first ].path_ = path_; - tree_[it->first ].path_.push_back(it->first); - cout<<"in visit() tree_[it->first ].path_= "; - tree_[it->first ].print_path(); - continue; - } - - - cout<<"tmp2"; - vector<WordID> vsplits = grSplitNonterminals[it->first * -1]; - // vector<WordID> vsplits = git->second; - cout<<"tmp3"; - // vector<WordID> vsplits = agrammar_ ->splitNonterminals_[it->first * -1]; - cout <<"got vsplits"<<endl; - for (int i =0 ; i<vsplits.size(); i++){ - // nonterminals_.insert(vsplits[i]); //add vsplits[i] into nonterminals_ of the current TextGrammarNode - tree_[vsplits[i] * -1] = aTextGrammarNode(tree_[it->first]); //cp the subtree to new nonterminal - tree_[vsplits[i] * -1].path_ = path_; //update the path if the subtrees - tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1); - tree_[vsplits[i] * -1].print_path(); - } - - //remove the old node: - tobe_removedNTs.push_back(it->first); - - } - - for (int i =0; i<tobe_removedNTs.size(); i++) - tree_.erase(tobe_removedNTs[i]); - - if (tree_.size() ==0){ //the last (terminal/nonterminal - cout<<"inside visit(): the last terminal/nonterminal"<<endl; - rb_->substituteF(path_, grSplitNonterminals); - - } - cout<<"visit() end"<<endl; -} - -struct aTGImpl { - aTextGrammarNode root_; -}; - -aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} -aTextGrammar::aTextGrammar(const std::string& file) : - max_span_(10), - pimpl_(new aTGImpl) { - ReadFromFile(file); -} - - -const GrammarIter* aTextGrammar::GetRoot() const { - return &pimpl_->root_; -} - - -void aTextGrammar::addNonterminal(WordID wordID){ - //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map<WordID, int>) nonterminals_ of grammar - //if the input parameter wordID<0 then do nothing - - if (wordID <0){ //it is a nonterminal - - map<WordID, int>::iterator it = nonterminals_.find(wordID * -1); - if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals) - nonterminals_[wordID * -1] = 1; - } -} - - - -void aTextGrammar::AddRule(const TRulePtr& rule) { - //add the LHS nonterminal to nonterminals_ map - - this->addNonterminal(rule->lhs_); - - if (rule->IsUnary()) { - rhs2unaries_[rule->f().front()].push_back(rule); - unaries_.push_back(rule); - if (rule->f().front() <0) - //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) - this->addNonterminal(rule->f().front()); - - - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i){ - if (rule->f_[i] <0){ - cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set - this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the grammar's list of nonterminals - } - cur = &cur->tree_[rule->f_[i]]; - - } - if (cur->rb_ == NULL) - cur->rb_ = new aTextRuleBin; - cur->rb_->AddRule(rule); - - } -} - -static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) { - static_cast<aTextGrammar*>(extra)->AddRule(new_rule); -} - - -void aTextGrammar::ReadFromFile(const string& filename) { - ReadFile in(filename); - RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this); -} - -bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { - return (max_span_ >= distance); -} - - -////Linh added - -void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} - - -void aTextGrammar::printAllNonterminals() const{ - for (map<WordID, int>::const_iterator it =nonterminals_.begin(); - it != nonterminals_.end(); it++){ - if (it->second >0){ - cout <<it->first<<"\t"<<TD::Convert(it->first)<<endl; - } - } - -} - - -void aTextGrammar::splitNonterminal(WordID wordID){ - - //first added the splits nonterminal into the TD dictionary - - string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split - - vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits - for (int i =0; i< this->max_split_; i++){ - string split_str = old_str + "+" + itos(i); - WordID splitID = TD::Convert(split_str); - v_splits.push_back(splitID); - nonterminals_[splitID] = 1; - } - - grSplitNonterminals[wordID] = v_splits; - //set wordID to be an inactive nonterminal - nonterminals_[wordID] = 0; - - //print split nonterminas of wordID - v_splits = grSplitNonterminals[wordID]; - cout<<"print split nonterminals\n"; - for (int i =0; i<v_splits.size(); i++) - cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl; - - - //now update in grammar rules and gramar tree: - vector<TRulePtr> newrules; - //first unary rules: - //iterate through unary rules - for (int i =0; i < unaries_.size(); i++){ - TRulePtr rule = unaries_[i]; - WordID lhs = rule.lhs_; - if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal - && grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too - ){ - vector<WordID> rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the rhs nonterminal into the list of nonterminals in 'rhs_nonterminals' - vector<WordID> lhs_nonterminals = grSplitNonterminals[lhs]; //split the rhs nonterminal into the list of nonterminals in 'lhs_nonterminals' - for (int k =0; k <rhs_nonterminals.size(); k++) - for (int j =0; j <lhs_nonterminals.size(); j++){ - TRulePtr newrule; - newrule -> e_ = rule->e_; - newrule -> f_ = rhs_nonterminals[k]->f_; - newrule->lhs_ = lhs_nonterminals[j]->lhs_; - newrule -> scores_ = rule->scores_; - newrule -> arity_ = (*it)->arity_; - newrules.push_back (newrule); - - //update - } - } - else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal - - - } - } - - // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){ - - // } - // if (rule->IsUnary()) { - // rhs2unaries_[rule->f().front()].push_back(rule); - // unaries_.push_back(rule); - // if (rule->f().front() <0) - // //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) - // this->addNonterminal(rule->f().front()); - - - pimpl_->root_.DFS(); - -} - - -// void aTextGrammar::splitNonterminal0(WordID wordID){ - -// TextGrammarNode* cur = &pimpl_->root_; -// for (int i = 0; i < rule->f_.size(); ++i) -// cur = &cur->tree_[rule->f_[i]]; - -// } - -void aTextGrammar::splitAllNonterminals(){ - - -} - diff --git a/gi/scfg/abc/old_agrammar.h b/gi/scfg/abc/old_agrammar.h deleted file mode 100644 index d68c2548..00000000 --- a/gi/scfg/abc/old_agrammar.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _AGRAMMAR_H_ -#define _AGRAMMAR_H_ - -#include "grammar.h" - -using namespace std; - -class aTGImpl; - -struct aTextGrammar : public Grammar { - aTextGrammar(); - aTextGrammar(const std::string& file); - void SetMaxSpan(int m) { max_span_ = m; } - - virtual const GrammarIter* GetRoot() const; - void AddRule(const TRulePtr& rule); - void ReadFromFile(const std::string& filename); - virtual bool HasRuleForSpan(int i, int j, int distance) const; - const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const; - - void setMaxSplit(int max_split); - - void printAllNonterminals() const; - void addNonterminal(WordID wordID); - - void splitAllNonterminals(); - void splitNonterminal(WordID wordID); - - // inline map<WordID, vector<WordID> > & getSplitNonterminals(){return splitNonterminals_;} - // map<WordID, vector<WordID> > splitNonterminals_; - private: - int max_span_; - boost::shared_ptr<aTGImpl> pimpl_; - int max_split_; - - map<WordID, int> nonterminals_; //list of nonterminals of the grammar if nonterminals_[WordID] > 0 the nonterminal WordID is found in the grammar - - - -}; - - - - -#endif diff --git a/gi/scfg/abc/scfg.cpp b/gi/scfg/abc/scfg.cpp deleted file mode 100644 index 1e59fb4a..00000000 --- a/gi/scfg/abc/scfg.cpp +++ /dev/null @@ -1,277 +0,0 @@ -#include <iostream> -#include <fstream> - -#include <boost/shared_ptr.hpp> -#include <boost/pointer_cast.hpp> -#include "lattice.h" -#include "tdict.h" -#include "agrammar.h" -#include "bottom_up_parser.h" -#include "hg.h" -#include "hg_intersect.h" -#include "../utils/ParamsArray.h" - - -using namespace std; - -vector<string> src_corpus; -vector<string> tgt_corpus; - -bool openParallelCorpora(string & input_filename){ - ifstream input_file; - - input_file.open(input_filename.c_str()); - if (!input_file) { - cerr << "Cannot open input file " << input_filename << ". Exiting..." << endl; - return false; - } - - int line =0; - while (!input_file.eof()) { - // get a line of source language data - // cerr<<"new line "<<ctr<<endl; - string str; - - getline(input_file, str); - line++; - if (str.length()==0){ - cerr<<" sentence number "<<line<<" is empty, skip the sentence\n"; - continue; - } - string delimiters("|||"); - - vector<string> v = tokenize(str, delimiters); - - if ( (v.size() != 2) and (v.size() != 3) ) { - cerr<<str<<endl; - cerr<<" source or target sentence is not found in sentence number "<<line<<" , skip the sentence\n"; - continue; - } - - src_corpus.push_back(v[0]); - tgt_corpus.push_back(v[1]); - } - return true; -} - - -typedef aTextGrammar aGrammar; -aGrammar * load_grammar(string & grammar_filename){ - cerr<<"start_load_grammar "<<grammar_filename<<endl; - - aGrammar * test = new aGrammar(grammar_filename); - - return test; -} - -Lattice convertSentenceToLattice(const string & str){ - - std::vector<WordID> vID; - TD::ConvertSentence(str , &vID); - Lattice lsentence; - lsentence.resize(vID.size()); - - for (int i=0; i<vID.size(); i++){ - - lsentence[i].push_back( LatticeArc(vID[i], 0.0, 1) ); - } - - // if(!lsentence.IsSentence()) - // cout<<"not a sentence"<<endl; - - return lsentence; - -} - -bool parseSentencePair(const string & goal_sym, const string & src, const string & tgt, GrammarPtr & g, Hypergraph &hg){ - - - // cout<<" Start parse the sentence pairs\n"<<endl; - Lattice lsource = convertSentenceToLattice(src); - - //parse the source sentence by the grammar - - vector<GrammarPtr> grammars(1, g); - - ExhaustiveBottomUpParser parser = ExhaustiveBottomUpParser(goal_sym, grammars); - - if (!parser.Parse(lsource, &hg)){ - - cerr<<"source sentence is not parsed by the grammar!"<<endl; - return false; - } - - //intersect the hg with the target sentence - Lattice ltarget = convertSentenceToLattice(tgt); - - //forest.PrintGraphviz(); - if (!HG::Intersect(ltarget, & hg)) return false; - - SparseVector<double> reweight; - - reweight.set_value(FD::Convert("MinusLogP"), -1 ); - hg.Reweight(reweight); - - return true; - -} - - - - -int main(int argc, char** argv){ - - ParamsArray params(argc, argv); - params.setDescription("scfg models"); - - params.addConstraint("grammar_file", "grammar file (default ./grammar.pr )", true); // optional - - params.addConstraint("input_file", "parallel input file (default ./parallel_corpora)", true); //optional - - params.addConstraint("output_file", "grammar output file (default ./grammar_output)", true); //optional - - params.addConstraint("goal_symbol", "top nonterminal symbol (default: X)", true); //optional - - params.addConstraint("split", "split one nonterminal into 'split' nonterminals (default: 2)", true); //optional - - params.addConstraint("prob_iters", "number of iterations (default: 10)", true); //optional - - params.addConstraint("split_iters", "number of splitting iterations (default: 3)", true); //optional - - params.addConstraint("alpha", "alpha (default: 0.1)", true); //optional - - if (!params.runConstraints("scfg")) { - return 0; - } - cerr<<"get parametters\n\n\n"; - - - string grammar_file = params.asString("grammar_file", "./grammar.pr"); - - string input_file = params.asString("input_file", "parallel_corpora"); - - string output_file = params.asString("output_file", "grammar_output"); - - string goal_sym = params.asString("goal_symbol", "X"); - - int max_split = atoi(params.asString("split", "2").c_str()); - - int prob_iters = atoi(params.asString("prob_iters", "2").c_str()); - int split_iters = atoi(params.asString("split_iters", "1").c_str()); - double alpha = atof(params.asString("alpha", ".001").c_str()); - - ///// - cerr<<"grammar_file ="<<grammar_file<<endl; - cerr<<"input_file ="<< input_file<<endl; - cerr<<"output_file ="<< output_file<<endl; - cerr<<"goal_sym ="<< goal_sym<<endl; - cerr<<"max_split ="<< max_split<<endl; - cerr<<"prob_iters ="<< prob_iters<<endl; - cerr<<"split_iters ="<< split_iters<<endl; - cerr<<"alpha ="<< alpha<<endl; - ////////////////////////// - - cerr<<"\n\nLoad parallel corpus...\n"; - if (! openParallelCorpora(input_file)) - exit(1); - - cerr<<"Load grammar file ...\n"; - aGrammar * agrammar = load_grammar(grammar_file); - agrammar->SetGoalNT(goal_sym); - agrammar->setMaxSplit(max_split); - agrammar->set_alpha(alpha); - - srand(123); - - GrammarPtr g( agrammar); - Hypergraph hg; - - int data_size = src_corpus.size(); - int cnt_unparsed =0; - for (int i =0; i <split_iters; i++){ - - cerr<<"Split Nonterminals, iteration "<<(i+1)<<endl; - agrammar->PrintAllRules(output_file+".s" + itos(i+1)); - agrammar->splitAllNonterminals(); - - //vector<string> src_corpus; - //vector<string> tgt_corpus; - - for (int j=0; j<prob_iters; j++){ - cerr<<"reset grammar score\n"; - agrammar->ResetScore(); - // cerr<<"done reset grammar score\n"; - for (int k=0; k <data_size; k++){ - string src = src_corpus[k]; - - string tgt = tgt_corpus[k]; - cerr <<"parse sentence pair: "<<src<<" ||| "<<tgt<<endl; - - if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){ - cerr<<"target sentence is not parsed by the grammar!\n"; - //return 1; - cnt_unparsed++; - continue; - - } - - cerr<<"update edge posterior prob"<<endl; - boost::static_pointer_cast<aGrammar>(g)->UpdateHgProsteriorProb(hg); - hg.clear(); - if (k%1000 ==0 ) cerr<<"sentences "<<k<<endl; - } - cerr<<"cnt_unparased="<<cnt_unparsed<<endl; - boost::static_pointer_cast<aGrammar>(g)->UpdateScore(); - } - boost::static_pointer_cast<aGrammar>(g)->PrintAllRules(output_file+".e" + itos(i+1)); - } - - - - - - - - - - // // agrammar->ResetScore(); - // // agrammar->UpdateScore(); - // if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){ - // cerr<<"target sentence is not parsed by the grammar!\n"; - // return 1; - - // } - // // hg.PrintGraphviz(); - // //hg.clear(); - - // agrammar->PrintAllRules(); - // /*split grammar*/ - // cout<<"split NTs\n"; - // cerr<<"first of all write all nonterminals"<<endl; - // // agrammar->printAllNonterminals(); - // cout<<"after split nonterminal"<<endl; - // agrammar->PrintAllRules(); - // Hypergraph hg1; - // if (! parseSentencePair(goal_sym, src, tgt, g, hg1) ){ - // cerr<<"target sentence is not parsed by the grammar!\n"; - // return 1; - - // } - - // hg1.PrintGraphviz(); - - - // agrammar->splitNonterminal(15); - // cout<<"after split nonterminal"<<TD::Convert(15)<<endl; - // agrammar->PrintAllRules(); - - - /*load training corpus*/ - - - /*for each sentence pair in training corpus*/ - - // forest.PrintGraphviz(); - /*calculate expected count*/ - -} diff --git a/gi/scfg/abc/tmp.cpp b/gi/scfg/abc/tmp.cpp deleted file mode 100644 index 967a601d..00000000 --- a/gi/scfg/abc/tmp.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include <iostream> -#include <set> -#include <vector> -using namespace std; - -int x = 5; - -class A{A(){x++;}}; -// { -// int a_; - -// }; - -class B: public A{ - - int b_; -}; - -int main(){ - - cout<<"Hello World"; - set<int> s; - - s.insert(1); - s.insert(2); - - x++; - cout<<"x="<<x<<endl; - - vector<int> t; - t.push_back(2); t.push_back(1); t.push_back(2); t.push_back(3); t.push_back(2); t.push_back(4); - for(vector<int>::iterator it = t.begin(); it != t.end(); it++){ - if (*it ==2) t.erase(it); - cout <<*it<<endl; - } -} diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam index 9c75c247..957f4f43 100644 --- a/jam-files/sanity.jam +++ b/jam-files/sanity.jam @@ -74,6 +74,7 @@ if $(with-macports) { } else { with-boost = [ option.get "with-boost" ] ; + with-boost ?= [ os.environ "BOOST_ROOT" ] ; if $(with-boost) { L-boost-search = -L$(with-boost)/lib" "-L$(with-boost)/lib64 ; boost-search = <search>$(with-boost)/lib <search>$(with-boost)/lib64 ; @@ -86,6 +87,8 @@ else { boost-include = ; } } + +requirements = ; #Are we linking static binaries against shared boost? boost-auto-shared = [ auto-shared "boost_program_options" : $(L-boost-search) ] ; @@ -95,8 +98,15 @@ rule boost-lib ( name macro ) { #versions of boost do not have -mt tagged versions of all libraries. Sadly, #boost.jam does not handle this correctly. if [ test_flags $(L-boost-search)" -lboost_"$(name)"-mt" ] { - lib inner_boost_$(name) : : <threading>single $(boost-search) <name>boost_$(name) ; - lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt ; +# if [ test_flags $(L-boost-search)" -lboost_"$(name) ] { +# lib inner_boost_$(name) : : <threading>single $(boost-search) <name>boost_$(name) ; +# lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt ; +# } else { + if ! <threading>multi in $(requirements) { + requirements += <threading>multi ; + } + lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt ; +# } } else { lib inner_boost_$(name) : : $(boost-search) <name>boost_$(name) ; } @@ -148,7 +158,6 @@ rule external-lib ( name : search-path * ) { local ignored = @($(build-log):E=$(script)) ; } -requirements = ; { #Boost jam's static clang is buggy. requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <toolset>clang:<link>shared ; @@ -173,9 +182,10 @@ if $(prefix) { } else { prefix = $(TOP)/dist$(GITTAG) ; } + +bindir = [ option.get "bindir" : $(prefix)/bin ] ; +libdir = [ option.get "libdir" : $(prefix)/lib ] ; rule install-bin-libs ( deps * ) { - local bindir = [ option.get "bindir" : $(prefix)/bin ] ; - local libdir = [ option.get "libdir" : $(prefix)/lib ] ; install prefix-bin : $(deps) : <location>$(bindir) <install-dependencies>on <install-type>EXE <link>shared:<dll-path>$(libdir) ; install prefix-lib : $(deps) : <location>$(libdir) <install-dependencies>on <install-type>LIB <link>shared:<dll-path>$(libdir) ; } diff --git a/klm/lm/Jamfile b/klm/lm/Jamfile index b84dbb35..b1971d88 100644 --- a/klm/lm/Jamfile +++ b/klm/lm/Jamfile @@ -1,4 +1,4 @@ -lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quantize.cc read_arpa.cc search_hashed.cc search_trie.cc trie.cc trie_sort.cc virtual_interface.cc vocab.cc ../util//kenutil : <include>.. : : <include>.. <library>../util//kenutil ; +lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quantize.cc read_arpa.cc search_hashed.cc search_trie.cc trie.cc trie_sort.cc value_build.cc virtual_interface.cc vocab.cc ../util//kenutil : <include>.. : : <include>.. <library>../util//kenutil ; import testing ; diff --git a/klm/lm/Makefile.am b/klm/lm/Makefile.am index 54fd7f68..a12c5f03 100644 --- a/klm/lm/Makefile.am +++ b/klm/lm/Makefile.am @@ -24,6 +24,7 @@ libklm_a_SOURCES = \ search_trie.cc \ trie.cc \ trie_sort.cc \ + value_build.cc \ virtual_interface.cc \ vocab.cc diff --git a/klm/lm/binary_format.cc b/klm/lm/binary_format.cc index 4796f6d1..a56e998e 100644 --- a/klm/lm/binary_format.cc +++ b/klm/lm/binary_format.cc @@ -57,7 +57,7 @@ struct Sanity { } }; -const char *kModelNames[6] = {"hashed n-grams with probing", "hashed n-grams with sorted uniform find", "trie", "trie with quantization", "trie with array-compressed pointers", "trie with quantization and array-compressed pointers"}; +const char *kModelNames[6] = {"probing hash tables", "probing hash tables with rest costs", "trie", "trie with quantization", "trie with array-compressed pointers", "trie with quantization and array-compressed pointers"}; std::size_t TotalHeaderSize(unsigned char order) { return ALIGN8(sizeof(Sanity) + sizeof(FixedWidthParameters) + sizeof(uint64_t) * order); diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc index 8cbb69d0..c4a01cb4 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary.cc @@ -66,16 +66,28 @@ uint8_t ParseBitCount(const char *from) { return val; } +void ParseFileList(const char *from, std::vector<std::string> &to) { + to.clear(); + while (true) { + const char *i; + for (i = from; *i && *i != ' '; ++i) {} + to.push_back(std::string(from, i - from)); + if (!*i) break; + from = i + 1; + } +} + void ShowSizes(const char *file, const lm::ngram::Config &config) { std::vector<uint64_t> counts; util::FilePiece f(file); lm::ReadARPACounts(f, counts); - std::size_t sizes[5]; + std::size_t sizes[6]; sizes[0] = ProbingModel::Size(counts, config); - sizes[1] = TrieModel::Size(counts, config); - sizes[2] = QuantTrieModel::Size(counts, config); - sizes[3] = ArrayTrieModel::Size(counts, config); - sizes[4] = QuantArrayTrieModel::Size(counts, config); + sizes[1] = RestProbingModel::Size(counts, config); + sizes[2] = TrieModel::Size(counts, config); + sizes[3] = QuantTrieModel::Size(counts, config); + sizes[4] = ArrayTrieModel::Size(counts, config); + sizes[5] = QuantArrayTrieModel::Size(counts, config); std::size_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(size_t)); std::size_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(size_t)); std::size_t divide; @@ -99,10 +111,11 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) { for (long int i = 0; i < length - 2; ++i) std::cout << ' '; std::cout << prefix << "B\n" "probing " << std::setw(length) << (sizes[0] / divide) << " assuming -p " << config.probing_multiplier << "\n" - "trie " << std::setw(length) << (sizes[1] / divide) << " without quantization\n" - "trie " << std::setw(length) << (sizes[2] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n" - "trie " << std::setw(length) << (sizes[3] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n" - "trie " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits<< " array pointer compression and quantization\n"; + "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r -p " << config.probing_multiplier << "\n" + "trie " << std::setw(length) << (sizes[2] / divide) << " without quantization\n" + "trie " << std::setw(length) << (sizes[3] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n" + "trie " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n" + "trie " << std::setw(length) << (sizes[5] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits<< " array pointer compression and quantization\n"; } void ProbingQuantizationUnsupported() { @@ -118,10 +131,10 @@ int main(int argc, char *argv[]) { using namespace lm::ngram; try { - bool quantize = false, set_backoff_bits = false, bhiksha = false, set_write_method = false; + bool quantize = false, set_backoff_bits = false, bhiksha = false, set_write_method = false, rest = false; lm::ngram::Config config; int opt; - while ((opt = getopt(argc, argv, "q:b:a:u:p:t:m:w:si")) != -1) { + while ((opt = getopt(argc, argv, "q:b:a:u:p:t:m:w:sir:")) != -1) { switch(opt) { case 'q': config.prob_bits = ParseBitCount(optarg); @@ -164,6 +177,11 @@ int main(int argc, char *argv[]) { case 'i': config.positive_log_probability = lm::SILENT; break; + case 'r': + rest = true; + ParseFileList(optarg, config.rest_lower_files); + config.rest_function = Config::REST_LOWER; + break; default: Usage(argv[0]); } @@ -174,35 +192,48 @@ int main(int argc, char *argv[]) { } if (optind + 1 == argc) { ShowSizes(argv[optind], config); - } else if (optind + 2 == argc) { + return 0; + } + const char *model_type; + const char *from_file; + + if (optind + 2 == argc) { + model_type = "probing"; + from_file = argv[optind]; config.write_mmap = argv[optind + 1]; - if (quantize || set_backoff_bits) ProbingQuantizationUnsupported(); - ProbingModel(argv[optind], config); } else if (optind + 3 == argc) { - const char *model_type = argv[optind]; - const char *from_file = argv[optind + 1]; + model_type = argv[optind]; + from_file = argv[optind + 1]; config.write_mmap = argv[optind + 2]; - if (!strcmp(model_type, "probing")) { - if (!set_write_method) config.write_method = Config::WRITE_AFTER; - if (quantize || set_backoff_bits) ProbingQuantizationUnsupported(); + } else { + Usage(argv[0]); + } + if (!strcmp(model_type, "probing")) { + if (!set_write_method) config.write_method = Config::WRITE_AFTER; + if (quantize || set_backoff_bits) ProbingQuantizationUnsupported(); + if (rest) { + RestProbingModel(from_file, config); + } else { ProbingModel(from_file, config); - } else if (!strcmp(model_type, "trie")) { - if (!set_write_method) config.write_method = Config::WRITE_MMAP; - if (quantize) { - if (bhiksha) { - QuantArrayTrieModel(from_file, config); - } else { - QuantTrieModel(from_file, config); - } + } + } else if (!strcmp(model_type, "trie")) { + if (rest) { + std::cerr << "Rest + trie is not supported yet." << std::endl; + return 1; + } + if (!set_write_method) config.write_method = Config::WRITE_MMAP; + if (quantize) { + if (bhiksha) { + QuantArrayTrieModel(from_file, config); } else { - if (bhiksha) { - ArrayTrieModel(from_file, config); - } else { - TrieModel(from_file, config); - } + QuantTrieModel(from_file, config); } } else { - Usage(argv[0]); + if (bhiksha) { + ArrayTrieModel(from_file, config); + } else { + TrieModel(from_file, config); + } } } else { Usage(argv[0]); diff --git a/klm/lm/config.cc b/klm/lm/config.cc index dbe762b3..f9d988ca 100644 --- a/klm/lm/config.cc +++ b/klm/lm/config.cc @@ -19,6 +19,7 @@ Config::Config() : write_mmap(NULL), write_method(WRITE_AFTER), include_vocab(true), + rest_function(REST_MAX), prob_bits(8), backoff_bits(8), pointer_bhiksha_bits(22), diff --git a/klm/lm/config.hh b/klm/lm/config.hh index 01b75632..739cee9c 100644 --- a/klm/lm/config.hh +++ b/klm/lm/config.hh @@ -1,11 +1,13 @@ #ifndef LM_CONFIG__ #define LM_CONFIG__ -#include <iosfwd> - #include "lm/lm_exception.hh" #include "util/mmap.hh" +#include <iosfwd> +#include <string> +#include <vector> + /* Configuration for ngram model. Separate header to reduce pollution. */ namespace lm { @@ -63,23 +65,33 @@ struct Config { const char *temporary_directory_prefix; // Level of complaining to do when loading from ARPA instead of binary format. - typedef enum {ALL, EXPENSIVE, NONE} ARPALoadComplain; + enum ARPALoadComplain {ALL, EXPENSIVE, NONE}; ARPALoadComplain arpa_complain; // While loading an ARPA file, also write out this binary format file. Set // to NULL to disable. const char *write_mmap; - typedef enum { + enum WriteMethod { WRITE_MMAP, // Map the file directly. WRITE_AFTER // Write after we're done. - } WriteMethod; + }; WriteMethod write_method; // Include the vocab in the binary file? Only effective if write_mmap != NULL. bool include_vocab; + // Left rest options. Only used when the model includes rest costs. + enum RestFunction { + REST_MAX, // Maximum of any score to the left + REST_LOWER, // Use lower-order files given below. + }; + RestFunction rest_function; + // Only used for REST_LOWER. + std::vector<std::string> rest_lower_files; + + // Quantization options. Only effective for QuantTrieModel. One value is // reserved for each of prob and backoff, so 2^bits - 1 buckets will be used diff --git a/klm/lm/left.hh b/klm/lm/left.hh index a07f9803..c00af88a 100644 --- a/klm/lm/left.hh +++ b/klm/lm/left.hh @@ -39,7 +39,7 @@ #define LM_LEFT__ #include "lm/max_order.hh" -#include "lm/model.hh" +#include "lm/state.hh" #include "lm/return.hh" #include "util/murmur_hash.hh" @@ -49,72 +49,6 @@ namespace lm { namespace ngram { -struct Left { - bool operator==(const Left &other) const { - return - (length == other.length) && - pointers[length - 1] == other.pointers[length - 1]; - } - - int Compare(const Left &other) const { - if (length != other.length) return length < other.length ? -1 : 1; - if (pointers[length - 1] > other.pointers[length - 1]) return 1; - if (pointers[length - 1] < other.pointers[length - 1]) return -1; - return 0; - } - - bool operator<(const Left &other) const { - if (length != other.length) return length < other.length; - return pointers[length - 1] < other.pointers[length - 1]; - } - - void ZeroRemaining() { - for (uint64_t * i = pointers + length; i < pointers + kMaxOrder - 1; ++i) - *i = 0; - } - - unsigned char length; - uint64_t pointers[kMaxOrder - 1]; -}; - -inline size_t hash_value(const Left &left) { - return util::MurmurHashNative(&left.length, 1, left.pointers[left.length - 1]); -} - -struct ChartState { - bool operator==(const ChartState &other) { - return (left == other.left) && (right == other.right) && (full == other.full); - } - - int Compare(const ChartState &other) const { - int lres = left.Compare(other.left); - if (lres) return lres; - int rres = right.Compare(other.right); - if (rres) return rres; - return (int)full - (int)other.full; - } - - bool operator<(const ChartState &other) const { - return Compare(other) == -1; - } - - void ZeroRemaining() { - left.ZeroRemaining(); - right.ZeroRemaining(); - } - - Left left; - bool full; - State right; -}; - -inline size_t hash_value(const ChartState &state) { - size_t hashes[2]; - hashes[0] = hash_value(state.left); - hashes[1] = hash_value(state.right); - return util::MurmurHashNative(hashes, sizeof(size_t) * 2, state.full); -} - template <class M> class RuleScore { public: explicit RuleScore(const M &model, ChartState &out) : model_(model), out_(out), left_done_(false), prob_(0.0) { @@ -131,29 +65,30 @@ template <class M> class RuleScore { void Terminal(WordIndex word) { State copy(out_.right); FullScoreReturn ret(model_.FullScore(copy, word, out_.right)); - prob_ += ret.prob; - if (left_done_) return; + if (left_done_) { prob_ += ret.prob; return; } if (ret.independent_left) { + prob_ += ret.prob; left_done_ = true; return; } out_.left.pointers[out_.left.length++] = ret.extend_left; + prob_ += ret.rest; if (out_.right.length != copy.length + 1) left_done_ = true; } // Faster version of NonTerminal for the case where the rule begins with a non-terminal. - void BeginNonTerminal(const ChartState &in, float prob) { + void BeginNonTerminal(const ChartState &in, float prob = 0.0) { prob_ = prob; out_ = in; - left_done_ = in.full; + left_done_ = in.left.full; } - void NonTerminal(const ChartState &in, float prob) { + void NonTerminal(const ChartState &in, float prob = 0.0) { prob_ += prob; if (!in.left.length) { - if (in.full) { + if (in.left.full) { for (const float *i = out_.right.backoff; i < out_.right.backoff + out_.right.length; ++i) prob_ += *i; left_done_ = true; out_.right = in.right; @@ -163,12 +98,15 @@ template <class M> class RuleScore { if (!out_.right.length) { out_.right = in.right; - if (left_done_) return; + if (left_done_) { + prob_ += model_.UnRest(in.left.pointers, in.left.pointers + in.left.length, 1); + return; + } if (out_.left.length) { left_done_ = true; } else { out_.left = in.left; - left_done_ = in.full; + left_done_ = in.left.full; } return; } @@ -186,7 +124,7 @@ template <class M> class RuleScore { std::swap(back, back2); } - if (in.full) { + if (in.left.full) { for (const float *i = back; i != back + next_use; ++i) prob_ += *i; left_done_ = true; out_.right = in.right; @@ -213,10 +151,17 @@ template <class M> class RuleScore { float Finish() { // A N-1-gram might extend left and right but we should still set full to true because it's an N-1-gram. - out_.full = left_done_ || (out_.left.length == model_.Order() - 1); + out_.left.full = left_done_ || (out_.left.length == model_.Order() - 1); return prob_; } + void Reset() { + prob_ = 0.0; + left_done_ = false; + out_.left.length = 0; + out_.right.length = 0; + } + private: bool ExtendLeft(const ChartState &in, unsigned char &next_use, unsigned char extend_length, const float *back_in, float *back_out) { ProcessRet(model_.ExtendLeft( @@ -228,8 +173,9 @@ template <class M> class RuleScore { if (next_use != out_.right.length) { left_done_ = true; if (!next_use) { - out_.right = in.right; // Early exit. + out_.right = in.right; + prob_ += model_.UnRest(in.left.pointers + extend_length, in.left.pointers + in.left.length, extend_length + 1); return true; } } @@ -238,13 +184,17 @@ template <class M> class RuleScore { } void ProcessRet(const FullScoreReturn &ret) { - prob_ += ret.prob; - if (left_done_) return; + if (left_done_) { + prob_ += ret.prob; + return; + } if (ret.independent_left) { + prob_ += ret.prob; left_done_ = true; return; } out_.left.pointers[out_.left.length++] = ret.extend_left; + prob_ += ret.rest; } const M &model_; diff --git a/klm/lm/left_test.cc b/klm/lm/left_test.cc index c85e5efa..b23e6a0f 100644 --- a/klm/lm/left_test.cc +++ b/klm/lm/left_test.cc @@ -24,7 +24,7 @@ template <class M> void Short(const M &m) { Term("loin"); BOOST_CHECK_CLOSE(-1.206319 - 0.3561665, score.Finish(), 0.001); } - BOOST_CHECK(base.full); + BOOST_CHECK(base.left.full); BOOST_CHECK_EQUAL(2, base.left.length); BOOST_CHECK_EQUAL(1, base.right.length); VCheck("loin", base.right.words[0]); @@ -40,7 +40,7 @@ template <class M> void Short(const M &m) { BOOST_CHECK_EQUAL(3, more_left.left.length); BOOST_CHECK_EQUAL(1, more_left.right.length); VCheck("loin", more_left.right.words[0]); - BOOST_CHECK(more_left.full); + BOOST_CHECK(more_left.left.full); ChartState shorter; { @@ -52,7 +52,7 @@ template <class M> void Short(const M &m) { BOOST_CHECK_EQUAL(1, shorter.left.length); BOOST_CHECK_EQUAL(1, shorter.right.length); VCheck("loin", shorter.right.words[0]); - BOOST_CHECK(shorter.full); + BOOST_CHECK(shorter.left.full); } template <class M> void Charge(const M &m) { @@ -66,7 +66,7 @@ template <class M> void Charge(const M &m) { BOOST_CHECK_EQUAL(1, base.left.length); BOOST_CHECK_EQUAL(1, base.right.length); VCheck("more", base.right.words[0]); - BOOST_CHECK(base.full); + BOOST_CHECK(base.left.full); ChartState extend; { @@ -78,7 +78,7 @@ template <class M> void Charge(const M &m) { BOOST_CHECK_EQUAL(2, extend.left.length); BOOST_CHECK_EQUAL(1, extend.right.length); VCheck("more", extend.right.words[0]); - BOOST_CHECK(extend.full); + BOOST_CHECK(extend.left.full); ChartState tobos; { @@ -91,9 +91,9 @@ template <class M> void Charge(const M &m) { BOOST_CHECK_EQUAL(1, tobos.right.length); } -template <class M> float LeftToRight(const M &m, const std::vector<WordIndex> &words) { +template <class M> float LeftToRight(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) { float ret = 0.0; - State right = m.NullContextState(); + State right = begin_sentence ? m.BeginSentenceState() : m.NullContextState(); for (std::vector<WordIndex>::const_iterator i = words.begin(); i != words.end(); ++i) { State copy(right); ret += m.Score(copy, *i, right); @@ -101,12 +101,12 @@ template <class M> float LeftToRight(const M &m, const std::vector<WordIndex> &w return ret; } -template <class M> float RightToLeft(const M &m, const std::vector<WordIndex> &words) { +template <class M> float RightToLeft(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) { float ret = 0.0; ChartState state; state.left.length = 0; state.right.length = 0; - state.full = false; + state.left.full = false; for (std::vector<WordIndex>::const_reverse_iterator i = words.rbegin(); i != words.rend(); ++i) { ChartState copy(state); RuleScore<M> score(m, state); @@ -114,10 +114,17 @@ template <class M> float RightToLeft(const M &m, const std::vector<WordIndex> &w score.NonTerminal(copy, ret); ret = score.Finish(); } + if (begin_sentence) { + ChartState copy(state); + RuleScore<M> score(m, state); + score.BeginSentence(); + score.NonTerminal(copy, ret); + ret = score.Finish(); + } return ret; } -template <class M> float TreeMiddle(const M &m, const std::vector<WordIndex> &words) { +template <class M> float TreeMiddle(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) { std::vector<std::pair<ChartState, float> > states(words.size()); for (unsigned int i = 0; i < words.size(); ++i) { RuleScore<M> score(m, states[i].first); @@ -137,7 +144,19 @@ template <class M> float TreeMiddle(const M &m, const std::vector<WordIndex> &wo } std::swap(states, upper); } - return states.empty() ? 0 : states.back().second; + + if (states.empty()) return 0.0; + + if (begin_sentence) { + ChartState ignored; + RuleScore<M> score(m, ignored); + score.BeginSentence(); + score.NonTerminal(states.front().first, states.front().second); + return score.Finish(); + } else { + return states.front().second; + } + } template <class M> void LookupVocab(const M &m, const StringPiece &str, std::vector<WordIndex> &out) { @@ -148,16 +167,15 @@ template <class M> void LookupVocab(const M &m, const StringPiece &str, std::vec } #define TEXT_TEST(str) \ -{ \ - std::vector<WordIndex> words; \ LookupVocab(m, str, words); \ - float expect = LeftToRight(m, words); \ - BOOST_CHECK_CLOSE(expect, RightToLeft(m, words), 0.001); \ - BOOST_CHECK_CLOSE(expect, TreeMiddle(m, words), 0.001); \ -} + expect = LeftToRight(m, words, rest); \ + BOOST_CHECK_CLOSE(expect, RightToLeft(m, words, rest), 0.001); \ + BOOST_CHECK_CLOSE(expect, TreeMiddle(m, words, rest), 0.001); \ // Build sentences, or parts thereof, from right to left. -template <class M> void GrowBig(const M &m) { +template <class M> void GrowBig(const M &m, bool rest = false) { + std::vector<WordIndex> words; + float expect; TEXT_TEST("in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>"); TEXT_TEST("on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>"); TEXT_TEST("on a little more loin also would consider higher to look good"); @@ -171,6 +189,14 @@ template <class M> void GrowBig(const M &m) { TEXT_TEST("consider higher"); } +template <class M> void GrowSmall(const M &m, bool rest = false) { + std::vector<WordIndex> words; + float expect; + TEXT_TEST("in biarritz watching considering looking . </s>"); + TEXT_TEST("in biarritz watching considering looking ."); + TEXT_TEST("in biarritz"); +} + template <class M> void AlsoWouldConsiderHigher(const M &m) { ChartState also; { @@ -210,7 +236,7 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) { } BOOST_CHECK_EQUAL(1, consider.left.length); BOOST_CHECK_EQUAL(1, consider.right.length); - BOOST_CHECK(!consider.full); + BOOST_CHECK(!consider.left.full); ChartState higher; float higher_score; @@ -222,7 +248,7 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) { BOOST_CHECK_CLOSE(-1.509559, higher_score, 0.001); BOOST_CHECK_EQUAL(1, higher.left.length); BOOST_CHECK_EQUAL(1, higher.right.length); - BOOST_CHECK(!higher.full); + BOOST_CHECK(!higher.left.full); VCheck("higher", higher.right.words[0]); BOOST_CHECK_CLOSE(-0.30103, higher.right.backoff[0], 0.001); @@ -234,7 +260,7 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) { BOOST_CHECK_CLOSE(-1.509559 - 1.687872 - 0.30103, score.Finish(), 0.001); } BOOST_CHECK_EQUAL(2, consider_higher.left.length); - BOOST_CHECK(!consider_higher.full); + BOOST_CHECK(!consider_higher.left.full); ChartState full; { @@ -246,12 +272,6 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) { BOOST_CHECK_EQUAL(4, full.right.length); } -template <class M> void GrowSmall(const M &m) { - TEXT_TEST("in biarritz watching considering looking . </s>"); - TEXT_TEST("in biarritz watching considering looking ."); - TEXT_TEST("in biarritz"); -} - #define CHECK_SCORE(str, val) \ { \ float got = val; \ @@ -315,7 +335,7 @@ template <class M> void FullGrow(const M &m) { CHECK_SCORE("looking . </s>", l2_scores[1] = score.Finish()); } BOOST_CHECK_EQUAL(l2[1].left.length, 1); - BOOST_CHECK(l2[1].full); + BOOST_CHECK(l2[1].left.full); ChartState top; { @@ -362,6 +382,13 @@ BOOST_AUTO_TEST_CASE(ArrayTrieAll) { Everything<ArrayTrieModel>(); } +BOOST_AUTO_TEST_CASE(RestProbing) { + Config config; + config.messages = NULL; + RestProbingModel m(FileLocation(), config); + GrowBig(m, true); +} + } // namespace } // namespace ngram } // namespace lm diff --git a/klm/lm/max_order.hh b/klm/lm/max_order.hh index 71cd23dd..aff9de27 100644 --- a/klm/lm/max_order.hh +++ b/klm/lm/max_order.hh @@ -6,7 +6,7 @@ namespace ngram { // Having this limit means that State can be // (kMaxOrder - 1) * sizeof(float) bytes instead of // sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead -const unsigned char kMaxOrder = 6; +const unsigned char kMaxOrder = 5; } // namespace ngram } // namespace lm diff --git a/klm/lm/model.cc b/klm/lm/model.cc index 478ebed1..c081788c 100644 --- a/klm/lm/model.cc +++ b/klm/lm/model.cc @@ -38,10 +38,13 @@ template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::Ge State begin_sentence = State(); begin_sentence.length = 1; begin_sentence.words[0] = vocab_.BeginSentence(); - begin_sentence.backoff[0] = search_.unigram.Lookup(begin_sentence.words[0]).backoff; + typename Search::Node ignored_node; + bool ignored_independent_left; + uint64_t ignored_extend_left; + begin_sentence.backoff[0] = search_.LookupUnigram(begin_sentence.words[0], ignored_node, ignored_independent_left, ignored_extend_left).Backoff(); State null_context = State(); null_context.length = 0; - P::Init(begin_sentence, null_context, vocab_, search_.MiddleEnd() - search_.MiddleBegin() + 2); + P::Init(begin_sentence, null_context, vocab_, search_.Order()); } template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromBinary(void *start, const Parameters ¶ms, const Config &config, int fd) { @@ -50,6 +53,9 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT search_.LoadedBinary(); } +namespace { +} // namespace + template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromARPA(const char *file, const Config &config) { // Backing file is the ARPA. Steal it so we can make the backing file the mmap output if any. util::FilePiece f(backing_.file.release(), file, config.messages); @@ -79,8 +85,8 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT if (!vocab_.SawUnk()) { assert(config.unknown_missing != THROW_UP); // Default probabilities for unknown. - search_.unigram.Unknown().backoff = 0.0; - search_.unigram.Unknown().prob = config.unknown_missing_logprob; + search_.UnknownUnigram().backoff = 0.0; + search_.UnknownUnigram().prob = config.unknown_missing_logprob; } FinishFile(config, kModelType, kVersion, counts, vocab_.UnkCountChangePadding(), backing_); } catch (util::Exception &e) { @@ -109,20 +115,22 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search, // Add the backoff weights for n-grams of order start to (context_rend - context_rbegin). unsigned char start = ret.ngram_length; if (context_rend - context_rbegin < static_cast<std::ptrdiff_t>(start)) return ret; + + bool independent_left; + uint64_t extend_left; + typename Search::Node node; if (start <= 1) { - ret.prob += search_.unigram.Lookup(*context_rbegin).backoff; + ret.prob += search_.LookupUnigram(*context_rbegin, node, independent_left, extend_left).Backoff(); start = 2; - } - typename Search::Node node; - if (!search_.FastMakeNode(context_rbegin, context_rbegin + start - 1, node)) { + } else if (!search_.FastMakeNode(context_rbegin, context_rbegin + start - 1, node)) { return ret; } - float backoff; // i is the order of the backoff we're looking for. - typename Search::MiddleIter mid_iter = search_.MiddleBegin() + start - 2; - for (const WordIndex *i = context_rbegin + start - 1; i < context_rend; ++i, ++mid_iter) { - if (!search_.LookupMiddleNoProb(*mid_iter, *i, backoff, node)) break; - ret.prob += backoff; + unsigned char order_minus_2 = 0; + for (const WordIndex *i = context_rbegin + start - 1; i < context_rend; ++i, ++order_minus_2) { + typename Search::MiddlePointer p(search_.LookupMiddle(order_minus_2, *i, node, independent_left, extend_left)); + if (!p.Found()) break; + ret.prob += p.Backoff(); } return ret; } @@ -134,17 +142,20 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT out_state.length = 0; return; } - FullScoreReturn ignored; typename Search::Node node; - search_.LookupUnigram(*context_rbegin, out_state.backoff[0], node, ignored); + bool independent_left; + uint64_t extend_left; + out_state.backoff[0] = search_.LookupUnigram(*context_rbegin, node, independent_left, extend_left).Backoff(); out_state.length = HasExtension(out_state.backoff[0]) ? 1 : 0; float *backoff_out = out_state.backoff + 1; - typename Search::MiddleIter mid(search_.MiddleBegin()); - for (const WordIndex *i = context_rbegin + 1; i < context_rend; ++i, ++backoff_out, ++mid) { - if (!search_.LookupMiddleNoProb(*mid, *i, *backoff_out, node)) { + unsigned char order_minus_2 = 0; + for (const WordIndex *i = context_rbegin + 1; i < context_rend; ++i, ++backoff_out, ++order_minus_2) { + typename Search::MiddlePointer p(search_.LookupMiddle(order_minus_2, *i, node, independent_left, extend_left)); + if (!p.Found()) { std::copy(context_rbegin, context_rbegin + out_state.length, out_state.words); return; } + *backoff_out = p.Backoff(); if (HasExtension(*backoff_out)) out_state.length = i - context_rbegin + 1; } std::copy(context_rbegin, context_rbegin + out_state.length, out_state.words); @@ -158,43 +169,29 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search, float *backoff_out, unsigned char &next_use) const { FullScoreReturn ret; - float subtract_me; - typename Search::Node node(search_.Unpack(extend_pointer, extend_length, subtract_me)); - ret.prob = subtract_me; - ret.ngram_length = extend_length; - next_use = 0; - // If this function is called, then it does depend on left words. - ret.independent_left = false; - ret.extend_left = extend_pointer; - typename Search::MiddleIter mid_iter(search_.MiddleBegin() + extend_length - 1); - const WordIndex *i = add_rbegin; - for (; ; ++i, ++backoff_out, ++mid_iter) { - if (i == add_rend) { - // Ran out of words. - for (const float *b = backoff_in + ret.ngram_length - extend_length; b < backoff_in + (add_rend - add_rbegin); ++b) ret.prob += *b; - ret.prob -= subtract_me; - return ret; - } - if (mid_iter == search_.MiddleEnd()) break; - if (ret.independent_left || !search_.LookupMiddle(*mid_iter, *i, *backoff_out, node, ret)) { - // Didn't match a word. - ret.independent_left = true; - for (const float *b = backoff_in + ret.ngram_length - extend_length; b < backoff_in + (add_rend - add_rbegin); ++b) ret.prob += *b; - ret.prob -= subtract_me; - return ret; - } - ret.ngram_length = mid_iter - search_.MiddleBegin() + 2; - if (HasExtension(*backoff_out)) next_use = i - add_rbegin + 1; - } - - if (ret.independent_left || !search_.LookupLongest(*i, ret.prob, node)) { - // The last backoff weight, for Order() - 1. - ret.prob += backoff_in[i - add_rbegin]; + typename Search::Node node; + if (extend_length == 1) { + typename Search::UnigramPointer ptr(search_.LookupUnigram(static_cast<WordIndex>(extend_pointer), node, ret.independent_left, ret.extend_left)); + ret.rest = ptr.Rest(); + ret.prob = ptr.Prob(); + assert(!ret.independent_left); } else { - ret.ngram_length = P::Order(); + typename Search::MiddlePointer ptr(search_.Unpack(extend_pointer, extend_length, node)); + ret.rest = ptr.Rest(); + ret.prob = ptr.Prob(); + ret.extend_left = extend_pointer; + // If this function is called, then it does depend on left words. + ret.independent_left = false; } - ret.independent_left = true; + float subtract_me = ret.rest; + ret.ngram_length = extend_length; + next_use = extend_length; + ResumeScore(add_rbegin, add_rend, extend_length - 1, node, backoff_out, next_use, ret); + next_use -= extend_length; + // Charge backoffs. + for (const float *b = backoff_in + ret.ngram_length - extend_length; b < backoff_in + (add_rend - add_rbegin); ++b) ret.prob += *b; ret.prob -= subtract_me; + ret.rest -= subtract_me; return ret; } @@ -215,66 +212,83 @@ void CopyRemainingHistory(const WordIndex *from, State &out_state) { * new_word. */ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search, VocabularyT>::ScoreExceptBackoff( - const WordIndex *context_rbegin, - const WordIndex *context_rend, + const WordIndex *const context_rbegin, + const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const { FullScoreReturn ret; // ret.ngram_length contains the last known non-blank ngram length. ret.ngram_length = 1; - float *backoff_out(out_state.backoff); typename Search::Node node; - search_.LookupUnigram(new_word, *backoff_out, node, ret); + typename Search::UnigramPointer uni(search_.LookupUnigram(new_word, node, ret.independent_left, ret.extend_left)); + out_state.backoff[0] = uni.Backoff(); + ret.prob = uni.Prob(); + ret.rest = uni.Rest(); + // This is the length of the context that should be used for continuation to the right. - out_state.length = HasExtension(*backoff_out) ? 1 : 0; + out_state.length = HasExtension(out_state.backoff[0]) ? 1 : 0; // We'll write the word anyway since it will probably be used and does no harm being there. out_state.words[0] = new_word; if (context_rbegin == context_rend) return ret; - ++backoff_out; - - // Ok start by looking up the bigram. - const WordIndex *hist_iter = context_rbegin; - typename Search::MiddleIter mid_iter(search_.MiddleBegin()); - for (; ; ++mid_iter, ++hist_iter, ++backoff_out) { - if (hist_iter == context_rend) { - // Ran out of history. Typically no backoff, but this could be a blank. - CopyRemainingHistory(context_rbegin, out_state); - // ret.prob was already set. - return ret; - } - if (mid_iter == search_.MiddleEnd()) break; + ResumeScore(context_rbegin, context_rend, 0, node, out_state.backoff + 1, out_state.length, ret); + CopyRemainingHistory(context_rbegin, out_state); + return ret; +} - if (ret.independent_left || !search_.LookupMiddle(*mid_iter, *hist_iter, *backoff_out, node, ret)) { - // Didn't find an ngram using hist_iter. - CopyRemainingHistory(context_rbegin, out_state); - // ret.prob was already set. - ret.independent_left = true; - return ret; - } - ret.ngram_length = hist_iter - context_rbegin + 2; +template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::ResumeScore(const WordIndex *hist_iter, const WordIndex *const context_rend, unsigned char order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const { + for (; ; ++order_minus_2, ++hist_iter, ++backoff_out) { + if (hist_iter == context_rend) return; + if (ret.independent_left) return; + if (order_minus_2 == P::Order() - 2) break; + + typename Search::MiddlePointer pointer(search_.LookupMiddle(order_minus_2, *hist_iter, node, ret.independent_left, ret.extend_left)); + if (!pointer.Found()) return; + *backoff_out = pointer.Backoff(); + ret.prob = pointer.Prob(); + ret.rest = pointer.Rest(); + ret.ngram_length = order_minus_2 + 2; if (HasExtension(*backoff_out)) { - out_state.length = ret.ngram_length; + next_use = ret.ngram_length; } } - - // It passed every lookup in search_.middle. All that's left is to check search_.longest. - if (!ret.independent_left && search_.LookupLongest(*hist_iter, ret.prob, node)) { - // It's an P::Order()-gram. + ret.independent_left = true; + typename Search::LongestPointer longest(search_.LookupLongest(*hist_iter, node)); + if (longest.Found()) { + ret.prob = longest.Prob(); + ret.rest = ret.prob; // There is no blank in longest_. ret.ngram_length = P::Order(); } - // This handles (N-1)-grams and N-grams. - CopyRemainingHistory(context_rbegin, out_state); - ret.independent_left = true; +} + +template <class Search, class VocabularyT> float GenericModel<Search, VocabularyT>::InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const { + float ret; + typename Search::Node node; + if (first_length == 1) { + if (pointers_begin >= pointers_end) return 0.0; + bool independent_left; + uint64_t extend_left; + typename Search::UnigramPointer ptr(search_.LookupUnigram(static_cast<WordIndex>(*pointers_begin), node, independent_left, extend_left)); + ret = ptr.Prob() - ptr.Rest(); + ++first_length; + ++pointers_begin; + } else { + ret = 0.0; + } + for (const uint64_t *i = pointers_begin; i < pointers_end; ++i, ++first_length) { + typename Search::MiddlePointer ptr(search_.Unpack(*i, first_length, node)); + ret += ptr.Prob() - ptr.Rest(); + } return ret; } -template class GenericModel<ProbingHashedSearch, ProbingVocabulary>; // HASH_PROBING -template class GenericModel<trie::TrieSearch<DontQuantize, trie::DontBhiksha>, SortedVocabulary>; // TRIE_SORTED +template class GenericModel<HashedSearch<BackoffValue>, ProbingVocabulary>; +template class GenericModel<HashedSearch<RestValue>, ProbingVocabulary>; +template class GenericModel<trie::TrieSearch<DontQuantize, trie::DontBhiksha>, SortedVocabulary>; template class GenericModel<trie::TrieSearch<DontQuantize, trie::ArrayBhiksha>, SortedVocabulary>; -template class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::DontBhiksha>, SortedVocabulary>; // TRIE_SORTED_QUANT +template class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::DontBhiksha>, SortedVocabulary>; template class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::ArrayBhiksha>, SortedVocabulary>; } // namespace detail diff --git a/klm/lm/model.hh b/klm/lm/model.hh index 6ea62a78..be872178 100644 --- a/klm/lm/model.hh +++ b/klm/lm/model.hh @@ -9,6 +9,8 @@ #include "lm/quantize.hh" #include "lm/search_hashed.hh" #include "lm/search_trie.hh" +#include "lm/state.hh" +#include "lm/value.hh" #include "lm/vocab.hh" #include "lm/weights.hh" @@ -23,48 +25,6 @@ namespace util { class FilePiece; } namespace lm { namespace ngram { - -// This is a POD but if you want memcmp to return the same as operator==, call -// ZeroRemaining first. -class State { - public: - bool operator==(const State &other) const { - if (length != other.length) return false; - return !memcmp(words, other.words, length * sizeof(WordIndex)); - } - - // Three way comparison function. - int Compare(const State &other) const { - if (length != other.length) return length < other.length ? -1 : 1; - return memcmp(words, other.words, length * sizeof(WordIndex)); - } - - bool operator<(const State &other) const { - if (length != other.length) return length < other.length; - return memcmp(words, other.words, length * sizeof(WordIndex)) < 0; - } - - // Call this before using raw memcmp. - void ZeroRemaining() { - for (unsigned char i = length; i < kMaxOrder - 1; ++i) { - words[i] = 0; - backoff[i] = 0.0; - } - } - - unsigned char Length() const { return length; } - - // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD. - // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit. - WordIndex words[kMaxOrder - 1]; - float backoff[kMaxOrder - 1]; - unsigned char length; -}; - -inline size_t hash_value(const State &state) { - return util::MurmurHashNative(state.words, sizeof(WordIndex) * state.length); -} - namespace detail { // Should return the same results as SRI. @@ -119,8 +79,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod /* More efficient version of FullScore where a partial n-gram has already * been scored. - * NOTE: THE RETURNED .prob IS RELATIVE, NOT ABSOLUTE. So for example, if - * the n-gram does not end up extending further left, then 0 is returned. + * NOTE: THE RETURNED .rest AND .prob ARE RELATIVE TO THE .rest RETURNED BEFORE. */ FullScoreReturn ExtendLeft( // Additional context in reverse order. This will update add_rend to @@ -136,12 +95,24 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod // Amount of additional content that should be considered by the next call. unsigned char &next_use) const; + /* Return probabilities minus rest costs for an array of pointers. The + * first length should be the length of the n-gram to which pointers_begin + * points. + */ + float UnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const { + // Compiler should optimize this if away. + return Search::kDifferentRest ? InternalUnRest(pointers_begin, pointers_end, first_length) : 0.0; + } + private: friend void lm::ngram::LoadLM<>(const char *file, const Config &config, GenericModel<Search, VocabularyT> &to); static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config); - FullScoreReturn ScoreExceptBackoff(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const; + FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const; + + // Score bigrams and above. Do not include backoff. + void ResumeScore(const WordIndex *context_rbegin, const WordIndex *const context_rend, unsigned char starting_order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const; // Appears after Size in the cc file. void SetupMemory(void *start, const std::vector<uint64_t> &counts, const Config &config); @@ -150,32 +121,38 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod void InitializeFromARPA(const char *file, const Config &config); + float InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const; + Backing &MutableBacking() { return backing_; } Backing backing_; VocabularyT vocab_; - typedef typename Search::Middle Middle; - Search search_; }; } // namespace detail -// These must also be instantiated in the cc file. -typedef ::lm::ngram::ProbingVocabulary Vocabulary; -typedef detail::GenericModel<detail::ProbingHashedSearch, Vocabulary> ProbingModel; // HASH_PROBING -// Default implementation. No real reason for it to be the default. -typedef ProbingModel Model; +// Instead of typedef, inherit. This allows the Model etc to be forward declared. +// Oh the joys of C and C++. +#define LM_COMMA() , +#define LM_NAME_MODEL(name, from)\ +class name : public from {\ + public:\ + name(const char *file, const Config &config = Config()) : from(file, config) {}\ +}; -// Smaller implementation. -typedef ::lm::ngram::SortedVocabulary SortedVocabulary; -typedef detail::GenericModel<trie::TrieSearch<DontQuantize, trie::DontBhiksha>, SortedVocabulary> TrieModel; // TRIE_SORTED -typedef detail::GenericModel<trie::TrieSearch<DontQuantize, trie::ArrayBhiksha>, SortedVocabulary> ArrayTrieModel; +LM_NAME_MODEL(ProbingModel, detail::GenericModel<detail::HashedSearch<BackoffValue> LM_COMMA() ProbingVocabulary>); +LM_NAME_MODEL(RestProbingModel, detail::GenericModel<detail::HashedSearch<RestValue> LM_COMMA() ProbingVocabulary>); +LM_NAME_MODEL(TrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>); +LM_NAME_MODEL(ArrayTrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>); +LM_NAME_MODEL(QuantTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>); +LM_NAME_MODEL(QuantArrayTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>); -typedef detail::GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::DontBhiksha>, SortedVocabulary> QuantTrieModel; // QUANT_TRIE_SORTED -typedef detail::GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::ArrayBhiksha>, SortedVocabulary> QuantArrayTrieModel; +// Default implementation. No real reason for it to be the default. +typedef ::lm::ngram::ProbingVocabulary Vocabulary; +typedef ProbingModel Model; } // namespace ngram } // namespace lm diff --git a/klm/lm/model_test.cc b/klm/lm/model_test.cc index 461704d4..8a122c60 100644 --- a/klm/lm/model_test.cc +++ b/klm/lm/model_test.cc @@ -30,7 +30,15 @@ const char *TestNoUnkLocation() { return "test_nounk.arpa"; } return boost::unit_test::framework::master_test_suite().argv[2]; +} +template <class Model> State GetState(const Model &model, const char *word, const State &in) { + WordIndex context[in.length + 1]; + context[0] = model.GetVocabulary().Index(word); + std::copy(in.words, in.words + in.length, context + 1); + State ret; + model.GetState(context, context + in.length + 1, ret); + return ret; } #define StartTest(word, ngram, score, indep_left) \ @@ -42,14 +50,7 @@ const char *TestNoUnkLocation() { BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \ BOOST_CHECK_GE(std::min<unsigned char>(ngram, 5 - 1), out.length); \ BOOST_CHECK_EQUAL(indep_left, ret.independent_left); \ - {\ - WordIndex context[state.length + 1]; \ - context[0] = model.GetVocabulary().Index(word); \ - std::copy(state.words, state.words + state.length, context + 1); \ - State get_state; \ - model.GetState(context, context + state.length + 1, get_state); \ - BOOST_CHECK_EQUAL(out, get_state); \ - } + BOOST_CHECK_EQUAL(out, GetState(model, word, state)); #define AppendTest(word, ngram, score, indep_left) \ StartTest(word, ngram, score, indep_left) \ @@ -182,7 +183,7 @@ template <class M> void ExtendLeftTest(const M &model) { FullScoreReturn extend_none(model.ExtendLeft(NULL, NULL, NULL, little.extend_left, 1, NULL, next_use)); BOOST_CHECK_EQUAL(0, next_use); BOOST_CHECK_EQUAL(little.extend_left, extend_none.extend_left); - BOOST_CHECK_CLOSE(0.0, extend_none.prob, 0.001); + BOOST_CHECK_CLOSE(little.prob - little.rest, extend_none.prob, 0.001); BOOST_CHECK_EQUAL(1, extend_none.ngram_length); const WordIndex a = model.GetVocabulary().Index("a"); @@ -191,7 +192,7 @@ template <class M> void ExtendLeftTest(const M &model) { FullScoreReturn extend_a(model.ExtendLeft(&a, &a + 1, &backoff_in, little.extend_left, 1, backoff_out, next_use)); BOOST_CHECK_EQUAL(1, next_use); BOOST_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001); - BOOST_CHECK_CLOSE(-0.09132547 - kLittleProb, extend_a.prob, 0.001); + BOOST_CHECK_CLOSE(-0.09132547 - little.rest, extend_a.prob, 0.001); BOOST_CHECK_EQUAL(2, extend_a.ngram_length); BOOST_CHECK(!extend_a.independent_left); @@ -199,7 +200,7 @@ template <class M> void ExtendLeftTest(const M &model) { FullScoreReturn extend_on(model.ExtendLeft(&on, &on + 1, &backoff_in, extend_a.extend_left, 2, backoff_out, next_use)); BOOST_CHECK_EQUAL(1, next_use); BOOST_CHECK_CLOSE(-0.4771212, backoff_out[0], 0.001); - BOOST_CHECK_CLOSE(-0.0283603 - -0.09132547, extend_on.prob, 0.001); + BOOST_CHECK_CLOSE(-0.0283603 - (extend_a.rest + little.rest), extend_on.prob, 0.001); BOOST_CHECK_EQUAL(3, extend_on.ngram_length); BOOST_CHECK(!extend_on.independent_left); @@ -209,7 +210,7 @@ template <class M> void ExtendLeftTest(const M &model) { BOOST_CHECK_EQUAL(2, next_use); BOOST_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001); BOOST_CHECK_CLOSE(-0.4771212, backoff_out[1], 0.001); - BOOST_CHECK_CLOSE(-0.0283603 - kLittleProb, extend_both.prob, 0.001); + BOOST_CHECK_CLOSE(-0.0283603 - little.rest, extend_both.prob, 0.001); BOOST_CHECK_EQUAL(3, extend_both.ngram_length); BOOST_CHECK(!extend_both.independent_left); BOOST_CHECK_EQUAL(extend_on.extend_left, extend_both.extend_left); @@ -399,7 +400,10 @@ template <class ModelT> void BinaryTest() { } BOOST_AUTO_TEST_CASE(write_and_read_probing) { - BinaryTest<Model>(); + BinaryTest<ProbingModel>(); +} +BOOST_AUTO_TEST_CASE(write_and_read_rest_probing) { + BinaryTest<RestProbingModel>(); } BOOST_AUTO_TEST_CASE(write_and_read_trie) { BinaryTest<TrieModel>(); @@ -414,6 +418,18 @@ BOOST_AUTO_TEST_CASE(write_and_read_quant_array_trie) { BinaryTest<QuantArrayTrieModel>(); } +BOOST_AUTO_TEST_CASE(rest_max) { + Config config; + config.arpa_complain = Config::NONE; + config.messages = NULL; + + RestProbingModel model(TestLocation(), config); + State state, out; + FullScoreReturn ret(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("."), state)); + BOOST_CHECK_CLOSE(-0.2705918, ret.rest, 0.001); + BOOST_CHECK_CLOSE(-0.01916512, model.FullScore(state, model.GetVocabulary().EndSentence(), out).rest, 0.001); +} + } // namespace } // namespace ngram } // namespace lm diff --git a/klm/lm/model_type.hh b/klm/lm/model_type.hh index 5057ed25..8b35c793 100644 --- a/klm/lm/model_type.hh +++ b/klm/lm/model_type.hh @@ -6,10 +6,17 @@ namespace ngram { /* Not the best numbering system, but it grew this way for historical reasons * and I want to preserve existing binary files. */ -typedef enum {HASH_PROBING=0, HASH_SORTED=1, TRIE_SORTED=2, QUANT_TRIE_SORTED=3, ARRAY_TRIE_SORTED=4, QUANT_ARRAY_TRIE_SORTED=5} ModelType; +typedef enum {PROBING=0, REST_PROBING=1, TRIE=2, QUANT_TRIE=3, ARRAY_TRIE=4, QUANT_ARRAY_TRIE=5} ModelType; -const static ModelType kQuantAdd = static_cast<ModelType>(QUANT_TRIE_SORTED - TRIE_SORTED); -const static ModelType kArrayAdd = static_cast<ModelType>(ARRAY_TRIE_SORTED - TRIE_SORTED); +// Historical names. +const ModelType HASH_PROBING = PROBING; +const ModelType TRIE_SORTED = TRIE; +const ModelType QUANT_TRIE_SORTED = QUANT_TRIE; +const ModelType ARRAY_TRIE_SORTED = ARRAY_TRIE; +const ModelType QUANT_ARRAY_TRIE_SORTED = QUANT_ARRAY_TRIE; + +const static ModelType kQuantAdd = static_cast<ModelType>(QUANT_TRIE - TRIE); +const static ModelType kArrayAdd = static_cast<ModelType>(ARRAY_TRIE - TRIE); } // namespace ngram } // namespace lm diff --git a/klm/lm/ngram_query.cc b/klm/lm/ngram_query.cc index 8f7a0e1c..49757d9a 100644 --- a/klm/lm/ngram_query.cc +++ b/klm/lm/ngram_query.cc @@ -12,22 +12,24 @@ int main(int argc, char *argv[]) { ModelType model_type; if (RecognizeBinary(argv[1], model_type)) { switch(model_type) { - case HASH_PROBING: + case PROBING: Query<lm::ngram::ProbingModel>(argv[1], sentence_context, std::cin, std::cout); break; - case TRIE_SORTED: + case REST_PROBING: + Query<lm::ngram::RestProbingModel>(argv[1], sentence_context, std::cin, std::cout); + break; + case TRIE: Query<TrieModel>(argv[1], sentence_context, std::cin, std::cout); break; - case QUANT_TRIE_SORTED: + case QUANT_TRIE: Query<QuantTrieModel>(argv[1], sentence_context, std::cin, std::cout); break; - case ARRAY_TRIE_SORTED: + case ARRAY_TRIE: Query<ArrayTrieModel>(argv[1], sentence_context, std::cin, std::cout); break; - case QUANT_ARRAY_TRIE_SORTED: + case QUANT_ARRAY_TRIE: Query<QuantArrayTrieModel>(argv[1], sentence_context, std::cin, std::cout); break; - case HASH_SORTED: default: std::cerr << "Unrecognized kenlm model type " << model_type << std::endl; abort(); @@ -35,8 +37,8 @@ int main(int argc, char *argv[]) { } else { Query<ProbingModel>(argv[1], sentence_context, std::cin, std::cout); } - - PrintUsage("Total time including destruction:\n"); + std::cerr << "Total time including destruction:\n"; + util::PrintUsage(std::cerr); } catch (const std::exception &e) { std::cerr << e.what() << std::endl; return 1; diff --git a/klm/lm/ngram_query.hh b/klm/lm/ngram_query.hh index 4990df22..dfcda170 100644 --- a/klm/lm/ngram_query.hh +++ b/klm/lm/ngram_query.hh @@ -3,51 +3,20 @@ #include "lm/enumerate_vocab.hh" #include "lm/model.hh" +#include "util/usage.hh" #include <cstdlib> -#include <fstream> #include <iostream> +#include <ostream> +#include <istream> #include <string> -#include <ctype.h> -#if !defined(_WIN32) && !defined(_WIN64) -#include <sys/resource.h> -#include <sys/time.h> -#endif - namespace lm { namespace ngram { -#if !defined(_WIN32) && !defined(_WIN64) -float FloatSec(const struct timeval &tv) { - return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000000.0); -} -#endif - -void PrintUsage(const char *message) { -#if !defined(_WIN32) && !defined(_WIN64) - struct rusage usage; - if (getrusage(RUSAGE_SELF, &usage)) { - perror("getrusage"); - return; - } - std::cerr << message; - std::cerr << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n'; - - // Linux doesn't set memory usage :-(. - std::ifstream status("/proc/self/status", std::ios::in); - std::string line; - while (getline(status, line)) { - if (!strncmp(line.c_str(), "VmRSS:\t", 7)) { - std::cerr << "rss " << (line.c_str() + 7) << '\n'; - break; - } - } -#endif -} - template <class Model> void Query(const Model &model, bool sentence_context, std::istream &in_stream, std::ostream &out_stream) { - PrintUsage("Loading statistics:\n"); + std::cerr << "Loading statistics:\n"; + util::PrintUsage(std::cerr); typename Model::State state, out; lm::FullScoreReturn ret; std::string word; @@ -84,13 +53,13 @@ template <class Model> void Query(const Model &model, bool sentence_context, std out_stream << "</s>=" << model.GetVocabulary().EndSentence() << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t'; } out_stream << "Total: " << total << " OOV: " << oov << '\n'; - } - PrintUsage("After queries:\n"); + } + std::cerr << "After queries:\n"; + util::PrintUsage(std::cerr); } template <class M> void Query(const char *file, bool sentence_context, std::istream &in_stream, std::ostream &out_stream) { Config config; -// config.load_method = util::LAZY; M model(file, config); Query(model, sentence_context, in_stream, out_stream); } diff --git a/klm/lm/quantize.cc b/klm/lm/quantize.cc index a8e0cb21..b58c3f3f 100644 --- a/klm/lm/quantize.cc +++ b/klm/lm/quantize.cc @@ -47,9 +47,7 @@ void SeparatelyQuantize::UpdateConfigFromBinary(int fd, const std::vector<uint64 util::AdvanceOrThrow(fd, -3); } -void SeparatelyQuantize::SetupMemory(void *start, const Config &config) { - // Reserve 8 byte header for bit counts. - start_ = reinterpret_cast<float*>(static_cast<uint8_t*>(start) + 8); +void SeparatelyQuantize::SetupMemory(void *base, unsigned char order, const Config &config) { prob_bits_ = config.prob_bits; backoff_bits_ = config.backoff_bits; // We need the reserved values. @@ -57,25 +55,35 @@ void SeparatelyQuantize::SetupMemory(void *start, const Config &config) { if (config.backoff_bits == 0) UTIL_THROW(ConfigException, "You can't quantize backoff to zero"); if (config.prob_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing probability supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.prob_bits) << " bits."); if (config.backoff_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing backoff supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.backoff_bits) << " bits."); + // Reserve 8 byte header for bit counts. + actual_base_ = static_cast<uint8_t*>(base); + float *start = reinterpret_cast<float*>(actual_base_ + 8); + for (unsigned char i = 0; i < order - 2; ++i) { + tables_[i][0] = Bins(prob_bits_, start); + start += (1ULL << prob_bits_); + tables_[i][1] = Bins(backoff_bits_, start); + start += (1ULL << backoff_bits_); + } + longest_ = tables_[order - 2][0] = Bins(prob_bits_, start); } void SeparatelyQuantize::Train(uint8_t order, std::vector<float> &prob, std::vector<float> &backoff) { TrainProb(order, prob); // Backoff - float *centers = start_ + TableStart(order) + ProbTableLength(); + float *centers = tables_[order - 2][1].Populate(); *(centers++) = kNoExtensionBackoff; *(centers++) = kExtensionBackoff; MakeBins(backoff, centers, (1ULL << backoff_bits_) - 2); } void SeparatelyQuantize::TrainProb(uint8_t order, std::vector<float> &prob) { - float *centers = start_ + TableStart(order); + float *centers = tables_[order - 2][0].Populate(); MakeBins(prob, centers, (1ULL << prob_bits_)); } void SeparatelyQuantize::FinishedLoading(const Config &config) { - uint8_t *actual_base = reinterpret_cast<uint8_t*>(start_) - 8; + uint8_t *actual_base = actual_base_; *(actual_base++) = kSeparatelyQuantizeVersion; // version *(actual_base++) = config.prob_bits; *(actual_base++) = config.backoff_bits; diff --git a/klm/lm/quantize.hh b/klm/lm/quantize.hh index 6d130a57..3e9153e3 100644 --- a/klm/lm/quantize.hh +++ b/klm/lm/quantize.hh @@ -3,6 +3,7 @@ #include "lm/blank.hh" #include "lm/config.hh" +#include "lm/max_order.hh" #include "lm/model_type.hh" #include "util/bit_packing.hh" @@ -27,37 +28,60 @@ class DontQuantize { static uint8_t MiddleBits(const Config &/*config*/) { return 63; } static uint8_t LongestBits(const Config &/*config*/) { return 31; } - struct Middle { - void Write(void *base, uint64_t bit_offset, float prob, float backoff) const { - util::WriteNonPositiveFloat31(base, bit_offset, prob); - util::WriteFloat32(base, bit_offset + 31, backoff); - } - void Read(const void *base, uint64_t bit_offset, float &prob, float &backoff) const { - prob = util::ReadNonPositiveFloat31(base, bit_offset); - backoff = util::ReadFloat32(base, bit_offset + 31); - } - void ReadProb(const void *base, uint64_t bit_offset, float &prob) const { - prob = util::ReadNonPositiveFloat31(base, bit_offset); - } - void ReadBackoff(const void *base, uint64_t bit_offset, float &backoff) const { - backoff = util::ReadFloat32(base, bit_offset + 31); - } - uint8_t TotalBits() const { return 63; } + class MiddlePointer { + public: + MiddlePointer(const DontQuantize & /*quant*/, unsigned char /*order_minus_2*/, util::BitAddress address) : address_(address) {} + + MiddlePointer() : address_(NULL, 0) {} + + bool Found() const { + return address_.base != NULL; + } + + float Prob() const { + return util::ReadNonPositiveFloat31(address_.base, address_.offset); + } + + float Backoff() const { + return util::ReadFloat32(address_.base, address_.offset + 31); + } + + float Rest() const { return Prob(); } + + void Write(float prob, float backoff) { + util::WriteNonPositiveFloat31(address_.base, address_.offset, prob); + util::WriteFloat32(address_.base, address_.offset + 31, backoff); + } + + private: + util::BitAddress address_; }; - struct Longest { - void Write(void *base, uint64_t bit_offset, float prob) const { - util::WriteNonPositiveFloat31(base, bit_offset, prob); - } - void Read(const void *base, uint64_t bit_offset, float &prob) const { - prob = util::ReadNonPositiveFloat31(base, bit_offset); - } - uint8_t TotalBits() const { return 31; } + class LongestPointer { + public: + explicit LongestPointer(const DontQuantize &/*quant*/, util::BitAddress address) : address_(address) {} + + LongestPointer() : address_(NULL, 0) {} + + bool Found() const { + return address_.base != NULL; + } + + float Prob() const { + return util::ReadNonPositiveFloat31(address_.base, address_.offset); + } + + void Write(float prob) { + util::WriteNonPositiveFloat31(address_.base, address_.offset, prob); + } + + private: + util::BitAddress address_; }; DontQuantize() {} - void SetupMemory(void * /*start*/, const Config & /*config*/) {} + void SetupMemory(void * /*start*/, unsigned char /*order*/, const Config & /*config*/) {} static const bool kTrain = false; // These should never be called because kTrain is false. @@ -65,9 +89,6 @@ class DontQuantize { void TrainProb(uint8_t, std::vector<float> &/*prob*/) {} void FinishedLoading(const Config &) {} - - Middle Mid(uint8_t /*order*/) const { return Middle(); } - Longest Long(uint8_t /*order*/) const { return Longest(); } }; class SeparatelyQuantize { @@ -77,7 +98,9 @@ class SeparatelyQuantize { // Sigh C++ default constructor Bins() {} - Bins(uint8_t bits, const float *const begin) : begin_(begin), end_(begin_ + (1ULL << bits)), bits_(bits), mask_((1ULL << bits) - 1) {} + Bins(uint8_t bits, float *begin) : begin_(begin), end_(begin_ + (1ULL << bits)), bits_(bits), mask_((1ULL << bits) - 1) {} + + float *Populate() { return begin_; } uint64_t EncodeProb(float value) const { return Encode(value, 0); @@ -98,13 +121,13 @@ class SeparatelyQuantize { private: uint64_t Encode(float value, size_t reserved) const { - const float *above = std::lower_bound(begin_ + reserved, end_, value); + const float *above = std::lower_bound(static_cast<const float*>(begin_) + reserved, end_, value); if (above == begin_ + reserved) return reserved; if (above == end_) return end_ - begin_ - 1; return above - begin_ - (value - *(above - 1) < *above - value); } - const float *begin_; + float *begin_; const float *end_; uint8_t bits_; uint64_t mask_; @@ -125,65 +148,61 @@ class SeparatelyQuantize { static uint8_t MiddleBits(const Config &config) { return config.prob_bits + config.backoff_bits; } static uint8_t LongestBits(const Config &config) { return config.prob_bits; } - class Middle { + class MiddlePointer { public: - Middle(uint8_t prob_bits, const float *prob_begin, uint8_t backoff_bits, const float *backoff_begin) : - total_bits_(prob_bits + backoff_bits), total_mask_((1ULL << total_bits_) - 1), prob_(prob_bits, prob_begin), backoff_(backoff_bits, backoff_begin) {} + MiddlePointer(const SeparatelyQuantize &quant, unsigned char order_minus_2, const util::BitAddress &address) : bins_(quant.GetTables(order_minus_2)), address_(address) {} - void Write(void *base, uint64_t bit_offset, float prob, float backoff) const { - util::WriteInt57(base, bit_offset, total_bits_, - (prob_.EncodeProb(prob) << backoff_.Bits()) | backoff_.EncodeBackoff(backoff)); - } + MiddlePointer() : address_(NULL, 0) {} - void ReadProb(const void *base, uint64_t bit_offset, float &prob) const { - prob = prob_.Decode(util::ReadInt25(base, bit_offset + backoff_.Bits(), prob_.Bits(), prob_.Mask())); - } + bool Found() const { return address_.base != NULL; } - void Read(const void *base, uint64_t bit_offset, float &prob, float &backoff) const { - uint64_t both = util::ReadInt57(base, bit_offset, total_bits_, total_mask_); - prob = prob_.Decode(both >> backoff_.Bits()); - backoff = backoff_.Decode(both & backoff_.Mask()); + float Prob() const { + return ProbBins().Decode(util::ReadInt25(address_.base, address_.offset + BackoffBins().Bits(), ProbBins().Bits(), ProbBins().Mask())); } - void ReadBackoff(const void *base, uint64_t bit_offset, float &backoff) const { - backoff = backoff_.Decode(util::ReadInt25(base, bit_offset, backoff_.Bits(), backoff_.Mask())); + float Backoff() const { + return BackoffBins().Decode(util::ReadInt25(address_.base, address_.offset, BackoffBins().Bits(), BackoffBins().Mask())); } - uint8_t TotalBits() const { - return total_bits_; + float Rest() const { return Prob(); } + + void Write(float prob, float backoff) const { + util::WriteInt57(address_.base, address_.offset, ProbBins().Bits() + BackoffBins().Bits(), + (ProbBins().EncodeProb(prob) << BackoffBins().Bits()) | BackoffBins().EncodeBackoff(backoff)); } private: - const uint8_t total_bits_; - const uint64_t total_mask_; - const Bins prob_; - const Bins backoff_; + const Bins &ProbBins() const { return bins_[0]; } + const Bins &BackoffBins() const { return bins_[1]; } + const Bins *bins_; + + util::BitAddress address_; }; - class Longest { + class LongestPointer { public: - // Sigh C++ default constructor - Longest() {} + LongestPointer(const SeparatelyQuantize &quant, const util::BitAddress &address) : table_(&quant.LongestTable()), address_(address) {} + + LongestPointer() : address_(NULL, 0) {} - Longest(uint8_t prob_bits, const float *prob_begin) : prob_(prob_bits, prob_begin) {} + bool Found() const { return address_.base != NULL; } - void Write(void *base, uint64_t bit_offset, float prob) const { - util::WriteInt25(base, bit_offset, prob_.Bits(), prob_.EncodeProb(prob)); + void Write(float prob) const { + util::WriteInt25(address_.base, address_.offset, table_->Bits(), table_->EncodeProb(prob)); } - void Read(const void *base, uint64_t bit_offset, float &prob) const { - prob = prob_.Decode(util::ReadInt25(base, bit_offset, prob_.Bits(), prob_.Mask())); + float Prob() const { + return table_->Decode(util::ReadInt25(address_.base, address_.offset, table_->Bits(), table_->Mask())); } - uint8_t TotalBits() const { return prob_.Bits(); } - private: - Bins prob_; + const Bins *table_; + util::BitAddress address_; }; SeparatelyQuantize() {} - void SetupMemory(void *start, const Config &config); + void SetupMemory(void *start, unsigned char order, const Config &config); static const bool kTrain = true; // Assumes 0.0 is removed from backoff. @@ -193,18 +212,17 @@ class SeparatelyQuantize { void FinishedLoading(const Config &config); - Middle Mid(uint8_t order) const { - const float *table = start_ + TableStart(order); - return Middle(prob_bits_, table, backoff_bits_, table + ProbTableLength()); - } + const Bins *GetTables(unsigned char order_minus_2) const { return tables_[order_minus_2]; } - Longest Long(uint8_t order) const { return Longest(prob_bits_, start_ + TableStart(order)); } + const Bins &LongestTable() const { return longest_; } private: - size_t TableStart(uint8_t order) const { return ((1ULL << prob_bits_) + (1ULL << backoff_bits_)) * static_cast<uint64_t>(order - 2); } - size_t ProbTableLength() const { return (1ULL << prob_bits_); } + Bins tables_[kMaxOrder - 1][2]; + + Bins longest_; + + uint8_t *actual_base_; - float *start_; uint8_t prob_bits_, backoff_bits_; }; diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc index 05f761be..2d9a337d 100644 --- a/klm/lm/read_arpa.cc +++ b/klm/lm/read_arpa.cc @@ -83,7 +83,7 @@ void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) { } } -void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) { +void ReadBackoff(util::FilePiece &in, float &backoff) { // Always make zero negative. // Negative zero means that no (n+1)-gram has this n-gram as context. // Therefore the hypothesis state can be shorter. Of course, many n-grams @@ -91,12 +91,12 @@ void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) { // back and set the backoff to positive zero in these cases. switch (in.get()) { case '\t': - weights.backoff = in.ReadFloat(); - if (weights.backoff == ngram::kExtensionBackoff) weights.backoff = ngram::kNoExtensionBackoff; + backoff = in.ReadFloat(); + if (backoff == ngram::kExtensionBackoff) backoff = ngram::kNoExtensionBackoff; if ((in.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff"); break; case '\n': - weights.backoff = ngram::kNoExtensionBackoff; + backoff = ngram::kNoExtensionBackoff; break; default: UTIL_THROW(FormatLoadException, "Expected tab or newline for backoff"); diff --git a/klm/lm/read_arpa.hh b/klm/lm/read_arpa.hh index ab996bde..234d130c 100644 --- a/klm/lm/read_arpa.hh +++ b/klm/lm/read_arpa.hh @@ -16,7 +16,13 @@ void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number); void ReadNGramHeader(util::FilePiece &in, unsigned int length); void ReadBackoff(util::FilePiece &in, Prob &weights); -void ReadBackoff(util::FilePiece &in, ProbBackoff &weights); +void ReadBackoff(util::FilePiece &in, float &backoff); +inline void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) { + ReadBackoff(in, weights.backoff); +} +inline void ReadBackoff(util::FilePiece &in, RestWeights &weights) { + ReadBackoff(in, weights.backoff); +} void ReadEnd(util::FilePiece &in); @@ -35,7 +41,7 @@ class PositiveProbWarn { WarningAction action_; }; -template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) { +template <class Voc, class Weights> void Read1Gram(util::FilePiece &f, Voc &vocab, Weights *unigrams, PositiveProbWarn &warn) { try { float prob = f.ReadFloat(); if (prob > 0.0) { @@ -43,7 +49,7 @@ template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff prob = 0.0; } if (f.get() != '\t') UTIL_THROW(FormatLoadException, "Expected tab after probability"); - ProbBackoff &value = unigrams[vocab.Insert(f.ReadDelimited(kARPASpaces))]; + Weights &value = unigrams[vocab.Insert(f.ReadDelimited(kARPASpaces))]; value.prob = prob; ReadBackoff(f, value); } catch(util::Exception &e) { @@ -53,7 +59,7 @@ template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff } // Return true if a positive log probability came out. -template <class Voc> void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) { +template <class Voc, class Weights> void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, Weights *unigrams, PositiveProbWarn &warn) { ReadNGramHeader(f, 1); for (std::size_t i = 0; i < count; ++i) { Read1Gram(f, vocab, unigrams, warn); diff --git a/klm/lm/return.hh b/klm/lm/return.hh index 1b55091b..622320ce 100644 --- a/klm/lm/return.hh +++ b/klm/lm/return.hh @@ -33,6 +33,9 @@ struct FullScoreReturn { */ bool independent_left; uint64_t extend_left; // Defined only if independent_left + + // Rest cost for extension to the left. + float rest; }; } // namespace lm diff --git a/klm/lm/search_hashed.cc b/klm/lm/search_hashed.cc index 1d6fb5be..13942309 100644 --- a/klm/lm/search_hashed.cc +++ b/klm/lm/search_hashed.cc @@ -3,7 +3,9 @@ #include "lm/binary_format.hh" #include "lm/blank.hh" #include "lm/lm_exception.hh" +#include "lm/model.hh" #include "lm/read_arpa.hh" +#include "lm/value.hh" #include "lm/vocab.hh" #include "util/bit_packing.hh" @@ -14,6 +16,8 @@ namespace lm { namespace ngram { +class ProbingModel; + namespace { /* These are passed to ReadNGrams so that n-grams with zero backoff that appear as context will still be used in state. */ @@ -37,9 +41,9 @@ template <class Middle> class ActivateLowerMiddle { Middle &modify_; }; -class ActivateUnigram { +template <class Weights> class ActivateUnigram { public: - explicit ActivateUnigram(ProbBackoff *unigram) : modify_(unigram) {} + explicit ActivateUnigram(Weights *unigram) : modify_(unigram) {} void operator()(const WordIndex *vocab_ids, const unsigned int /*n*/) { // assert(n == 2); @@ -47,43 +51,124 @@ class ActivateUnigram { } private: - ProbBackoff *modify_; + Weights *modify_; }; -template <class Middle> void FixSRI(int lower, float negative_lower_prob, unsigned int n, const uint64_t *keys, const WordIndex *vocab_ids, ProbBackoff *unigrams, std::vector<Middle> &middle) { - ProbBackoff blank; - blank.backoff = kNoExtensionBackoff; - // Fix SRI's stupidity. - // Note that negative_lower_prob is the negative of the probability (so it's currently >= 0). We still want the sign bit off to indicate left extension, so I just do -= on the backoffs. - blank.prob = negative_lower_prob; - // An entry was found at lower (order lower + 2). - // We need to insert blanks starting at lower + 1 (order lower + 3). - unsigned int fix = static_cast<unsigned int>(lower + 1); - uint64_t backoff_hash = detail::CombineWordHash(static_cast<uint64_t>(vocab_ids[1]), vocab_ids[2]); - if (fix == 0) { - // Insert a missing bigram. - blank.prob -= unigrams[vocab_ids[1]].backoff; - SetExtension(unigrams[vocab_ids[1]].backoff); - // Bigram including a unigram's backoff - middle[0].Insert(detail::ProbBackoffEntry::Make(keys[0], blank)); - fix = 1; - } else { - for (unsigned int i = 3; i < fix + 2; ++i) backoff_hash = detail::CombineWordHash(backoff_hash, vocab_ids[i]); +// Find the lower order entry, inserting blanks along the way as necessary. +template <class Value> void FindLower( + const std::vector<uint64_t> &keys, + typename Value::Weights &unigram, + std::vector<util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> > &middle, + std::vector<typename Value::Weights *> &between) { + typename util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash>::MutableIterator iter; + typename Value::ProbingEntry entry; + // Backoff will always be 0.0. We'll get the probability and rest in another pass. + entry.value.backoff = kNoExtensionBackoff; + // Go back and find the longest right-aligned entry, informing it that it extends left. Normally this will match immediately, but sometimes SRI is dumb. + for (int lower = keys.size() - 2; ; --lower) { + if (lower == -1) { + between.push_back(&unigram); + return; + } + entry.key = keys[lower]; + bool found = middle[lower].FindOrInsert(entry, iter); + between.push_back(&iter->value); + if (found) return; + } +} + +// Between usually has single entry, the value to adjust. But sometimes SRI stupidly pruned entries so it has unitialized blank values to be set here. +template <class Added, class Build> void AdjustLower( + const Added &added, + const Build &build, + std::vector<typename Build::Value::Weights *> &between, + const unsigned int n, + const std::vector<WordIndex> &vocab_ids, + typename Build::Value::Weights *unigrams, + std::vector<util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash> > &middle) { + typedef typename Build::Value Value; + if (between.size() == 1) { + build.MarkExtends(*between.front(), added); + return; + } + typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle; + float prob = -fabs(between.back()->prob); + // Order of the n-gram on which probabilities are based. + unsigned char basis = n - between.size(); + assert(basis != 0); + typename Build::Value::Weights **change = &between.back(); + // Skip the basis. + --change; + if (basis == 1) { + // Hallucinate a bigram based on a unigram's backoff and a unigram probability. + float &backoff = unigrams[vocab_ids[1]].backoff; + SetExtension(backoff); + prob += backoff; + (*change)->prob = prob; + build.SetRest(&*vocab_ids.begin(), 2, **change); + basis = 2; + --change; } - // fix >= 1. Insert trigrams and above. - for (; fix <= n - 3; ++fix) { + uint64_t backoff_hash = static_cast<uint64_t>(vocab_ids[1]); + for (unsigned char i = 2; i <= basis; ++i) { + backoff_hash = detail::CombineWordHash(backoff_hash, vocab_ids[i]); + } + for (; basis < n - 1; ++basis, --change) { typename Middle::MutableIterator gotit; - if (middle[fix - 1].UnsafeMutableFind(backoff_hash, gotit)) { + if (middle[basis - 2].UnsafeMutableFind(backoff_hash, gotit)) { float &backoff = gotit->value.backoff; SetExtension(backoff); - blank.prob -= backoff; + prob += backoff; } - middle[fix].Insert(detail::ProbBackoffEntry::Make(keys[fix], blank)); - backoff_hash = detail::CombineWordHash(backoff_hash, vocab_ids[fix + 2]); + (*change)->prob = prob; + build.SetRest(&*vocab_ids.begin(), basis + 1, **change); + backoff_hash = detail::CombineWordHash(backoff_hash, vocab_ids[basis+1]); + } + + typename std::vector<typename Value::Weights *>::const_iterator i(between.begin()); + build.MarkExtends(**i, added); + const typename Value::Weights *longer = *i; + // Everything has probability but is not marked as extending. + for (++i; i != between.end(); ++i) { + build.MarkExtends(**i, *longer); + longer = *i; } } -template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(util::FilePiece &f, const unsigned int n, const size_t count, const Voc &vocab, ProbBackoff *unigrams, std::vector<Middle> &middle, Activate activate, Store &store, PositiveProbWarn &warn) { +// Continue marking lower entries even they know that they extend left. This is used for upper/lower bounds. +template <class Build> void MarkLower( + const std::vector<uint64_t> &keys, + const Build &build, + typename Build::Value::Weights &unigram, + std::vector<util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash> > &middle, + int start_order, + const typename Build::Value::Weights &longer) { + if (start_order == 0) return; + typename util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash>::MutableIterator iter; + // Hopefully the compiler will realize that if MarkExtends always returns false, it can simplify this code. + for (int even_lower = start_order - 2 /* index in middle */; ; --even_lower) { + if (even_lower == -1) { + build.MarkExtends(unigram, longer); + return; + } + middle[even_lower].UnsafeMutableFind(keys[even_lower], iter); + if (!build.MarkExtends(iter->value, longer)) return; + } +} + +template <class Build, class Activate, class Store> void ReadNGrams( + util::FilePiece &f, + const unsigned int n, + const size_t count, + const ProbingVocabulary &vocab, + const Build &build, + typename Build::Value::Weights *unigrams, + std::vector<util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash> > &middle, + Activate activate, + Store &store, + PositiveProbWarn &warn) { + typedef typename Build::Value Value; + typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle; assert(n >= 2); ReadNGramHeader(f, n); @@ -91,38 +176,25 @@ template <class Voc, class Store, class Middle, class Activate> void ReadNGrams( // vocab ids of words in reverse order. std::vector<WordIndex> vocab_ids(n); std::vector<uint64_t> keys(n-1); - typename Store::Entry::Value value; - typename Middle::MutableIterator found; + typename Store::Entry entry; + std::vector<typename Value::Weights *> between; for (size_t i = 0; i < count; ++i) { - ReadNGram(f, n, vocab, &*vocab_ids.begin(), value, warn); + ReadNGram(f, n, vocab, &*vocab_ids.begin(), entry.value, warn); + build.SetRest(&*vocab_ids.begin(), n, entry.value); keys[0] = detail::CombineWordHash(static_cast<uint64_t>(vocab_ids.front()), vocab_ids[1]); for (unsigned int h = 1; h < n - 1; ++h) { keys[h] = detail::CombineWordHash(keys[h-1], vocab_ids[h+1]); } // Initially the sign bit is on, indicating it does not extend left. Most already have this but there might +0.0. - util::SetSign(value.prob); - store.Insert(Store::Entry::Make(keys[n-2], value)); - // Go back and find the longest right-aligned entry, informing it that it extends left. Normally this will match immediately, but sometimes SRI is dumb. - int lower; - util::FloatEnc fix_prob; - for (lower = n - 3; ; --lower) { - if (lower == -1) { - fix_prob.f = unigrams[vocab_ids.front()].prob; - fix_prob.i &= ~util::kSignBit; - unigrams[vocab_ids.front()].prob = fix_prob.f; - break; - } - if (middle[lower].UnsafeMutableFind(keys[lower], found)) { - // Turn off sign bit to indicate that it extends left. - fix_prob.f = found->value.prob; - fix_prob.i &= ~util::kSignBit; - found->value.prob = fix_prob.f; - // We don't need to recurse further down because this entry already set the bits for lower entries. - break; - } - } - if (lower != static_cast<int>(n) - 3) FixSRI(lower, fix_prob.f, n, &*keys.begin(), &*vocab_ids.begin(), unigrams, middle); + util::SetSign(entry.value.prob); + entry.key = keys[n-2]; + + store.Insert(entry); + between.clear(); + FindLower<Value>(keys, unigrams[vocab_ids.front()], middle, between); + AdjustLower<typename Store::Entry::Value, Build>(entry.value, build, between, n, vocab_ids, unigrams, middle); + if (Build::kMarkEvenLower) MarkLower<Build>(keys, build, unigrams[vocab_ids.front()], middle, n - between.size() - 1, *between.back()); activate(&*vocab_ids.begin(), n); } @@ -132,9 +204,9 @@ template <class Voc, class Store, class Middle, class Activate> void ReadNGrams( } // namespace namespace detail { -template <class MiddleT, class LongestT> uint8_t *TemplateHashedSearch<MiddleT, LongestT>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) { +template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) { std::size_t allocated = Unigram::Size(counts[0]); - unigram = Unigram(start, allocated); + unigram_ = Unigram(start, counts[0], allocated); start += allocated; for (unsigned int n = 2; n < counts.size(); ++n) { allocated = Middle::Size(counts[n - 1], config.probing_multiplier); @@ -142,31 +214,63 @@ template <class MiddleT, class LongestT> uint8_t *TemplateHashedSearch<MiddleT, start += allocated; } allocated = Longest::Size(counts.back(), config.probing_multiplier); - longest = Longest(start, allocated); + longest_ = Longest(start, allocated); start += allocated; return start; } -template <class MiddleT, class LongestT> template <class Voc> void TemplateHashedSearch<MiddleT, LongestT>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, Voc &vocab, Backing &backing) { +template <class Value> void HashedSearch<Value>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, ProbingVocabulary &vocab, Backing &backing) { // TODO: fix sorted. SetupMemory(GrowForSearch(config, vocab.UnkCountChangePadding(), Size(counts, config), backing), counts, config); PositiveProbWarn warn(config.positive_log_probability); - - Read1Grams(f, counts[0], vocab, unigram.Raw(), warn); + Read1Grams(f, counts[0], vocab, unigram_.Raw(), warn); CheckSpecials(config, vocab); + DispatchBuild(f, counts, config, vocab, warn); +} + +template <> void HashedSearch<BackoffValue>::DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn) { + NoRestBuild build; + ApplyBuild(f, counts, config, vocab, warn, build); +} + +template <> void HashedSearch<RestValue>::DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn) { + switch (config.rest_function) { + case Config::REST_MAX: + { + MaxRestBuild build; + ApplyBuild(f, counts, config, vocab, warn, build); + } + break; + case Config::REST_LOWER: + { + LowerRestBuild<ProbingModel> build(config, counts.size(), vocab); + ApplyBuild(f, counts, config, vocab, warn, build); + } + break; + } +} + +template <class Value> template <class Build> void HashedSearch<Value>::ApplyBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build) { + for (WordIndex i = 0; i < counts[0]; ++i) { + build.SetRest(&i, (unsigned int)1, unigram_.Raw()[i]); + } try { if (counts.size() > 2) { - ReadNGrams(f, 2, counts[1], vocab, unigram.Raw(), middle_, ActivateUnigram(unigram.Raw()), middle_[0], warn); + ReadNGrams<Build, ActivateUnigram<typename Value::Weights>, Middle>( + f, 2, counts[1], vocab, build, unigram_.Raw(), middle_, ActivateUnigram<typename Value::Weights>(unigram_.Raw()), middle_[0], warn); } for (unsigned int n = 3; n < counts.size(); ++n) { - ReadNGrams(f, n, counts[n-1], vocab, unigram.Raw(), middle_, ActivateLowerMiddle<Middle>(middle_[n-3]), middle_[n-2], warn); + ReadNGrams<Build, ActivateLowerMiddle<Middle>, Middle>( + f, n, counts[n-1], vocab, build, unigram_.Raw(), middle_, ActivateLowerMiddle<Middle>(middle_[n-3]), middle_[n-2], warn); } if (counts.size() > 2) { - ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, unigram.Raw(), middle_, ActivateLowerMiddle<Middle>(middle_.back()), longest, warn); + ReadNGrams<Build, ActivateLowerMiddle<Middle>, Longest>( + f, counts.size(), counts[counts.size() - 1], vocab, build, unigram_.Raw(), middle_, ActivateLowerMiddle<Middle>(middle_.back()), longest_, warn); } else { - ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, unigram.Raw(), middle_, ActivateUnigram(unigram.Raw()), longest, warn); + ReadNGrams<Build, ActivateUnigram<typename Value::Weights>, Longest>( + f, counts.size(), counts[counts.size() - 1], vocab, build, unigram_.Raw(), middle_, ActivateUnigram<typename Value::Weights>(unigram_.Raw()), longest_, warn); } } catch (util::ProbingSizeException &e) { UTIL_THROW(util::ProbingSizeException, "Avoid pruning n-grams like \"bar baz quux\" when \"foo bar baz quux\" is still in the model. KenLM will work when this pruning happens, but the probing model assumes these events are rare enough that using blank space in the probing hash table will cover all of them. Increase probing_multiplier (-p to build_binary) to add more blank spaces.\n"); @@ -174,17 +278,16 @@ template <class MiddleT, class LongestT> template <class Voc> void TemplateHashe ReadEnd(f); } -template <class MiddleT, class LongestT> void TemplateHashedSearch<MiddleT, LongestT>::LoadedBinary() { - unigram.LoadedBinary(); +template <class Value> void HashedSearch<Value>::LoadedBinary() { + unigram_.LoadedBinary(); for (typename std::vector<Middle>::iterator i = middle_.begin(); i != middle_.end(); ++i) { i->LoadedBinary(); } - longest.LoadedBinary(); + longest_.LoadedBinary(); } -template class TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>; - -template void TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>::InitializeFromARPA(const char *, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &, ProbingVocabulary &vocab, Backing &backing); +template class HashedSearch<BackoffValue>; +template class HashedSearch<RestValue>; } // namespace detail } // namespace ngram diff --git a/klm/lm/search_hashed.hh b/klm/lm/search_hashed.hh index 4352c72d..7e8c1220 100644 --- a/klm/lm/search_hashed.hh +++ b/klm/lm/search_hashed.hh @@ -19,6 +19,7 @@ namespace util { class FilePiece; } namespace lm { namespace ngram { struct Backing; +class ProbingVocabulary; namespace detail { inline uint64_t CombineWordHash(uint64_t current, const WordIndex next) { @@ -26,54 +27,48 @@ inline uint64_t CombineWordHash(uint64_t current, const WordIndex next) { return ret; } -struct HashedSearch { - typedef uint64_t Node; - - class Unigram { - public: - Unigram() {} - - Unigram(void *start, std::size_t /*allocated*/) : unigram_(static_cast<ProbBackoff*>(start)) {} - - static std::size_t Size(uint64_t count) { - return (count + 1) * sizeof(ProbBackoff); // +1 for hallucinate <unk> - } - - const ProbBackoff &Lookup(WordIndex index) const { return unigram_[index]; } +#pragma pack(push) +#pragma pack(4) +struct ProbEntry { + uint64_t key; + Prob value; + typedef uint64_t Key; + typedef Prob Value; + uint64_t GetKey() const { + return key; + } +}; - ProbBackoff &Unknown() { return unigram_[0]; } +#pragma pack(pop) - void LoadedBinary() {} +class LongestPointer { + public: + explicit LongestPointer(const float &to) : to_(&to) {} - // For building. - ProbBackoff *Raw() { return unigram_; } + LongestPointer() : to_(NULL) {} - private: - ProbBackoff *unigram_; - }; + bool Found() const { + return to_ != NULL; + } - Unigram unigram; + float Prob() const { + return *to_; + } - void LookupUnigram(WordIndex word, float &backoff, Node &next, FullScoreReturn &ret) const { - const ProbBackoff &entry = unigram.Lookup(word); - util::FloatEnc val; - val.f = entry.prob; - ret.independent_left = (val.i & util::kSignBit); - ret.extend_left = static_cast<uint64_t>(word); - val.i |= util::kSignBit; - ret.prob = val.f; - backoff = entry.backoff; - next = static_cast<Node>(word); - } + private: + const float *to_; }; -template <class MiddleT, class LongestT> class TemplateHashedSearch : public HashedSearch { +template <class Value> class HashedSearch { public: - typedef MiddleT Middle; + typedef uint64_t Node; - typedef LongestT Longest; - Longest longest; + typedef typename Value::ProbingProxy UnigramPointer; + typedef typename Value::ProbingProxy MiddlePointer; + typedef ::lm::ngram::detail::LongestPointer LongestPointer; + static const ModelType kModelType = Value::kProbingModelType; + static const bool kDifferentRest = Value::kDifferentRest; static const unsigned int kVersion = 0; // TODO: move probing_multiplier here with next binary file format update. @@ -89,64 +84,55 @@ template <class MiddleT, class LongestT> class TemplateHashedSearch : public Has uint8_t *SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config); - template <class Voc> void InitializeFromARPA(const char *file, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, Voc &vocab, Backing &backing); + void InitializeFromARPA(const char *file, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, ProbingVocabulary &vocab, Backing &backing); - typedef typename std::vector<Middle>::const_iterator MiddleIter; + void LoadedBinary(); - MiddleIter MiddleBegin() const { return middle_.begin(); } - MiddleIter MiddleEnd() const { return middle_.end(); } + unsigned char Order() const { + return middle_.size() + 2; + } - Node Unpack(uint64_t extend_pointer, unsigned char extend_length, float &prob) const { - util::FloatEnc val; - if (extend_length == 1) { - val.f = unigram.Lookup(static_cast<uint64_t>(extend_pointer)).prob; - } else { - typename Middle::ConstIterator found; - if (!middle_[extend_length - 2].Find(extend_pointer, found)) { - std::cerr << "Extend pointer " << extend_pointer << " should have been found for length " << (unsigned) extend_length << std::endl; - abort(); - } - val.f = found->value.prob; - } - val.i |= util::kSignBit; - prob = val.f; - return extend_pointer; + typename Value::Weights &UnknownUnigram() { return unigram_.Unknown(); } + + UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &independent_left, uint64_t &extend_left) const { + extend_left = static_cast<uint64_t>(word); + next = extend_left; + UnigramPointer ret(unigram_.Lookup(word)); + independent_left = ret.IndependentLeft(); + return ret; } - bool LookupMiddle(const Middle &middle, WordIndex word, float &backoff, Node &node, FullScoreReturn &ret) const { - node = CombineWordHash(node, word); +#pragma GCC diagnostic ignored "-Wuninitialized" + MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const { + node = extend_pointer; typename Middle::ConstIterator found; - if (!middle.Find(node, found)) return false; - util::FloatEnc enc; - enc.f = found->value.prob; - ret.independent_left = (enc.i & util::kSignBit); - ret.extend_left = node; - enc.i |= util::kSignBit; - ret.prob = enc.f; - backoff = found->value.backoff; - return true; + bool got = middle_[extend_length - 2].Find(extend_pointer, found); + assert(got); + (void)got; + return MiddlePointer(found->value); } - void LoadedBinary(); - - bool LookupMiddleNoProb(const Middle &middle, WordIndex word, float &backoff, Node &node) const { + MiddlePointer LookupMiddle(unsigned char order_minus_2, WordIndex word, Node &node, bool &independent_left, uint64_t &extend_pointer) const { node = CombineWordHash(node, word); typename Middle::ConstIterator found; - if (!middle.Find(node, found)) return false; - backoff = found->value.backoff; - return true; + if (!middle_[order_minus_2].Find(node, found)) { + independent_left = true; + return MiddlePointer(); + } + extend_pointer = node; + MiddlePointer ret(found->value); + independent_left = ret.IndependentLeft(); + return ret; } - bool LookupLongest(WordIndex word, float &prob, Node &node) const { + LongestPointer LookupLongest(WordIndex word, const Node &node) const { // Sign bit is always on because longest n-grams do not extend left. - node = CombineWordHash(node, word); typename Longest::ConstIterator found; - if (!longest.Find(node, found)) return false; - prob = found->value.prob; - return true; + if (!longest_.Find(CombineWordHash(node, word), found)) return LongestPointer(); + return LongestPointer(found->value.prob); } - // Geenrate a node without necessarily checking that it actually exists. + // Generate a node without necessarily checking that it actually exists. // Optionally return false if it's know to not exist. bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const { assert(begin != end); @@ -158,55 +144,54 @@ template <class MiddleT, class LongestT> class TemplateHashedSearch : public Has } private: - std::vector<Middle> middle_; -}; + // Interpret config's rest cost build policy and pass the right template argument to ApplyBuild. + void DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn); -/* These look like perfect candidates for a template, right? Ancient gcc (4.1 - * on RedHat stale linux) doesn't pack templates correctly. ProbBackoffEntry - * is a multiple of 8 bytes anyway. ProbEntry is 12 bytes so it's set to pack. - */ -struct ProbBackoffEntry { - uint64_t key; - ProbBackoff value; - typedef uint64_t Key; - typedef ProbBackoff Value; - uint64_t GetKey() const { - return key; - } - static ProbBackoffEntry Make(uint64_t key, ProbBackoff value) { - ProbBackoffEntry ret; - ret.key = key; - ret.value = value; - return ret; - } -}; + template <class Build> void ApplyBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build); -#pragma pack(push) -#pragma pack(4) -struct ProbEntry { - uint64_t key; - Prob value; - typedef uint64_t Key; - typedef Prob Value; - uint64_t GetKey() const { - return key; - } - static ProbEntry Make(uint64_t key, Prob value) { - ProbEntry ret; - ret.key = key; - ret.value = value; - return ret; - } -}; + class Unigram { + public: + Unigram() {} -#pragma pack(pop) + Unigram(void *start, uint64_t count, std::size_t /*allocated*/) : + unigram_(static_cast<typename Value::Weights*>(start)) +#ifdef DEBUG + , count_(count) +#endif + {} + + static std::size_t Size(uint64_t count) { + return (count + 1) * sizeof(ProbBackoff); // +1 for hallucinate <unk> + } + + const typename Value::Weights &Lookup(WordIndex index) const { +#ifdef DEBUG + assert(index < count_); +#endif + return unigram_[index]; + } + + typename Value::Weights &Unknown() { return unigram_[0]; } + void LoadedBinary() {} -struct ProbingHashedSearch : public TemplateHashedSearch< - util::ProbingHashTable<ProbBackoffEntry, util::IdentityHash>, - util::ProbingHashTable<ProbEntry, util::IdentityHash> > { + // For building. + typename Value::Weights *Raw() { return unigram_; } + + private: + typename Value::Weights *unigram_; +#ifdef DEBUG + uint64_t count_; +#endif + }; + + Unigram unigram_; + + typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle; + std::vector<Middle> middle_; - static const ModelType kModelType = HASH_PROBING; + typedef util::ProbingHashTable<ProbEntry, util::IdentityHash> Longest; + Longest longest_; }; } // namespace detail diff --git a/klm/lm/search_trie.cc b/klm/lm/search_trie.cc index ffadfa94..18e80d5a 100644 --- a/klm/lm/search_trie.cc +++ b/klm/lm/search_trie.cc @@ -273,8 +273,9 @@ class FindBlanks { // Phase to actually write n-grams to the trie. template <class Quant, class Bhiksha> class WriteEntries { public: - WriteEntries(RecordReader *contexts, UnigramValue *unigrams, BitPackedMiddle<typename Quant::Middle, Bhiksha> *middle, BitPackedLongest<typename Quant::Longest> &longest, unsigned char order, SRISucks &sri) : + WriteEntries(RecordReader *contexts, const Quant &quant, UnigramValue *unigrams, BitPackedMiddle<Bhiksha> *middle, BitPackedLongest &longest, unsigned char order, SRISucks &sri) : contexts_(contexts), + quant_(quant), unigrams_(unigrams), middle_(middle), longest_(longest), @@ -290,7 +291,7 @@ template <class Quant, class Bhiksha> class WriteEntries { void MiddleBlank(const unsigned char order, const WordIndex *indices, unsigned char /*lower*/, float /*prob_base*/) { ProbBackoff weights = sri_.GetBlank(order_, order, indices); - middle_[order - 2].Insert(indices[order - 1], weights.prob, weights.backoff); + typename Quant::MiddlePointer(quant_, order - 2, middle_[order - 2].Insert(indices[order - 1])).Write(weights.prob, weights.backoff); } void Middle(const unsigned char order, const void *data) { @@ -301,21 +302,22 @@ template <class Quant, class Bhiksha> class WriteEntries { SetExtension(weights.backoff); ++context; } - middle_[order - 2].Insert(words[order - 1], weights.prob, weights.backoff); + typename Quant::MiddlePointer(quant_, order - 2, middle_[order - 2].Insert(words[order - 1])).Write(weights.prob, weights.backoff); } void Longest(const void *data) { const WordIndex *words = reinterpret_cast<const WordIndex*>(data); - longest_.Insert(words[order_ - 1], reinterpret_cast<const Prob*>(words + order_)->prob); + typename Quant::LongestPointer(quant_, longest_.Insert(words[order_ - 1])).Write(reinterpret_cast<const Prob*>(words + order_)->prob); } void Cleanup() {} private: RecordReader *contexts_; + const Quant &quant_; UnigramValue *const unigrams_; - BitPackedMiddle<typename Quant::Middle, Bhiksha> *const middle_; - BitPackedLongest<typename Quant::Longest> &longest_; + BitPackedMiddle<Bhiksha> *const middle_; + BitPackedLongest &longest_; BitPacked &bigram_pack_; const unsigned char order_; SRISucks &sri_; @@ -380,7 +382,7 @@ template <class Doing> class BlankManager { }; template <class Doing> void RecursiveInsert(const unsigned char total_order, const WordIndex unigram_count, RecordReader *input, std::ostream *progress_out, const char *message, Doing &doing) { - util::ErsatzProgress progress(progress_out, message, unigram_count + 1); + util::ErsatzProgress progress(unigram_count + 1, progress_out, message); WordIndex unigram = 0; std::priority_queue<Gram> grams; grams.push(Gram(&unigram, 1)); @@ -502,7 +504,7 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve inputs[i-2].Rewind(); } if (Quant::kTrain) { - util::ErsatzProgress progress(config.messages, "Quantizing", std::accumulate(counts.begin() + 1, counts.end(), 0)); + util::ErsatzProgress progress(std::accumulate(counts.begin() + 1, counts.end(), 0), config.messages, "Quantizing"); for (unsigned char i = 2; i < counts.size(); ++i) { TrainQuantizer(i, counts[i-1], sri.Values(i), inputs[i-2], progress, quant); } @@ -510,7 +512,7 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve quant.FinishedLoading(config); } - UnigramValue *unigrams = out.unigram.Raw(); + UnigramValue *unigrams = out.unigram_.Raw(); PopulateUnigramWeights(unigram_file.get(), counts[0], contexts[0], unigrams); unigram_file.reset(); @@ -519,7 +521,7 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve } // Fill entries except unigram probabilities. { - WriteEntries<Quant, Bhiksha> writer(contexts, unigrams, out.middle_begin_, out.longest, counts.size(), sri); + WriteEntries<Quant, Bhiksha> writer(contexts, quant, unigrams, out.middle_begin_, out.longest_, counts.size(), sri); RecursiveInsert(counts.size(), counts[0], inputs, config.messages, "Writing trie", writer); } @@ -544,14 +546,14 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve for (typename TrieSearch<Quant, Bhiksha>::Middle *i = out.middle_begin_; i != out.middle_end_ - 1; ++i) { i->FinishedLoading((i+1)->InsertIndex(), config); } - (out.middle_end_ - 1)->FinishedLoading(out.longest.InsertIndex(), config); + (out.middle_end_ - 1)->FinishedLoading(out.longest_.InsertIndex(), config); } } template <class Quant, class Bhiksha> uint8_t *TrieSearch<Quant, Bhiksha>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) { - quant_.SetupMemory(start, config); + quant_.SetupMemory(start, counts.size(), config); start += Quant::Size(counts.size(), config); - unigram.Init(start); + unigram_.Init(start); start += Unigram::Size(counts[0]); FreeMiddles(); middle_begin_ = static_cast<Middle*>(malloc(sizeof(Middle) * (counts.size() - 2))); @@ -565,23 +567,23 @@ template <class Quant, class Bhiksha> uint8_t *TrieSearch<Quant, Bhiksha>::Setup for (unsigned char i = counts.size() - 1; i >= 2; --i) { new (middle_begin_ + i - 2) Middle( middle_starts[i-2], - quant_.Mid(i), + quant_.MiddleBits(config), counts[i-1], counts[0], counts[i], - (i == counts.size() - 1) ? static_cast<const BitPacked&>(longest) : static_cast<const BitPacked &>(middle_begin_[i-1]), + (i == counts.size() - 1) ? static_cast<const BitPacked&>(longest_) : static_cast<const BitPacked &>(middle_begin_[i-1]), config); } - longest.Init(start, quant_.Long(counts.size()), counts[0]); + longest_.Init(start, quant_.LongestBits(config), counts[0]); return start + Longest::Size(Quant::LongestBits(config), counts.back(), counts[0]); } template <class Quant, class Bhiksha> void TrieSearch<Quant, Bhiksha>::LoadedBinary() { - unigram.LoadedBinary(); + unigram_.LoadedBinary(); for (Middle *i = middle_begin_; i != middle_end_; ++i) { i->LoadedBinary(); } - longest.LoadedBinary(); + longest_.LoadedBinary(); } template <class Quant, class Bhiksha> void TrieSearch<Quant, Bhiksha>::InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, Backing &backing) { diff --git a/klm/lm/search_trie.hh b/klm/lm/search_trie.hh index 5155ca02..10b22ab1 100644 --- a/klm/lm/search_trie.hh +++ b/klm/lm/search_trie.hh @@ -28,13 +28,11 @@ template <class Quant, class Bhiksha> class TrieSearch { public: typedef NodeRange Node; - typedef ::lm::ngram::trie::Unigram Unigram; - Unigram unigram; - - typedef trie::BitPackedMiddle<typename Quant::Middle, Bhiksha> Middle; + typedef ::lm::ngram::trie::UnigramPointer UnigramPointer; + typedef typename Quant::MiddlePointer MiddlePointer; + typedef typename Quant::LongestPointer LongestPointer; - typedef trie::BitPackedLongest<typename Quant::Longest> Longest; - Longest longest; + static const bool kDifferentRest = false; static const ModelType kModelType = static_cast<ModelType>(TRIE_SORTED + Quant::kModelTypeAdd + Bhiksha::kModelTypeAdd); @@ -62,55 +60,46 @@ template <class Quant, class Bhiksha> class TrieSearch { void LoadedBinary(); - typedef const Middle *MiddleIter; + void InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, Backing &backing); - const Middle *MiddleBegin() const { return middle_begin_; } - const Middle *MiddleEnd() const { return middle_end_; } + unsigned char Order() const { + return middle_end_ - middle_begin_ + 2; + } - void InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, Backing &backing); + ProbBackoff &UnknownUnigram() { return unigram_.Unknown(); } - void LookupUnigram(WordIndex word, float &backoff, Node &node, FullScoreReturn &ret) const { - unigram.Find(word, ret.prob, backoff, node); - ret.independent_left = (node.begin == node.end); - ret.extend_left = static_cast<uint64_t>(word); + UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &independent_left, uint64_t &extend_left) const { + extend_left = static_cast<uint64_t>(word); + UnigramPointer ret(unigram_.Find(word, next)); + independent_left = (next.begin == next.end); + return ret; } - bool LookupMiddle(const Middle &mid, WordIndex word, float &backoff, Node &node, FullScoreReturn &ret) const { - if (!mid.Find(word, ret.prob, backoff, node, ret.extend_left)) return false; - ret.independent_left = (node.begin == node.end); - return true; + MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const { + return MiddlePointer(quant_, extend_length - 2, middle_begin_[extend_length - 2].ReadEntry(extend_pointer, node)); } - bool LookupMiddleNoProb(const Middle &mid, WordIndex word, float &backoff, Node &node) const { - return mid.FindNoProb(word, backoff, node); + MiddlePointer LookupMiddle(unsigned char order_minus_2, WordIndex word, Node &node, bool &independent_left, uint64_t &extend_left) const { + util::BitAddress address(middle_begin_[order_minus_2].Find(word, node, extend_left)); + independent_left = (address.base == NULL) || (node.begin == node.end); + return MiddlePointer(quant_, order_minus_2, address); } - bool LookupLongest(WordIndex word, float &prob, const Node &node) const { - return longest.Find(word, prob, node); + LongestPointer LookupLongest(WordIndex word, const Node &node) const { + return LongestPointer(quant_, longest_.Find(word, node)); } bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const { - // TODO: don't decode backoff. assert(begin != end); - FullScoreReturn ignored; - float ignored_backoff; - LookupUnigram(*begin, ignored_backoff, node, ignored); + bool independent_left; + uint64_t ignored; + LookupUnigram(*begin, node, independent_left, ignored); for (const WordIndex *i = begin + 1; i < end; ++i) { - if (!LookupMiddleNoProb(middle_begin_[i - begin - 1], *i, ignored_backoff, node)) return false; + if (independent_left || !LookupMiddle(i - begin - 1, *i, node, independent_left, ignored).Found()) return false; } return true; } - Node Unpack(uint64_t extend_pointer, unsigned char extend_length, float &prob) const { - if (extend_length == 1) { - float ignored; - Node ret; - unigram.Find(static_cast<WordIndex>(extend_pointer), prob, ignored, ret); - return ret; - } - return middle_begin_[extend_length - 2].ReadEntry(extend_pointer, prob); - } - private: friend void BuildTrie<Quant, Bhiksha>(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, const SortedVocabulary &vocab, Backing &backing); @@ -122,8 +111,16 @@ template <class Quant, class Bhiksha> class TrieSearch { free(middle_begin_); } + typedef trie::BitPackedMiddle<Bhiksha> Middle; + + typedef trie::BitPackedLongest Longest; + Longest longest_; + Middle *middle_begin_, *middle_end_; Quant quant_; + + typedef ::lm::ngram::trie::Unigram Unigram; + Unigram unigram_; }; } // namespace trie diff --git a/klm/lm/state.hh b/klm/lm/state.hh new file mode 100644 index 00000000..c7438414 --- /dev/null +++ b/klm/lm/state.hh @@ -0,0 +1,123 @@ +#ifndef LM_STATE__ +#define LM_STATE__ + +#include "lm/max_order.hh" +#include "lm/word_index.hh" +#include "util/murmur_hash.hh" + +#include <string.h> + +namespace lm { +namespace ngram { + +// This is a POD but if you want memcmp to return the same as operator==, call +// ZeroRemaining first. +class State { + public: + bool operator==(const State &other) const { + if (length != other.length) return false; + return !memcmp(words, other.words, length * sizeof(WordIndex)); + } + + // Three way comparison function. + int Compare(const State &other) const { + if (length != other.length) return length < other.length ? -1 : 1; + return memcmp(words, other.words, length * sizeof(WordIndex)); + } + + bool operator<(const State &other) const { + if (length != other.length) return length < other.length; + return memcmp(words, other.words, length * sizeof(WordIndex)) < 0; + } + + // Call this before using raw memcmp. + void ZeroRemaining() { + for (unsigned char i = length; i < kMaxOrder - 1; ++i) { + words[i] = 0; + backoff[i] = 0.0; + } + } + + unsigned char Length() const { return length; } + + // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD. + // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit. + WordIndex words[kMaxOrder - 1]; + float backoff[kMaxOrder - 1]; + unsigned char length; +}; + +inline uint64_t hash_value(const State &state, uint64_t seed = 0) { + return util::MurmurHashNative(state.words, sizeof(WordIndex) * state.length, seed); +} + +struct Left { + bool operator==(const Left &other) const { + return + (length == other.length) && + pointers[length - 1] == other.pointers[length - 1] && + full == other.full; + } + + int Compare(const Left &other) const { + if (length < other.length) return -1; + if (length > other.length) return 1; + if (pointers[length - 1] > other.pointers[length - 1]) return 1; + if (pointers[length - 1] < other.pointers[length - 1]) return -1; + return (int)full - (int)other.full; + } + + bool operator<(const Left &other) const { + return Compare(other) == -1; + } + + void ZeroRemaining() { + for (uint64_t * i = pointers + length; i < pointers + kMaxOrder - 1; ++i) + *i = 0; + } + + uint64_t pointers[kMaxOrder - 1]; + unsigned char length; + bool full; +}; + +inline uint64_t hash_value(const Left &left) { + unsigned char add[2]; + add[0] = left.length; + add[1] = left.full; + return util::MurmurHashNative(add, 2, left.length ? left.pointers[left.length - 1] : 0); +} + +struct ChartState { + bool operator==(const ChartState &other) { + return (right == other.right) && (left == other.left); + } + + int Compare(const ChartState &other) const { + int lres = left.Compare(other.left); + if (lres) return lres; + return right.Compare(other.right); + } + + bool operator<(const ChartState &other) const { + return Compare(other) == -1; + } + + void ZeroRemaining() { + left.ZeroRemaining(); + right.ZeroRemaining(); + } + + Left left; + State right; +}; + +inline uint64_t hash_value(const ChartState &state) { + return hash_value(state.right, hash_value(state.left)); +} + + +} // namespace ngram +} // namespace lm + +#endif // LM_STATE__ diff --git a/klm/lm/trie.cc b/klm/lm/trie.cc index 20075bb8..0f1ca574 100644 --- a/klm/lm/trie.cc +++ b/klm/lm/trie.cc @@ -1,7 +1,6 @@ #include "lm/trie.hh" #include "lm/bhiksha.hh" -#include "lm/quantize.hh" #include "util/bit_packing.hh" #include "util/exception.hh" #include "util/sorted_uniform.hh" @@ -58,91 +57,71 @@ void BitPacked::BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits) max_vocab_ = max_vocab; } -template <class Quant, class Bhiksha> std::size_t BitPackedMiddle<Quant, Bhiksha>::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) { +template <class Bhiksha> std::size_t BitPackedMiddle<Bhiksha>::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) { return Bhiksha::Size(entries + 1, max_ptr, config) + BaseSize(entries, max_vocab, quant_bits + Bhiksha::InlineBits(entries + 1, max_ptr, config)); } -template <class Quant, class Bhiksha> BitPackedMiddle<Quant, Bhiksha>::BitPackedMiddle(void *base, const Quant &quant, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config) : +template <class Bhiksha> BitPackedMiddle<Bhiksha>::BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config) : BitPacked(), - quant_(quant), + quant_bits_(quant_bits), // If the offset of the method changes, also change TrieSearch::UpdateConfigFromBinary. bhiksha_(base, entries + 1, max_next, config), next_source_(&next_source) { if (entries + 1 >= (1ULL << 57) || (max_next >= (1ULL << 57))) UTIL_THROW(util::Exception, "Sorry, this does not support more than " << (1ULL << 57) << " n-grams of a particular order. Edit util/bit_packing.hh and fix the bit packing functions."); - BaseInit(reinterpret_cast<uint8_t*>(base) + Bhiksha::Size(entries + 1, max_next, config), max_vocab, quant.TotalBits() + bhiksha_.InlineBits()); + BaseInit(reinterpret_cast<uint8_t*>(base) + Bhiksha::Size(entries + 1, max_next, config), max_vocab, quant_bits_ + bhiksha_.InlineBits()); } -template <class Quant, class Bhiksha> void BitPackedMiddle<Quant, Bhiksha>::Insert(WordIndex word, float prob, float backoff) { +template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Insert(WordIndex word) { assert(word <= word_mask_); uint64_t at_pointer = insert_index_ * total_bits_; util::WriteInt57(base_, at_pointer, word_bits_, word); at_pointer += word_bits_; - quant_.Write(base_, at_pointer, prob, backoff); - at_pointer += quant_.TotalBits(); + util::BitAddress ret(base_, at_pointer); + at_pointer += quant_bits_; uint64_t next = next_source_->InsertIndex(); bhiksha_.WriteNext(base_, at_pointer, insert_index_, next); - ++insert_index_; + return ret; } -template <class Quant, class Bhiksha> bool BitPackedMiddle<Quant, Bhiksha>::Find(WordIndex word, float &prob, float &backoff, NodeRange &range, uint64_t &pointer) const { +template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Find(WordIndex word, NodeRange &range, uint64_t &pointer) const { uint64_t at_pointer; if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) { - return false; + return util::BitAddress(NULL, 0); } pointer = at_pointer; at_pointer *= total_bits_; at_pointer += word_bits_; + bhiksha_.ReadNext(base_, at_pointer + quant_bits_, pointer, total_bits_, range); - quant_.Read(base_, at_pointer, prob, backoff); - at_pointer += quant_.TotalBits(); - - bhiksha_.ReadNext(base_, at_pointer, pointer, total_bits_, range); - - return true; + return util::BitAddress(base_, at_pointer); } -template <class Quant, class Bhiksha> bool BitPackedMiddle<Quant, Bhiksha>::FindNoProb(WordIndex word, float &backoff, NodeRange &range) const { - uint64_t index; - if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, index)) return false; - uint64_t at_pointer = index * total_bits_; - at_pointer += word_bits_; - quant_.ReadBackoff(base_, at_pointer, backoff); - at_pointer += quant_.TotalBits(); - bhiksha_.ReadNext(base_, at_pointer, index, total_bits_, range); - return true; -} - -template <class Quant, class Bhiksha> void BitPackedMiddle<Quant, Bhiksha>::FinishedLoading(uint64_t next_end, const Config &config) { +template <class Bhiksha> void BitPackedMiddle<Bhiksha>::FinishedLoading(uint64_t next_end, const Config &config) { uint64_t last_next_write = (insert_index_ + 1) * total_bits_ - bhiksha_.InlineBits(); bhiksha_.WriteNext(base_, last_next_write, insert_index_ + 1, next_end); bhiksha_.FinishedLoading(config); } -template <class Quant> void BitPackedLongest<Quant>::Insert(WordIndex index, float prob) { +util::BitAddress BitPackedLongest::Insert(WordIndex index) { assert(index <= word_mask_); uint64_t at_pointer = insert_index_ * total_bits_; util::WriteInt57(base_, at_pointer, word_bits_, index); at_pointer += word_bits_; - quant_.Write(base_, at_pointer, prob); ++insert_index_; + return util::BitAddress(base_, at_pointer); } -template <class Quant> bool BitPackedLongest<Quant>::Find(WordIndex word, float &prob, const NodeRange &range) const { +util::BitAddress BitPackedLongest::Find(WordIndex word, const NodeRange &range) const { uint64_t at_pointer; - if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) return false; + if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) return util::BitAddress(NULL, 0); at_pointer = at_pointer * total_bits_ + word_bits_; - quant_.Read(base_, at_pointer, prob); - return true; + return util::BitAddress(base_, at_pointer); } -template class BitPackedMiddle<DontQuantize::Middle, DontBhiksha>; -template class BitPackedMiddle<DontQuantize::Middle, ArrayBhiksha>; -template class BitPackedMiddle<SeparatelyQuantize::Middle, DontBhiksha>; -template class BitPackedMiddle<SeparatelyQuantize::Middle, ArrayBhiksha>; -template class BitPackedLongest<DontQuantize::Longest>; -template class BitPackedLongest<SeparatelyQuantize::Longest>; +template class BitPackedMiddle<DontBhiksha>; +template class BitPackedMiddle<ArrayBhiksha>; } // namespace trie } // namespace ngram diff --git a/klm/lm/trie.hh b/klm/lm/trie.hh index ebe9910f..eff93292 100644 --- a/klm/lm/trie.hh +++ b/klm/lm/trie.hh @@ -1,12 +1,13 @@ #ifndef LM_TRIE__ #define LM_TRIE__ -#include <stdint.h> +#include "lm/weights.hh" +#include "lm/word_index.hh" +#include "util/bit_packing.hh" #include <cstddef> -#include "lm/word_index.hh" -#include "lm/weights.hh" +#include <stdint.h> namespace lm { namespace ngram { @@ -24,6 +25,22 @@ struct UnigramValue { uint64_t Next() const { return next; } }; +class UnigramPointer { + public: + explicit UnigramPointer(const ProbBackoff &to) : to_(&to) {} + + UnigramPointer() : to_(NULL) {} + + bool Found() const { return to_ != NULL; } + + float Prob() const { return to_->prob; } + float Backoff() const { return to_->backoff; } + float Rest() const { return Prob(); } + + private: + const ProbBackoff *to_; +}; + class Unigram { public: Unigram() {} @@ -47,12 +64,11 @@ class Unigram { void LoadedBinary() {} - void Find(WordIndex word, float &prob, float &backoff, NodeRange &next) const { + UnigramPointer Find(WordIndex word, NodeRange &next) const { UnigramValue *val = unigram_ + word; - prob = val->weights.prob; - backoff = val->weights.backoff; next.begin = val->next; next.end = (val+1)->next; + return UnigramPointer(val->weights); } private: @@ -81,40 +97,36 @@ class BitPacked { uint64_t insert_index_, max_vocab_; }; -template <class Quant, class Bhiksha> class BitPackedMiddle : public BitPacked { +template <class Bhiksha> class BitPackedMiddle : public BitPacked { public: static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config); // next_source need not be initialized. - BitPackedMiddle(void *base, const Quant &quant, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config); + BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config); - void Insert(WordIndex word, float prob, float backoff); + util::BitAddress Insert(WordIndex word); void FinishedLoading(uint64_t next_end, const Config &config); void LoadedBinary() { bhiksha_.LoadedBinary(); } - bool Find(WordIndex word, float &prob, float &backoff, NodeRange &range, uint64_t &pointer) const; - - bool FindNoProb(WordIndex word, float &backoff, NodeRange &range) const; + util::BitAddress Find(WordIndex word, NodeRange &range, uint64_t &pointer) const; - NodeRange ReadEntry(uint64_t pointer, float &prob) { + util::BitAddress ReadEntry(uint64_t pointer, NodeRange &range) { uint64_t addr = pointer * total_bits_; addr += word_bits_; - quant_.ReadProb(base_, addr, prob); - NodeRange ret; - bhiksha_.ReadNext(base_, addr + quant_.TotalBits(), pointer, total_bits_, ret); - return ret; + bhiksha_.ReadNext(base_, addr + quant_bits_, pointer, total_bits_, range); + return util::BitAddress(base_, addr); } private: - Quant quant_; + uint8_t quant_bits_; Bhiksha bhiksha_; const BitPacked *next_source_; }; -template <class Quant> class BitPackedLongest : public BitPacked { +class BitPackedLongest : public BitPacked { public: static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) { return BaseSize(entries, max_vocab, quant_bits); @@ -122,19 +134,18 @@ template <class Quant> class BitPackedLongest : public BitPacked { BitPackedLongest() {} - void Init(void *base, const Quant &quant, uint64_t max_vocab) { - quant_ = quant; - BaseInit(base, max_vocab, quant_.TotalBits()); + void Init(void *base, uint8_t quant_bits, uint64_t max_vocab) { + BaseInit(base, max_vocab, quant_bits); } void LoadedBinary() {} - void Insert(WordIndex word, float prob); + util::BitAddress Insert(WordIndex word); - bool Find(WordIndex word, float &prob, const NodeRange &node) const; + util::BitAddress Find(WordIndex word, const NodeRange &node) const; private: - Quant quant_; + uint8_t quant_bits_; }; } // namespace trie diff --git a/klm/lm/value.hh b/klm/lm/value.hh new file mode 100644 index 00000000..85e53f14 --- /dev/null +++ b/klm/lm/value.hh @@ -0,0 +1,157 @@ +#ifndef LM_VALUE__ +#define LM_VALUE__ + +#include "lm/model_type.hh" +#include "lm/value_build.hh" +#include "lm/weights.hh" +#include "util/bit_packing.hh" + +#include <inttypes.h> + +namespace lm { +namespace ngram { + +// Template proxy for probing unigrams and middle. +template <class Weights> class GenericProbingProxy { + public: + explicit GenericProbingProxy(const Weights &to) : to_(&to) {} + + GenericProbingProxy() : to_(0) {} + + bool Found() const { return to_ != 0; } + + float Prob() const { + util::FloatEnc enc; + enc.f = to_->prob; + enc.i |= util::kSignBit; + return enc.f; + } + + float Backoff() const { return to_->backoff; } + + bool IndependentLeft() const { + util::FloatEnc enc; + enc.f = to_->prob; + return enc.i & util::kSignBit; + } + + protected: + const Weights *to_; +}; + +// Basic proxy for trie unigrams. +template <class Weights> class GenericTrieUnigramProxy { + public: + explicit GenericTrieUnigramProxy(const Weights &to) : to_(&to) {} + + GenericTrieUnigramProxy() : to_(0) {} + + bool Found() const { return to_ != 0; } + float Prob() const { return to_->prob; } + float Backoff() const { return to_->backoff; } + float Rest() const { return Prob(); } + + protected: + const Weights *to_; +}; + +struct BackoffValue { + typedef ProbBackoff Weights; + static const ModelType kProbingModelType = PROBING; + + class ProbingProxy : public GenericProbingProxy<Weights> { + public: + explicit ProbingProxy(const Weights &to) : GenericProbingProxy<Weights>(to) {} + ProbingProxy() {} + float Rest() const { return Prob(); } + }; + + class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> { + public: + explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigramProxy<Weights>(to) {} + TrieUnigramProxy() {} + float Rest() const { return Prob(); } + }; + + struct ProbingEntry { + typedef uint64_t Key; + typedef Weights Value; + uint64_t key; + ProbBackoff value; + uint64_t GetKey() const { return key; } + }; + + struct TrieUnigramValue { + Weights weights; + uint64_t next; + uint64_t Next() const { return next; } + }; + + const static bool kDifferentRest = false; + + template <class Model, class C> void Callback(const Config &, unsigned int, typename Model::Vocabulary &, C &callback) { + NoRestBuild build; + callback(build); + } +}; + +struct RestValue { + typedef RestWeights Weights; + static const ModelType kProbingModelType = REST_PROBING; + + class ProbingProxy : public GenericProbingProxy<RestWeights> { + public: + explicit ProbingProxy(const Weights &to) : GenericProbingProxy<RestWeights>(to) {} + ProbingProxy() {} + float Rest() const { return to_->rest; } + }; + + class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> { + public: + explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigramProxy<Weights>(to) {} + TrieUnigramProxy() {} + float Rest() const { return to_->rest; } + }; + +// gcc 4.1 doesn't properly back dependent types :-(. +#pragma pack(push) +#pragma pack(4) + struct ProbingEntry { + typedef uint64_t Key; + typedef Weights Value; + Key key; + Value value; + Key GetKey() const { return key; } + }; + + struct TrieUnigramValue { + Weights weights; + uint64_t next; + uint64_t Next() const { return next; } + }; +#pragma pack(pop) + + const static bool kDifferentRest = true; + + template <class Model, class C> void Callback(const Config &config, unsigned int order, typename Model::Vocabulary &vocab, C &callback) { + switch (config.rest_function) { + case Config::REST_MAX: + { + MaxRestBuild build; + callback(build); + } + break; + case Config::REST_LOWER: + { + LowerRestBuild<Model> build(config, order, vocab); + callback(build); + } + break; + } + } +}; + +} // namespace ngram +} // namespace lm + +#endif // LM_VALUE__ diff --git a/klm/lm/value_build.cc b/klm/lm/value_build.cc new file mode 100644 index 00000000..6124f8da --- /dev/null +++ b/klm/lm/value_build.cc @@ -0,0 +1,58 @@ +#include "lm/value_build.hh" + +#include "lm/model.hh" +#include "lm/read_arpa.hh" + +namespace lm { +namespace ngram { + +template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) { + UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes."); + Config for_lower = config; + for_lower.rest_lower_files.clear(); + + // Unigram models aren't supported, so this is a custom loader. + // TODO: optimize the unigram loading? + { + util::FilePiece uni(config.rest_lower_files[0].c_str()); + std::vector<uint64_t> number; + ReadARPACounts(uni, number); + UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size()); + ReadNGramHeader(uni, 1); + unigrams_.resize(number[0]); + unigrams_[0] = config.unknown_missing_logprob; + PositiveProbWarn warn; + for (uint64_t i = 0; i < number[0]; ++i) { + WordIndex w; + Prob entry; + ReadNGram(uni, 1, vocab, &w, entry, warn); + unigrams_[w] = entry.prob; + } + } + + try { + for (unsigned int i = 2; i < order; ++i) { + models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower)); + UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i); + } + } catch (...) { + for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) { + delete *i; + } + models_.clear(); + throw; + } + + // TODO: force/check same vocab. +} + +template <class Model> LowerRestBuild<Model>::~LowerRestBuild() { + for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) { + delete *i; + } +} + +template class LowerRestBuild<ProbingModel>; + +} // namespace ngram +} // namespace lm diff --git a/klm/lm/value_build.hh b/klm/lm/value_build.hh new file mode 100644 index 00000000..687a41a0 --- /dev/null +++ b/klm/lm/value_build.hh @@ -0,0 +1,97 @@ +#ifndef LM_VALUE_BUILD__ +#define LM_VALUE_BUILD__ + +#include "lm/weights.hh" +#include "lm/word_index.hh" +#include "util/bit_packing.hh" + +#include <vector> + +namespace lm { +namespace ngram { + +class Config; +class BackoffValue; +class RestValue; + +class NoRestBuild { + public: + typedef BackoffValue Value; + + NoRestBuild() {} + + void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {} + void SetRest(const WordIndex *, unsigned int, const ProbBackoff &) const {} + + template <class Second> bool MarkExtends(ProbBackoff &weights, const Second &) const { + util::UnsetSign(weights.prob); + return false; + } + + // Probing doesn't need to go back to unigram. + const static bool kMarkEvenLower = false; +}; + +class MaxRestBuild { + public: + typedef RestValue Value; + + MaxRestBuild() {} + + void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {} + void SetRest(const WordIndex *, unsigned int, RestWeights &weights) const { + weights.rest = weights.prob; + util::SetSign(weights.rest); + } + + bool MarkExtends(RestWeights &weights, const RestWeights &to) const { + util::UnsetSign(weights.prob); + if (weights.rest >= to.rest) return false; + weights.rest = to.rest; + return true; + } + bool MarkExtends(RestWeights &weights, const Prob &to) const { + util::UnsetSign(weights.prob); + if (weights.rest >= to.prob) return false; + weights.rest = to.prob; + return true; + } + + // Probing does need to go back to unigram. + const static bool kMarkEvenLower = true; +}; + +template <class Model> class LowerRestBuild { + public: + typedef RestValue Value; + + LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab); + + ~LowerRestBuild(); + + void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {} + void SetRest(const WordIndex *vocab_ids, unsigned int n, RestWeights &weights) const { + typename Model::State ignored; + if (n == 1) { + weights.rest = unigrams_[*vocab_ids]; + } else { + weights.rest = models_[n-2]->FullScoreForgotState(vocab_ids + 1, vocab_ids + n, *vocab_ids, ignored).prob; + } + } + + template <class Second> bool MarkExtends(RestWeights &weights, const Second &) const { + util::UnsetSign(weights.prob); + return false; + } + + const static bool kMarkEvenLower = false; + + std::vector<float> unigrams_; + + std::vector<const Model*> models_; +}; + +} // namespace ngram +} // namespace lm + +#endif // LM_VALUE_BUILD__ diff --git a/klm/lm/vocab.cc b/klm/lm/vocab.cc index 9fd698bb..5de68f16 100644 --- a/klm/lm/vocab.cc +++ b/klm/lm/vocab.cc @@ -196,7 +196,7 @@ WordIndex ProbingVocabulary::Insert(const StringPiece &str) { } } -void ProbingVocabulary::FinishedLoading(ProbBackoff * /*reorder_vocab*/) { +void ProbingVocabulary::InternalFinishedLoading() { lookup_.FinishedInserting(); header_->bound = bound_; header_->version = kProbingVocabularyVersion; diff --git a/klm/lm/vocab.hh b/klm/lm/vocab.hh index 06fdefe4..c3efcb4a 100644 --- a/klm/lm/vocab.hh +++ b/klm/lm/vocab.hh @@ -141,7 +141,9 @@ class ProbingVocabulary : public base::Vocabulary { WordIndex Insert(const StringPiece &str); - void FinishedLoading(ProbBackoff *reorder_vocab); + template <class Weights> void FinishedLoading(Weights * /*reorder_vocab*/) { + InternalFinishedLoading(); + } std::size_t UnkCountChangePadding() const { return 0; } @@ -150,6 +152,8 @@ class ProbingVocabulary : public base::Vocabulary { void LoadedBinary(bool have_words, int fd, EnumerateVocab *to); private: + void InternalFinishedLoading(); + typedef util::ProbingHashTable<ProbingVocabuaryEntry, util::IdentityHash> Lookup; Lookup lookup_; diff --git a/klm/lm/weights.hh b/klm/lm/weights.hh index 1f38cf5e..bd5d8034 100644 --- a/klm/lm/weights.hh +++ b/klm/lm/weights.hh @@ -12,6 +12,11 @@ struct ProbBackoff { float prob; float backoff; }; +struct RestWeights { + float prob; + float backoff; + float rest; +}; } // namespace lm #endif // LM_WEIGHTS__ diff --git a/klm/util/Jamfile b/klm/util/Jamfile index b8c14347..3ee2c2c2 100644 --- a/klm/util/Jamfile +++ b/klm/util/Jamfile @@ -1,4 +1,4 @@ -lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc ../..//z : <include>.. : : <include>.. ; +lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc ../..//z : <include>.. : : <include>.. ; import testing ; diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am index a8d6299b..5ceccf2c 100644 --- a/klm/util/Makefile.am +++ b/klm/util/Makefile.am @@ -25,6 +25,7 @@ libklm_util_a_SOURCES = \ file.cc \ file_piece.cc \ mmap.cc \ - murmur_hash.cc + murmur_hash.cc \ + usage.cc AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh index 73a5cb22..dcbd814c 100644 --- a/klm/util/bit_packing.hh +++ b/klm/util/bit_packing.hh @@ -174,6 +174,13 @@ struct BitsMask { uint64_t mask; }; +struct BitAddress { + BitAddress(void *in_base, uint64_t in_offset) : base(in_base), offset(in_offset) {} + + void *base; + uint64_t offset; +}; + } // namespace util #endif // UTIL_BIT_PACKING__ diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc index a82ce672..07b14e26 100644 --- a/klm/util/ersatz_progress.cc +++ b/klm/util/ersatz_progress.cc @@ -12,17 +12,17 @@ namespace { const unsigned char kWidth = 100; } ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<std::size_t>::max()), complete_(next_), out_(NULL) {} ErsatzProgress::~ErsatzProgress() { - if (!out_) return; - Finished(); + if (out_) Finished(); } -ErsatzProgress::ErsatzProgress(std::ostream *to, const std::string &message, std::size_t complete) +ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message) : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) { if (!out_) { next_ = std::numeric_limits<std::size_t>::max(); return; } - *out_ << message << "\n----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n"; + if (!message.empty()) *out_ << message << '\n'; + *out_ << "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n"; } void ErsatzProgress::Milestone() { diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh index 92c345fe..f709dc51 100644 --- a/klm/util/ersatz_progress.hh +++ b/klm/util/ersatz_progress.hh @@ -1,7 +1,7 @@ #ifndef UTIL_ERSATZ_PROGRESS__ #define UTIL_ERSATZ_PROGRESS__ -#include <iosfwd> +#include <iostream> #include <string> // Ersatz version of boost::progress so core language model doesn't depend on @@ -14,7 +14,7 @@ class ErsatzProgress { ErsatzProgress(); // Null means no output. The null value is useful for passing along the ostream pointer from another caller. - ErsatzProgress(std::ostream *to, const std::string &message, std::size_t complete); + explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); ~ErsatzProgress(); diff --git a/klm/util/file.cc b/klm/util/file.cc index de206bc8..6a3885a7 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -10,11 +10,12 @@ #include <sys/stat.h> #include <fcntl.h> #include <stdint.h> -#include <unistd.h> #if defined(_WIN32) || defined(_WIN64) #include <windows.h> #include <io.h> +#else +#include <unistd.h> #endif namespace util { @@ -43,16 +44,6 @@ int OpenReadOrThrow(const char *name) { return ret; } -int CreateOrThrow(const char *name) { - int ret; -#if defined(_WIN32) || defined(_WIN64) - UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); -#else - UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name); -#endif - return ret; -} - uint64_t SizeFile(int fd) { #if defined(_WIN32) || defined(_WIN64) __int64 ret = _filelengthi64(fd); diff --git a/klm/util/file.hh b/klm/util/file.hh index 72c8ea76..5c57e2a9 100644 --- a/klm/util/file.hh +++ b/klm/util/file.hh @@ -65,10 +65,7 @@ class scoped_FILE { std::FILE *file_; }; -// Open for read only. int OpenReadOrThrow(const char *name); -// Create file if it doesn't exist, truncate if it does. Opened for write. -int CreateOrThrow(const char *name); // Return value for SizeFile when it can't size properly. const uint64_t kBadSize = (uint64_t)-1; diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 081e662b..a205995a 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -36,13 +36,13 @@ const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0 FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { + progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { Initialize(name, show_progress, min_buffer); } FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { + progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { Initialize(name, show_progress, min_buffer); } diff --git a/klm/util/have.hh b/klm/util/have.hh index f2f0cf90..b8181e99 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -3,13 +3,21 @@ #define UTIL_HAVE__ #ifndef HAVE_ZLIB +#if !defined(_WIN32) && !defined(_WIN64) #define HAVE_ZLIB #endif +#endif -// #define HAVE_ICU +#ifndef HAVE_ICU +//#define HAVE_ICU +#endif #ifndef HAVE_BOOST #define HAVE_BOOST #endif +#ifndef HAVE_THREADS +//#define HAVE_THREADS +#endif + #endif // UTIL_HAVE__ diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc index 2db35b56..576fd4cc 100644 --- a/klm/util/mmap.cc +++ b/klm/util/mmap.cc @@ -14,12 +14,12 @@ #include <sys/types.h> #include <sys/stat.h> #include <stdlib.h> -#include <unistd.h> #if defined(_WIN32) || defined(_WIN64) #include <windows.h> #include <io.h> #else +#include <unistd.h> #include <sys/mman.h> #endif @@ -171,6 +171,20 @@ void *MapZeroedWrite(int fd, std::size_t size) { return MapOrThrow(size, true, kFileFlags, false, fd, 0); } +namespace { + +int CreateOrThrow(const char *name) { + int ret; +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); +#else + UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name); +#endif + return ret; +} + +} // namespace + void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) { file.reset(CreateOrThrow(name)); try { diff --git a/klm/util/murmur_hash.cc b/klm/util/murmur_hash.cc index 6accc21a..4f519312 100644 --- a/klm/util/murmur_hash.cc +++ b/klm/util/murmur_hash.cc @@ -23,7 +23,7 @@ namespace util { // 64-bit hash for 64-bit platforms -uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed ) +uint64_t MurmurHash64A ( const void * key, std::size_t len, uint64_t seed ) { const uint64_t m = 0xc6a4a7935bd1e995ULL; const int r = 47; @@ -81,7 +81,7 @@ uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed ) // 64-bit hash for 32-bit platforms -uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed ) +uint64_t MurmurHash64B ( const void * key, std::size_t len, uint64_t seed ) { const unsigned int m = 0x5bd1e995; const int r = 24; @@ -150,17 +150,18 @@ uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed ) return h; } + // Trick to test for 64-bit architecture at compile time. namespace { -template <unsigned L> uint64_t MurmurHashNativeBackend(const void * key, std::size_t len, unsigned int seed) { +template <unsigned L> inline uint64_t MurmurHashNativeBackend(const void * key, std::size_t len, uint64_t seed) { return MurmurHash64A(key, len, seed); } -template <> uint64_t MurmurHashNativeBackend<4>(const void * key, std::size_t len, unsigned int seed) { +template <> inline uint64_t MurmurHashNativeBackend<4>(const void * key, std::size_t len, uint64_t seed) { return MurmurHash64B(key, len, seed); } } // namespace -uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed) { +uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed) { return MurmurHashNativeBackend<sizeof(void*)>(key, len, seed); } diff --git a/klm/util/murmur_hash.hh b/klm/util/murmur_hash.hh index 638aaeb2..ae7e88de 100644 --- a/klm/util/murmur_hash.hh +++ b/klm/util/murmur_hash.hh @@ -5,9 +5,9 @@ namespace util { -uint64_t MurmurHash64A(const void * key, std::size_t len, unsigned int seed = 0); -uint64_t MurmurHash64B(const void * key, std::size_t len, unsigned int seed = 0); -uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed = 0); +uint64_t MurmurHash64A(const void * key, std::size_t len, uint64_t seed = 0); +uint64_t MurmurHash64B(const void * key, std::size_t len, uint64_t seed = 0); +uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed = 0); } // namespace util diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index f466cebc..3354b68e 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -78,12 +78,33 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry } } + // Return true if the value was found (and not inserted). This is consistent with Find but the opposite if hash_map! + template <class T> bool FindOrInsert(const T &t, MutableIterator &out) { +#ifdef DEBUG + assert(initialized_); +#endif + for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) { + Key got(i->GetKey()); + if (equal_(got, t.GetKey())) { out = i; return true; } + if (equal_(got, invalid_)) { + UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full."); + *i = t; + out = i; + return false; + } + if (++i == end_) i = begin_; + } + } + void FinishedInserting() {} void LoadedBinary() {} // Don't change anything related to GetKey, template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) { +#ifdef DEBUG + assert(initialized_); +#endif for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) { Key got(i->GetKey()); if (equal_(got, key)) { out = i; return true; } diff --git a/klm/util/usage.cc b/klm/util/usage.cc new file mode 100644 index 00000000..e5cf76f0 --- /dev/null +++ b/klm/util/usage.cc @@ -0,0 +1,46 @@ +#include "util/usage.hh" + +#include <fstream> +#include <ostream> + +#include <string.h> +#include <ctype.h> +#if !defined(_WIN32) && !defined(_WIN64) +#include <sys/resource.h> +#include <sys/time.h> +#endif + +namespace util { + +namespace { +#if !defined(_WIN32) && !defined(_WIN64) +float FloatSec(const struct timeval &tv) { + return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000.0); +} +#endif +} // namespace + +void PrintUsage(std::ostream &out) { +#if !defined(_WIN32) && !defined(_WIN64) + struct rusage usage; + if (getrusage(RUSAGE_SELF, &usage)) { + perror("getrusage"); + return; + } + out << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n'; + + // Linux doesn't set memory usage :-(. + std::ifstream status("/proc/self/status", std::ios::in); + std::string line; + while (getline(status, line)) { + if (!strncmp(line.c_str(), "VmRSS:\t", 7)) { + out << "VmRSS: " << (line.c_str() + 7) << '\n'; + break; + } else if (!strncmp(line.c_str(), "VmPeak:\t", 8)) { + out << "VmPeak: " << (line.c_str() + 8) << '\n'; + } + } +#endif +} + +} // namespace util diff --git a/klm/util/usage.hh b/klm/util/usage.hh new file mode 100644 index 00000000..d331ff74 --- /dev/null +++ b/klm/util/usage.hh @@ -0,0 +1,8 @@ +#ifndef UTIL_USAGE__ +#define UTIL_USAGE__ +#include <iosfwd> + +namespace util { +void PrintUsage(std::ostream &to); +} // namespace util +#endif // UTIL_USAGE__ diff --git a/mira/kbest_mira.cc b/mira/kbest_mira.cc index dc0200d6..8b7993dd 100644 --- a/mira/kbest_mira.cc +++ b/mira/kbest_mira.cc @@ -5,8 +5,6 @@ #include <cmath> #include <tr1/memory> -#include "config.h" - #include <boost/program_options.hpp> #include <boost/program_options/variables_map.hpp> diff --git a/mteval/ns.h b/mteval/ns.h index 4e4c6975..ac7b0a23 100644 --- a/mteval/ns.h +++ b/mteval/ns.h @@ -56,6 +56,11 @@ class SufficientStats { } void Encode(std::string* out) const; + void swap(SufficientStats& other) { + id_.swap(other.id_); + fields.swap(other.fields); + } + std::string id_; std::vector<float> fields; }; diff --git a/phrasinator/Makefile.am b/phrasinator/Makefile.am index aba98601..3ddd1934 100644 --- a/phrasinator/Makefile.am +++ b/phrasinator/Makefile.am @@ -11,4 +11,4 @@ gibbs_train_plm_LDADD = $(top_srcdir)/utils/libutils.a -lz #head_bigram_model_SOURCES = head_bigram_model.cc #head_bigram_model_LDADD = $(top_srcdir)/utils/libutils.a -lz -AM_CPPFLAGS = -funroll-loops -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval +AM_CPPFLAGS = -funroll-loops -ffast-math -W -Wall -I$(top_srcdir)/utils diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h deleted file mode 100644 index 811bce73..00000000 --- a/phrasinator/ccrp_nt.h +++ /dev/null @@ -1,170 +0,0 @@ -#ifndef _CCRP_NT_H_ -#define _CCRP_NT_H_ - -#include <numeric> -#include <cassert> -#include <cmath> -#include <list> -#include <iostream> -#include <vector> -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template <typename Dish, typename DishHash = boost::hash<Dish> > -class CCRP_NoTable { - public: - explicit CCRP_NoTable(double conc) : - num_customers_(), - concentration_(conc), - concentration_prior_shape_(std::numeric_limits<double>::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {} - - CCRP_NoTable(double c_shape, double c_rate, double c = 10.0) : - num_customers_(), - concentration_(c), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} - - double concentration() const { return concentration_; } - - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); - } - - void clear() { - num_customers_ = 0; - custs_.clear(); - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.find(dish); - if (it == custs_.end()) return 0; - return it->second; - } - - int increment(const Dish& dish) { - int table_diff = 0; - if (++custs_[dish] == 1) - table_diff = 1; - ++num_customers_; - return table_diff; - } - - int decrement(const Dish& dish) { - int table_diff = 0; - int nc = --custs_[dish]; - if (nc == 0) { - custs_.erase(dish); - table_diff = -1; - } else if (nc < 0) { - std::cerr << "Dish counts dropped below zero for: " << dish << std::endl; - abort(); - } - --num_customers_; - return table_diff; - } - - double prob(const Dish& dish, const double& p0) const { - const unsigned at_table = num_customers(dish); - return (at_table + p0 * concentration_) / (num_customers_ + concentration_); - } - - double logprob(const Dish& dish, const double& logp0) const { - const unsigned at_table = num_customers(dish); - return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_); - } - - double log_crp_prob() const { - return log_crp_prob(concentration_); - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& concentration) const { - double lp = 0.0; - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - lp += lgamma(concentration) - lgamma(concentration + num_customers_) + - custs_.size() * log(concentration); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.begin(); - it != custs_.end(); ++it) { - lp += lgamma(it->second); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_concentration_prior()); - ConcentrationResampler cr(*this); - for (int iter = 0; iter < nloop; ++iter) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); - } - } - - struct ConcentrationResampler { - ConcentrationResampler(const CCRP_NoTable& crp) : crp_(crp) {} - const CCRP_NoTable& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(proposed_concentration); - } - }; - - void Print(std::ostream* out) const { - (*out) << "DP(alpha=" << concentration_ << ") customers=" << num_customers_ << std::endl; - int cc = 0; - for (typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.begin(); - it != custs_.end(); ++it) { - (*out) << " " << it->first << "(" << it->second << " eating)"; - ++cc; - if (cc > 10) { (*out) << " ..."; break; } - } - (*out) << std::endl; - } - - unsigned num_customers_; - std::tr1::unordered_map<Dish, unsigned, DishHash> custs_; - - typedef typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator const_iterator; - const_iterator begin() const { - return custs_.begin(); - } - const_iterator end() const { - return custs_.end(); - } - - double concentration_; - - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; -}; - -template <typename T,typename H> -std::ostream& operator<<(std::ostream& o, const CCRP_NoTable<T,H>& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/phrasinator/gibbs_train_plm.cc b/phrasinator/gibbs_train_plm.cc index 86fd7865..7847a460 100644 --- a/phrasinator/gibbs_train_plm.cc +++ b/phrasinator/gibbs_train_plm.cc @@ -18,7 +18,7 @@ Dict d; // global dictionary string Join(char joiner, const vector<int>& phrase) { ostringstream os; - for (int i = 0; i < phrase.size(); ++i) { + for (unsigned i = 0; i < phrase.size(); ++i) { if (i > 0) os << joiner; os << d.Convert(phrase[i]); } @@ -26,7 +26,7 @@ string Join(char joiner, const vector<int>& phrase) { } ostream& operator<<(ostream& os, const vector<int>& phrase) { - for (int i = 0; i < phrase.size(); ++i) + for (unsigned i = 0; i < phrase.size(); ++i) os << (i == 0 ? "" : " ") << d.Convert(phrase[i]); return os; } @@ -37,7 +37,7 @@ struct UnigramLM { assert(in); } - double logprob(int word) const { + double logprob(unsigned word) const { assert(word < freqs_.size()); return freqs_[word]; } @@ -91,7 +91,7 @@ void ReadCorpus(const string& filename, vector<vector<int> >* c, set<int>* vocab c->push_back(vector<int>()); vector<int>& v = c->back(); d.ConvertWhitespaceDelimitedLine(line, &v); - for (int i = 0; i < v.size(); ++i) vocab->insert(v[i]); + for (unsigned i = 0; i < v.size(); ++i) vocab->insert(v[i]); } if (in != &cin) delete in; } @@ -151,7 +151,7 @@ struct UniphraseLM { cerr << "Initializing...\n"; z_.resize(corpus_.size()); int tc = 0; - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector<int>& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -177,7 +177,7 @@ struct UniphraseLM { cerr << "Initial LLH: " << llh() << endl; cerr << "Sampling...\n"; cerr << gen_ << endl; - for (int s = 1; s < samples; ++s) { + for (unsigned s = 1; s < samples; ++s) { cerr << '.'; if (s % 10 == 0) { cerr << " [" << s; @@ -187,7 +187,7 @@ struct UniphraseLM { //for (int j = 0; j < z.size(); ++j) z[j] = z_[0][j]; //SegCorpus::Write(corpus_[0], z, d); } - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector<int>& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -286,7 +286,7 @@ int main(int argc, char** argv) { ulm.Sample(conf["samples"].as<unsigned>(), conf.count("no_hyperparameter_inference") == 0, &rng); cerr << "OOV unigram prob: " << ulm.OOVUnigramLogProb() << endl; - for (int i = 0; i < corpus.size(); ++i) + for (unsigned i = 0; i < corpus.size(); ++i) // SegCorpus::Write(corpus[i], shmmlm.z_[i], d); ; if (conf.count("write_cdec_grammar")) { @@ -304,8 +304,6 @@ int main(int argc, char** argv) { os << "# make C smaller to use more phrases\nP 1\nPassThrough " << ulm.OOVUnigramLogProb() << "\nC -3\n"; } - - return 0; } diff --git a/phrasinator/gibbs_train_plm.notables.cc b/phrasinator/gibbs_train_plm.notables.cc index 9dca9e8d..4526eaa6 100644 --- a/phrasinator/gibbs_train_plm.notables.cc +++ b/phrasinator/gibbs_train_plm.notables.cc @@ -18,7 +18,7 @@ Dict d; // global dictionary string Join(char joiner, const vector<int>& phrase) { ostringstream os; - for (int i = 0; i < phrase.size(); ++i) { + for (unsigned i = 0; i < phrase.size(); ++i) { if (i > 0) os << joiner; os << d.Convert(phrase[i]); } @@ -29,13 +29,13 @@ template <typename BType> void WriteSeg(const vector<int>& line, const vector<BType>& label, const Dict& d) { assert(line.size() == label.size()); assert(label.back()); - int prev = 0; - int cur = 0; + unsigned prev = 0; + unsigned cur = 0; while (cur < line.size()) { if (label[cur]) { if (prev) cout << ' '; cout << "{{"; - for (int i = prev; i <= cur; ++i) + for (unsigned i = prev; i <= cur; ++i) cout << (i == prev ? "" : " ") << d.Convert(line[i]); cout << "}}:" << label[cur]; prev = cur + 1; @@ -46,7 +46,7 @@ void WriteSeg(const vector<int>& line, const vector<BType>& label, const Dict& d } ostream& operator<<(ostream& os, const vector<int>& phrase) { - for (int i = 0; i < phrase.size(); ++i) + for (unsigned i = 0; i < phrase.size(); ++i) os << (i == 0 ? "" : " ") << d.Convert(phrase[i]); return os; } @@ -57,7 +57,7 @@ struct UnigramLM { assert(in); } - double logprob(int word) const { + double logprob(unsigned word) const { assert(word < freqs_.size()); return freqs_[word]; } @@ -111,7 +111,7 @@ void ReadCorpus(const string& filename, vector<vector<int> >* c, set<int>* vocab c->push_back(vector<int>()); vector<int>& v = c->back(); d.ConvertWhitespaceDelimitedLine(line, &v); - for (int i = 0; i < v.size(); ++i) vocab->insert(v[i]); + for (unsigned i = 0; i < v.size(); ++i) vocab->insert(v[i]); } if (in != &cin) delete in; } @@ -175,7 +175,7 @@ struct UniphraseLM { cerr << "Initializing...\n"; z_.resize(corpus_.size()); int tc = 0; - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector<int>& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -201,7 +201,7 @@ struct UniphraseLM { cerr << "Initial LLH: " << llh() << endl; cerr << "Sampling...\n"; cerr << gen_ << endl; - for (int s = 1; s < samples; ++s) { + for (unsigned s = 1; s < samples; ++s) { cerr << '.'; if (s % 10 == 0) { cerr << " [" << s; @@ -211,7 +211,7 @@ struct UniphraseLM { //for (int j = 0; j < z.size(); ++j) z[j] = z_[0][j]; //SegCorpus::Write(corpus_[0], z, d); } - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector<int>& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -276,7 +276,7 @@ struct UniphraseLM { void ResampleHyperparameters(MT19937* rng) { phrases_.resample_hyperparameters(rng); gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.concentration(); + cerr << " " << phrases_.alpha(); } CCRP_NoTable<vector<int> > phrases_; @@ -310,7 +310,7 @@ int main(int argc, char** argv) { ulm.Sample(conf["samples"].as<unsigned>(), conf.count("no_hyperparameter_inference") == 0, &rng); cerr << "OOV unigram prob: " << ulm.OOVUnigramLogProb() << endl; - for (int i = 0; i < corpus.size(); ++i) + for (unsigned i = 0; i < corpus.size(); ++i) WriteSeg(corpus[i], ulm.z_[i], d); if (conf.count("write_cdec_grammar")) { diff --git a/pro-train/Makefile.am b/pro-train/Makefile.am index 11d26211..1e9d46b0 100644 --- a/pro-train/Makefile.am +++ b/pro-train/Makefile.am @@ -2,12 +2,10 @@ bin_PROGRAMS = \ mr_pro_map \ mr_pro_reduce -TESTS = lo_test - mr_pro_map_SOURCES = mr_pro_map.cc -mr_pro_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_pro_map_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz mr_pro_reduce_SOURCES = mr_pro_reduce.cc -mr_pro_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/training/optimize.o $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc index 52b67f32..eef40b8a 100644 --- a/pro-train/mr_pro_map.cc +++ b/pro-train/mr_pro_map.cc @@ -9,14 +9,13 @@ #include <boost/program_options.hpp> #include <boost/program_options/variables_map.hpp> +#include "candidate_set.h" #include "sampler.h" #include "filelib.h" #include "stringlib.h" #include "weights.h" #include "inside_outside.h" #include "hg_io.h" -#include "kbest.h" -#include "viterbi.h" #include "ns.h" #include "ns_docscorer.h" @@ -25,52 +24,6 @@ using namespace std; namespace po = boost::program_options; -struct ApproxVectorHasher { - static const size_t MASK = 0xFFFFFFFFull; - union UType { - double f; // leave as double - size_t i; - }; - static inline double round(const double x) { - UType t; - t.f = x; - size_t r = t.i & MASK; - if ((r << 1) > MASK) - t.i += MASK - r + 1; - else - t.i &= (1ull - MASK); - return t.f; - } - size_t operator()(const SparseVector<weight_t>& x) const { - size_t h = 0x573915839; - for (SparseVector<weight_t>::const_iterator it = x.begin(); it != x.end(); ++it) { - UType t; - t.f = it->second; - if (t.f) { - size_t z = (t.i >> 32); - boost::hash_combine(h, it->first); - boost::hash_combine(h, z); - } - } - return h; - } -}; - -struct ApproxVectorEquals { - bool operator()(const SparseVector<weight_t>& a, const SparseVector<weight_t>& b) const { - SparseVector<weight_t>::const_iterator bit = b.begin(); - for (SparseVector<weight_t>::const_iterator ait = a.begin(); ait != a.end(); ++ait) { - if (bit == b.end() || - ait->first != bit->first || - ApproxVectorHasher::round(ait->second) != ApproxVectorHasher::round(bit->second)) - return false; - ++bit; - } - if (bit != b.end()) return false; - return true; - } -}; - boost::shared_ptr<MT19937> rng; void InitCommandLine(int argc, char** argv, po::variables_map* conf) { @@ -105,107 +58,6 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } } -struct HypInfo { - HypInfo() : g_(-100.0f) {} - HypInfo(const vector<WordID>& h, const SparseVector<weight_t>& feats) : hyp(h), g_(-100.0f), x(feats) {} - - // lazy evaluation - double g(const SegmentEvaluator& scorer, const EvaluationMetric* metric) const { - if (g_ == -100.0f) { - SufficientStats ss; - scorer.Evaluate(hyp, &ss); - g_ = metric->ComputeScore(ss); - } - return g_; - } - vector<WordID> hyp; - mutable float g_; - SparseVector<weight_t> x; -}; - -struct HypInfoCompare { - bool operator()(const HypInfo& a, const HypInfo& b) const { - ApproxVectorEquals comp; - return (a.hyp == b.hyp && comp(a.x,b.x)); - } -}; - -struct HypInfoHasher { - size_t operator()(const HypInfo& x) const { - boost::hash<vector<WordID> > hhasher; - ApproxVectorHasher vhasher; - size_t ha = hhasher(x.hyp); - boost::hash_combine(ha, vhasher(x.x)); - return ha; - } -}; - -void WriteKBest(const string& file, const vector<HypInfo>& kbest) { - WriteFile wf(file); - ostream& out = *wf.stream(); - out.precision(10); - for (int i = 0; i < kbest.size(); ++i) { - out << TD::GetString(kbest[i].hyp) << endl; - out << kbest[i].x << endl; - } -} - -void ParseSparseVector(string& line, size_t cur, SparseVector<weight_t>* out) { - SparseVector<weight_t>& x = *out; - size_t last_start = cur; - size_t last_comma = string::npos; - while(cur <= line.size()) { - if (line[cur] == ' ' || cur == line.size()) { - if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { - cerr << "[ERROR] " << line << endl << " position = " << cur << endl; - exit(1); - } - const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); - if (cur < line.size()) line[cur] = 0; - const double val = strtod(&line[last_comma + 1], NULL); - x.set_value(fid, val); - - last_comma = string::npos; - last_start = cur+1; - } else { - if (line[cur] == '=') - last_comma = cur; - } - ++cur; - } -} - -void ReadKBest(const string& file, vector<HypInfo>* kbest) { - cerr << "Reading from " << file << endl; - ReadFile rf(file); - istream& in = *rf.stream(); - string cand; - string feats; - while(getline(in, cand)) { - getline(in, feats); - assert(in); - kbest->push_back(HypInfo()); - TD::ConvertSentence(cand, &kbest->back().hyp); - ParseSparseVector(feats, 0, &kbest->back().x); - } - cerr << " read " << kbest->size() << " hypotheses\n"; -} - -void Dedup(vector<HypInfo>* h) { - cerr << "Dedup in=" << h->size(); - tr1::unordered_set<HypInfo, HypInfoHasher, HypInfoCompare> u; - while(h->size() > 0) { - u.insert(h->back()); - h->pop_back(); - } - tr1::unordered_set<HypInfo, HypInfoHasher, HypInfoCompare>::iterator it = u.begin(); - while (it != u.end()) { - h->push_back(*it); - it = u.erase(it); - } - cerr << " out=" << h->size() << endl; -} - struct ThresholdAlpha { explicit ThresholdAlpha(double t = 0.05) : threshold(t) {} double operator()(double mag) const { @@ -239,8 +91,7 @@ struct DiffOrder { void Sample(const unsigned gamma, const unsigned xi, - const vector<HypInfo>& J_i, - const SegmentEvaluator& scorer, + const training::CandidateSet& J_i, const EvaluationMetric* metric, vector<TrainingInstance>* pv) { const bool invert_score = metric->IsErrorMetric(); @@ -250,17 +101,17 @@ void Sample(const unsigned gamma, const size_t a = rng->inclusive(0, J_i.size() - 1)(); const size_t b = rng->inclusive(0, J_i.size() - 1)(); if (a == b) continue; - float ga = J_i[a].g(scorer, metric); - float gb = J_i[b].g(scorer, metric); + float ga = metric->ComputeScore(J_i[a].eval_feats); + float gb = metric->ComputeScore(J_i[b].eval_feats); bool positive = gb < ga; if (invert_score) positive = !positive; const float gdiff = fabs(ga - gb); if (!gdiff) continue; avg_diff += gdiff; - SparseVector<weight_t> xdiff = (J_i[a].x - J_i[b].x).erase_zeros(); + SparseVector<weight_t> xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros(); if (xdiff.empty()) { - cerr << "Empty diff:\n " << TD::GetString(J_i[a].hyp) << endl << "x=" << J_i[a].x << endl; - cerr << " " << TD::GetString(J_i[b].hyp) << endl << "x=" << J_i[b].x << endl; + cerr << "Empty diff:\n " << TD::GetString(J_i[a].ewords) << endl << "x=" << J_i[a].fmap << endl; + cerr << " " << TD::GetString(J_i[b].ewords) << endl << "x=" << J_i[b].fmap << endl; continue; } v1.push_back(TrainingInstance(xdiff, positive, gdiff)); @@ -328,25 +179,17 @@ int main(int argc, char** argv) { is >> file >> sent_id; ReadFile rf(file); ostringstream os; - vector<HypInfo> J_i; + training::CandidateSet J_i; os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; const string kbest_file = os.str(); if (FileExists(kbest_file)) - ReadKBest(kbest_file, &J_i); + J_i.ReadFromFile(kbest_file); HypergraphIO::ReadFromJSON(rf.stream(), &hg); hg.Reweight(weights); - KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, kbest_size); - - for (int i = 0; i < kbest_size; ++i) { - const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(hg.nodes_.size() - 1, i); - if (!d) break; - J_i.push_back(HypInfo(d->yield, d->feature_values)); - } - Dedup(&J_i); - WriteKBest(kbest_file, J_i); + J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]); + J_i.WriteToFile(kbest_file); - Sample(gamma, xi, J_i, *ds[sent_id], metric, &v); + Sample(gamma, xi, J_i, metric, &v); for (unsigned i = 0; i < v.size(); ++i) { const TrainingInstance& vi = v[i]; cout << vi.y << "\t" << vi.x << endl; diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc index d3fb8026..5ef9b470 100644 --- a/pro-train/mr_pro_reduce.cc +++ b/pro-train/mr_pro_reduce.cc @@ -25,6 +25,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { opts.add_options() ("weights,w", po::value<string>(), "Weights from previous iteration (used as initialization and interpolation") ("regularization_strength,C",po::value<double>()->default_value(500.0), "l2 regularization strength") + ("l1",po::value<double>()->default_value(0.0), "l1 regularization strength") ("regularize_to_weights,y",po::value<double>()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect") ("memory_buffers,m",po::value<unsigned>()->default_value(100), "Number of memory buffers (LBFGS)") ("min_reg,r",po::value<double>()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght") @@ -180,12 +181,14 @@ struct ProLoss { double LearnParameters(const vector<pair<bool, SparseVector<weight_t> > >& training, const vector<pair<bool, SparseVector<weight_t> > >& testing, const double C, + const double C1, const double T, const unsigned memory_buffers, const vector<weight_t>& prev_x, vector<weight_t>* px) { + assert(px->size() == prev_x.size()); ProLoss loss(training, testing, C, T, prev_x); - LBFGS<ProLoss> lbfgs(px, loss, 0.0, memory_buffers); + LBFGS<ProLoss> lbfgs(px, loss, memory_buffers, C1); lbfgs.MinimizeFunction(); return loss.tppl; } @@ -203,6 +206,7 @@ int main(int argc, char** argv) { const double min_reg = conf["min_reg"].as<double>(); const double max_reg = conf["max_reg"].as<double>(); double C = conf["regularization_strength"].as<double>(); // will be overridden if parameter is tuned + double C1 = conf["l1"].as<double>(); // will be overridden if parameter is tuned const double T = conf["regularize_to_weights"].as<double>(); assert(C >= 0.0); assert(min_reg >= 0.0); @@ -239,7 +243,7 @@ int main(int argc, char** argv) { cerr << "SWEEP FACTOR: " << sweep_factor << endl; while(C < max_reg) { cerr << "C=" << C << "\tT=" <<T << endl; - tppl = LearnParameters(training, testing, C, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x); + tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x); sp.push_back(make_pair(C, tppl)); C *= sweep_factor; } @@ -262,7 +266,7 @@ int main(int argc, char** argv) { } C = sp[best_i].first; } // tune regularizer - tppl = LearnParameters(training, testing, C, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x); + tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x); if (conf.count("weights")) { for (int i = 1; i < x.size(); ++i) { x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi); diff --git a/rampion/Makefile.am b/rampion/Makefile.am index 12df39c2..f4dbb7cc 100644 --- a/rampion/Makefile.am +++ b/rampion/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = rampion_cccp rampion_cccp_SOURCES = rampion_cccp.cc -rampion_cccp_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +rampion_cccp_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training +AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training diff --git a/rampion/rampion.pl b/rampion/rampion.pl index 9884f453..55f7b3f1 100755 --- a/rampion/rampion.pl +++ b/rampion/rampion.pl @@ -65,12 +65,14 @@ my $cpbin=1; my $tune_regularizer = 0; my $reg = 500; my $reg_previous = 5000; +my $dont_accum = 0; # Process command-line options Getopt::Long::Configure("no_auto_abbrev"); if (GetOptions( "jobs=i" => \$jobs, "dont-clean" => \$disable_clean, + "dont-accumulate" => \$dont_accum, "pass-suffix=s" => \$pass_suffix, "qsub" => \$useqsub, "dry-run" => \$dryrun, @@ -163,8 +165,6 @@ my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; my $newIniFile = "$dir/$decoderBase.ini"; my $inputFileName = "$dir/input"; my $user = $ENV{"USER"}; - - # process ini file -e $iniFile || die "Error: could not open $iniFile for reading\n"; open(INI, $iniFile); @@ -229,6 +229,13 @@ close F; unless($best_weights){ $best_weights = $weights; } unless($projected_score){ $projected_score = 0.0; } $seen_weights{$weights} = 1; +my $kbest = "$dir/kbest"; +if ($dont_accum) { + $kbest = ''; +} else { + check_call("mkdir -p $kbest"); + $kbest = "--kbest_repository $kbest"; +} my $random_seed = int(time / 1000); my $lastWeightsFile; @@ -305,7 +312,7 @@ while (1){ $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; print STDERR "COMMAND:\n$cmd\n"; check_call($cmd); - $cmd="$MAPPER $refs_comma_sep -m $metric -i $dir/agenda.$im1 -w $inweights > $outweights"; + $cmd="$MAPPER $refs_comma_sep -m $metric -i $dir/agenda.$im1 $kbest -w $inweights > $outweights"; check_call($cmd); $lastWeightsFile = $outweights; $iteration++; @@ -445,6 +452,9 @@ General options: --help Print this message and exit. + --dont-accumulate + Don't accumulate k-best lists from multiple iterations. + --max-iterations <M> Maximum number of iterations to run. If not specified, defaults to $default_max_iter. diff --git a/rampion/rampion_cccp.cc b/rampion/rampion_cccp.cc index 7a6f1f0c..1e36dc51 100644 --- a/rampion/rampion_cccp.cc +++ b/rampion/rampion_cccp.cc @@ -14,6 +14,7 @@ #include "viterbi.h" #include "ns.h" #include "ns_docscorer.h" +#include "candidate_set.h" using namespace std; namespace po = boost::program_options; @@ -25,6 +26,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("weights,w",po::value<string>(), "[REQD] Weights files from current iterations") ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)") ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") + ("kbest_repository,R",po::value<string>(), "Accumulate k-best lists from previous iterations (parameter is path to repository)") ("kbest_size,k",po::value<unsigned>()->default_value(500u), "Top k-hypotheses to extract") ("cccp_iterations,I", po::value<unsigned>()->default_value(10u), "CCCP iterations (T')") ("ssd_iterations,J", po::value<unsigned>()->default_value(5u), "Stochastic subgradient iterations (T'')") @@ -50,38 +52,36 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } } -struct HypInfo { - HypInfo() : g(-100.0f) {} - HypInfo(const vector<WordID>& h, - const SparseVector<weight_t>& feats, - const SegmentEvaluator& scorer, const EvaluationMetric* metric) : hyp(h), x(feats) { - SufficientStats ss; - scorer.Evaluate(hyp, &ss); - g = metric->ComputeScore(ss); +struct GainFunction { + explicit GainFunction(const EvaluationMetric* m) : metric(m) {} + float operator()(const SufficientStats& eval_feats) const { + float g = metric->ComputeScore(eval_feats); if (!metric->IsErrorMetric()) g = 1 - g; + return g; } - - vector<WordID> hyp; - float g; - SparseVector<weight_t> x; + const EvaluationMetric* metric; }; -void CostAugmentedSearch(const vector<HypInfo>& kbest, +template <typename GainFunc> +void CostAugmentedSearch(const GainFunc& gain, + const training::CandidateSet& cs, const SparseVector<double>& w, double alpha, SparseVector<double>* fmap) { unsigned best_i = 0; double best = -numeric_limits<double>::infinity(); - for (unsigned i = 0; i < kbest.size(); ++i) { - double s = kbest[i].x.dot(w) + alpha * kbest[i].g; + for (unsigned i = 0; i < cs.size(); ++i) { + double s = cs[i].fmap.dot(w) + alpha * gain(cs[i].eval_feats); if (s > best) { best = s; best_i = i; } } - *fmap = kbest[best_i].x; + *fmap = cs[best_i].fmap; } + + // runs lines 4--15 of rampion algorithm int main(int argc, char** argv) { po::variables_map conf; @@ -97,6 +97,11 @@ int main(int argc, char** argv) { Hypergraph hg; string last_file; ReadFile in_read(conf["input"].as<string>()); + string kbest_repo; + if (conf.count("kbest_repository")) { + kbest_repo = conf["kbest_repository"].as<string>(); + MkDirP(kbest_repo); + } istream &in=*in_read.stream(); const unsigned kbest_size = conf["kbest_size"].as<unsigned>(); const unsigned tp = conf["cccp_iterations"].as<unsigned>(); @@ -112,40 +117,44 @@ int main(int argc, char** argv) { Weights::InitSparseVector(vweights, &weights); } string line, file; - vector<vector<HypInfo> > kis; + vector<training::CandidateSet> kis; cerr << "Loading hypergraphs...\n"; while(getline(in, line)) { istringstream is(line); int sent_id; kis.resize(kis.size() + 1); - vector<HypInfo>& curkbest = kis.back(); + training::CandidateSet& curkbest = kis.back(); + string kbest_file; + if (kbest_repo.size()) { + ostringstream os; + os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; + kbest_file = os.str(); + if (FileExists(kbest_file)) + curkbest.ReadFromFile(kbest_file); + } is >> file >> sent_id; ReadFile rf(file); if (kis.size() % 5 == 0) { cerr << '.'; } if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; } HypergraphIO::ReadFromJSON(rf.stream(), &hg); hg.Reweight(weights); - KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, kbest_size); - - for (int i = 0; i < kbest_size; ++i) { - const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(hg.nodes_.size() - 1, i); - if (!d) break; - curkbest.push_back(HypInfo(d->yield, d->feature_values, *ds[sent_id], metric)); - } + curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]); + if (kbest_file.size()) + curkbest.WriteToFile(kbest_file); } cerr << "\nHypergraphs loaded.\n"; vector<SparseVector<weight_t> > goals(kis.size()); // f(x_i,y+,h+) SparseVector<weight_t> fear; // f(x,y-,h-) + const GainFunction gain(metric); for (unsigned iterp = 1; iterp <= tp; ++iterp) { cerr << "CCCP Iteration " << iterp << endl; - for (int i = 0; i < goals.size(); ++i) - CostAugmentedSearch(kis[i], weights, goodsign * alpha, &goals[i]); + for (unsigned i = 0; i < goals.size(); ++i) + CostAugmentedSearch(gain, kis[i], weights, goodsign * alpha, &goals[i]); for (unsigned iterpp = 1; iterpp <= tpp; ++iterpp) { cerr << " SSD Iteration " << iterpp << endl; - for (int i = 0; i < goals.size(); ++i) { - CostAugmentedSearch(kis[i], weights, badsign * alpha, &fear); + for (unsigned i = 0; i < goals.size(); ++i) { + CostAugmentedSearch(gain, kis[i], weights, badsign * alpha, &fear); weights -= weights * (eta * reg / goals.size()); weights += (goals[i] - fear) * eta; } diff --git a/sa-extract/sa2cdec.py b/sa-extract/sa2cdec.py new file mode 100755 index 00000000..55fb19f3 --- /dev/null +++ b/sa-extract/sa2cdec.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +import sys + +featNames = [ line.strip() for line in open(sys.argv[1]) if not line.startswith('#') ] + +for line in sys.stdin: + try: + (lhs, src, tgt, feats, align) = line.strip("\n").split(' ||| ') + except: + print >>sys.stderr, 'WARNING: No alignments:', line + try: + (lhs, src, tgt, feats) = line.strip().split(' ||| ') + align = '' + except: + print >>sys.stderr, "ERROR: Malformed line:", line + raise + featValues = feats.split(' ') + namedFeats = ' '.join( name+"="+value for (name, value) in zip(featNames, featValues) ) + print " ||| ".join( (lhs, src, tgt, namedFeats, align) ) diff --git a/sa-extract/sa_feat_names.txt b/sa-extract/sa_feat_names.txt new file mode 100644 index 00000000..02c137d7 --- /dev/null +++ b/sa-extract/sa_feat_names.txt @@ -0,0 +1,7 @@ +EGivenFCoherent +SampleCountF +CountEF +MaxLexFGivenE +MaxLexEGivenF +IsSingletonF +IsSingletonFE diff --git a/sa-extract/wrap_input.py b/sa-extract/wrap_input.py new file mode 100755 index 00000000..e859a4fd --- /dev/null +++ b/sa-extract/wrap_input.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +import sys +import codecs +import os +import os.path +from xml.sax.saxutils import escape + +graPrefix = sys.argv[1] + +# Second argument can be a file with observable sentence-level features, +# one set of features per line (parallel with source sentences). Features are space-delimited indicator features. +obsFeatsFile = None +if len(sys.argv) == 3: + obsFeatsFilename = sys.argv[2] + obsFeatsFile = open(obsFeatsFilename) + +sys.stdin = codecs.getreader("utf-8")(sys.stdin) +sys.stdout = codecs.getwriter("utf-8")(sys.stdout) + +i = 0 +for line in sys.stdin: + filename = "%s%d"%(graPrefix,i) + if not os.path.exists(filename): + filenameGz = filename + ".gz" + if not os.path.exists(filenameGz): + print >>sys.stderr, "Grammar file not found: ", filename, filenameGz + sys.exit(1) + else: + filename = filenameGz + + if obsFeatsFile: + obsFeats = obsFeatsFile.next().strip() + print '<seg id="%d" features="%s" grammar="%s"> '%(i,obsFeats,filename) + escape(line.strip()) + " </seg>" + else: + print '<seg id="%d" grammar="%s"> '%(i,filename) + escape(line.strip()) + " </seg>" + i+=1 + diff --git a/tests/issues/5/mapoutput.abj.gz b/tests/issues/5/mapoutput.abj.gz new file mode 100644 index 00000000..30649475 --- /dev/null +++ b/tests/issues/5/mapoutput.abj.gz @@ -0,0 +1,1600 @@ +0 WordPenalty=-3.47436 LanguageModel=-9.88234 MaxLexFGivenE=-4.35121 MaxLexEGivenF=17.5289 +1 WordPenalty=3.47436 LanguageModel=9.88234 MaxLexFGivenE=4.35121 MaxLexEGivenF=-17.5289 +0 Glue=-2 WordPenalty=-3.47436 EGivenFCoherent=1.85733 LanguageModel=-9.44422 CountEF=-1.35218 MaxLexFGivenE=-4.18947 MaxLexEGivenF=17.7033 IsSingletonFE=2 +1 Glue=2 WordPenalty=3.47436 EGivenFCoherent=-1.85733 LanguageModel=9.44422 CountEF=1.35218 MaxLexFGivenE=4.18947 MaxLexEGivenF=-17.7033 IsSingletonFE=-2 +0 WordPenalty=-3.47436 LanguageModel=-9.74205 MaxLexFGivenE=-4.35121 MaxLexEGivenF=17.1096 +1 WordPenalty=3.47436 LanguageModel=9.74205 MaxLexFGivenE=4.35121 MaxLexEGivenF=-17.1096 +0 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-10.6534 CountEF=-0.352182 MaxLexFGivenE=-4.94741 MaxLexEGivenF=17.8873 IsSingletonFE=2 +1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=10.6534 CountEF=0.352182 MaxLexFGivenE=4.94741 MaxLexEGivenF=-17.8873 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.495 CountEF=-0.176091 MaxLexFGivenE=-3.19065 MaxLexEGivenF=20.0434 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.495 CountEF=0.176091 MaxLexFGivenE=3.19065 MaxLexEGivenF=-20.0434 IsSingletonFE=-1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.95424 LanguageModel=9.55791 CountEF=0.69897 MaxLexFGivenE=2.54581 MaxLexEGivenF=-19.8082 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.95424 LanguageModel=-9.55791 CountEF=-0.69897 MaxLexFGivenE=-2.54581 MaxLexEGivenF=19.8082 IsSingletonFE=1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.90309 LanguageModel=-9.8296 CountEF=-0.574031 MaxLexFGivenE=-3.43842 MaxLexEGivenF=17.3905 IsSingletonFE=2 +1 WordPenalty=3.47436 EGivenFCoherent=-0.90309 LanguageModel=9.8296 CountEF=0.574031 MaxLexFGivenE=3.43842 MaxLexEGivenF=-17.3905 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.47712 LanguageModel=-9.75285 CountEF=-0.30103 MaxLexFGivenE=-2.54581 MaxLexEGivenF=20.2274 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.47712 LanguageModel=9.75285 CountEF=0.30103 MaxLexFGivenE=2.54581 MaxLexEGivenF=-20.2274 IsSingletonFE=-1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-1.50515 LanguageModel=9.67998 CountEF=1.05115 MaxLexFGivenE=3.41509 MaxLexEGivenF=-17.3303 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=1.50515 LanguageModel=-9.67998 CountEF=-1.05115 MaxLexFGivenE=-3.41509 MaxLexEGivenF=17.3303 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.6702 CountEF=0.176091 MaxLexFGivenE=3.02471 MaxLexEGivenF=-19.7399 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.6702 CountEF=-0.176091 MaxLexFGivenE=-3.02471 MaxLexEGivenF=19.7399 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=9.59982 CountEF=0.176091 MaxLexFGivenE=4.17044 MaxLexEGivenF=-17.1177 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-9.59982 CountEF=-0.176091 MaxLexFGivenE=-4.17044 MaxLexEGivenF=17.1177 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=8.9494 CountEF=0.176091 MaxLexFGivenE=3.09171 MaxLexEGivenF=-16.4181 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-8.9494 CountEF=-0.176091 MaxLexFGivenE=-3.09171 MaxLexEGivenF=16.4181 IsSingletonFE=1 +0 WordPenalty=-3.47436 EGivenFCoherent=1.60206 LanguageModel=-9.35636 CountEF=-1.19728 MaxLexFGivenE=-2.85968 MaxLexEGivenF=18.4017 IsSingletonFE=2 +1 WordPenalty=3.47436 EGivenFCoherent=-1.60206 LanguageModel=9.35636 CountEF=1.19728 MaxLexFGivenE=2.85968 MaxLexEGivenF=-18.4017 IsSingletonFE=-2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-1.43136 LanguageModel=9.91874 CountEF=1 MaxLexFGivenE=4.17044 MaxLexEGivenF=-16.86 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=1.43136 LanguageModel=-9.91874 CountEF=-1 MaxLexFGivenE=-4.17044 MaxLexEGivenF=16.86 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.8192 CountEF=0.176091 MaxLexFGivenE=2.97607 MaxLexEGivenF=-19.6054 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.8192 CountEF=-0.176091 MaxLexFGivenE=-2.97607 MaxLexEGivenF=19.6054 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-1.07918 LanguageModel=9.80737 CountEF=0.69897 MaxLexFGivenE=3.39606 MaxLexEGivenF=-16.4871 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=1.07918 LanguageModel=-9.80737 CountEF=-0.69897 MaxLexFGivenE=-3.39606 MaxLexEGivenF=16.4871 IsSingletonFE=2 +1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.7169 CountEF=0.176091 MaxLexFGivenE=4.6007 MaxLexEGivenF=-16.238 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.7169 CountEF=-0.176091 MaxLexFGivenE=-4.6007 MaxLexEGivenF=16.238 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.7734 CountEF=-0.176091 MaxLexFGivenE=-4.94741 MaxLexEGivenF=17.2104 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.7734 CountEF=0.176091 MaxLexFGivenE=4.94741 MaxLexEGivenF=-17.2104 IsSingletonFE=-1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.95424 LanguageModel=9.44422 CountEF=0.69897 MaxLexFGivenE=4.18947 MaxLexEGivenF=-17.7033 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.95424 LanguageModel=-9.44422 CountEF=-0.69897 MaxLexFGivenE=-4.18947 MaxLexEGivenF=17.7033 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=10.6534 CountEF=0.352182 MaxLexFGivenE=4.94741 MaxLexEGivenF=-17.8873 IsSingletonFE=-2 +0 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-10.6534 CountEF=-0.352182 MaxLexFGivenE=-4.94741 MaxLexEGivenF=17.8873 IsSingletonFE=2 +0 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-8.9494 CountEF=-0.176091 MaxLexFGivenE=-3.09171 MaxLexEGivenF=16.4181 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=8.9494 CountEF=0.176091 MaxLexFGivenE=3.09171 MaxLexEGivenF=-16.4181 IsSingletonFE=-1 +1 WordPenalty=3.47436 LanguageModel=9.61256 MaxLexFGivenE=2.54581 MaxLexEGivenF=-19.8082 +0 WordPenalty=-3.47436 LanguageModel=-9.61256 MaxLexFGivenE=-2.54581 MaxLexEGivenF=19.8082 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-1.43136 LanguageModel=9.65634 CountEF=1 MaxLexFGivenE=4.51715 MaxLexEGivenF=-18.0901 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=1.43136 LanguageModel=-9.65634 CountEF=-1 MaxLexFGivenE=-4.51715 MaxLexEGivenF=18.0901 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=12.2993 CountEF=0.352182 MaxLexFGivenE=4.43476 MaxLexEGivenF=-16.3074 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-12.2993 CountEF=-0.352182 MaxLexFGivenE=-4.43476 MaxLexEGivenF=16.3074 IsSingletonFE=2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=1.60206 LanguageModel=-9.78678 CountEF=-1.19728 MaxLexFGivenE=-0.888336 MaxLexEGivenF=20.4928 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-1.60206 LanguageModel=9.78678 CountEF=1.19728 MaxLexFGivenE=0.888336 MaxLexEGivenF=-20.4928 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.90309 LanguageModel=-9.66186 CountEF=-0.653213 MaxLexFGivenE=-2.38407 MaxLexEGivenF=20.4018 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.90309 LanguageModel=9.66186 CountEF=0.653213 MaxLexFGivenE=2.38407 MaxLexEGivenF=-20.4018 IsSingletonFE=-1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.95424 LanguageModel=9.69821 CountEF=0.69897 MaxLexFGivenE=2.54581 MaxLexEGivenF=-20.2274 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.95424 LanguageModel=-9.69821 CountEF=-0.69897 MaxLexFGivenE=-2.54581 MaxLexEGivenF=20.2274 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=11.2687 CountEF=0.176091 MaxLexFGivenE=3.83856 MaxLexEGivenF=-16.2066 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-11.2687 CountEF=-0.176091 MaxLexFGivenE=-3.83856 MaxLexEGivenF=16.2066 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.95424 LanguageModel=9.44422 CountEF=0.69897 MaxLexFGivenE=4.18947 MaxLexEGivenF=-17.7033 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.95424 LanguageModel=-9.44422 CountEF=-0.69897 MaxLexFGivenE=-4.18947 MaxLexEGivenF=17.7033 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=12.2993 CountEF=0.352182 MaxLexFGivenE=4.43476 MaxLexEGivenF=-16.3074 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-12.2993 CountEF=-0.352182 MaxLexFGivenE=-4.43476 MaxLexEGivenF=16.3074 IsSingletonFE=2 +1 WordPenalty=3.47436 LanguageModel=9.82582 MaxLexFGivenE=4.0045 MaxLexEGivenF=-16.5564 +0 WordPenalty=-3.47436 LanguageModel=-9.82582 MaxLexFGivenE=-4.0045 MaxLexEGivenF=16.5564 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.90309 LanguageModel=9.94376 CountEF=0.653213 MaxLexFGivenE=4.66837 MaxLexEGivenF=-17.2619 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.90309 LanguageModel=-9.94376 CountEF=-0.653213 MaxLexFGivenE=-4.66837 MaxLexEGivenF=17.2619 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-1.30103 LanguageModel=9.37731 CountEF=1.02119 MaxLexFGivenE=2.85968 MaxLexEGivenF=-17.7248 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=1.30103 LanguageModel=-9.37731 CountEF=-1.02119 MaxLexFGivenE=-2.85968 MaxLexEGivenF=17.7248 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=1.85733 LanguageModel=-9.79135 CountEF=-1.35218 MaxLexFGivenE=-4.18947 MaxLexEGivenF=17.7033 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-1.85733 LanguageModel=9.79135 CountEF=1.35218 MaxLexFGivenE=4.18947 MaxLexEGivenF=-17.7033 IsSingletonFE=-2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.90309 LanguageModel=13.1315 CountEF=0.653213 MaxLexFGivenE=3.94114 MaxLexEGivenF=-15.9208 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.90309 LanguageModel=-13.1315 CountEF=-0.653213 MaxLexFGivenE=-3.94114 MaxLexEGivenF=15.9208 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-3.47436 EGivenFCoherent=1.85733 LanguageModel=-10.5896 CountEF=-1.35218 MaxLexFGivenE=-3.67682 MaxLexEGivenF=16.4273 IsSingletonFE=2 +1 Glue=2 WordPenalty=3.47436 EGivenFCoherent=-1.85733 LanguageModel=10.5896 CountEF=1.35218 MaxLexFGivenE=3.67682 MaxLexEGivenF=-16.4273 IsSingletonFE=-2 +0 WordPenalty=-3.47436 LanguageModel=-12.0318 MaxLexFGivenE=-2.48245 MaxLexEGivenF=19.5918 +1 WordPenalty=3.47436 LanguageModel=12.0318 MaxLexFGivenE=2.48245 MaxLexEGivenF=-19.5918 +0 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.7734 CountEF=-0.176091 MaxLexFGivenE=-4.94741 MaxLexEGivenF=17.2104 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.7734 CountEF=0.176091 MaxLexFGivenE=4.94741 MaxLexEGivenF=-17.2104 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.6245 CountEF=-0.176091 MaxLexFGivenE=-4.99605 MaxLexEGivenF=17.3449 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.6245 CountEF=0.176091 MaxLexFGivenE=4.99605 MaxLexEGivenF=-17.3449 IsSingletonFE=-1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.7169 CountEF=0.176091 MaxLexFGivenE=4.6007 MaxLexEGivenF=-16.238 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.7169 CountEF=-0.176091 MaxLexFGivenE=-4.6007 MaxLexEGivenF=16.238 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=10.2581 CountEF=0.352182 MaxLexFGivenE=2.83404 MaxLexEGivenF=-19.6554 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-10.2581 CountEF=-0.352182 MaxLexFGivenE=-2.83404 MaxLexEGivenF=19.6554 IsSingletonFE=2 +1 WordPenalty=3.47436 LanguageModel=9.88234 MaxLexFGivenE=4.35121 MaxLexEGivenF=-17.5289 +0 WordPenalty=-3.47436 LanguageModel=-9.88234 MaxLexFGivenE=-4.35121 MaxLexEGivenF=17.5289 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.47712 LanguageModel=-9.84577 CountEF=-0.30103 MaxLexFGivenE=-2.71175 MaxLexEGivenF=20.531 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.47712 LanguageModel=9.84577 CountEF=0.30103 MaxLexFGivenE=2.71175 MaxLexEGivenF=-20.531 IsSingletonFE=-1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=10.8192 CountEF=0.176091 MaxLexFGivenE=2.97607 MaxLexEGivenF=-19.6054 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-10.8192 CountEF=-0.176091 MaxLexFGivenE=-2.97607 MaxLexEGivenF=19.6054 IsSingletonFE=1 +1 WordPenalty=3.04006 LanguageModel=8.37075 MaxLexFGivenE=3.96229 MaxLexEGivenF=-12.6273 +0 WordPenalty=-3.04006 LanguageModel=-8.37075 MaxLexFGivenE=-3.96229 MaxLexEGivenF=12.6273 +1 WordPenalty=3.04006 EGivenFCoherent=0.30103 LanguageModel=11.6764 CountEF=-0.221849 MaxLexFGivenE=3.46655 MaxLexEGivenF=-20.9663 +0 WordPenalty=-3.04006 EGivenFCoherent=-0.30103 LanguageModel=-11.6764 CountEF=0.221849 MaxLexFGivenE=-3.46655 MaxLexEGivenF=20.9663 +1 WordPenalty=3.04006 LanguageModel=9.46571 MaxLexFGivenE=3.9961 MaxLexEGivenF=-12.5049 +0 WordPenalty=-3.04006 LanguageModel=-9.46571 MaxLexFGivenE=-3.9961 MaxLexEGivenF=12.5049 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=0.60206 LanguageModel=-8.88469 CountEF=-0.352182 MaxLexFGivenE=-4.74414 MaxLexEGivenF=12.4881 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-0.60206 LanguageModel=8.88469 CountEF=0.352182 MaxLexFGivenE=4.74414 MaxLexEGivenF=-12.4881 IsSingletonFE=-2 +1 WordPenalty=3.04006 LanguageModel=8.81224 MaxLexFGivenE=6.70492 MaxLexEGivenF=-12.4548 +0 WordPenalty=-3.04006 LanguageModel=-8.81224 MaxLexFGivenE=-6.70492 MaxLexEGivenF=12.4548 +0 WordPenalty=-3.04006 EGivenFCoherent=-0.65321 LanguageModel=-10.6521 CountEF=0.522879 MaxLexFGivenE=-5.91753 MaxLexEGivenF=12.0189 +1 WordPenalty=3.04006 EGivenFCoherent=0.65321 LanguageModel=10.6521 CountEF=-0.522879 MaxLexFGivenE=5.91753 MaxLexEGivenF=-12.0189 +1 Glue=-1 WordPenalty=-0.434291 EGivenFCoherent=1.24159 PassThrough=-1 LanguageModel=0.467719 SampleCountF=2.47857 CountEF=1.4624 MaxLexFGivenE=0.363092 MaxLexEGivenF=0.713101 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.434291 EGivenFCoherent=-1.24159 PassThrough=1 LanguageModel=-0.467719 SampleCountF=-2.47857 CountEF=-1.4624 MaxLexFGivenE=-0.363092 MaxLexEGivenF=-0.713101 IsSingletonFE=-1 +1 Glue=2 WordPenalty=-0.868586 EGivenFCoherent=-1.6163 LanguageModel=1.42495 CountEF=1.49485 MaxLexFGivenE=-2.17171 MaxLexEGivenF=-1.3008 +0 Glue=-2 WordPenalty=0.868586 EGivenFCoherent=1.6163 LanguageModel=-1.42495 CountEF=-1.49485 MaxLexFGivenE=2.17171 MaxLexEGivenF=1.3008 +1 Glue=-1 WordPenalty=2.17148 LanguageModel=4.55715 MaxLexFGivenE=-0.63546 MaxLexEGivenF=-18.5444 +0 Glue=1 WordPenalty=-2.17148 LanguageModel=-4.55715 MaxLexFGivenE=0.63546 MaxLexEGivenF=18.5444 +1 Glue=1 WordPenalty=-0.434291 LanguageModel=-1.86066 MaxLexEGivenF=0.40229 +0 Glue=-1 WordPenalty=0.434291 LanguageModel=1.86066 MaxLexEGivenF=-0.40229 +0 Glue=-1 WordPenalty=0.434291 LanguageModel=1.09802 MaxLexFGivenE=0.206486 MaxLexEGivenF=5.05234 +1 Glue=1 WordPenalty=-0.434291 LanguageModel=-1.09802 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-5.05234 +1 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=0.47712 LanguageModel=-5.94193 CountEF=-0.30103 MaxLexFGivenE=-1.07281 MaxLexEGivenF=4.88722 IsSingletonFE=1 +0 Glue=1 WordPenalty=1.30288 EGivenFCoherent=-0.47712 LanguageModel=5.94193 CountEF=0.30103 MaxLexFGivenE=1.07281 MaxLexEGivenF=-4.88722 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-0.4343 LanguageModel=0.935858 MaxLexEGivenF=7.08786 +1 Glue=1 WordPenalty=0.4343 LanguageModel=-0.935858 MaxLexEGivenF=-7.08786 +0 WordPenalty=-1.73719 LanguageModel=-7.38139 MaxLexEGivenF=14.2225 +1 WordPenalty=1.73719 LanguageModel=7.38139 MaxLexEGivenF=-14.2225 +1 EGivenFCoherent=0.47712 LanguageModel=-0.927247 CountEF=-0.30103 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-7.18061 IsSingletonFE=1 +0 EGivenFCoherent=-0.47712 LanguageModel=0.927247 CountEF=0.30103 MaxLexFGivenE=1.07281 MaxLexEGivenF=7.18061 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.434291 EGivenFCoherent=-1.73239 LanguageModel=4.76805 CountEF=1.24304 MaxLexFGivenE=0.206486 MaxLexEGivenF=0.0781129 IsSingletonFE=-2 +1 Glue=-1 WordPenalty=-0.434291 EGivenFCoherent=1.73239 LanguageModel=-4.76805 CountEF=-1.24304 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-0.0781129 IsSingletonFE=2 +0 Glue=1 WordPenalty=0.434291 EGivenFCoherent=-2.48572 LanguageModel=4.81573 CountEF=1.94201 MaxLexEGivenF=-0.434406 IsSingletonFE=-2 +1 Glue=-1 WordPenalty=-0.434291 EGivenFCoherent=2.48572 LanguageModel=-4.81573 CountEF=-1.94201 MaxLexEGivenF=0.434406 IsSingletonFE=2 +0 WordPenalty=0.434291 EGivenFCoherent=-1.51851 LanguageModel=2.82814 CountEF=1.23045 MaxLexEGivenF=1.71042 IsSingletonFE=-1 +1 WordPenalty=-0.434291 EGivenFCoherent=1.51851 LanguageModel=-2.82814 CountEF=-1.23045 MaxLexEGivenF=-1.71042 IsSingletonFE=1 +0 Glue=-1 WordPenalty=0.868586 EGivenFCoherent=1.9345 LanguageModel=0.48756 CountEF=-1.63849 MaxLexEGivenF=-1.69957 IsSingletonFE=1 +1 Glue=1 WordPenalty=-0.868586 EGivenFCoherent=-1.9345 LanguageModel=-0.48756 CountEF=1.63849 MaxLexEGivenF=1.69957 IsSingletonFE=-1 +0 WordPenalty=0.868586 EGivenFCoherent=-1.53148 LanguageModel=2.33353 CountEF=1.24304 MaxLexEGivenF=-1.69957 IsSingletonFE=-1 +1 WordPenalty=-0.868586 EGivenFCoherent=1.53148 LanguageModel=-2.33353 CountEF=-1.24304 MaxLexEGivenF=1.69957 IsSingletonFE=1 +1 WordPenalty=-0.434291 LanguageModel=1.18841 MaxLexFGivenE=-0.433793 MaxLexEGivenF=-3.47033 +0 WordPenalty=0.434291 LanguageModel=-1.18841 MaxLexFGivenE=0.433793 MaxLexEGivenF=3.47033 +1 Glue=-1 WordPenalty=-0.434295 LanguageModel=-0.631861 MaxLexFGivenE=-0.63546 MaxLexEGivenF=-3.14772 +0 Glue=1 WordPenalty=0.434295 LanguageModel=0.631861 MaxLexFGivenE=0.63546 MaxLexEGivenF=3.14772 +1 Glue=1 WordPenalty=-0.868586 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=2.01716 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=0.045537 MaxLexEGivenF=0.03711 IsSingletonFE=1 +0 Glue=-1 WordPenalty=0.868586 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=-2.01716 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-0.045537 MaxLexEGivenF=-0.03711 IsSingletonFE=-1 +1 WordPenalty=1.73718 LanguageModel=5.70925 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-16.4622 +0 WordPenalty=-1.73718 LanguageModel=-5.70925 MaxLexFGivenE=1.07281 MaxLexEGivenF=16.4622 +1 WordPenalty=1.73718 EGivenFCoherent=-0.47712 LanguageModel=8.55235 CountEF=0.30103 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-20.4976 IsSingletonFE=-1 +0 WordPenalty=-1.73718 EGivenFCoherent=0.47712 LanguageModel=-8.55235 CountEF=-0.30103 MaxLexFGivenE=0.206486 MaxLexEGivenF=20.4976 IsSingletonFE=1 +1 Glue=-1 EGivenFCoherent=-0.47712 LanguageModel=7.07913 CountEF=0.30103 MaxLexEGivenF=-6.76147 IsSingletonFE=-1 +0 Glue=1 EGivenFCoherent=0.47712 LanguageModel=-7.07913 CountEF=-0.30103 MaxLexEGivenF=6.76147 IsSingletonFE=1 +0 Glue=-2 EGivenFCoherent=0.95424 LanguageModel=-3.11065 CountEF=-0.69897 MaxLexEGivenF=8.80423 IsSingletonFE=1 +1 Glue=2 EGivenFCoherent=-0.95424 LanguageModel=3.11065 CountEF=0.69897 MaxLexEGivenF=-8.80423 IsSingletonFE=-1 +0 Glue=-1 EGivenFCoherent=0.47712 LanguageModel=-4.99904 CountEF=-0.39794 MaxLexEGivenF=4.33937 +1 Glue=1 EGivenFCoherent=-0.47712 LanguageModel=4.99904 CountEF=0.39794 MaxLexEGivenF=-4.33937 +1 WordPenalty=-0.434291 LanguageModel=0.26946 MaxLexEGivenF=-1.30366 +0 WordPenalty=0.434291 LanguageModel=-0.26946 MaxLexEGivenF=1.30366 +0 Glue=-1 WordPenalty=0.434291 EGivenFCoherent=0.95424 LanguageModel=-3.08599 CountEF=-0.69897 MaxLexEGivenF=3.87493 IsSingletonFE=1 +1 Glue=1 WordPenalty=-0.434291 EGivenFCoherent=-0.95424 LanguageModel=3.08599 CountEF=0.69897 MaxLexEGivenF=-3.87493 IsSingletonFE=-1 +1 Glue=1 WordPenalty=-0.868586 EGivenFCoherent=1.96848 LanguageModel=-7.36914 CountEF=-1.50515 MaxLexEGivenF=4.46341 IsSingletonFE=2 +0 Glue=-1 WordPenalty=0.868586 EGivenFCoherent=-1.96848 LanguageModel=7.36914 CountEF=1.50515 MaxLexEGivenF=-4.46341 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=0.868586 LanguageModel=4.0613 CountEF=-0.09691 MaxLexEGivenF=-1.69957 IsSingletonFE=-1 +1 Glue=2 WordPenalty=-0.868586 LanguageModel=-4.0613 CountEF=0.09691 MaxLexEGivenF=1.69957 IsSingletonFE=1 +0 WordPenalty=1.30288 EGivenFCoherent=-3.08063 LanguageModel=6.1006 CountEF=2.51355 MaxLexEGivenF=-6.68167 IsSingletonFE=-2 +1 WordPenalty=-1.30288 EGivenFCoherent=3.08063 LanguageModel=-6.1006 CountEF=-2.51355 MaxLexEGivenF=6.68167 IsSingletonFE=2 +1 WordPenalty=-1.30288 EGivenFCoherent=1.51851 LanguageModel=-2.65175 CountEF=-1.23045 MaxLexEGivenF=6.26949 IsSingletonFE=1 +0 WordPenalty=1.30288 EGivenFCoherent=-1.51851 LanguageModel=2.65175 CountEF=1.23045 MaxLexEGivenF=-6.26949 IsSingletonFE=-1 +1 WordPenalty=-1.30288 EGivenFCoherent=1.9345 LanguageModel=-5.42804 CountEF=-1.63849 MaxLexFGivenE=-1.07281 MaxLexEGivenF=4.57963 IsSingletonFE=1 +0 WordPenalty=1.30288 EGivenFCoherent=-1.9345 LanguageModel=5.42804 CountEF=1.63849 MaxLexFGivenE=1.07281 MaxLexEGivenF=-4.57963 IsSingletonFE=-1 +1 Glue=1 WordPenalty=2.60577 LanguageModel=6.10726 MaxLexEGivenF=-15.626 +0 Glue=-1 WordPenalty=-2.60577 LanguageModel=-6.10726 MaxLexEGivenF=15.626 +1 Glue=1 WordPenalty=0.868591 EGivenFCoherent=-0.95424 LanguageModel=3.02212 CountEF=0.69897 MaxLexEGivenF=-11.2976 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-0.868591 EGivenFCoherent=0.95424 LanguageModel=-3.02212 CountEF=-0.69897 MaxLexEGivenF=11.2976 IsSingletonFE=1 +1 WordPenalty=0.868591 LanguageModel=1.72398 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-10.4733 +0 WordPenalty=-0.868591 LanguageModel=-1.72398 MaxLexFGivenE=1.07281 MaxLexEGivenF=10.4733 +1 Glue=1 WordPenalty=0.434291 LanguageModel=2.81964 MaxLexEGivenF=-2.11273 +0 Glue=-1 WordPenalty=-0.434291 LanguageModel=-2.81964 MaxLexEGivenF=2.11273 +0 WordPenalty=0.434295 EGivenFCoherent=-2.09342 LanguageModel=2.85966 CountEF=1.79588 MaxLexEGivenF=-3.03903 IsSingletonFE=-1 +1 WordPenalty=-0.434295 EGivenFCoherent=2.09342 LanguageModel=-2.85966 CountEF=-1.79588 MaxLexEGivenF=3.03903 IsSingletonFE=1 +1 Glue=1 WordPenalty=-1.73717 EGivenFCoherent=0.90309 LanguageModel=-4.27407 CountEF=-0.653213 MaxLexFGivenE=-2.17171 MaxLexEGivenF=6.94972 IsSingletonFE=1 +0 Glue=-1 WordPenalty=1.73717 EGivenFCoherent=-0.90309 LanguageModel=4.27407 CountEF=0.653213 MaxLexFGivenE=2.17171 MaxLexEGivenF=-6.94972 IsSingletonFE=-1 +0 WordPenalty=-1.73719 EGivenFCoherent=-0.47712 LanguageModel=-4.98084 CountEF=0.30103 MaxLexEGivenF=9.89451 IsSingletonFE=-1 +1 WordPenalty=1.73719 EGivenFCoherent=0.47712 LanguageModel=4.98084 CountEF=-0.30103 MaxLexEGivenF=-9.89451 IsSingletonFE=1 +0 WordPenalty=-0.8686 LanguageModel=-1.532 MaxLexEGivenF=8.36687 +1 WordPenalty=0.8686 LanguageModel=1.532 MaxLexEGivenF=-8.36687 +1 Glue=-1 WordPenalty=-0.868582 EGivenFCoherent=0.95424 LanguageModel=-1.09436 CountEF=-0.69897 MaxLexFGivenE=-0.206486 MaxLexEGivenF=2.05538 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.868582 EGivenFCoherent=-0.95424 LanguageModel=1.09436 CountEF=0.69897 MaxLexFGivenE=0.206486 MaxLexEGivenF=-2.05538 IsSingletonFE=-1 +1 WordPenalty=-1.30288 EGivenFCoherent=0.90309 LanguageModel=-5.36093 CountEF=-0.653213 MaxLexEGivenF=5.92109 IsSingletonFE=1 +0 WordPenalty=1.30288 EGivenFCoherent=-0.90309 LanguageModel=5.36093 CountEF=0.653213 MaxLexEGivenF=-5.92109 IsSingletonFE=-1 +0 WordPenalty=1.30288 EGivenFCoherent=-0.30103 LanguageModel=3.92178 CountEF=0.176091 MaxLexFGivenE=1.07281 MaxLexEGivenF=-3.52541 IsSingletonFE=-1 +1 WordPenalty=-1.30288 EGivenFCoherent=0.30103 LanguageModel=-3.92178 CountEF=-0.176091 MaxLexFGivenE=-1.07281 MaxLexEGivenF=3.52541 IsSingletonFE=1 +0 WordPenalty=1.73717 EGivenFCoherent=-3.45301 LanguageModel=6.4798 CountEF=2.86894 MaxLexEGivenF=-9.95009 IsSingletonFE=-2 +1 WordPenalty=-1.73717 EGivenFCoherent=3.45301 LanguageModel=-6.4798 CountEF=-2.86894 MaxLexEGivenF=9.95009 IsSingletonFE=2 +0 Glue=-1 WordPenalty=1.73717 EGivenFCoherent=-2.23553 LanguageModel=8.24972 CountEF=1.81458 MaxLexFGivenE=0.206486 MaxLexEGivenF=-8.21247 IsSingletonFE=-2 +1 Glue=1 WordPenalty=-1.73717 EGivenFCoherent=2.23553 LanguageModel=-8.24972 CountEF=-1.81458 MaxLexFGivenE=-0.206486 MaxLexEGivenF=8.21247 IsSingletonFE=2 +1 Glue=-1 WordPenalty=-1.73717 EGivenFCoherent=1.9345 LanguageModel=-5.78038 CountEF=-1.63849 MaxLexFGivenE=-1.07281 MaxLexEGivenF=7.74268 IsSingletonFE=1 +0 Glue=1 WordPenalty=1.73717 EGivenFCoherent=-1.9345 LanguageModel=5.78038 CountEF=1.63849 MaxLexFGivenE=1.07281 MaxLexEGivenF=-7.74268 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-0.434291 EGivenFCoherent=0.47712 LanguageModel=-3.89992 CountEF=-0.30103 MaxLexEGivenF=4.81104 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.434291 EGivenFCoherent=-0.47712 LanguageModel=3.89992 CountEF=0.30103 MaxLexEGivenF=-4.81104 IsSingletonFE=-1 +1 Glue=1 WordPenalty=-0.434291 LanguageModel=-1.10684 MaxLexFGivenE=-0.206486 MaxLexEGivenF=2.31052 +0 Glue=-1 WordPenalty=0.434291 LanguageModel=1.10684 MaxLexFGivenE=0.206486 MaxLexEGivenF=-2.31052 +0 Glue=1 WordPenalty=0.868586 LanguageModel=2.83572 MaxLexFGivenE=-1.0989 MaxLexEGivenF=-3.46279 +1 Glue=-1 WordPenalty=-0.868586 LanguageModel=-2.83572 MaxLexFGivenE=1.0989 MaxLexEGivenF=3.46279 +1 WordPenalty=0.434295 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=4.02939 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-0.252023 MaxLexEGivenF=-11.5703 IsSingletonFE=-1 +0 WordPenalty=-0.434295 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-4.02939 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=0.252023 MaxLexEGivenF=11.5703 IsSingletonFE=1 +1 Glue=1 WordPenalty=0.4343 EGivenFCoherent=1.32222 LanguageModel=0.04898 CountEF=-1.04139 MaxLexFGivenE=2.17171 MaxLexEGivenF=-6.06872 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-0.4343 EGivenFCoherent=-1.32222 LanguageModel=-0.04898 CountEF=1.04139 MaxLexFGivenE=-2.17171 MaxLexEGivenF=6.06872 IsSingletonFE=-1 +0 LanguageModel=-6.65588 MaxLexEGivenF=3.73443 +1 LanguageModel=6.65588 MaxLexEGivenF=-3.73443 +1 Glue=1 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=0.377606 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-0.252023 MaxLexEGivenF=-3.60523 IsSingletonFE=-1 +0 Glue=-1 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-0.377606 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=0.252023 MaxLexEGivenF=3.60523 IsSingletonFE=1 +0 Glue=3 WordPenalty=2.17148 EGivenFCoherent=-1.63347 LanguageModel=6.19555 CountEF=1.33746 MaxLexFGivenE=-0.435638 MaxLexEGivenF=-12.8449 IsSingletonFE=-2 +1 Glue=-3 WordPenalty=-2.17148 EGivenFCoherent=1.63347 LanguageModel=-6.19555 CountEF=-1.33746 MaxLexFGivenE=0.435638 MaxLexEGivenF=12.8449 IsSingletonFE=2 +0 EGivenFCoherent=0.95424 LanguageModel=0.639115 CountEF=-0.761758 MaxLexFGivenE=0.021639 MaxLexEGivenF=2.36562 +1 EGivenFCoherent=-0.95424 LanguageModel=-0.639115 CountEF=0.761758 MaxLexFGivenE=-0.021639 MaxLexEGivenF=-2.36562 +1 WordPenalty=-0.4343 EGivenFCoherent=-0.210073 LanguageModel=-1.48685 CountEF=0.058546 MaxLexFGivenE=0.435638 MaxLexEGivenF=5.21168 +0 WordPenalty=0.4343 EGivenFCoherent=0.210073 LanguageModel=1.48685 CountEF=-0.058546 MaxLexFGivenE=-0.435638 MaxLexEGivenF=-5.21168 +0 WordPenalty=-0.86859 EGivenFCoherent=1.06201 LanguageModel=-1.96126 CountEF=-0.810603 MaxLexFGivenE=0.881201 MaxLexEGivenF=10.5318 IsSingletonFE=1 +1 WordPenalty=0.86859 EGivenFCoherent=-1.06201 LanguageModel=1.96126 CountEF=0.810603 MaxLexFGivenE=-0.881201 MaxLexEGivenF=-10.5318 IsSingletonFE=-1 +0 EGivenFCoherent=0.47712 LanguageModel=0.306705 CountEF=-0.128399 MaxLexFGivenE=-0.384298 MaxLexEGivenF=1.99031 IsSingletonFE=2 +1 EGivenFCoherent=-0.47712 LanguageModel=-0.306705 CountEF=0.128399 MaxLexFGivenE=0.384298 MaxLexEGivenF=-1.99031 IsSingletonFE=-2 +0 Glue=1 EGivenFCoherent=0.8893 LanguageModel=0.61116 CountEF=-0.851938 MaxLexFGivenE=0.021639 MaxLexEGivenF=1.40384 IsSingletonFE=-1 +1 Glue=-1 EGivenFCoherent=-0.8893 LanguageModel=-0.61116 CountEF=0.851938 MaxLexFGivenE=-0.021639 MaxLexEGivenF=-1.40384 IsSingletonFE=1 +0 Glue=2 WordPenalty=-1.73718 EGivenFCoherent=-1.01424 LanguageModel=-5.99377 CountEF=1.01773 MaxLexFGivenE=0.239077 MaxLexEGivenF=10.7083 +1 Glue=-2 WordPenalty=1.73718 EGivenFCoherent=1.01424 LanguageModel=5.99377 CountEF=-1.01773 MaxLexFGivenE=-0.239077 MaxLexEGivenF=-10.7083 +1 Glue=-1 WordPenalty=1.73718 EGivenFCoherent=0.378197 LanguageModel=6.18858 CountEF=-0.559308 MaxLexFGivenE=-0.021639 MaxLexEGivenF=-10.1644 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-1.73718 EGivenFCoherent=-0.378197 LanguageModel=-6.18858 CountEF=0.559308 MaxLexFGivenE=0.021639 MaxLexEGivenF=10.1644 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=1.52489 LanguageModel=-7.46705 CountEF=-0.848822 MaxLexFGivenE=-0.384298 MaxLexEGivenF=20.0654 IsSingletonFE=3 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-1.52489 LanguageModel=7.46705 CountEF=0.848822 MaxLexFGivenE=0.384298 MaxLexEGivenF=-20.0654 IsSingletonFE=-3 +0 WordPenalty=-2.60577 EGivenFCoherent=0.255833 LanguageModel=-8.64392 CountEF=-0.042751 MaxLexFGivenE=-0.384298 MaxLexEGivenF=20.0654 IsSingletonFE=1 +1 WordPenalty=2.60577 EGivenFCoherent=-0.255833 LanguageModel=8.64392 CountEF=0.042751 MaxLexFGivenE=0.384298 MaxLexEGivenF=-20.0654 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=1.81213 LanguageModel=-7.39625 CountEF=-1.22364 MaxLexFGivenE=-0.435638 MaxLexEGivenF=20.1552 IsSingletonFE=2 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-1.81213 LanguageModel=7.39625 CountEF=1.22364 MaxLexFGivenE=0.435638 MaxLexEGivenF=-20.1552 IsSingletonFE=-2 +0 WordPenalty=-0.868586 EGivenFCoherent=1.66745 LanguageModel=-1.56546 CountEF=-1.39601 MaxLexFGivenE=-0.435638 MaxLexEGivenF=4.29963 +1 WordPenalty=0.868586 EGivenFCoherent=-1.66745 LanguageModel=1.56546 CountEF=1.39601 MaxLexFGivenE=0.435638 MaxLexEGivenF=-4.29963 +0 Glue=-1 WordPenalty=-0.868586 EGivenFCoherent=1.34925 LanguageModel=-2.26013 CountEF=-1.18542 MaxLexFGivenE=-0.254628 MaxLexEGivenF=5.22699 +1 Glue=1 WordPenalty=0.868586 EGivenFCoherent=-1.34925 LanguageModel=2.26013 CountEF=1.18542 MaxLexFGivenE=0.254628 MaxLexEGivenF=-5.22699 +0 WordPenalty=-0.868586 EGivenFCoherent=1.58827 LanguageModel=-1.59452 CountEF=-1.32906 MaxLexFGivenE=-0.435638 MaxLexEGivenF=5.82018 +1 WordPenalty=0.868586 EGivenFCoherent=-1.58827 LanguageModel=1.59452 CountEF=1.32906 MaxLexFGivenE=0.435638 MaxLexEGivenF=-5.82018 +1 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=2.8238 LanguageModel=-7.46237 CountEF=-2.36549 MaxLexFGivenE=-0.674715 MaxLexEGivenF=16.022 IsSingletonFE=2 +0 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-2.8238 LanguageModel=7.46237 CountEF=2.36549 MaxLexFGivenE=0.674715 MaxLexEGivenF=-16.022 IsSingletonFE=-2 +1 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=2.9345 LanguageModel=-6.78322 CountEF=-2.2917 MaxLexFGivenE=-0.674715 MaxLexEGivenF=15.9056 IsSingletonFE=3 +0 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-2.9345 LanguageModel=6.78322 CountEF=2.2917 MaxLexFGivenE=0.674715 MaxLexEGivenF=-15.9056 IsSingletonFE=-3 +0 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-2.68169 LanguageModel=7.22701 CountEF=2.34679 MaxLexFGivenE=0.674715 MaxLexEGivenF=-16.0759 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=2.68169 LanguageModel=-7.22701 CountEF=-2.34679 MaxLexFGivenE=-0.674715 MaxLexEGivenF=16.0759 IsSingletonFE=1 +1 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=3.12483 LanguageModel=-6.95196 CountEF=-2.54158 MaxLexFGivenE=-0.413335 MaxLexEGivenF=15.7558 IsSingletonFE=3 +0 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-3.12483 LanguageModel=6.95196 CountEF=2.54158 MaxLexFGivenE=0.413335 MaxLexEGivenF=-15.7558 IsSingletonFE=-3 +0 Glue=3 WordPenalty=2.60577 EGivenFCoherent=-2.98272 LanguageModel=7.46237 CountEF=2.52288 MaxLexFGivenE=0.674715 MaxLexEGivenF=-16.022 IsSingletonFE=-2 +1 Glue=-3 WordPenalty=-2.60577 EGivenFCoherent=2.98272 LanguageModel=-7.46237 CountEF=-2.52288 MaxLexFGivenE=-0.674715 MaxLexEGivenF=16.022 IsSingletonFE=2 +1 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=2.09342 LanguageModel=-7.2267 CountEF=-1.79588 MaxLexFGivenE=-0.674715 MaxLexEGivenF=16.0759 IsSingletonFE=1 +0 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-2.09342 LanguageModel=7.2267 CountEF=1.79588 MaxLexFGivenE=0.674715 MaxLexEGivenF=-16.0759 IsSingletonFE=-1 +0 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-2.04565 LanguageModel=8.12633 CountEF=1.72867 MaxLexFGivenE=-0.26138 MaxLexEGivenF=-17.8081 IsSingletonFE=-2 +1 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=2.04565 LanguageModel=-8.12633 CountEF=-1.72867 MaxLexFGivenE=0.26138 MaxLexEGivenF=17.8081 IsSingletonFE=2 +0 Glue=-1 WordPenalty=3.04007 EGivenFCoherent=-1.45738 LanguageModel=8.40107 CountEF=1.22352 MaxLexEGivenF=-18.1282 IsSingletonFE=-1 +1 Glue=1 WordPenalty=-3.04007 EGivenFCoherent=1.45738 LanguageModel=-8.40107 CountEF=-1.22352 MaxLexEGivenF=18.1282 IsSingletonFE=1 +1 Glue=2 WordPenalty=-3.04007 EGivenFCoherent=0.401798 LanguageModel=-8.92729 CountEF=-0.38091 MaxLexEGivenF=17.9579 +0 Glue=-2 WordPenalty=3.04007 EGivenFCoherent=-0.401798 LanguageModel=8.92729 CountEF=0.38091 MaxLexEGivenF=-17.9579 +1 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=2.06585 LanguageModel=-8.84421 CountEF=-1.76646 MaxLexFGivenE=0.21004 MaxLexEGivenF=17.8979 IsSingletonFE=1 +0 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-2.06585 LanguageModel=8.84421 CountEF=1.76646 MaxLexFGivenE=-0.21004 MaxLexEGivenF=-17.8979 IsSingletonFE=-1 +1 Glue=2 WordPenalty=-3.04007 EGivenFCoherent=0.401747 LanguageModel=-9.13444 CountEF=-0.274674 MaxLexEGivenF=17.9579 IsSingletonFE=1 +0 Glue=-2 WordPenalty=3.04007 EGivenFCoherent=-0.401747 LanguageModel=9.13444 CountEF=0.274674 MaxLexEGivenF=-17.9579 IsSingletonFE=-1 +1 WordPenalty=-3.04007 EGivenFCoherent=2.17059 LanguageModel=-8.62074 CountEF=-1.98528 MaxLexFGivenE=-0.457277 MaxLexEGivenF=18.3523 +0 WordPenalty=3.04007 EGivenFCoherent=-2.17059 LanguageModel=8.62074 CountEF=1.98528 MaxLexFGivenE=0.457277 MaxLexEGivenF=-18.3523 +0 WordPenalty=3.04007 EGivenFCoherent=-2.17014 LanguageModel=8.09417 CountEF=1.66161 MaxLexFGivenE=0.247237 MaxLexEGivenF=-18.122 IsSingletonFE=-2 +1 WordPenalty=-3.04007 EGivenFCoherent=2.17014 LanguageModel=-8.09417 CountEF=-1.66161 MaxLexFGivenE=-0.247237 MaxLexEGivenF=18.122 IsSingletonFE=2 +1 WordPenalty=-3.04007 EGivenFCoherent=2.42541 LanguageModel=-8.06665 CountEF=-1.80774 MaxLexEGivenF=17.9898 IsSingletonFE=3 +0 WordPenalty=3.04007 EGivenFCoherent=-2.42541 LanguageModel=8.06665 CountEF=1.80774 MaxLexEGivenF=-17.9898 IsSingletonFE=-3 +0 Glue=3 WordPenalty=3.04007 EGivenFCoherent=-3.12145 LanguageModel=8.90177 CountEF=2.4077 MaxLexFGivenE=-0.649959 MaxLexEGivenF=-18.566 IsSingletonFE=-3 +1 Glue=-3 WordPenalty=-3.04007 EGivenFCoherent=3.12145 LanguageModel=-8.90177 CountEF=-2.4077 MaxLexFGivenE=0.649959 MaxLexEGivenF=18.566 IsSingletonFE=3 +1 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=2.44359 LanguageModel=-7.85518 CountEF=-2.11685 MaxLexFGivenE=0.457277 MaxLexEGivenF=17.9041 IsSingletonFE=1 +0 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-2.44359 LanguageModel=7.85518 CountEF=2.11685 MaxLexFGivenE=-0.457277 MaxLexEGivenF=-17.9041 IsSingletonFE=-1 +0 Glue=6 WordPenalty=3.04007 EGivenFCoherent=-3.63593 LanguageModel=9.10895 CountEF=3.06279 MaxLexFGivenE=-0.674715 MaxLexEGivenF=-18.448 IsSingletonFE=-2 +1 Glue=-6 WordPenalty=-3.04007 EGivenFCoherent=3.63593 LanguageModel=-9.10895 CountEF=-3.06279 MaxLexFGivenE=0.674715 MaxLexEGivenF=18.448 IsSingletonFE=2 +0 WordPenalty=3.04007 EGivenFCoherent=-1.45738 LanguageModel=8.43673 CountEF=1.33746 MaxLexFGivenE=0.457277 MaxLexEGivenF=-17.5304 +1 WordPenalty=-3.04007 EGivenFCoherent=1.45738 LanguageModel=-8.43673 CountEF=-1.33746 MaxLexFGivenE=-0.457277 MaxLexEGivenF=17.5304 +0 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-1.8152 LanguageModel=8.82487 CountEF=1.52702 MaxLexFGivenE=-0.457277 MaxLexEGivenF=-17.9041 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=1.8152 LanguageModel=-8.82487 CountEF=-1.52702 MaxLexFGivenE=0.457277 MaxLexEGivenF=17.9041 IsSingletonFE=1 +0 Glue=3 WordPenalty=3.04007 EGivenFCoherent=-2.57054 LanguageModel=9.05136 CountEF=2.1181 MaxLexFGivenE=-0.21004 MaxLexEGivenF=-17.8979 IsSingletonFE=-2 +1 Glue=-3 WordPenalty=-3.04007 EGivenFCoherent=2.57054 LanguageModel=-9.05136 CountEF=-2.1181 MaxLexFGivenE=0.21004 MaxLexEGivenF=17.8979 IsSingletonFE=2 +1 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=2.34668 LanguageModel=-7.95759 CountEF=-2.06446 MaxLexEGivenF=17.9579 IsSingletonFE=1 +0 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-2.34668 LanguageModel=7.95759 CountEF=2.06446 MaxLexEGivenF=-17.9579 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=1.9345 LanguageModel=-8.40106 CountEF=-1.63849 MaxLexEGivenF=18.1282 IsSingletonFE=1 +0 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-1.9345 LanguageModel=8.40106 CountEF=1.63849 MaxLexEGivenF=-18.1282 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=3.04007 EGivenFCoherent=-1.45738 LanguageModel=8.53249 CountEF=1.33746 MaxLexEGivenF=-16.3003 +1 Glue=1 WordPenalty=-3.04007 EGivenFCoherent=1.45738 LanguageModel=-8.53249 CountEF=-1.33746 MaxLexEGivenF=16.3003 +1 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=2.99651 LanguageModel=-8.44281 CountEF=-2.43686 MaxLexFGivenE=0.05523 MaxLexEGivenF=18.316 IsSingletonFE=2 +0 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-2.99651 LanguageModel=8.44281 CountEF=2.43686 MaxLexFGivenE=-0.05523 MaxLexEGivenF=-18.316 IsSingletonFE=-2 +0 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-1.78544 LanguageModel=9.59043 CountEF=1.54364 MaxLexFGivenE=0.457277 MaxLexEGivenF=-18.3523 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=1.78544 LanguageModel=-9.59043 CountEF=-1.54364 MaxLexFGivenE=-0.457277 MaxLexEGivenF=18.3523 IsSingletonFE=1 +1 Glue=1 WordPenalty=-3.04007 EGivenFCoherent=0.267047 LanguageModel=-9.32342 CountEF=-0.228915 MaxLexFGivenE=0.05134 MaxLexEGivenF=17.8682 IsSingletonFE=1 +0 Glue=-1 WordPenalty=3.04007 EGivenFCoherent=-0.267047 LanguageModel=9.32342 CountEF=0.228915 MaxLexFGivenE=-0.05134 MaxLexEGivenF=-17.8682 IsSingletonFE=-1 +0 Glue=4 WordPenalty=3.04007 EGivenFCoherent=-3.40186 LanguageModel=8.90178 CountEF=2.63052 MaxLexFGivenE=-0.649959 MaxLexEGivenF=-18.566 IsSingletonFE=-3 +1 Glue=-4 WordPenalty=-3.04007 EGivenFCoherent=3.40186 LanguageModel=-8.90178 CountEF=-2.63052 MaxLexFGivenE=0.649959 MaxLexEGivenF=18.566 IsSingletonFE=3 +1 WordPenalty=-3.04007 EGivenFCoherent=1.03825 LanguageModel=-9.37075 CountEF=-0.949283 MaxLexEGivenF=18.1282 +0 WordPenalty=3.04007 EGivenFCoherent=-1.03825 LanguageModel=9.37075 CountEF=0.949283 MaxLexEGivenF=-18.1282 +0 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-2.04565 LanguageModel=8.28634 CountEF=1.79562 MaxLexFGivenE=0.457277 MaxLexEGivenF=-18.2139 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=2.04565 LanguageModel=-8.28634 CountEF=-1.79562 MaxLexFGivenE=-0.457277 MaxLexEGivenF=18.2139 IsSingletonFE=1 +0 WordPenalty=3.04007 EGivenFCoherent=-1.75841 LanguageModel=8.08043 CountEF=1.5805 MaxLexFGivenE=0.272377 MaxLexEGivenF=-18.0367 +1 WordPenalty=-3.04007 EGivenFCoherent=1.75841 LanguageModel=-8.08043 CountEF=-1.5805 MaxLexFGivenE=-0.272377 MaxLexEGivenF=18.0367 +1 WordPenalty=-3.04007 EGivenFCoherent=1.45738 LanguageModel=-8.62074 CountEF=-1.33746 MaxLexFGivenE=-0.457277 MaxLexEGivenF=18.3523 +0 WordPenalty=3.04007 EGivenFCoherent=-1.45738 LanguageModel=8.62074 CountEF=1.33746 MaxLexFGivenE=0.457277 MaxLexEGivenF=-18.3523 +0 Glue=3 WordPenalty=3.04007 EGivenFCoherent=-3.52277 LanguageModel=7.87452 CountEF=2.84261 MaxLexFGivenE=-0.21004 MaxLexEGivenF=-17.8979 IsSingletonFE=-3 +1 Glue=-3 WordPenalty=-3.04007 EGivenFCoherent=3.52277 LanguageModel=-7.87452 CountEF=-2.84261 MaxLexFGivenE=0.21004 MaxLexEGivenF=17.8979 IsSingletonFE=3 +1 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=1.91733 LanguageModel=-8.90061 CountEF=-1.67094 MaxLexFGivenE=0.244022 MaxLexEGivenF=18.7003 IsSingletonFE=1 +0 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-1.91733 LanguageModel=8.90061 CountEF=1.67094 MaxLexFGivenE=-0.244022 MaxLexEGivenF=-18.7003 IsSingletonFE=-1 +0 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-2.03141 LanguageModel=9.03762 CountEF=1.71767 MaxLexFGivenE=-0.1849 MaxLexEGivenF=-17.8126 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=2.03141 LanguageModel=-9.03762 CountEF=-1.71767 MaxLexFGivenE=0.1849 MaxLexEGivenF=17.8126 IsSingletonFE=1 +0 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-2.60251 LanguageModel=8.09418 CountEF=2.25404 MaxLexFGivenE=0.247237 MaxLexEGivenF=-18.122 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=2.60251 LanguageModel=-8.09418 CountEF=-2.25404 MaxLexFGivenE=-0.247237 MaxLexEGivenF=18.122 IsSingletonFE=1 +1 Glue=2 WordPenalty=-3.04007 EGivenFCoherent=0.378197 LanguageModel=-9.10863 CountEF=-0.457603 MaxLexFGivenE=0.21004 MaxLexEGivenF=17.8979 +0 Glue=-2 WordPenalty=3.04007 EGivenFCoherent=-0.378197 LanguageModel=9.10863 CountEF=0.457603 MaxLexFGivenE=-0.21004 MaxLexEGivenF=-17.8979 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=4.17609 LanguageModel=-5.97721 SampleCountF=2.47857 CountEF=-1.15294 MaxLexFGivenE=-0.717941 MaxLexEGivenF=10.3904 IsSingletonFE=2 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-4.17609 LanguageModel=5.97721 SampleCountF=-2.47857 CountEF=1.15294 MaxLexFGivenE=0.717941 MaxLexEGivenF=-10.3904 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-2.60576 EGivenFCoherent=1.63202 LanguageModel=-7.05903 SampleCountF=2.47857 CountEF=0.90309 MaxLexFGivenE=-1.94972 MaxLexEGivenF=10.0185 +1 Glue=-1 WordPenalty=2.60576 EGivenFCoherent=-1.63202 LanguageModel=7.05903 SampleCountF=-2.47857 CountEF=-0.90309 MaxLexFGivenE=1.94972 MaxLexEGivenF=-10.0185 +1 Glue=3 WordPenalty=2.60576 EGivenFCoherent=-4.51255 LanguageModel=5.97678 SampleCountF=-2.47857 CountEF=1.41125 MaxLexFGivenE=0.717941 MaxLexEGivenF=-10.3904 IsSingletonFE=-2 +0 Glue=-3 WordPenalty=-2.60576 EGivenFCoherent=4.51255 LanguageModel=-5.97678 SampleCountF=2.47857 CountEF=-1.41125 MaxLexFGivenE=-0.717941 MaxLexEGivenF=10.3904 IsSingletonFE=2 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-2.78692 LanguageModel=7.05903 SampleCountF=-2.47857 CountEF=0.077481 MaxLexFGivenE=1.94972 MaxLexEGivenF=-10.0185 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=2.78692 LanguageModel=-7.05903 SampleCountF=2.47857 CountEF=-0.077481 MaxLexFGivenE=-1.94972 MaxLexEGivenF=10.0185 IsSingletonFE=1 +0 WordPenalty=-2.17147 EGivenFCoherent=2.75415 LanguageModel=-4.8198 SampleCountF=2.47857 CountEF=0.092267 MaxLexFGivenE=-0.866996 MaxLexEGivenF=8.65418 IsSingletonFE=1 +1 WordPenalty=2.17147 EGivenFCoherent=-2.75415 LanguageModel=4.8198 SampleCountF=-2.47857 CountEF=-0.092267 MaxLexFGivenE=0.866996 MaxLexEGivenF=-8.65418 IsSingletonFE=-1 +1 Glue=1 WordPenalty=2.17147 EGivenFCoherent=-2.30103 LanguageModel=5.31095 SampleCountF=-2.47857 CountEF=-0.425969 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.17147 EGivenFCoherent=2.30103 LanguageModel=-5.31095 SampleCountF=2.47857 CountEF=0.425969 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 IsSingletonFE=1 +1 Glue=-2 WordPenalty=2.17147 EGivenFCoherent=-1.63202 LanguageModel=5.31137 SampleCountF=-2.47857 CountEF=-0.90309 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 +0 Glue=2 WordPenalty=-2.17147 EGivenFCoherent=1.63202 LanguageModel=-5.31137 SampleCountF=2.47857 CountEF=0.90309 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 +1 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-3.12338 LanguageModel=5.31095 SampleCountF=-2.47857 CountEF=0.335788 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=3.12338 LanguageModel=-5.31095 SampleCountF=2.47857 CountEF=-0.335788 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=2.81012 LanguageModel=-5.51952 SampleCountF=2.47857 CountEF=-0.124938 MaxLexFGivenE=-1.487 MaxLexEGivenF=8.60598 +1 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-2.81012 LanguageModel=5.51952 SampleCountF=-2.47857 CountEF=0.124938 MaxLexFGivenE=1.487 MaxLexEGivenF=-8.60598 +1 Glue=1 WordPenalty=2.17147 EGivenFCoherent=-2.30103 LanguageModel=5.31094 SampleCountF=-2.47857 CountEF=-0.425969 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.17147 EGivenFCoherent=2.30103 LanguageModel=-5.31094 SampleCountF=2.47857 CountEF=0.425969 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=3.26605 LanguageModel=-5.31137 SampleCountF=2.47857 CountEF=-0.531478 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-3.26605 LanguageModel=5.31137 SampleCountF=-2.47857 CountEF=0.531478 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-2.17147 EGivenFCoherent=2.11115 LanguageModel=-5.31137 SampleCountF=2.47857 CountEF=0.449093 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 +1 Glue=-1 WordPenalty=2.17147 EGivenFCoherent=-2.11115 LanguageModel=5.31137 SampleCountF=-2.47857 CountEF=-0.449093 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 +1 WordPenalty=2.17147 EGivenFCoherent=-2.64481 LanguageModel=5.31139 SampleCountF=-2.47857 CountEF=0.05878 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 +0 WordPenalty=-2.17147 EGivenFCoherent=2.64481 LanguageModel=-5.31139 SampleCountF=2.47857 CountEF=-0.05878 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 +1 WordPenalty=2.17147 EGivenFCoherent=-2.93024 LanguageModel=4.82023 SampleCountF=-2.47857 CountEF=0.032672 MaxLexFGivenE=0.866996 MaxLexEGivenF=-8.65418 IsSingletonFE=-1 +0 WordPenalty=-2.17147 EGivenFCoherent=2.93024 LanguageModel=-4.82023 SampleCountF=2.47857 CountEF=-0.032672 MaxLexFGivenE=-0.866996 MaxLexEGivenF=8.65418 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-2.17147 EGivenFCoherent=2.77871 LanguageModel=-5.31096 SampleCountF=2.47857 CountEF=-0.14896 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 +1 Glue=1 WordPenalty=2.17147 EGivenFCoherent=-2.77871 LanguageModel=5.31096 SampleCountF=-2.47857 CountEF=0.14896 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 +1 Glue=5 WordPenalty=2.17147 EGivenFCoherent=-4.8893 LanguageModel=4.92886 SampleCountF=-2.47857 CountEF=1.9147 MaxLexFGivenE=0.8881 MaxLexEGivenF=-8.6681 IsSingletonFE=-1 +0 Glue=-5 WordPenalty=-2.17147 EGivenFCoherent=4.8893 LanguageModel=-4.92886 SampleCountF=2.47857 CountEF=-1.9147 MaxLexFGivenE=-0.8881 MaxLexEGivenF=8.6681 IsSingletonFE=1 +1 Glue=5 WordPenalty=2.17147 EGivenFCoherent=-3.98283 LanguageModel=5.6286 SampleCountF=-2.47857 CountEF=1.22015 MaxLexFGivenE=1.5081 MaxLexEGivenF=-8.6199 +0 Glue=-5 WordPenalty=-2.17147 EGivenFCoherent=3.98283 LanguageModel=-5.6286 SampleCountF=2.47857 CountEF=-1.22015 MaxLexFGivenE=-1.5081 MaxLexEGivenF=8.6199 +1 Glue=1 WordPenalty=2.17147 EGivenFCoherent=-2.56909 LanguageModel=5.31138 SampleCountF=-2.47857 CountEF=-0.201632 MaxLexFGivenE=1.49988 MaxLexEGivenF=-8.3444 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.17147 EGivenFCoherent=2.56909 LanguageModel=-5.31138 SampleCountF=2.47857 CountEF=0.201632 MaxLexFGivenE=-1.49988 MaxLexEGivenF=8.3444 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=3.2381 LanguageModel=-4.9293 SampleCountF=2.47857 CountEF=-0.41567 MaxLexFGivenE=-0.8881 MaxLexEGivenF=8.6681 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-3.2381 LanguageModel=4.9293 SampleCountF=-2.47857 CountEF=0.41567 MaxLexFGivenE=0.8881 MaxLexEGivenF=-8.6681 IsSingletonFE=-1 +1 Glue=3 WordPenalty=2.17147 EGivenFCoherent=-4.61923 LanguageModel=4.22913 SampleCountF=-2.47857 CountEF=1.50743 MaxLexFGivenE=0.268101 MaxLexEGivenF=-8.71631 IsSingletonFE=-2 +0 Glue=-3 WordPenalty=-2.17147 EGivenFCoherent=4.61923 LanguageModel=-4.22913 SampleCountF=2.47857 CountEF=-1.50743 MaxLexFGivenE=-0.268101 MaxLexEGivenF=8.71631 IsSingletonFE=2 +1 WordPenalty=2.17147 EGivenFCoherent=-2.86776 LanguageModel=4.82023 SampleCountF=-2.47857 CountEF=0.061427 MaxLexFGivenE=0.866996 MaxLexEGivenF=-8.65418 IsSingletonFE=-1 +0 WordPenalty=-2.17147 EGivenFCoherent=2.86776 LanguageModel=-4.82023 SampleCountF=2.47857 CountEF=-0.061427 MaxLexFGivenE=-0.866996 MaxLexEGivenF=8.65418 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=3.92428 LanguageModel=-4.22913 SampleCountF=2.47857 CountEF=-0.86034 MaxLexFGivenE=-0.268101 MaxLexEGivenF=8.71631 IsSingletonFE=2 +1 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-3.92428 LanguageModel=4.22913 SampleCountF=-2.47857 CountEF=0.86034 MaxLexFGivenE=0.268101 MaxLexEGivenF=-8.71631 IsSingletonFE=-2 +0 Glue=-5 WordPenalty=-2.17147 EGivenFCoherent=4.14852 LanguageModel=-5.51951 SampleCountF=2.47857 CountEF=-1.35561 MaxLexFGivenE=-1.487 MaxLexEGivenF=8.60598 +1 Glue=5 WordPenalty=2.17147 EGivenFCoherent=-4.14852 LanguageModel=5.51951 SampleCountF=-2.47857 CountEF=1.35561 MaxLexFGivenE=1.487 MaxLexEGivenF=-8.60598 +1 Glue=-2 WordPenalty=3.04006 EGivenFCoherent=-2.51256 LanguageModel=7.45859 SampleCountF=-2.47857 CountEF=-0.258257 MaxLexFGivenE=-0.104584 MaxLexEGivenF=-12.5766 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-3.04006 EGivenFCoherent=2.51256 LanguageModel=-7.45859 SampleCountF=2.47857 CountEF=0.258257 MaxLexFGivenE=0.104584 MaxLexEGivenF=12.5766 IsSingletonFE=1 +0 Glue=2 WordPenalty=-3.04006 EGivenFCoherent=1.91733 LanguageModel=-7.91345 SampleCountF=2.47857 CountEF=0.61066 MaxLexFGivenE=-1.02217 MaxLexEGivenF=12.4223 +1 Glue=-2 WordPenalty=3.04006 EGivenFCoherent=-1.91733 LanguageModel=7.91345 SampleCountF=-2.47857 CountEF=-0.61066 MaxLexFGivenE=1.02217 MaxLexEGivenF=-12.4223 +0 WordPenalty=-3.04006 EGivenFCoherent=4.10485 LanguageModel=-6.75844 SampleCountF=2.47857 CountEF=-1.0521 MaxLexFGivenE=0.724583 MaxLexEGivenF=12.6248 IsSingletonFE=2 +1 WordPenalty=3.04006 EGivenFCoherent=-4.10485 LanguageModel=6.75844 SampleCountF=-2.47857 CountEF=1.0521 MaxLexFGivenE=-0.724583 MaxLexEGivenF=-12.6248 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=3.06346 LanguageModel=-7.24959 SampleCountF=2.47857 CountEF=-0.167491 MaxLexFGivenE=0.091695 MaxLexEGivenF=12.315 IsSingletonFE=2 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-3.06346 LanguageModel=7.24959 SampleCountF=-2.47857 CountEF=0.167491 MaxLexFGivenE=-0.091695 MaxLexEGivenF=-12.315 IsSingletonFE=-2 +1 Glue=3 WordPenalty=1.73718 EGivenFCoherent=-2.58634 LanguageModel=4.56866 CountEF=2.25828 MaxLexFGivenE=-0.690146 MaxLexEGivenF=-6.6147 IsSingletonFE=-1 +0 Glue=-3 WordPenalty=-1.73718 EGivenFCoherent=2.58634 LanguageModel=-4.56866 CountEF=-2.25828 MaxLexFGivenE=0.690146 MaxLexEGivenF=6.6147 IsSingletonFE=1 +1 Glue=3 WordPenalty=1.73717 EGivenFCoherent=-4.14211 LanguageModel=3.18121 SampleCountF=-2.47857 CountEF=1.2064 MaxLexFGivenE=0.43826 MaxLexEGivenF=-6.99406 IsSingletonFE=-1 +0 Glue=-3 WordPenalty=-1.73717 EGivenFCoherent=4.14211 LanguageModel=-3.18121 SampleCountF=2.47857 CountEF=-1.2064 MaxLexFGivenE=-0.43826 MaxLexEGivenF=6.99406 IsSingletonFE=1 +0 Glue=-3 WordPenalty=-2.60576 EGivenFCoherent=3.85231 LanguageModel=-6.50698 SampleCountF=2.47857 CountEF=-0.929418 MaxLexFGivenE=-1.4465 MaxLexEGivenF=11.0581 IsSingletonFE=2 +1 Glue=3 WordPenalty=2.60576 EGivenFCoherent=-3.85231 LanguageModel=6.50698 SampleCountF=-2.47857 CountEF=0.929418 MaxLexFGivenE=1.4465 MaxLexEGivenF=-11.0581 IsSingletonFE=-2 +1 Glue=1 WordPenalty=2.60576 EGivenFCoherent=-2.70243 LanguageModel=6.86049 SampleCountF=-2.47857 CountEF=-0.14826 MaxLexFGivenE=0.96084 MaxLexEGivenF=-11.1781 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60576 EGivenFCoherent=2.70243 LanguageModel=-6.86049 SampleCountF=2.47857 CountEF=0.14826 MaxLexFGivenE=-0.96084 MaxLexEGivenF=11.1781 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60576 EGivenFCoherent=-2.59329 LanguageModel=7.05447 SampleCountF=-2.47857 CountEF=-0.24517 MaxLexFGivenE=-0.08348 MaxLexEGivenF=-11.3253 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60576 EGivenFCoherent=2.59329 LanguageModel=-7.05447 SampleCountF=2.47857 CountEF=0.24517 MaxLexFGivenE=0.08348 MaxLexEGivenF=11.3253 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-3.40379 LanguageModel=6.71555 SampleCountF=-2.47857 CountEF=0.551127 MaxLexFGivenE=1.43361 MaxLexEGivenF=-11.3196 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=3.40379 LanguageModel=-6.71555 SampleCountF=2.47857 CountEF=-0.551127 MaxLexFGivenE=-1.43361 MaxLexEGivenF=11.3196 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=3.04766 LanguageModel=-6.86093 SampleCountF=2.47857 CountEF=-0.215557 MaxLexFGivenE=-0.96084 MaxLexEGivenF=11.1781 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-3.04766 LanguageModel=6.86093 SampleCountF=-2.47857 CountEF=0.215557 MaxLexFGivenE=0.96084 MaxLexEGivenF=-11.1781 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=2.60576 EGivenFCoherent=-1.34724 LanguageModel=7.24301 SampleCountF=-2.47857 CountEF=-1.27718 MaxLexFGivenE=1.57262 MaxLexEGivenF=-10.8544 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-2.60576 EGivenFCoherent=1.34724 LanguageModel=-7.24301 SampleCountF=2.47857 CountEF=1.27718 MaxLexFGivenE=-1.57262 MaxLexEGivenF=10.8544 IsSingletonFE=1 +1 Glue=4 WordPenalty=2.60576 EGivenFCoherent=-3.93305 LanguageModel=6.82464 SampleCountF=-2.47857 CountEF=1.06276 MaxLexFGivenE=1.45471 MaxLexEGivenF=-11.3336 IsSingletonFE=-1 +0 Glue=-4 WordPenalty=-2.60576 EGivenFCoherent=3.93305 LanguageModel=-6.82464 SampleCountF=2.47857 CountEF=-1.06276 MaxLexFGivenE=-1.45471 MaxLexEGivenF=11.3336 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-2.92026 LanguageModel=6.86091 SampleCountF=-2.47857 CountEF=0.10089 MaxLexFGivenE=0.96084 MaxLexEGivenF=-11.1781 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=2.92026 LanguageModel=-6.86091 SampleCountF=2.47857 CountEF=-0.10089 MaxLexFGivenE=-0.96084 MaxLexEGivenF=11.1781 IsSingletonFE=1 +0 Glue=-3 WordPenalty=-2.60576 EGivenFCoherent=4.41017 LanguageModel=-6.12491 SampleCountF=2.47857 CountEF=-1.36379 MaxLexFGivenE=-0.834711 MaxLexEGivenF=11.3818 IsSingletonFE=2 +1 Glue=3 WordPenalty=2.60576 EGivenFCoherent=-4.41017 LanguageModel=6.12491 SampleCountF=-2.47857 CountEF=1.36379 MaxLexFGivenE=0.834711 MaxLexEGivenF=-11.3818 IsSingletonFE=-2 +1 Glue=5 WordPenalty=2.60576 EGivenFCoherent=-3.74317 LanguageModel=7.24258 SampleCountF=-2.47857 CountEF=0.929418 MaxLexFGivenE=1.57262 MaxLexEGivenF=-10.8544 IsSingletonFE=-1 +0 Glue=-5 WordPenalty=-2.60576 EGivenFCoherent=3.74317 LanguageModel=-7.24258 SampleCountF=2.47857 CountEF=-0.929418 MaxLexFGivenE=-1.57262 MaxLexEGivenF=10.8544 IsSingletonFE=1 +1 Glue=3 WordPenalty=2.60576 EGivenFCoherent=-3.44716 LanguageModel=6.8605 SampleCountF=-2.47857 CountEF=0.55931 MaxLexFGivenE=0.96084 MaxLexEGivenF=-11.1781 IsSingletonFE=-1 +0 Glue=-3 WordPenalty=-2.60576 EGivenFCoherent=3.44716 LanguageModel=-6.8605 SampleCountF=2.47857 CountEF=-0.55931 MaxLexFGivenE=-0.96084 MaxLexEGivenF=11.1781 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-2.69495 LanguageModel=7.40026 SampleCountF=-2.47857 CountEF=0.04503 MaxLexFGivenE=1.02217 MaxLexEGivenF=-11.1571 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=2.69495 LanguageModel=-7.40026 SampleCountF=2.47857 CountEF=-0.04503 MaxLexFGivenE=-1.02217 MaxLexEGivenF=11.1571 +0 Glue=-1 WordPenalty=-2.60576 EGivenFCoherent=2.70243 LanguageModel=-7.05447 SampleCountF=2.47857 CountEF=0.14826 MaxLexFGivenE=0.08348 MaxLexEGivenF=11.3253 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60576 EGivenFCoherent=-2.70243 LanguageModel=7.05447 SampleCountF=-2.47857 CountEF=-0.14826 MaxLexFGivenE=-0.08348 MaxLexEGivenF=-11.3253 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60576 EGivenFCoherent=2.81552 LanguageModel=-6.90953 SampleCountF=2.47857 CountEF=-0.00022 MaxLexFGivenE=-0.389286 MaxLexEGivenF=11.4669 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60576 EGivenFCoherent=-2.81552 LanguageModel=6.90953 SampleCountF=-2.47857 CountEF=0.00022 MaxLexFGivenE=0.389286 MaxLexEGivenF=-11.4669 IsSingletonFE=-1 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-2.62169 LanguageModel=7.20627 SampleCountF=-2.47857 CountEF=-0.214655 MaxLexFGivenE=2.06649 MaxLexEGivenF=-11.0099 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=2.62169 LanguageModel=-7.20627 SampleCountF=2.47857 CountEF=0.214655 MaxLexFGivenE=-2.06649 MaxLexEGivenF=11.0099 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=2.99598 LanguageModel=-6.75142 SampleCountF=2.47857 CountEF=-0.221121 MaxLexFGivenE=-0.939736 MaxLexEGivenF=11.1642 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-2.99598 LanguageModel=6.75142 SampleCountF=-2.47857 CountEF=0.221121 MaxLexFGivenE=0.939736 MaxLexEGivenF=-11.1642 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60576 EGivenFCoherent=3.28824 LanguageModel=-7.01817 SampleCountF=2.47857 CountEF=-0.40192 MaxLexFGivenE=-0.41039 MaxLexEGivenF=11.4808 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60576 EGivenFCoherent=-3.28824 LanguageModel=7.01817 SampleCountF=-2.47857 CountEF=0.40192 MaxLexFGivenE=0.41039 MaxLexEGivenF=-11.4808 IsSingletonFE=-1 +0 Glue=-5 WordPenalty=-2.60576 EGivenFCoherent=4.30103 LanguageModel=-7.05447 SampleCountF=2.47857 CountEF=-1.36379 MaxLexFGivenE=0.08348 MaxLexEGivenF=11.3253 IsSingletonFE=1 +1 Glue=5 WordPenalty=2.60576 EGivenFCoherent=-4.30103 LanguageModel=7.05447 SampleCountF=-2.47857 CountEF=1.36379 MaxLexFGivenE=-0.08348 MaxLexEGivenF=-11.3253 IsSingletonFE=-1 +0 Glue=-3 WordPenalty=-2.60576 EGivenFCoherent=4.03096 LanguageModel=-6.16078 SampleCountF=2.47857 CountEF=-0.956523 MaxLexFGivenE=-0.340841 MaxLexEGivenF=11.2263 IsSingletonFE=2 +1 Glue=3 WordPenalty=2.60576 EGivenFCoherent=-4.03096 LanguageModel=6.16078 SampleCountF=-2.47857 CountEF=0.956523 MaxLexFGivenE=0.340841 MaxLexEGivenF=-11.2263 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-2.60576 EGivenFCoherent=3.00596 LanguageModel=-6.75186 SampleCountF=2.47857 CountEF=-0.200333 MaxLexFGivenE=-0.939736 MaxLexEGivenF=11.1642 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60576 EGivenFCoherent=-3.00596 LanguageModel=6.75186 SampleCountF=-2.47857 CountEF=0.200333 MaxLexFGivenE=0.939736 MaxLexEGivenF=-11.1642 IsSingletonFE=-1 +1 WordPenalty=2.60576 EGivenFCoherent=-2.22531 LanguageModel=6.86092 SampleCountF=-2.47857 CountEF=-0.5462 MaxLexFGivenE=0.96084 MaxLexEGivenF=-11.1781 IsSingletonFE=-1 +0 WordPenalty=-2.60576 EGivenFCoherent=2.22531 LanguageModel=-6.86092 SampleCountF=2.47857 CountEF=0.5462 MaxLexFGivenE=-0.96084 MaxLexEGivenF=11.1781 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-4.77724 EGivenFCoherent=1.64345 LanguageModel=-14.57 CountEF=-1.35218 MaxLexFGivenE=-1.37604 MaxLexEGivenF=25.6246 IsSingletonFE=1 +1 Glue=1 WordPenalty=4.77724 EGivenFCoherent=-1.64345 LanguageModel=14.57 CountEF=1.35218 MaxLexFGivenE=1.37604 MaxLexEGivenF=-25.6246 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=1.20412 LanguageModel=-8.8811 CountEF=-1.13034 MaxLexFGivenE=-0.36554 MaxLexEGivenF=17.503 +1 WordPenalty=3.90865 EGivenFCoherent=-1.20412 LanguageModel=8.8811 CountEF=1.13034 MaxLexFGivenE=0.36554 MaxLexEGivenF=-17.503 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-0.30103 LanguageModel=11.6601 CountEF=0.176091 MaxLexFGivenE=1.21718 MaxLexEGivenF=-19.341 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=0.30103 LanguageModel=-11.6601 CountEF=-0.176091 MaxLexFGivenE=-1.21718 MaxLexEGivenF=19.341 IsSingletonFE=1 +1 Glue=2 WordPenalty=3.90865 LanguageModel=12.2796 MaxLexFGivenE=2.1581 MaxLexEGivenF=-15.1156 +0 Glue=-2 WordPenalty=-3.90865 LanguageModel=-12.2796 MaxLexFGivenE=-2.1581 MaxLexEGivenF=15.1156 +0 WordPenalty=-3.90865 EGivenFCoherent=1.90309 LanguageModel=-10.3854 CountEF=-1.60746 MaxLexFGivenE=-1.42419 MaxLexEGivenF=20.0701 IsSingletonFE=1 +1 WordPenalty=3.90865 EGivenFCoherent=-1.90309 LanguageModel=10.3854 CountEF=1.60746 MaxLexFGivenE=1.42419 MaxLexEGivenF=-20.0701 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=1.46736 LanguageModel=-9.23377 CountEF=-1.22724 MaxLexFGivenE=-0.731324 MaxLexEGivenF=18.6943 IsSingletonFE=1 +1 WordPenalty=3.90865 EGivenFCoherent=-1.46736 LanguageModel=9.23377 CountEF=1.22724 MaxLexFGivenE=0.731324 MaxLexEGivenF=-18.6943 IsSingletonFE=-1 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-0.47712 LanguageModel=13.3261 CountEF=0.30103 MaxLexFGivenE=2.80023 MaxLexEGivenF=-18.3881 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=0.47712 LanguageModel=-13.3261 CountEF=-0.30103 MaxLexFGivenE=-2.80023 MaxLexEGivenF=18.3881 IsSingletonFE=1 +1 WordPenalty=3.90865 EGivenFCoherent=-1.90309 LanguageModel=13.4812 CountEF=1.60746 MaxLexFGivenE=1.21718 MaxLexEGivenF=-19.4273 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=1.90309 LanguageModel=-13.4812 CountEF=-1.60746 MaxLexFGivenE=-1.21718 MaxLexEGivenF=19.4273 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-3.90865 EGivenFCoherent=2.63246 LanguageModel=-7.62715 CountEF=-2.15836 MaxLexFGivenE=-0.888714 MaxLexEGivenF=19.5791 IsSingletonFE=2 +1 Glue=2 WordPenalty=3.90865 EGivenFCoherent=-2.63246 LanguageModel=7.62715 CountEF=2.15836 MaxLexFGivenE=0.888714 MaxLexEGivenF=-19.5791 IsSingletonFE=-2 +1 Glue=1 WordPenalty=3.90865 LanguageModel=14.7907 MaxLexFGivenE=2.59323 MaxLexEGivenF=-17.8071 +0 Glue=-1 WordPenalty=-3.90865 LanguageModel=-14.7907 MaxLexFGivenE=-2.59323 MaxLexEGivenF=17.8071 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-0.47712 LanguageModel=10.7772 CountEF=0.30103 MaxLexFGivenE=1.37604 MaxLexEGivenF=-19.8349 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=0.47712 LanguageModel=-10.7772 CountEF=-0.30103 MaxLexFGivenE=-1.37604 MaxLexEGivenF=19.8349 IsSingletonFE=1 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=-1.83569 LanguageModel=-10.5653 CountEF=1.66276 MaxLexFGivenE=-1.42419 MaxLexEGivenF=18.1384 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=1.83569 LanguageModel=10.5653 CountEF=-1.66276 MaxLexFGivenE=1.42419 MaxLexEGivenF=-18.1384 +1 Glue=2 WordPenalty=3.47435 EGivenFCoherent=-0.60206 LanguageModel=3.98242 CountEF=0.39794 MaxLexFGivenE=1.23817 MaxLexEGivenF=-13.5763 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-3.47435 EGivenFCoherent=0.60206 LanguageModel=-3.98242 CountEF=-0.39794 MaxLexFGivenE=-1.23817 MaxLexEGivenF=13.5763 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=9.94915 CountEF=0.176091 MaxLexEGivenF=-19.3064 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-9.94915 CountEF=-0.176091 MaxLexEGivenF=19.3064 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-1.78533 LanguageModel=10.9772 CountEF=1.49136 MaxLexFGivenE=1.20105 MaxLexEGivenF=-17.656 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=1.78533 LanguageModel=-10.9772 CountEF=-1.49136 MaxLexFGivenE=-1.20105 MaxLexEGivenF=17.656 IsSingletonFE=1 +1 Glue=2 WordPenalty=3.47436 EGivenFCoherent=-2.13672 LanguageModel=11.6978 CountEF=1.83885 MaxLexFGivenE=0.744931 MaxLexEGivenF=-16.4844 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-3.47436 EGivenFCoherent=2.13672 LanguageModel=-11.6978 CountEF=-1.83885 MaxLexFGivenE=-0.744931 MaxLexEGivenF=16.4844 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=1.4843 LanguageModel=-13.8895 CountEF=-1.31527 MaxLexFGivenE=-1.37604 MaxLexEGivenF=17.3123 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-1.4843 LanguageModel=13.8895 CountEF=1.31527 MaxLexFGivenE=1.37604 MaxLexEGivenF=-17.3123 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=-1.16633 LanguageModel=11.0299 CountEF=1.05115 MaxLexFGivenE=-0.13787 MaxLexEGivenF=-17.1624 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=1.16633 LanguageModel=-11.0299 CountEF=-1.05115 MaxLexFGivenE=0.13787 MaxLexEGivenF=17.1624 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=1.34242 LanguageModel=-13.4653 CountEF=-1.13033 MaxLexEGivenF=19.4761 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=-1.34242 LanguageModel=13.4653 CountEF=1.13033 MaxLexEGivenF=-19.4761 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.47436 LanguageModel=-10.3538 MaxLexEGivenF=16.6671 +1 Glue=-1 WordPenalty=3.47436 LanguageModel=10.3538 MaxLexEGivenF=-16.6671 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-10.9358 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=1.6872 MaxLexEGivenF=18.7711 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=10.9358 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-1.6872 MaxLexEGivenF=-18.7711 IsSingletonFE=-1 +1 Glue=1 WordPenalty=3.47435 EGivenFCoherent=-3.68842 LanguageModel=10.795 CountEF=3.09882 MaxLexEGivenF=-19.2465 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47435 EGivenFCoherent=3.68842 LanguageModel=-10.795 CountEF=-3.09882 MaxLexEGivenF=19.2465 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.90309 LanguageModel=14.046 CountEF=0.574031 MaxLexEGivenF=-19.3916 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.90309 LanguageModel=-14.046 CountEF=-0.574031 MaxLexEGivenF=19.3916 IsSingletonFE=2 +0 Glue=-2 WordPenalty=-3.47435 LanguageModel=-4.78229 MaxLexFGivenE=-1.23817 MaxLexEGivenF=15.5172 +1 Glue=2 WordPenalty=3.47435 LanguageModel=4.78229 MaxLexFGivenE=1.23817 MaxLexEGivenF=-15.5172 +0 WordPenalty=-3.47436 EGivenFCoherent=0.30103 LanguageModel=-13.2337 CountEF=-0.176091 MaxLexFGivenE=1.37604 MaxLexEGivenF=20.3525 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.30103 LanguageModel=13.2337 CountEF=0.176091 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-20.3525 IsSingletonFE=-1 +0 WordPenalty=-3.47436 LanguageModel=-8.24641 MaxLexEGivenF=16.4085 +1 WordPenalty=3.47436 LanguageModel=8.24641 MaxLexEGivenF=-16.4085 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=2.77815 PassThrough=-1 LanguageModel=-10.7896 SampleCountF=2.47857 CountEF=0.124939 MaxLexFGivenE=1.72432 MaxLexEGivenF=20.5985 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-2.77815 PassThrough=1 LanguageModel=10.7896 SampleCountF=-2.47857 CountEF=-0.124939 MaxLexFGivenE=-1.72432 MaxLexEGivenF=-20.5985 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-12.8683 CountEF=-0.39794 MaxLexEGivenF=19.1047 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=12.8683 CountEF=0.39794 MaxLexEGivenF=-19.1047 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=-1.78533 LanguageModel=16.5126 CountEF=1.49136 MaxLexEGivenF=-20.0747 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=1.78533 LanguageModel=-16.5126 CountEF=-1.49136 MaxLexEGivenF=20.0747 IsSingletonFE=1 +1 Glue=2 WordPenalty=3.47435 LanguageModel=8.58618 MaxLexFGivenE=2.1581 MaxLexEGivenF=-13.2649 +0 Glue=-2 WordPenalty=-3.47435 LanguageModel=-8.58618 MaxLexFGivenE=-2.1581 MaxLexEGivenF=13.2649 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=10.4136 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=0.990645 MaxLexEGivenF=-18.772 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-10.4136 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=-0.990645 MaxLexEGivenF=18.772 IsSingletonFE=1 +0 WordPenalty=-3.47436 LanguageModel=-8.24641 MaxLexEGivenF=16.4085 +1 WordPenalty=3.47436 LanguageModel=8.24641 MaxLexEGivenF=-16.4085 +0 Glue=1 WordPenalty=-3.47435 EGivenFCoherent=1.64345 LanguageModel=-5.03587 CountEF=-1.35218 MaxLexFGivenE=-1.37604 MaxLexEGivenF=14.5632 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.47435 EGivenFCoherent=-1.64345 LanguageModel=5.03587 CountEF=1.35218 MaxLexFGivenE=1.37604 MaxLexEGivenF=-14.5632 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-13.5196 CountEF=-0.39794 MaxLexEGivenF=19.2025 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=13.5196 CountEF=0.39794 MaxLexEGivenF=-19.2025 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 LanguageModel=-11.5442 MaxLexFGivenE=-1.42419 MaxLexEGivenF=17.8515 +1 Glue=1 WordPenalty=3.47436 LanguageModel=11.5442 MaxLexFGivenE=1.42419 MaxLexEGivenF=-17.8515 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-10.6875 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=0.348279 MaxLexEGivenF=20.5765 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=10.6875 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-0.348279 MaxLexEGivenF=-20.5765 IsSingletonFE=-1 +1 WordPenalty=5.64583 LanguageModel=13.3889 MaxLexFGivenE=1.37604 MaxLexEGivenF=-27.3278 +0 WordPenalty=-5.64583 LanguageModel=-13.3889 MaxLexFGivenE=-1.37604 MaxLexEGivenF=27.3278 +0 WordPenalty=-3.04006 LanguageModel=-9.29247 MaxLexFGivenE=-2.80023 MaxLexEGivenF=13.8257 +1 WordPenalty=3.04006 LanguageModel=9.29247 MaxLexFGivenE=2.80023 MaxLexEGivenF=-13.8257 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=0.30103 LanguageModel=-7.57065 CountEF=-0.176091 MaxLexFGivenE=-1.46131 MaxLexEGivenF=16.0771 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-0.30103 LanguageModel=7.57065 CountEF=0.176091 MaxLexFGivenE=1.46131 MaxLexEGivenF=-16.0771 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=1.81954 LanguageModel=-5.0223 CountEF=-1.52504 MaxLexFGivenE=-1.37604 MaxLexEGivenF=13.2341 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-1.81954 LanguageModel=5.0223 CountEF=1.52504 MaxLexFGivenE=1.37604 MaxLexEGivenF=-13.2341 IsSingletonFE=-1 +1 Glue=2 WordPenalty=3.04006 LanguageModel=3.5952 MaxLexFGivenE=2.59323 MaxLexEGivenF=-10.2983 +0 Glue=-2 WordPenalty=-3.04006 LanguageModel=-3.5952 MaxLexFGivenE=-2.59323 MaxLexEGivenF=10.2983 +1 WordPenalty=3.04006 LanguageModel=5.03891 MaxLexFGivenE=1.37604 MaxLexEGivenF=-14.2147 +0 WordPenalty=-3.04006 LanguageModel=-5.03891 MaxLexFGivenE=-1.37604 MaxLexEGivenF=14.2147 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-0.60206 LanguageModel=5.5201 CountEF=0.39794 MaxLexFGivenE=2.80023 MaxLexEGivenF=-13.6524 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=0.60206 LanguageModel=-5.5201 CountEF=-0.39794 MaxLexFGivenE=-2.80023 MaxLexEGivenF=13.6524 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-3.04006 LanguageModel=-6.76778 MaxLexFGivenE=-1.74158 MaxLexEGivenF=13.9499 +1 Glue=1 WordPenalty=3.04006 LanguageModel=6.76778 MaxLexFGivenE=1.74158 MaxLexEGivenF=-13.9499 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-0.30103 LanguageModel=3.32538 CountEF=0.176091 MaxLexFGivenE=2.3129 MaxLexEGivenF=-11.7714 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=0.30103 LanguageModel=-3.32538 CountEF=-0.176091 MaxLexFGivenE=-2.3129 MaxLexEGivenF=11.7714 IsSingletonFE=1 +0 WordPenalty=-3.04006 LanguageModel=-5.02229 MaxLexFGivenE=-1.37604 MaxLexEGivenF=13.2341 +1 WordPenalty=3.04006 LanguageModel=5.02229 MaxLexFGivenE=1.37604 MaxLexEGivenF=-13.2341 +1 Glue=2 WordPenalty=3.04006 LanguageModel=5.42833 MaxLexFGivenE=0.660474 MaxLexEGivenF=-13.5402 +0 Glue=-2 WordPenalty=-3.04006 LanguageModel=-5.42833 MaxLexFGivenE=-0.660474 MaxLexEGivenF=13.5402 +0 Glue=-1 WordPenalty=-3.04006 LanguageModel=-5.33359 MaxLexFGivenE=-1.23223 MaxLexEGivenF=15.3875 +1 Glue=1 WordPenalty=3.04006 LanguageModel=5.33359 MaxLexFGivenE=1.23223 MaxLexEGivenF=-15.3875 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-2.73878 LanguageModel=8.68678 CountEF=2.23679 MaxLexEGivenF=-18.4448 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=2.73878 LanguageModel=-8.68678 CountEF=-2.23679 MaxLexEGivenF=18.4448 IsSingletonFE=2 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-1.94448 LanguageModel=5.66253 CountEF=1.52827 MaxLexFGivenE=1.37604 MaxLexEGivenF=-14.6187 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=1.94448 LanguageModel=-5.66253 CountEF=-1.52827 MaxLexFGivenE=-1.37604 MaxLexEGivenF=14.6187 IsSingletonFE=2 +1 WordPenalty=3.47436 EGivenFCoherent=-0.441705 LanguageModel=11.1432 CountEF=0.141332 MaxLexFGivenE=-0.021104 MaxLexEGivenF=-19.5595 IsSingletonFE=-2 +0 WordPenalty=-3.47436 EGivenFCoherent=0.441705 LanguageModel=-11.1432 CountEF=-0.141332 MaxLexFGivenE=0.021104 MaxLexEGivenF=19.5595 IsSingletonFE=2 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=0.411618 LanguageModel=-10.6634 CountEF=-0.23195 MaxLexFGivenE=-1.22257 MaxLexEGivenF=18.5544 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=-0.411618 LanguageModel=10.6634 CountEF=0.23195 MaxLexFGivenE=1.22257 MaxLexEGivenF=-18.5544 IsSingletonFE=-1 +1 Glue=-3 WordPenalty=3.90865 EGivenFCoherent=0.120657 LanguageModel=11.7364 CountEF=-0.32816 MaxLexFGivenE=0.022098 MaxLexEGivenF=-23.5563 IsSingletonFE=-2 +0 Glue=3 WordPenalty=-3.90865 EGivenFCoherent=-0.120657 LanguageModel=-11.7364 CountEF=0.32816 MaxLexFGivenE=-0.022098 MaxLexEGivenF=23.5563 IsSingletonFE=2 +0 WordPenalty=-3.90865 EGivenFCoherent=3.10788 LanguageModel=-10.505 CountEF=-2.63679 MaxLexFGivenE=0.61079 MaxLexEGivenF=23.8661 IsSingletonFE=2 +1 WordPenalty=3.90865 EGivenFCoherent=-3.10788 LanguageModel=10.505 CountEF=2.63679 MaxLexFGivenE=-0.61079 MaxLexEGivenF=-23.8661 IsSingletonFE=-2 +0 Glue=-3 WordPenalty=-3.90865 EGivenFCoherent=2.45314 LanguageModel=-11.9247 CountEF=-2.05192 MaxLexFGivenE=0.747261 MaxLexEGivenF=24.0026 IsSingletonFE=2 +1 Glue=3 WordPenalty=3.90865 EGivenFCoherent=-2.45314 LanguageModel=11.9247 CountEF=2.05192 MaxLexFGivenE=-0.747261 MaxLexEGivenF=-24.0026 IsSingletonFE=-2 +1 WordPenalty=3.90865 EGivenFCoherent=-0.976252 LanguageModel=11.6283 CountEF=0.618289 MaxLexFGivenE=0.632888 MaxLexEGivenF=-22.861 IsSingletonFE=-2 +0 WordPenalty=-3.90865 EGivenFCoherent=0.976252 LanguageModel=-11.6283 CountEF=-0.618289 MaxLexFGivenE=-0.632888 MaxLexEGivenF=22.861 IsSingletonFE=2 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=2.18375 LanguageModel=-11.7479 CountEF=-1.56277 MaxLexFGivenE=0.178176 MaxLexEGivenF=23.9043 IsSingletonFE=3 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-2.18375 LanguageModel=11.7479 CountEF=1.56277 MaxLexFGivenE=-0.178176 MaxLexEGivenF=-23.9043 IsSingletonFE=-3 +1 WordPenalty=3.90865 EGivenFCoherent=-1.51253 LanguageModel=11.8154 CountEF=1.46903 MaxLexFGivenE=0.719001 MaxLexEGivenF=-23.0208 +0 WordPenalty=-3.90865 EGivenFCoherent=1.51253 LanguageModel=-11.8154 CountEF=-1.46903 MaxLexFGivenE=-0.719001 MaxLexEGivenF=23.0208 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=3.05464 LanguageModel=-10.512 CountEF=-2.69478 MaxLexFGivenE=-0.632888 MaxLexEGivenF=22.861 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-3.05464 LanguageModel=10.512 CountEF=2.69478 MaxLexFGivenE=0.632888 MaxLexEGivenF=-22.861 IsSingletonFE=-1 +1 Glue=4 WordPenalty=3.90865 EGivenFCoherent=-3.67298 LanguageModel=11.9165 CountEF=3.22112 MaxLexFGivenE=0.900203 MaxLexEGivenF=-22.3331 IsSingletonFE=-1 +0 Glue=-4 WordPenalty=-3.90865 EGivenFCoherent=3.67298 LanguageModel=-11.9165 CountEF=-3.22112 MaxLexFGivenE=-0.900203 MaxLexEGivenF=22.3331 IsSingletonFE=1 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=0.709982 LanguageModel=-12.0245 CountEF=-0.366479 MaxLexFGivenE=-0.289413 MaxLexEGivenF=23.0284 IsSingletonFE=2 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-0.709982 LanguageModel=12.0245 CountEF=0.366479 MaxLexFGivenE=0.289413 MaxLexEGivenF=-23.0284 IsSingletonFE=-2 +1 Glue=2 WordPenalty=3.90865 EGivenFCoherent=-2.61849 LanguageModel=11.6283 CountEF=2.16313 MaxLexFGivenE=0.632888 MaxLexEGivenF=-22.861 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-3.90865 EGivenFCoherent=2.61849 LanguageModel=-11.6283 CountEF=-2.16313 MaxLexFGivenE=-0.632888 MaxLexEGivenF=22.861 IsSingletonFE=2 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-0.315212 LanguageModel=12.3962 CountEF=0.089163 MaxLexFGivenE=-0.343475 MaxLexEGivenF=-23.3382 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=0.315212 LanguageModel=-12.3962 CountEF=-0.089163 MaxLexFGivenE=0.343475 MaxLexEGivenF=23.3382 IsSingletonFE=2 +0 WordPenalty=-3.90865 EGivenFCoherent=0.909065 LanguageModel=-12 CountEF=-0.575612 MaxLexEGivenF=23.1708 IsSingletonFE=2 +1 WordPenalty=3.90865 EGivenFCoherent=-0.909065 LanguageModel=12 CountEF=0.575612 MaxLexEGivenF=-23.1708 IsSingletonFE=-2 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-2.24304 LanguageModel=10.7662 SampleCountF=-2.47857 CountEF=-0.51188 MaxLexFGivenE=0.632895 MaxLexEGivenF=-22.861 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=2.24304 LanguageModel=-10.7662 SampleCountF=2.47857 CountEF=0.51188 MaxLexFGivenE=-0.632895 MaxLexEGivenF=22.861 IsSingletonFE=1 +1 Glue=-3 WordPenalty=3.90865 EGivenFCoherent=-0.521885 LanguageModel=11.6497 SampleCountF=-2.47857 CountEF=-1.96221 MaxLexFGivenE=0.715771 MaxLexEGivenF=-23.7306 +0 Glue=3 WordPenalty=-3.90865 EGivenFCoherent=0.521885 LanguageModel=-11.6497 SampleCountF=2.47857 CountEF=1.96221 MaxLexFGivenE=-0.715771 MaxLexEGivenF=23.7306 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-1.39309 LanguageModel=12.0112 CountEF=1.33806 MaxLexFGivenE=-0.114373 MaxLexEGivenF=-23.6928 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=1.39309 LanguageModel=-12.0112 CountEF=-1.33806 MaxLexFGivenE=0.114373 MaxLexEGivenF=23.6928 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-0.847935 LanguageModel=12.0536 CountEF=0.548871 MaxLexFGivenE=0.715764 MaxLexEGivenF=-23.7306 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=0.847935 LanguageModel=-12.0536 CountEF=-0.548871 MaxLexFGivenE=-0.715764 MaxLexEGivenF=23.7306 IsSingletonFE=2 +1 WordPenalty=3.90865 EGivenFCoherent=-2.85798 LanguageModel=11.5529 CountEF=2.53375 MaxLexFGivenE=-0.114373 MaxLexEGivenF=-23.6928 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=2.85798 LanguageModel=-11.5529 CountEF=-2.53375 MaxLexFGivenE=0.114373 MaxLexEGivenF=23.6928 IsSingletonFE=1 +1 WordPenalty=3.90865 EGivenFCoherent=-2.81146 LanguageModel=10.3969 SampleCountF=-2.47857 CountEF=0.00613 MaxLexEGivenF=-23.1708 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=2.81146 LanguageModel=-10.3969 SampleCountF=2.47857 CountEF=-0.00613 MaxLexEGivenF=23.1708 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-1.62198 LanguageModel=11.5202 CountEF=1.21555 MaxLexFGivenE=1.24368 MaxLexEGivenF=-22.1657 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=1.62198 LanguageModel=-11.5202 CountEF=-1.21555 MaxLexFGivenE=-1.24368 MaxLexEGivenF=22.1657 IsSingletonFE=2 +0 WordPenalty=-3.90865 EGivenFCoherent=2.17609 LanguageModel=-11.2244 SampleCountF=2.47857 CountEF=0.477121 MaxLexFGivenE=-0.632895 MaxLexEGivenF=22.861 +1 WordPenalty=3.90865 EGivenFCoherent=-2.17609 LanguageModel=11.2244 SampleCountF=-2.47857 CountEF=-0.477121 MaxLexFGivenE=0.632895 MaxLexEGivenF=-22.861 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=1.80912 LanguageModel=-11.2452 CountEF=-1.31053 MaxLexFGivenE=0.61079 MaxLexEGivenF=23.8661 IsSingletonFE=2 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-1.80912 LanguageModel=11.2452 CountEF=1.31053 MaxLexFGivenE=-0.61079 MaxLexEGivenF=-23.8661 IsSingletonFE=-2 +1 Glue=-4 WordPenalty=3.90865 EGivenFCoherent=0.412727 LanguageModel=11.6283 CountEF=-0.681041 MaxLexFGivenE=0.632888 MaxLexEGivenF=-22.861 IsSingletonFE=-2 +0 Glue=4 WordPenalty=-3.90865 EGivenFCoherent=-0.412727 LanguageModel=-11.6283 CountEF=0.681041 MaxLexFGivenE=-0.632888 MaxLexEGivenF=22.861 IsSingletonFE=2 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-2.00145 LanguageModel=11.17 CountEF=1.60373 MaxLexFGivenE=0.632888 MaxLexEGivenF=-22.861 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=2.00145 LanguageModel=-11.17 CountEF=-1.60373 MaxLexFGivenE=-0.632888 MaxLexEGivenF=22.861 IsSingletonFE=2 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=2.01022 LanguageModel=-11.7364 CountEF=-1.80615 MaxLexFGivenE=-0.022098 MaxLexEGivenF=23.5563 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-2.01022 LanguageModel=11.7364 CountEF=1.80615 MaxLexFGivenE=0.022098 MaxLexEGivenF=-23.5563 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-3.90865 EGivenFCoherent=2.54674 LanguageModel=-10.3215 SampleCountF=2.47857 CountEF=0.21025 MaxLexFGivenE=0.74726 MaxLexEGivenF=24.0026 IsSingletonFE=1 +1 Glue=-2 WordPenalty=3.90865 EGivenFCoherent=-2.54674 LanguageModel=10.3215 SampleCountF=-2.47857 CountEF=-0.21025 MaxLexFGivenE=-0.74726 MaxLexEGivenF=-24.0026 IsSingletonFE=-1 +1 WordPenalty=3.90865 EGivenFCoherent=-2.23553 LanguageModel=11.6398 CountEF=1.86637 MaxLexFGivenE=0.432614 MaxLexEGivenF=-23.2091 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=2.23553 LanguageModel=-11.6398 CountEF=-1.86637 MaxLexFGivenE=-0.432614 MaxLexEGivenF=23.2091 IsSingletonFE=1 +1 Glue=-3 WordPenalty=3.90865 EGivenFCoherent=0.260538 LanguageModel=12.1947 CountEF=-0.346951 MaxLexFGivenE=0.022098 MaxLexEGivenF=-23.5563 IsSingletonFE=-1 +0 Glue=3 WordPenalty=-3.90865 EGivenFCoherent=-0.260538 LanguageModel=-12.1947 CountEF=0.346951 MaxLexFGivenE=-0.022098 MaxLexEGivenF=23.5563 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-3.1236 LanguageModel=10.8221 SampleCountF=-2.47857 CountEF=0.286569 MaxLexFGivenE=0.082877 MaxLexEGivenF=-24.0404 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=3.1236 LanguageModel=-10.8221 SampleCountF=2.47857 CountEF=-0.286569 MaxLexFGivenE=-0.082877 MaxLexEGivenF=24.0404 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-1.65749 LanguageModel=11.5202 CountEF=1.1386 MaxLexFGivenE=1.24368 MaxLexEGivenF=-22.1657 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=1.65749 LanguageModel=-11.5202 CountEF=-1.1386 MaxLexFGivenE=-1.24368 MaxLexEGivenF=22.1657 IsSingletonFE=2 +0 WordPenalty=-3.90865 EGivenFCoherent=1.26505 LanguageModel=-11.6283 CountEF=-0.884561 MaxLexFGivenE=-0.632888 MaxLexEGivenF=22.861 IsSingletonFE=2 +1 WordPenalty=3.90865 EGivenFCoherent=-1.26505 LanguageModel=11.6283 CountEF=0.884561 MaxLexFGivenE=0.632888 MaxLexEGivenF=-22.861 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=1.63347 LanguageModel=-12.0245 CountEF=-1.33746 MaxLexFGivenE=-0.289413 MaxLexEGivenF=23.0284 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-1.63347 LanguageModel=12.0245 CountEF=1.33746 MaxLexFGivenE=0.289413 MaxLexEGivenF=-23.0284 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=3.90865 EGivenFCoherent=0.169878 LanguageModel=12.5119 CountEF=-0.267771 MaxLexFGivenE=0.715764 MaxLexEGivenF=-23.7306 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-3.90865 EGivenFCoherent=-0.169878 LanguageModel=-12.5119 CountEF=0.267771 MaxLexFGivenE=-0.715764 MaxLexEGivenF=23.7306 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=1.6105 LanguageModel=-11.9455 CountEF=-1.18816 MaxLexFGivenE=-1.32655 MaxLexEGivenF=23.0354 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-1.6105 LanguageModel=11.9455 CountEF=1.18816 MaxLexFGivenE=1.32655 MaxLexEGivenF=-23.0354 IsSingletonFE=-2 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-1.94327 LanguageModel=11.6283 CountEF=1.84091 MaxLexFGivenE=0.632888 MaxLexEGivenF=-22.861 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=1.94327 LanguageModel=-11.6283 CountEF=-1.84091 MaxLexFGivenE=-0.632888 MaxLexEGivenF=22.861 +1 Glue=-2 WordPenalty=3.90865 EGivenFCoherent=0.573983 LanguageModel=12.319 SampleCountF=-2.47857 CountEF=-2.79018 MaxLexFGivenE=2.7262 MaxLexEGivenF=-21.4383 IsSingletonFE=1 +0 Glue=2 WordPenalty=-3.90865 EGivenFCoherent=-0.573983 LanguageModel=-12.319 SampleCountF=2.47857 CountEF=2.79018 MaxLexFGivenE=-2.7262 MaxLexEGivenF=21.4383 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=1.87934 LanguageModel=-12.0245 CountEF=-1.41417 MaxLexFGivenE=-0.289413 MaxLexEGivenF=23.0284 IsSingletonFE=2 +1 WordPenalty=3.90865 EGivenFCoherent=-1.87934 LanguageModel=12.0245 CountEF=1.41417 MaxLexFGivenE=0.289413 MaxLexEGivenF=-23.0284 IsSingletonFE=-2 +1 WordPenalty=3.90865 EGivenFCoherent=-2.90309 LanguageModel=10.3969 SampleCountF=-2.47857 CountEF=0.09691 MaxLexEGivenF=-23.1708 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=2.90309 LanguageModel=-10.3969 SampleCountF=2.47857 CountEF=-0.09691 MaxLexEGivenF=23.1708 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=3.01368 LanguageModel=-11.5202 CountEF=-2.4514 MaxLexFGivenE=-1.24368 MaxLexEGivenF=22.1657 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-3.01368 LanguageModel=11.5202 CountEF=2.4514 MaxLexFGivenE=1.24368 MaxLexEGivenF=-22.1657 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=0.675222 LanguageModel=-11.5202 CountEF=-0.334289 MaxLexFGivenE=-1.24368 MaxLexEGivenF=22.1657 IsSingletonFE=2 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-0.675222 LanguageModel=11.5202 CountEF=0.334289 MaxLexFGivenE=1.24368 MaxLexEGivenF=-22.1657 IsSingletonFE=-2 +0 WordPenalty=-3.90865 EGivenFCoherent=1.7093 LanguageModel=-11.6283 CountEF=-1.42824 MaxLexFGivenE=-0.632888 MaxLexEGivenF=22.861 IsSingletonFE=1 +1 WordPenalty=3.90865 EGivenFCoherent=-1.7093 LanguageModel=11.6283 CountEF=1.42824 MaxLexFGivenE=0.632888 MaxLexEGivenF=-22.861 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.90865 EGivenFCoherent=1.69412 LanguageModel=-11.4448 CountEF=-1.51415 MaxLexFGivenE=-0.496417 MaxLexEGivenF=22.9975 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.90865 EGivenFCoherent=-1.69412 LanguageModel=11.4448 CountEF=1.51415 MaxLexFGivenE=0.496417 MaxLexEGivenF=-22.9975 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=2.13717 LanguageModel=-11.5529 CountEF=-1.7042 MaxLexFGivenE=0.114373 MaxLexEGivenF=23.6928 IsSingletonFE=2 +1 WordPenalty=3.90865 EGivenFCoherent=-2.13717 LanguageModel=11.5529 CountEF=1.7042 MaxLexFGivenE=-0.114373 MaxLexEGivenF=-23.6928 IsSingletonFE=-2 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-2.31471 LanguageModel=12.0536 CountEF=1.97428 MaxLexFGivenE=0.715764 MaxLexEGivenF=-23.7306 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=2.31471 LanguageModel=-12.0536 CountEF=-1.97428 MaxLexFGivenE=-0.715764 MaxLexEGivenF=23.7306 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=2.08673 LanguageModel=-12.0536 CountEF=-1.64808 MaxLexFGivenE=-0.715764 MaxLexEGivenF=23.7306 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-2.08673 LanguageModel=12.0536 CountEF=1.64808 MaxLexFGivenE=0.715764 MaxLexEGivenF=-23.7306 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.90866 EGivenFCoherent=2.47712 LanguageModel=-11.9403 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=-0.611791 MaxLexEGivenF=20.462 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.90866 EGivenFCoherent=-2.47712 LanguageModel=11.9403 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=0.611791 MaxLexEGivenF=-20.462 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-3.90866 EGivenFCoherent=-0.572535 LanguageModel=-12.9267 SampleCountF=2.47857 CountEF=2.95121 MaxLexFGivenE=-3.31588 MaxLexEGivenF=18.3439 +1 Glue=-2 WordPenalty=3.90866 EGivenFCoherent=0.572535 LanguageModel=12.9267 SampleCountF=-2.47857 CountEF=-2.95121 MaxLexFGivenE=3.31588 MaxLexEGivenF=-18.3439 +1 Glue=-4 WordPenalty=3.90865 EGivenFCoherent=1.49335 LanguageModel=13.0348 SampleCountF=-2.47857 CountEF=-3.76669 MaxLexFGivenE=2.70509 MaxLexEGivenF=-19.0392 +0 Glue=4 WordPenalty=-3.90865 EGivenFCoherent=-1.49335 LanguageModel=-13.0348 SampleCountF=2.47857 CountEF=3.76669 MaxLexFGivenE=-2.70509 MaxLexEGivenF=19.0392 +1 Glue=-2 WordPenalty=3.90866 EGivenFCoherent=-1.17566 LanguageModel=11.8583 SampleCountF=-2.47857 CountEF=-1.53624 MaxLexFGivenE=1.35905 MaxLexEGivenF=-19.6302 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-3.90866 EGivenFCoherent=1.17566 LanguageModel=-11.8583 SampleCountF=2.47857 CountEF=1.53624 MaxLexFGivenE=-1.35905 MaxLexEGivenF=19.6302 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-4.34295 LanguageModel=-14.2862 MaxLexFGivenE=-1.42419 MaxLexEGivenF=24.3226 +1 Glue=1 WordPenalty=4.34295 LanguageModel=14.2862 MaxLexFGivenE=1.42419 MaxLexEGivenF=-24.3226 +1 Glue=-1 WordPenalty=5.21154 LanguageModel=9.78165 MaxLexFGivenE=1.37604 MaxLexEGivenF=-24.8841 +0 Glue=1 WordPenalty=-5.21154 LanguageModel=-9.78165 MaxLexFGivenE=-1.37604 MaxLexEGivenF=24.8841 +1 Glue=2 WordPenalty=5.64583 LanguageModel=15.1286 MaxLexFGivenE=1.37604 MaxLexEGivenF=-28.1931 +0 Glue=-2 WordPenalty=-5.64583 LanguageModel=-15.1286 MaxLexFGivenE=-1.37604 MaxLexEGivenF=28.1931 +1 Glue=2 WordPenalty=5.64583 EGivenFCoherent=-0.90309 LanguageModel=15.635 CountEF=0.574031 MaxLexFGivenE=1.42419 MaxLexEGivenF=-31.1737 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-5.64583 EGivenFCoherent=0.90309 LanguageModel=-15.635 CountEF=-0.574031 MaxLexFGivenE=-1.42419 MaxLexEGivenF=31.1737 IsSingletonFE=2 +1 Glue=1 WordPenalty=5.64583 LanguageModel=14.0495 MaxLexFGivenE=1.37604 MaxLexEGivenF=-25.6992 +0 Glue=-1 WordPenalty=-5.64583 LanguageModel=-14.0495 MaxLexFGivenE=-1.37604 MaxLexEGivenF=25.6992 +1 Glue=1 WordPenalty=5.64583 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=12.6584 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-0.355566 MaxLexEGivenF=-30.5538 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-5.64583 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-12.6584 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=0.355566 MaxLexEGivenF=30.5538 IsSingletonFE=1 +1 Glue=2 WordPenalty=3.90865 EGivenFCoherent=-0.69897 LanguageModel=13.8527 CountEF=0.477121 MaxLexEGivenF=-21.0924 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-3.90865 EGivenFCoherent=0.69897 LanguageModel=-13.8527 CountEF=-0.477121 MaxLexEGivenF=21.0924 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=0.60206 LanguageModel=-2.73296 CountEF=-0.39794 MaxLexFGivenE=0.048147 MaxLexEGivenF=14.8263 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-0.60206 LanguageModel=2.73296 CountEF=0.39794 MaxLexFGivenE=-0.048147 MaxLexEGivenF=-14.8263 IsSingletonFE=-1 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-0.60206 LanguageModel=2.59288 CountEF=0.39794 MaxLexFGivenE=-0.048147 MaxLexEGivenF=-13.4959 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=0.60206 LanguageModel=-2.59288 CountEF=-0.39794 MaxLexFGivenE=0.048147 MaxLexEGivenF=13.4959 IsSingletonFE=1 +1 WordPenalty=4.34294 EGivenFCoherent=-1.64345 LanguageModel=15.8963 CountEF=1.35218 MaxLexFGivenE=1.37604 MaxLexEGivenF=-26.2043 IsSingletonFE=-1 +0 WordPenalty=-4.34294 EGivenFCoherent=1.64345 LanguageModel=-15.8963 CountEF=-1.35218 MaxLexFGivenE=-1.37604 MaxLexEGivenF=26.2043 IsSingletonFE=1 +0 WordPenalty=-3.90865 EGivenFCoherent=1.90309 LanguageModel=-16.26 CountEF=-1.60746 MaxLexEGivenF=24.0659 IsSingletonFE=1 +1 WordPenalty=3.90865 EGivenFCoherent=-1.90309 LanguageModel=16.26 CountEF=1.60746 MaxLexEGivenF=-24.0659 IsSingletonFE=-1 +1 WordPenalty=3.47435 EGivenFCoherent=-0.90309 LanguageModel=7.96248 CountEF=0.574031 MaxLexFGivenE=1.37604 MaxLexEGivenF=-17.4611 IsSingletonFE=-2 +0 WordPenalty=-3.47435 EGivenFCoherent=0.90309 LanguageModel=-7.96248 CountEF=-0.574031 MaxLexFGivenE=-1.37604 MaxLexEGivenF=17.4611 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.47435 EGivenFCoherent=-1.64345 LanguageModel=6.04554 CountEF=1.35218 MaxLexFGivenE=1.37604 MaxLexEGivenF=-15.9378 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47435 EGivenFCoherent=1.64345 LanguageModel=-6.04554 CountEF=-1.35218 MaxLexFGivenE=-1.37604 MaxLexEGivenF=15.9378 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-3.47914 LanguageModel=14.9314 CountEF=3.06286 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-20.4386 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=3.47914 LanguageModel=-14.9314 CountEF=-3.06286 MaxLexFGivenE=1.37604 MaxLexEGivenF=20.4386 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-3.47435 LanguageModel=-8.79805 MaxLexFGivenE=-1.37604 MaxLexEGivenF=18.9183 +1 Glue=2 WordPenalty=3.47435 LanguageModel=8.79805 MaxLexFGivenE=1.37604 MaxLexEGivenF=-18.9183 +0 Glue=2 WordPenalty=-2.60577 LanguageModel=-10.9537 MaxLexFGivenE=1.37604 MaxLexEGivenF=19.1167 +1 Glue=-2 WordPenalty=2.60577 LanguageModel=10.9537 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-19.1167 +1 WordPenalty=3.04006 EGivenFCoherent=-1.81954 LanguageModel=13.7702 CountEF=1.52504 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-18.36 IsSingletonFE=-1 +0 WordPenalty=-3.04006 EGivenFCoherent=1.81954 LanguageModel=-13.7702 CountEF=-1.52504 MaxLexFGivenE=1.37604 MaxLexEGivenF=18.36 IsSingletonFE=1 +0 WordPenalty=-3.04006 EGivenFCoherent=0.30103 LanguageModel=-12.1602 CountEF=-0.176091 MaxLexFGivenE=-0.048147 MaxLexEGivenF=18.6953 IsSingletonFE=1 +1 WordPenalty=3.04006 EGivenFCoherent=-0.30103 LanguageModel=12.1602 CountEF=0.176091 MaxLexFGivenE=0.048147 MaxLexEGivenF=-18.6953 IsSingletonFE=-1 +0 WordPenalty=-3.04006 EGivenFCoherent=1.83569 LanguageModel=-12.293 CountEF=-1.66276 MaxLexFGivenE=1.37604 MaxLexEGivenF=19.1573 +1 WordPenalty=3.04006 EGivenFCoherent=-1.83569 LanguageModel=12.293 CountEF=1.66276 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-19.1573 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-1.18327 LanguageModel=13.1894 CountEF=1.09342 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-18.4444 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=1.18327 LanguageModel=-13.1894 CountEF=-1.09342 MaxLexFGivenE=1.37604 MaxLexEGivenF=18.4444 +1 WordPenalty=3.04006 EGivenFCoherent=-1.90309 LanguageModel=11.7306 CountEF=1.60746 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-18.8465 IsSingletonFE=-1 +0 WordPenalty=-3.04006 EGivenFCoherent=1.90309 LanguageModel=-11.7306 CountEF=-1.60746 MaxLexFGivenE=1.37604 MaxLexEGivenF=18.8465 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-0.60206 LanguageModel=6.65037 CountEF=0.39794 MaxLexEGivenF=-15.3117 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=0.60206 LanguageModel=-6.65037 CountEF=-0.39794 MaxLexEGivenF=15.3117 IsSingletonFE=1 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=0.30103 LanguageModel=-11.5437 CountEF=-0.176091 MaxLexEGivenF=16.3628 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-0.30103 LanguageModel=11.5437 CountEF=0.176091 MaxLexEGivenF=-16.3628 IsSingletonFE=-1 +1 WordPenalty=3.04006 EGivenFCoherent=-2.24551 LanguageModel=10.3564 CountEF=1.75012 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-20.3873 IsSingletonFE=-2 +0 WordPenalty=-3.04006 EGivenFCoherent=2.24551 LanguageModel=-10.3564 CountEF=-1.75012 MaxLexFGivenE=1.37604 MaxLexEGivenF=20.3873 IsSingletonFE=2 +0 Glue=1 WordPenalty=-3.04006 LanguageModel=-11.1294 MaxLexEGivenF=19.6878 +1 Glue=-1 WordPenalty=3.04006 LanguageModel=11.1294 MaxLexEGivenF=-19.6878 +1 WordPenalty=3.04006 EGivenFCoherent=-1.90309 LanguageModel=11.5292 CountEF=1.60746 MaxLexFGivenE=-0.888714 MaxLexEGivenF=-15.4038 IsSingletonFE=-1 +0 WordPenalty=-3.04006 EGivenFCoherent=1.90309 LanguageModel=-11.5292 CountEF=-1.60746 MaxLexFGivenE=0.888714 MaxLexEGivenF=15.4038 IsSingletonFE=1 +1 Glue=3 WordPenalty=3.04006 EGivenFCoherent=-2.85431 LanguageModel=10.8505 CountEF=2.33445 MaxLexEGivenF=-17.4612 IsSingletonFE=-2 +0 Glue=-3 WordPenalty=-3.04006 EGivenFCoherent=2.85431 LanguageModel=-10.8505 CountEF=-2.33445 MaxLexEGivenF=17.4612 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-0.30103 LanguageModel=9.77483 CountEF=0.176091 MaxLexFGivenE=1.33892 MaxLexEGivenF=-16.9356 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=0.30103 LanguageModel=-9.77483 CountEF=-0.176091 MaxLexFGivenE=-1.33892 MaxLexEGivenF=16.9356 IsSingletonFE=1 +0 WordPenalty=-2.17147 EGivenFCoherent=0.60206 LanguageModel=0.125239 CountEF=-0.39794 MaxLexFGivenE=-0.158863 MaxLexEGivenF=9.71589 IsSingletonFE=1 +1 WordPenalty=2.17147 EGivenFCoherent=-0.60206 LanguageModel=-0.125239 CountEF=0.39794 MaxLexFGivenE=0.158863 MaxLexEGivenF=-9.71589 IsSingletonFE=-1 +1 WordPenalty=3.04006 LanguageModel=11.5655 MaxLexEGivenF=-17.8251 +0 WordPenalty=-3.04006 LanguageModel=-11.5655 MaxLexEGivenF=17.8251 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-9.16033 CountEF=-0.39794 MaxLexFGivenE=-1.37604 MaxLexEGivenF=14.0292 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=9.16033 CountEF=0.39794 MaxLexFGivenE=1.37604 MaxLexEGivenF=-14.0292 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=0.60206 LanguageModel=-10.6237 CountEF=-0.39794 MaxLexFGivenE=-1.37604 MaxLexEGivenF=18.8567 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-0.60206 LanguageModel=10.6237 CountEF=0.39794 MaxLexFGivenE=1.37604 MaxLexEGivenF=-18.8567 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-3.47436 EGivenFCoherent=0.47712 LanguageModel=-15.4303 CountEF=-0.30103 MaxLexEGivenF=19.3982 IsSingletonFE=1 +1 Glue=-2 WordPenalty=3.47436 EGivenFCoherent=-0.47712 LanguageModel=15.4303 CountEF=0.30103 MaxLexEGivenF=-19.3982 IsSingletonFE=-1 +1 WordPenalty=3.47436 EGivenFCoherent=-2.26245 LanguageModel=10.5834 CountEF=1.79239 MaxLexEGivenF=-19.2511 IsSingletonFE=-2 +0 WordPenalty=-3.47436 EGivenFCoherent=2.26245 LanguageModel=-10.5834 CountEF=-1.79239 MaxLexEGivenF=19.2511 IsSingletonFE=2 +1 WordPenalty=3.04006 EGivenFCoherent=-0.30103 LanguageModel=7.02647 CountEF=0.176091 MaxLexFGivenE=1.37604 MaxLexEGivenF=-15.9491 IsSingletonFE=-1 +0 WordPenalty=-3.04006 EGivenFCoherent=0.30103 LanguageModel=-7.02647 CountEF=-0.176091 MaxLexFGivenE=-1.37604 MaxLexEGivenF=15.9491 IsSingletonFE=1 +0 Glue=2 WordPenalty=-2.17147 LanguageModel=-3.16095 MaxLexEGivenF=14.7894 +1 Glue=-2 WordPenalty=2.17147 LanguageModel=3.16095 MaxLexEGivenF=-14.7894 +0 WordPenalty=-2.60577 EGivenFCoherent=1.64345 LanguageModel=-2.7904 CountEF=-1.35218 MaxLexEGivenF=17.0299 IsSingletonFE=1 +1 WordPenalty=2.60577 EGivenFCoherent=-1.64345 LanguageModel=2.7904 CountEF=1.35218 MaxLexEGivenF=-17.0299 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=2.15534 LanguageModel=-1.65618 CountEF=-1.85733 MaxLexEGivenF=15.1242 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-2.15534 LanguageModel=1.65618 CountEF=1.85733 MaxLexEGivenF=-15.1242 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=0.30103 LanguageModel=-2.77025 CountEF=-0.176091 MaxLexFGivenE=-0.414525 MaxLexEGivenF=11.7913 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-0.30103 LanguageModel=2.77025 CountEF=0.176091 MaxLexFGivenE=0.414525 MaxLexEGivenF=-11.7913 IsSingletonFE=-1 +1 WordPenalty=2.60577 EGivenFCoherent=-0.30103 LanguageModel=4.60866 CountEF=0.176091 MaxLexEGivenF=-17.5364 IsSingletonFE=-1 +0 WordPenalty=-2.60577 EGivenFCoherent=0.30103 LanguageModel=-4.60866 CountEF=-0.176091 MaxLexEGivenF=17.5364 IsSingletonFE=1 +0 WordPenalty=-2.60577 LanguageModel=-1.41116 MaxLexEGivenF=13.9436 +1 WordPenalty=2.60577 LanguageModel=1.41116 MaxLexEGivenF=-13.9436 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-2.51851 LanguageModel=7.8033 CountEF=2.00216 MaxLexEGivenF=-15.496 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=2.51851 LanguageModel=-7.8033 CountEF=-2.00216 MaxLexEGivenF=15.496 IsSingletonFE=2 +0 Glue=-1 WordPenalty=-3.47435 EGivenFCoherent=0.60206 LanguageModel=-8.3605 CountEF=-0.39794 MaxLexEGivenF=21.8127 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.47435 EGivenFCoherent=-0.60206 LanguageModel=8.3605 CountEF=0.39794 MaxLexEGivenF=-21.8127 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=-0.259638 LanguageModel=-11.2443 CountEF=0.25528 MaxLexEGivenF=17.1122 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=0.259638 LanguageModel=11.2443 CountEF=-0.25528 MaxLexEGivenF=-17.1122 +0 Glue=-1 WordPenalty=-3.47436 LanguageModel=-9.28868 MaxLexFGivenE=-1.37604 MaxLexEGivenF=17.953 +1 Glue=1 WordPenalty=3.47436 LanguageModel=9.28868 MaxLexFGivenE=1.37604 MaxLexEGivenF=-17.953 +1 Glue=-2 WordPenalty=2.17147 EGivenFCoherent=0.30103 LanguageModel=7.3105 CountEF=-0.176091 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-12.9753 IsSingletonFE=1 +0 Glue=2 WordPenalty=-2.17147 EGivenFCoherent=-0.30103 LanguageModel=-7.3105 CountEF=0.176091 MaxLexFGivenE=1.37604 MaxLexEGivenF=12.9753 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60577 LanguageModel=-3.2592 MaxLexFGivenE=0.048147 MaxLexEGivenF=13.6615 +1 Glue=1 WordPenalty=2.60577 LanguageModel=3.2592 MaxLexFGivenE=-0.048147 MaxLexEGivenF=-13.6615 +0 WordPenalty=-2.60577 EGivenFCoherent=0.09691 LanguageModel=-12.7025 CountEF=-0.079181 MaxLexFGivenE=1.37604 MaxLexEGivenF=16.5627 +1 WordPenalty=2.60577 EGivenFCoherent=-0.09691 LanguageModel=12.7025 CountEF=0.079181 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-16.5627 +0 Glue=2 WordPenalty=-2.60577 EGivenFCoherent=-0.30103 LanguageModel=-10.671 CountEF=0.176091 MaxLexFGivenE=1.37604 MaxLexEGivenF=17.1648 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=2.60577 EGivenFCoherent=0.30103 LanguageModel=10.671 CountEF=-0.176091 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-17.1648 IsSingletonFE=1 +1 WordPenalty=2.60577 EGivenFCoherent=-1.90309 LanguageModel=5.26399 CountEF=1.60746 MaxLexEGivenF=-17.6076 IsSingletonFE=-1 +0 WordPenalty=-2.60577 EGivenFCoherent=1.90309 LanguageModel=-5.26399 CountEF=-1.60746 MaxLexEGivenF=17.6076 IsSingletonFE=1 +1 WordPenalty=4.77725 EGivenFCoherent=-2.7796 LanguageModel=13.2353 CountEF=2.37885 MaxLexFGivenE=0.443473 MaxLexEGivenF=-29.3492 IsSingletonFE=-1 +0 WordPenalty=-4.77725 EGivenFCoherent=2.7796 LanguageModel=-13.2353 CountEF=-2.37885 MaxLexFGivenE=-0.443473 MaxLexEGivenF=29.3492 IsSingletonFE=1 +1 Glue=-1 WordPenalty=1.73718 EGivenFCoherent=-1.45738 LanguageModel=4.35907 CountEF=1.33746 MaxLexFGivenE=0.639109 MaxLexEGivenF=-14.0284 +0 Glue=1 WordPenalty=-1.73718 EGivenFCoherent=1.45738 LanguageModel=-4.35907 CountEF=-1.33746 MaxLexFGivenE=-0.639109 MaxLexEGivenF=14.0284 +1 Glue=3 WordPenalty=4.34296 EGivenFCoherent=-3.10766 LanguageModel=11.2713 CountEF=2.69897 MaxLexFGivenE=-0.41375 MaxLexEGivenF=-30.1204 IsSingletonFE=-1 +0 Glue=-3 WordPenalty=-4.34296 EGivenFCoherent=3.10766 LanguageModel=-11.2713 CountEF=-2.69897 MaxLexFGivenE=0.41375 MaxLexEGivenF=30.1204 IsSingletonFE=1 +1 Glue=4 WordPenalty=2.60577 EGivenFCoherent=-2.90719 LanguageModel=6.03872 CountEF=2.56864 MaxLexFGivenE=-0.391386 MaxLexEGivenF=-14.0414 IsSingletonFE=-1 +0 Glue=-4 WordPenalty=-2.60577 EGivenFCoherent=2.90719 LanguageModel=-6.03872 CountEF=-2.56864 MaxLexFGivenE=0.391386 MaxLexEGivenF=14.0414 IsSingletonFE=1 +0 Glue=1 WordPenalty=-2.60577 EGivenFCoherent=1.36463 LanguageModel=-6.08939 CountEF=-1.24852 MaxLexFGivenE=0.307186 MaxLexEGivenF=14.6151 +1 Glue=-1 WordPenalty=2.60577 EGivenFCoherent=-1.36463 LanguageModel=6.08939 CountEF=1.24852 MaxLexFGivenE=-0.307186 MaxLexEGivenF=-14.6151 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=3.38509 LanguageModel=-5.83422 CountEF=-3.0187 MaxLexFGivenE=0.206486 MaxLexEGivenF=14.1868 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-3.38509 LanguageModel=5.83422 CountEF=3.0187 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-14.1868 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-2.60577 EGivenFCoherent=1.9345 LanguageModel=-6.26137 CountEF=-1.77679 MaxLexFGivenE=0.607749 MaxLexEGivenF=14.3283 +1 Glue=-1 WordPenalty=2.60577 EGivenFCoherent=-1.9345 LanguageModel=6.26137 CountEF=1.77679 MaxLexFGivenE=-0.607749 MaxLexEGivenF=-14.3283 +0 Glue=2 WordPenalty=-2.60577 EGivenFCoherent=1.0636 LanguageModel=-6.30604 CountEF=-1.07243 MaxLexFGivenE=0.391386 MaxLexEGivenF=14.1798 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=2.60577 EGivenFCoherent=-1.0636 LanguageModel=6.30604 CountEF=1.07243 MaxLexFGivenE=-0.391386 MaxLexEGivenF=-14.1798 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=2.64771 LanguageModel=-6.30604 CountEF=-2.36549 MaxLexFGivenE=0.391386 MaxLexEGivenF=14.1798 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-2.64771 LanguageModel=6.30604 CountEF=2.36549 MaxLexFGivenE=-0.391386 MaxLexEGivenF=-14.1798 +1 Glue=-1 WordPenalty=2.60577 EGivenFCoherent=-2.7796 LanguageModel=5.84308 CountEF=2.37885 MaxLexFGivenE=-0.689288 MaxLexEGivenF=-16.1094 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-2.60577 EGivenFCoherent=2.7796 LanguageModel=-5.84308 CountEF=-2.37885 MaxLexFGivenE=0.689288 MaxLexEGivenF=16.1094 IsSingletonFE=1 +0 WordPenalty=-2.60577 EGivenFCoherent=2.7796 LanguageModel=-6.47481 CountEF=-2.37885 MaxLexFGivenE=0.989028 MaxLexEGivenF=16.4516 IsSingletonFE=1 +1 WordPenalty=2.60577 EGivenFCoherent=-2.7796 LanguageModel=6.47481 CountEF=2.37885 MaxLexFGivenE=-0.989028 MaxLexEGivenF=-16.4516 IsSingletonFE=-1 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-3.27692 LanguageModel=6.64545 CountEF=2.78976 MaxLexFGivenE=-0.391386 MaxLexEGivenF=-14.7522 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=3.27692 LanguageModel=-6.64545 CountEF=-2.78976 MaxLexFGivenE=0.391386 MaxLexEGivenF=14.7522 IsSingletonFE=1 +1 WordPenalty=2.60577 EGivenFCoherent=-2.7796 LanguageModel=6.18918 CountEF=2.37885 MaxLexFGivenE=-0.804128 MaxLexEGivenF=-16.597 IsSingletonFE=-1 +0 WordPenalty=-2.60577 EGivenFCoherent=2.7796 LanguageModel=-6.18918 CountEF=-2.37885 MaxLexFGivenE=0.804128 MaxLexEGivenF=16.597 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-2.51174 LanguageModel=6.0894 CountEF=2.25828 MaxLexFGivenE=-0.307186 MaxLexEGivenF=-14.6151 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=2.51174 LanguageModel=-6.0894 CountEF=-2.25828 MaxLexFGivenE=0.307186 MaxLexEGivenF=14.6151 +0 WordPenalty=-2.60577 EGivenFCoherent=3.07368 LanguageModel=-5.84862 CountEF=-2.63849 MaxLexFGivenE=1.25771 MaxLexEGivenF=14.7661 IsSingletonFE=1 +1 WordPenalty=2.60577 EGivenFCoherent=-3.07368 LanguageModel=5.84862 CountEF=2.63849 MaxLexFGivenE=-1.25771 MaxLexEGivenF=-14.7661 IsSingletonFE=-1 +1 WordPenalty=4.77725 EGivenFCoherent=-2.97589 LanguageModel=11.748 CountEF=2.56791 MaxLexFGivenE=-0.252476 MaxLexEGivenF=-29.1286 IsSingletonFE=-1 +0 WordPenalty=-4.77725 EGivenFCoherent=2.97589 LanguageModel=-11.748 CountEF=-2.56791 MaxLexFGivenE=0.252476 MaxLexEGivenF=29.1286 IsSingletonFE=1 +0 WordPenalty=-4.77725 EGivenFCoherent=3.28989 LanguageModel=-12.7195 CountEF=-2.75659 MaxLexFGivenE=0.336676 MaxLexEGivenF=29.3769 IsSingletonFE=2 +1 WordPenalty=4.77725 EGivenFCoherent=-3.28989 LanguageModel=12.7195 CountEF=2.75659 MaxLexFGivenE=-0.336676 MaxLexEGivenF=-29.3769 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-4.77725 EGivenFCoherent=4.79928 LanguageModel=-12.5268 CountEF=-4.11394 MaxLexFGivenE=-0.122413 MaxLexEGivenF=28.6725 IsSingletonFE=2 +1 Glue=2 WordPenalty=4.77725 EGivenFCoherent=-4.79928 LanguageModel=12.5268 CountEF=4.11394 MaxLexFGivenE=0.122413 MaxLexEGivenF=-28.6725 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-4.77725 EGivenFCoherent=4.10525 LanguageModel=-13.0072 CountEF=-3.56961 MaxLexFGivenE=0.856445 MaxLexEGivenF=30.1158 IsSingletonFE=1 +1 Glue=2 WordPenalty=4.77725 EGivenFCoherent=-4.10525 LanguageModel=13.0072 CountEF=3.56961 MaxLexFGivenE=-0.856445 MaxLexEGivenF=-30.1158 IsSingletonFE=-1 +1 Glue=2 WordPenalty=4.77725 EGivenFCoherent=-3.86221 LanguageModel=11.8174 CountEF=3.31973 MaxLexFGivenE=-0.443346 MaxLexEGivenF=-29.0931 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-4.77725 EGivenFCoherent=3.86221 LanguageModel=-11.8174 CountEF=-3.31973 MaxLexFGivenE=0.443346 MaxLexEGivenF=29.0931 IsSingletonFE=2 +1 Glue=1 WordPenalty=4.77725 EGivenFCoherent=-3.28989 LanguageModel=12.789 CountEF=2.75659 MaxLexFGivenE=-0.527546 MaxLexEGivenF=-29.3414 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-4.77725 EGivenFCoherent=3.28989 LanguageModel=-12.789 CountEF=-2.75659 MaxLexFGivenE=0.527546 MaxLexEGivenF=29.3414 IsSingletonFE=2 +0 Glue=-1 WordPenalty=-4.77725 EGivenFCoherent=3.08063 LanguageModel=-13.3804 CountEF=-2.55494 MaxLexFGivenE=0.231626 MaxLexEGivenF=30.0425 IsSingletonFE=2 +1 Glue=1 WordPenalty=4.77725 EGivenFCoherent=-3.08063 LanguageModel=13.3804 CountEF=2.55494 MaxLexFGivenE=-0.231626 MaxLexEGivenF=-30.0425 IsSingletonFE=-2 +1 Glue=2 WordPenalty=3.90866 EGivenFCoherent=-2.93852 LanguageModel=10.7816 CountEF=2.53624 MaxLexFGivenE=-0.79727 MaxLexEGivenF=-26.0981 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-3.90866 EGivenFCoherent=2.93852 LanguageModel=-10.7816 CountEF=-2.53624 MaxLexFGivenE=0.79727 MaxLexEGivenF=26.0981 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=3.08406 LanguageModel=-7.5847 CountEF=-2.84261 MaxLexFGivenE=0.1007 MaxLexEGivenF=14.8938 +1 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-3.08406 LanguageModel=7.5847 CountEF=2.84261 MaxLexFGivenE=-0.1007 MaxLexEGivenF=-14.8938 +1 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-2.59656 LanguageModel=7.53403 CountEF=2.33746 MaxLexFGivenE=-0.1849 MaxLexEGivenF=-14.3201 +0 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=2.59656 LanguageModel=-7.53403 CountEF=-2.33746 MaxLexFGivenE=0.1849 MaxLexEGivenF=14.3201 +0 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=2.7796 LanguageModel=-7.70635 CountEF=-2.37885 MaxLexEGivenF=15.4491 IsSingletonFE=1 +1 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-2.7796 LanguageModel=7.70635 CountEF=2.37885 MaxLexEGivenF=-15.4491 IsSingletonFE=-1 +1 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-2.21071 LanguageModel=7.29908 CountEF=2.03643 MaxLexFGivenE=0.0842 MaxLexEGivenF=-15.0391 +0 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=2.21071 LanguageModel=-7.29908 CountEF=-2.03643 MaxLexFGivenE=-0.0842 MaxLexEGivenF=15.0391 +0 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=2.90454 LanguageModel=-7.99198 CountEF=-2.47576 MaxLexFGivenE=0.1849 MaxLexEGivenF=15.3038 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-2.90454 LanguageModel=7.99198 CountEF=2.47576 MaxLexFGivenE=-0.1849 MaxLexEGivenF=-15.3038 IsSingletonFE=-1 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-3.56118 LanguageModel=8.94687 CountEF=3.14364 MaxLexFGivenE=0.0842 MaxLexEGivenF=-17.4784 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=3.56118 LanguageModel=-8.94687 CountEF=-3.14364 MaxLexFGivenE=-0.0842 MaxLexEGivenF=17.4784 IsSingletonFE=1 +1 Glue=3 WordPenalty=3.47436 EGivenFCoherent=-3.38166 LanguageModel=9.63979 CountEF=2.77679 MaxLexFGivenE=-0.1849 MaxLexEGivenF=-17.743 IsSingletonFE=-2 +0 Glue=-3 WordPenalty=-3.47436 EGivenFCoherent=3.38166 LanguageModel=-9.63979 CountEF=-2.77679 MaxLexFGivenE=0.1849 MaxLexEGivenF=17.743 IsSingletonFE=2 +0 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=3.85878 LanguageModel=-6.36636 CountEF=-3.19176 MaxLexFGivenE=1.01035 MaxLexEGivenF=19.079 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-3.85878 LanguageModel=6.36636 CountEF=3.19176 MaxLexFGivenE=-1.01035 MaxLexEGivenF=-19.079 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=1.15635 LanguageModel=-9.66262 CountEF=-1.00167 MaxLexFGivenE=0.07985 MaxLexEGivenF=19.2462 IsSingletonFE=1 +1 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-1.15635 LanguageModel=9.66262 CountEF=1.00167 MaxLexFGivenE=-0.07985 MaxLexEGivenF=-19.2462 IsSingletonFE=-1 +1 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-4.06804 LanguageModel=6.73998 CountEF=3.39341 MaxLexFGivenE=0.046844 MaxLexEGivenF=-18.3898 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=4.06804 LanguageModel=-6.73998 CountEF=-3.39341 MaxLexFGivenE=-0.046844 MaxLexEGivenF=18.3898 IsSingletonFE=2 +0 Glue=-4 WordPenalty=-3.04007 EGivenFCoherent=3.56118 LanguageModel=-7.35454 CountEF=-3.14364 MaxLexFGivenE=0.033006 MaxLexEGivenF=18.971 IsSingletonFE=1 +1 Glue=4 WordPenalty=3.04007 EGivenFCoherent=-3.56118 LanguageModel=7.35454 CountEF=3.14364 MaxLexFGivenE=-0.033006 MaxLexEGivenF=-18.971 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-4.34295 EGivenFCoherent=2.7796 LanguageModel=-11.6132 CountEF=-2.37885 MaxLexFGivenE=0.206486 MaxLexEGivenF=26.4667 IsSingletonFE=1 +1 Glue=-2 WordPenalty=4.34295 EGivenFCoherent=-2.7796 LanguageModel=11.6132 CountEF=2.37885 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-26.4667 IsSingletonFE=-1 +1 WordPenalty=4.34295 EGivenFCoherent=-2.70243 LanguageModel=12.2835 CountEF=2.03039 MaxLexFGivenE=-0.336676 MaxLexEGivenF=-28.7574 IsSingletonFE=-3 +0 WordPenalty=-4.34295 EGivenFCoherent=2.70243 LanguageModel=-12.2835 CountEF=-2.03039 MaxLexFGivenE=0.336676 MaxLexEGivenF=28.7574 IsSingletonFE=3 +1 Glue=3 WordPenalty=4.34295 EGivenFCoherent=-2.40483 LanguageModel=12.2531 CountEF=1.98227 MaxLexFGivenE=0.062194 MaxLexEGivenF=-29.2634 IsSingletonFE=-2 +0 Glue=-3 WordPenalty=-4.34295 EGivenFCoherent=2.40483 LanguageModel=-12.2531 CountEF=-1.98227 MaxLexFGivenE=-0.062194 MaxLexEGivenF=29.2634 IsSingletonFE=2 +0 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=2.30248 LanguageModel=-4.39492 CountEF=-1.93952 MaxLexFGivenE=0.01085 MaxLexEGivenF=14.6365 IsSingletonFE=1 +1 Glue=1 WordPenalty=1.73718 EGivenFCoherent=-2.30248 LanguageModel=4.39492 CountEF=1.93952 MaxLexFGivenE=-0.01085 MaxLexEGivenF=-14.6365 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-2.60577 EGivenFCoherent=1.15635 LanguageModel=-5.80442 CountEF=-1.20276 MaxLexEGivenF=13.6743 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=2.60577 EGivenFCoherent=-1.15635 LanguageModel=5.80442 CountEF=1.20276 MaxLexEGivenF=-13.6743 IsSingletonFE=1 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=2.43182 LanguageModel=-7.11223 CountEF=-2.09079 MaxLexFGivenE=0.1849 MaxLexEGivenF=14.3508 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-2.43182 LanguageModel=7.11223 CountEF=2.09079 MaxLexFGivenE=-0.1849 MaxLexEGivenF=-14.3508 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=1.56211 LanguageModel=-7.01733 CountEF=-1.32449 MaxLexFGivenE=-0.649959 MaxLexEGivenF=13.2365 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-1.56211 LanguageModel=7.01733 CountEF=1.32449 MaxLexFGivenE=0.649959 MaxLexEGivenF=-13.2365 IsSingletonFE=-1 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-2.98886 LanguageModel=5.70542 CountEF=2.5805 MaxLexEGivenF=-13.6743 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=2.98886 LanguageModel=-5.70542 CountEF=-2.5805 MaxLexEGivenF=13.6743 IsSingletonFE=1 +0 Glue=-3 WordPenalty=-2.60577 EGivenFCoherent=2.44479 LanguageModel=-7.11224 CountEF=-2.10338 MaxLexFGivenE=0.1849 MaxLexEGivenF=14.3508 IsSingletonFE=1 +1 Glue=3 WordPenalty=2.60577 EGivenFCoherent=-2.44479 LanguageModel=7.11224 CountEF=2.10338 MaxLexFGivenE=-0.1849 MaxLexEGivenF=-14.3508 IsSingletonFE=-1 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-2.40467 LanguageModel=7.06249 CountEF=2.16137 MaxLexFGivenE=-0.607749 MaxLexEGivenF=-13.7208 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=2.40467 LanguageModel=-7.06249 CountEF=-2.16137 MaxLexFGivenE=0.607749 MaxLexEGivenF=13.7208 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-1.66663 LanguageModel=7.17863 CountEF=1.53911 MaxLexEGivenF=-14.5511 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=1.66663 LanguageModel=-7.17863 CountEF=-1.53911 MaxLexEGivenF=14.5511 +0 Glue=2 WordPenalty=-2.60577 EGivenFCoherent=-0.169305 LanguageModel=-6.40379 CountEF=0.02939 MaxLexFGivenE=-0.465059 MaxLexEGivenF=13.0911 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=2.60577 EGivenFCoherent=0.169305 LanguageModel=6.40379 CountEF=-0.02939 MaxLexFGivenE=0.465059 MaxLexEGivenF=-13.0911 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-2.51174 LanguageModel=7.31712 CountEF=2.25828 MaxLexFGivenE=-0.391386 MaxLexEGivenF=-14.2117 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=2.51174 LanguageModel=-7.31712 CountEF=-2.25828 MaxLexFGivenE=0.391386 MaxLexEGivenF=14.2117 +0 Glue=-3 WordPenalty=-2.60577 EGivenFCoherent=2.21836 LanguageModel=-6.04172 CountEF=-1.89279 MaxLexFGivenE=0.1007 MaxLexEGivenF=14.1025 IsSingletonFE=1 +1 Glue=3 WordPenalty=2.60577 EGivenFCoherent=-2.21836 LanguageModel=6.04172 CountEF=1.89279 MaxLexFGivenE=-0.1007 MaxLexEGivenF=-14.1025 IsSingletonFE=-1 +1 WordPenalty=3.47437 EGivenFCoherent=-1.54407 LanguageModel=9.62792 CountEF=1.21748 MaxLexFGivenE=-0.79727 MaxLexEGivenF=-25.4205 IsSingletonFE=-1 +0 WordPenalty=-3.47437 EGivenFCoherent=1.54407 LanguageModel=-9.62792 CountEF=-1.21748 MaxLexFGivenE=0.79727 MaxLexEGivenF=25.4205 IsSingletonFE=1 +1 Glue=3 WordPenalty=2.17148 EGivenFCoherent=-3.56118 LanguageModel=5.67568 CountEF=3.14364 MaxLexFGivenE=-1.28007 MaxLexEGivenF=-15.6976 IsSingletonFE=-1 +0 Glue=-3 WordPenalty=-2.17148 EGivenFCoherent=3.56118 LanguageModel=-5.67568 CountEF=-3.14364 MaxLexFGivenE=1.28007 MaxLexEGivenF=15.6976 IsSingletonFE=1 +1 WordPenalty=3.47436 EGivenFCoherent=-3.71667 LanguageModel=8.55013 CountEF=2.97197 MaxLexFGivenE=-0.788657 MaxLexEGivenF=-20.817 IsSingletonFE=-3 +0 WordPenalty=-3.47436 EGivenFCoherent=3.71667 LanguageModel=-8.55013 CountEF=-2.97197 MaxLexFGivenE=0.788657 MaxLexEGivenF=20.817 IsSingletonFE=3 +1 Glue=1 WordPenalty=3.47436 EGivenFCoherent=-3.13862 LanguageModel=9.56812 CountEF=2.46779 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-20.6784 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-3.47436 EGivenFCoherent=3.13862 LanguageModel=-9.56812 CountEF=-2.46779 MaxLexFGivenE=0.983757 MaxLexEGivenF=20.6784 IsSingletonFE=3 +0 WordPenalty=-3.47436 EGivenFCoherent=2.99651 LanguageModel=-9.09955 CountEF=-2.36991 MaxLexFGivenE=0.140415 MaxLexEGivenF=20.5555 IsSingletonFE=3 +1 WordPenalty=3.47436 EGivenFCoherent=-2.99651 LanguageModel=9.09955 CountEF=2.36991 MaxLexFGivenE=-0.140415 MaxLexEGivenF=-20.5555 IsSingletonFE=-3 +0 Glue=-3 WordPenalty=-3.04007 EGivenFCoherent=1.79239 LanguageModel=-9.89725 CountEF=-1.61979 MaxLexFGivenE=0.983757 MaxLexEGivenF=19.0142 +1 Glue=3 WordPenalty=3.04007 EGivenFCoherent=-1.79239 LanguageModel=9.89725 CountEF=1.61979 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-19.0142 +0 Glue=2 WordPenalty=-3.04007 EGivenFCoherent=2.7796 LanguageModel=-7.11709 CountEF=-2.24055 MaxLexFGivenE=0.802007 MaxLexEGivenF=18.8816 IsSingletonFE=2 +1 Glue=-2 WordPenalty=3.04007 EGivenFCoherent=-2.7796 LanguageModel=7.11709 CountEF=2.24055 MaxLexFGivenE=-0.802007 MaxLexEGivenF=-18.8816 IsSingletonFE=-2 +1 Glue=-2 WordPenalty=3.04007 EGivenFCoherent=-2.74741 LanguageModel=6.3009 CountEF=2.26174 MaxLexFGivenE=-0.950072 MaxLexEGivenF=-18.7001 IsSingletonFE=-2 +0 Glue=2 WordPenalty=-3.04007 EGivenFCoherent=2.74741 LanguageModel=-6.3009 CountEF=-2.26174 MaxLexFGivenE=0.950072 MaxLexEGivenF=18.7001 IsSingletonFE=2 +0 WordPenalty=-3.04007 EGivenFCoherent=2.93852 LanguageModel=-7.45049 CountEF=-2.4279 MaxLexFGivenE=0.983757 MaxLexEGivenF=18.7103 IsSingletonFE=2 +1 WordPenalty=3.04007 EGivenFCoherent=-2.93852 LanguageModel=7.45049 CountEF=2.4279 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-18.7103 IsSingletonFE=-2 +1 WordPenalty=3.04007 EGivenFCoherent=-3.01368 LanguageModel=7.45047 CountEF=2.33746 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-18.7103 IsSingletonFE=-3 +0 WordPenalty=-3.04007 EGivenFCoherent=3.01368 LanguageModel=-7.45047 CountEF=-2.33746 MaxLexFGivenE=0.983757 MaxLexEGivenF=18.7103 IsSingletonFE=3 +1 Glue=1 WordPenalty=3.90865 EGivenFCoherent=-4.56177 LanguageModel=9.4037 CountEF=3.71233 MaxLexEGivenF=-23.3408 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-3.90865 EGivenFCoherent=4.56177 LanguageModel=-9.4037 CountEF=-3.71233 MaxLexEGivenF=23.3408 IsSingletonFE=3 +1 Glue=-1 WordPenalty=3.04007 EGivenFCoherent=-0.16085 LanguageModel=9.76234 CountEF=-0.096909 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-19.2626 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.04007 EGivenFCoherent=0.16085 LanguageModel=-9.76234 CountEF=0.096909 MaxLexFGivenE=0.047035 MaxLexEGivenF=19.2626 IsSingletonFE=2 +1 Glue=3 WordPenalty=3.04007 EGivenFCoherent=-2.3856 LanguageModel=9.19023 CountEF=1.74819 MaxLexFGivenE=-0.060385 MaxLexEGivenF=-17.7833 IsSingletonFE=-3 +0 Glue=-3 WordPenalty=-3.04007 EGivenFCoherent=2.3856 LanguageModel=-9.19023 CountEF=-1.74819 MaxLexFGivenE=0.060385 MaxLexEGivenF=17.7833 IsSingletonFE=3 +1 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-2.24055 LanguageModel=8.97071 CountEF=1.65321 MaxLexFGivenE=-0.10673 MaxLexEGivenF=-18.9762 IsSingletonFE=-3 +0 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=2.24055 LanguageModel=-8.97071 CountEF=-1.65321 MaxLexFGivenE=0.10673 MaxLexEGivenF=18.9762 IsSingletonFE=3 +0 WordPenalty=-3.04007 EGivenFCoherent=2.10037 LanguageModel=-8.41201 CountEF=-1.47712 MaxLexFGivenE=0.788657 MaxLexEGivenF=20.9653 IsSingletonFE=3 +1 WordPenalty=3.04007 EGivenFCoherent=-2.10037 LanguageModel=8.41201 CountEF=1.47712 MaxLexFGivenE=-0.788657 MaxLexEGivenF=-20.9653 IsSingletonFE=-3 +1 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-2.32222 LanguageModel=8.53409 CountEF=1.65321 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-18.2029 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=2.32222 LanguageModel=-8.53409 CountEF=-1.65321 MaxLexFGivenE=0.047035 MaxLexEGivenF=18.2029 IsSingletonFE=3 +0 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=1.25527 LanguageModel=-9.14251 CountEF=-0.875061 MaxLexFGivenE=0.047035 MaxLexEGivenF=18.9587 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-1.25527 LanguageModel=9.14251 CountEF=0.875061 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-18.9587 IsSingletonFE=-2 +0 WordPenalty=-3.04007 EGivenFCoherent=1.92428 LanguageModel=-8.33793 CountEF=-1.35218 MaxLexFGivenE=0.857337 MaxLexEGivenF=18.8877 IsSingletonFE=3 +1 WordPenalty=3.04007 EGivenFCoherent=-1.92428 LanguageModel=8.33793 CountEF=1.35218 MaxLexFGivenE=-0.857337 MaxLexEGivenF=-18.8877 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=1.43136 LanguageModel=-7.15382 CountEF=-1.14613 MaxLexFGivenE=0.047035 MaxLexEGivenF=16.924 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-1.43136 LanguageModel=7.15382 CountEF=1.14613 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-16.924 IsSingletonFE=-1 +1 Glue=3 WordPenalty=2.60577 EGivenFCoherent=-2.75358 LanguageModel=7.00613 CountEF=2.04922 MaxLexFGivenE=-1.60881 MaxLexEGivenF=-17.1785 IsSingletonFE=-3 +0 Glue=-3 WordPenalty=-2.60577 EGivenFCoherent=2.75358 LanguageModel=-7.00613 CountEF=-2.04922 MaxLexFGivenE=1.60881 MaxLexEGivenF=17.1785 IsSingletonFE=3 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=1.73239 LanguageModel=-7.9155 CountEF=-1.22185 MaxLexFGivenE=0.708907 MaxLexEGivenF=18.4587 IsSingletonFE=2 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-1.73239 LanguageModel=7.9155 CountEF=1.22185 MaxLexFGivenE=-0.708907 MaxLexEGivenF=-18.4587 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-2.60577 EGivenFCoherent=1.32222 LanguageModel=-6.56583 CountEF=-0.90309 MaxLexFGivenE=0.983757 MaxLexEGivenF=16.8261 IsSingletonFE=2 +1 Glue=-1 WordPenalty=2.60577 EGivenFCoherent=-1.32222 LanguageModel=6.56583 CountEF=0.90309 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-16.8261 IsSingletonFE=-2 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-1.38021 LanguageModel=7.52091 CountEF=0.954243 MaxLexFGivenE=-0.24574 MaxLexEGivenF=-17.7804 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=1.38021 LanguageModel=-7.52091 CountEF=-0.954243 MaxLexFGivenE=0.24574 MaxLexEGivenF=17.7804 IsSingletonFE=2 +0 Glue=1 WordPenalty=-2.60577 EGivenFCoherent=0.77815 LanguageModel=-8.02285 CountEF=-0.477121 MaxLexFGivenE=0.888299 MaxLexEGivenF=17.1292 IsSingletonFE=2 +1 Glue=-1 WordPenalty=2.60577 EGivenFCoherent=-0.77815 LanguageModel=8.02285 CountEF=0.477121 MaxLexFGivenE=-0.888299 MaxLexEGivenF=-17.1292 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=1.85733 LanguageModel=-7.32163 CountEF=-1.25527 MaxLexFGivenE=0.950072 MaxLexEGivenF=17.131 IsSingletonFE=3 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-1.85733 LanguageModel=7.32163 CountEF=1.25527 MaxLexFGivenE=-0.950072 MaxLexEGivenF=-17.131 IsSingletonFE=-3 +1 Glue=-2 WordPenalty=2.60577 EGivenFCoherent=-0.30103 LanguageModel=7.0223 CountEF=0.176091 MaxLexFGivenE=-0.060385 MaxLexEGivenF=-16.8749 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-2.60577 EGivenFCoherent=0.30103 LanguageModel=-7.0223 CountEF=-0.176091 MaxLexFGivenE=0.060385 MaxLexEGivenF=16.8749 IsSingletonFE=1 +0 WordPenalty=-2.60577 EGivenFCoherent=1.62325 LanguageModel=-6.98359 CountEF=-1.07918 MaxLexFGivenE=1.45794 MaxLexEGivenF=18.5018 IsSingletonFE=3 +1 WordPenalty=2.60577 EGivenFCoherent=-1.62325 LanguageModel=6.98359 CountEF=1.07918 MaxLexFGivenE=-1.45794 MaxLexEGivenF=-18.5018 IsSingletonFE=-3 +1 WordPenalty=2.60577 EGivenFCoherent=-0.52827 LanguageModel=8.70655 CountEF=0.425969 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-17.749 +0 WordPenalty=-2.60577 EGivenFCoherent=0.52827 LanguageModel=-8.70655 CountEF=-0.425969 MaxLexFGivenE=0.983757 MaxLexEGivenF=17.749 +0 WordPenalty=-2.60577 EGivenFCoherent=1.38021 LanguageModel=-7.34127 CountEF=-0.954243 MaxLexFGivenE=0.777587 MaxLexEGivenF=17.7946 IsSingletonFE=2 +1 WordPenalty=2.60577 EGivenFCoherent=-1.38021 LanguageModel=7.34127 CountEF=0.954243 MaxLexFGivenE=-0.777587 MaxLexEGivenF=-17.7946 IsSingletonFE=-2 +0 WordPenalty=-2.60577 EGivenFCoherent=0.65321 LanguageModel=-8.0118 CountEF=-0.425969 MaxLexFGivenE=-0.724667 MaxLexEGivenF=17.3822 IsSingletonFE=1 +1 WordPenalty=2.60577 EGivenFCoherent=-0.65321 LanguageModel=8.0118 CountEF=0.425969 MaxLexFGivenE=0.724667 MaxLexEGivenF=-17.3822 IsSingletonFE=-1 +1 Glue=3 WordPenalty=2.60577 EGivenFCoherent=-0.76235 LanguageModel=8.60945 CountEF=0.669009 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-16.3715 +0 Glue=-3 WordPenalty=-2.60577 EGivenFCoherent=0.76235 LanguageModel=-8.60945 CountEF=-0.669009 MaxLexFGivenE=0.047035 MaxLexEGivenF=16.3715 +1 WordPenalty=2.60577 EGivenFCoherent=-1.79934 LanguageModel=7.44283 CountEF=1.20412 MaxLexFGivenE=-0.788657 MaxLexEGivenF=-18.3399 IsSingletonFE=-3 +0 WordPenalty=-2.60577 EGivenFCoherent=1.79934 LanguageModel=-7.44283 CountEF=-1.20412 MaxLexFGivenE=0.788657 MaxLexEGivenF=18.3399 IsSingletonFE=3 +1 WordPenalty=2.60577 EGivenFCoherent=0.005023 LanguageModel=8.74205 CountEF=0.16137 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-17.6513 IsSingletonFE=1 +0 WordPenalty=-2.60577 EGivenFCoherent=-0.005023 LanguageModel=-8.74205 CountEF=-0.16137 MaxLexFGivenE=0.047035 MaxLexEGivenF=17.6513 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-2.60577 EGivenFCoherent=1.62325 LanguageModel=-6.53141 CountEF=-1.07918 MaxLexFGivenE=0.997107 MaxLexEGivenF=17.3295 IsSingletonFE=3 +1 Glue=-1 WordPenalty=2.60577 EGivenFCoherent=-1.62325 LanguageModel=6.53141 CountEF=1.07918 MaxLexFGivenE=-0.997107 MaxLexEGivenF=-17.3295 IsSingletonFE=-3 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-1.90848 LanguageModel=7.23828 CountEF=1.44716 MaxLexFGivenE=-1.05244 MaxLexEGivenF=-16.781 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=1.90848 LanguageModel=-7.23828 CountEF=-1.44716 MaxLexFGivenE=1.05244 MaxLexEGivenF=16.781 IsSingletonFE=2 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=1.38021 LanguageModel=-7.84156 CountEF=-0.875061 MaxLexFGivenE=0.01335 MaxLexEGivenF=18.197 IsSingletonFE=3 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-1.38021 LanguageModel=7.84156 CountEF=0.875061 MaxLexFGivenE=-0.01335 MaxLexEGivenF=-18.197 IsSingletonFE=-3 +1 WordPenalty=2.60577 EGivenFCoherent=-0.20828 LanguageModel=8.57502 CountEF=0.05799 MaxLexFGivenE=-0.060385 MaxLexEGivenF=-16.8749 IsSingletonFE=-1 +0 WordPenalty=-2.60577 EGivenFCoherent=0.20828 LanguageModel=-8.57502 CountEF=-0.05799 MaxLexFGivenE=0.060385 MaxLexEGivenF=16.8749 IsSingletonFE=1 +1 WordPenalty=2.60577 EGivenFCoherent=-1.5563 LanguageModel=7.2568 CountEF=1.07918 MaxLexFGivenE=-0.708907 MaxLexEGivenF=-18.7626 IsSingletonFE=-2 +0 WordPenalty=-2.60577 EGivenFCoherent=1.5563 LanguageModel=-7.2568 CountEF=-1.07918 MaxLexFGivenE=0.708907 MaxLexEGivenF=18.7626 IsSingletonFE=2 +0 WordPenalty=-2.60577 LanguageModel=-8.70656 CountEF=0.09691 MaxLexFGivenE=0.01335 MaxLexEGivenF=16.9804 IsSingletonFE=1 +1 WordPenalty=2.60577 LanguageModel=8.70656 CountEF=-0.09691 MaxLexFGivenE=-0.01335 MaxLexEGivenF=-16.9804 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=1.02119 LanguageModel=-8.32447 CountEF=-0.726999 MaxLexFGivenE=0.983757 MaxLexEGivenF=17.3777 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-1.02119 LanguageModel=8.32447 CountEF=0.726999 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-17.3777 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=2.70243 LanguageModel=-7.18286 CountEF=-1.99563 MaxLexFGivenE=0.983757 MaxLexEGivenF=19.2696 IsSingletonFE=3 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-2.70243 LanguageModel=7.18286 CountEF=1.99563 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-19.2696 IsSingletonFE=-3 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-2.11561 LanguageModel=7.61066 CountEF=1.60206 MaxLexFGivenE=-0.754972 MaxLexEGivenF=-17.7218 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=2.11561 LanguageModel=-7.61066 CountEF=-1.60206 MaxLexFGivenE=0.754972 MaxLexEGivenF=17.7218 IsSingletonFE=2 +1 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-0.95424 LanguageModel=7.42802 CountEF=0.69897 MaxLexFGivenE=-0.983757 MaxLexEGivenF=-17.749 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=0.95424 LanguageModel=-7.42802 CountEF=-0.69897 MaxLexFGivenE=0.983757 MaxLexEGivenF=17.749 IsSingletonFE=1 +1 Glue=-1 WordPenalty=2.60577 EGivenFCoherent=0.21085 LanguageModel=9.19744 CountEF=-0.288797 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-17.2945 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-2.60577 EGivenFCoherent=-0.21085 LanguageModel=-9.19744 CountEF=0.288797 MaxLexFGivenE=0.047035 MaxLexEGivenF=17.2945 IsSingletonFE=1 +1 Glue=-1 WordPenalty=2.60577 EGivenFCoherent=-0.77815 LanguageModel=6.53142 CountEF=0.477121 MaxLexFGivenE=-0.997107 MaxLexEGivenF=-17.3295 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-2.60577 EGivenFCoherent=0.77815 LanguageModel=-6.53142 CountEF=-0.477121 MaxLexFGivenE=0.997107 MaxLexEGivenF=17.3295 IsSingletonFE=2 +1 WordPenalty=3.04007 EGivenFCoherent=-2.0526 LanguageModel=8.96691 CountEF=1.71228 MaxLexEGivenF=-18.6651 IsSingletonFE=-1 +0 WordPenalty=-3.04007 EGivenFCoherent=2.0526 LanguageModel=-8.96691 CountEF=-1.71228 MaxLexEGivenF=18.6651 IsSingletonFE=1 +1 Glue=2 WordPenalty=3.04007 EGivenFCoherent=-2.95569 LanguageModel=8.30821 CountEF=2.50379 MaxLexEGivenF=-18.969 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-3.04007 EGivenFCoherent=2.95569 LanguageModel=-8.30821 CountEF=-2.50379 MaxLexEGivenF=18.969 IsSingletonFE=1 +0 WordPenalty=-3.04007 EGivenFCoherent=2.51255 LanguageModel=-7.41419 CountEF=-2.04576 MaxLexEGivenF=18.6651 IsSingletonFE=2 +1 WordPenalty=3.04007 EGivenFCoherent=-2.51255 LanguageModel=7.41419 CountEF=2.04576 MaxLexEGivenF=-18.6651 IsSingletonFE=-2 +1 Glue=-1 WordPenalty=1.73718 EGivenFCoherent=-1.37519 LanguageModel=4.17344 CountEF=1.03643 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-12.5906 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-1.73718 EGivenFCoherent=1.37519 LanguageModel=-4.17344 CountEF=-1.03643 MaxLexFGivenE=0.047035 MaxLexEGivenF=12.5906 IsSingletonFE=2 +1 Glue=-2 WordPenalty=3.47436 EGivenFCoherent=-1.89209 LanguageModel=9.6603 CountEF=1.32222 MaxLexFGivenE=0.033685 MaxLexEGivenF=-21.3625 IsSingletonFE=-3 +0 Glue=2 WordPenalty=-3.47436 EGivenFCoherent=1.89209 LanguageModel=-9.6603 CountEF=-1.32222 MaxLexFGivenE=-0.033685 MaxLexEGivenF=21.3625 IsSingletonFE=3 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=1.92428 LanguageModel=-9.29321 CountEF=-1.35218 MaxLexFGivenE=0.47418 MaxLexEGivenF=21.2127 IsSingletonFE=3 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=-1.92428 LanguageModel=9.29321 CountEF=1.35218 MaxLexFGivenE=-0.47418 MaxLexEGivenF=-21.2127 IsSingletonFE=-3 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=1.9345 LanguageModel=-10.1819 CountEF=-1.63849 MaxLexEGivenF=21.5377 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=-1.9345 LanguageModel=10.1819 CountEF=1.63849 MaxLexEGivenF=-21.5377 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.868586 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=2.73829 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=0.18177 MaxLexEGivenF=-5.45131 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-0.868586 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-2.73829 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=-0.18177 MaxLexEGivenF=5.45131 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-2.17147 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-8.24419 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=-0.820785 MaxLexEGivenF=14.0989 IsSingletonFE=1 +0 Glue=1 WordPenalty=2.17147 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=8.24419 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=0.820785 MaxLexEGivenF=-14.0989 IsSingletonFE=-1 +0 Glue=-1 EGivenFCoherent=2.09342 LanguageModel=-2.55744 CountEF=-1.79588 MaxLexFGivenE=2.37819 MaxLexEGivenF=1.02198 IsSingletonFE=1 +1 Glue=1 EGivenFCoherent=-2.09342 LanguageModel=2.55744 CountEF=1.79588 MaxLexFGivenE=-2.37819 MaxLexEGivenF=-1.02198 IsSingletonFE=-1 +1 EGivenFCoherent=-1.9345 LanguageModel=-1.48365 CountEF=1.63849 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-4.50481 IsSingletonFE=-1 +0 EGivenFCoherent=1.9345 LanguageModel=1.48365 CountEF=-1.63849 MaxLexFGivenE=1.07281 MaxLexEGivenF=4.50481 IsSingletonFE=1 +1 Glue=-1 EGivenFCoherent=-1.9345 LanguageModel=1.78933 CountEF=1.63849 MaxLexFGivenE=-0.433793 MaxLexEGivenF=-4.54962 IsSingletonFE=-1 +0 Glue=1 EGivenFCoherent=1.9345 LanguageModel=-1.78933 CountEF=-1.63849 MaxLexFGivenE=0.433793 MaxLexEGivenF=4.54962 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.434291 EGivenFCoherent=0.27579 LanguageModel=-4.52333 CountEF=-0.03342 MaxLexFGivenE=-0.206486 MaxLexEGivenF=3.20519 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.434291 EGivenFCoherent=-0.27579 LanguageModel=4.52333 CountEF=0.03342 MaxLexFGivenE=0.206486 MaxLexEGivenF=-3.20519 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=3.47437 EGivenFCoherent=-1.9345 LanguageModel=12.3176 CountEF=1.63849 MaxLexEGivenF=-22.9052 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.47437 EGivenFCoherent=1.9345 LanguageModel=-12.3176 CountEF=-1.63849 MaxLexEGivenF=22.9052 IsSingletonFE=1 +0 Glue=-2 WordPenalty=0.868586 EGivenFCoherent=2.09342 LanguageModel=2.23123 CountEF=-1.79588 MaxLexFGivenE=1.07281 MaxLexEGivenF=-3.74571 IsSingletonFE=1 +1 Glue=2 WordPenalty=-0.868586 EGivenFCoherent=-2.09342 LanguageModel=-2.23123 CountEF=1.79588 MaxLexFGivenE=-1.07281 MaxLexEGivenF=3.74571 IsSingletonFE=-1 +0 EGivenFCoherent=0.95424 LanguageModel=0.405463 CountEF=-0.69897 MaxLexFGivenE=1.07281 MaxLexEGivenF=5.75779 IsSingletonFE=1 +1 EGivenFCoherent=-0.95424 LanguageModel=-0.405463 CountEF=0.69897 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-5.75779 IsSingletonFE=-1 +1 Glue=1 LanguageModel=-1.39573 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-3.44665 +0 Glue=-1 LanguageModel=1.39573 MaxLexFGivenE=1.07281 MaxLexEGivenF=3.44665 +0 Glue=1 EGivenFCoherent=0.09691 LanguageModel=3.27499 CountEF=-0.079181 MaxLexFGivenE=0.206486 MaxLexEGivenF=5.29634 +1 Glue=-1 EGivenFCoherent=-0.09691 LanguageModel=-3.27499 CountEF=0.079181 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-5.29634 +0 Glue=-1 EGivenFCoherent=0.95424 LanguageModel=3.46509 CountEF=-0.69897 MaxLexFGivenE=1.07281 MaxLexEGivenF=6.02099 IsSingletonFE=1 +1 Glue=1 EGivenFCoherent=-0.95424 LanguageModel=-3.46509 CountEF=0.69897 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-6.02099 IsSingletonFE=-1 +0 EGivenFCoherent=0.47712 LanguageModel=-0.873074 CountEF=-0.30103 MaxLexFGivenE=1.07281 MaxLexEGivenF=5.75779 IsSingletonFE=1 +1 EGivenFCoherent=-0.47712 LanguageModel=0.873074 CountEF=0.30103 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-5.75779 IsSingletonFE=-1 +1 Glue=-1 LanguageModel=-1.35389 MaxLexFGivenE=-0.433793 MaxLexEGivenF=-2.83644 +0 Glue=1 LanguageModel=1.35389 MaxLexFGivenE=0.433793 MaxLexEGivenF=2.83644 +1 Glue=-2 WordPenalty=-0.434295 EGivenFCoherent=-0.69897 LanguageModel=-1.15331 CountEF=0.477121 MaxLexFGivenE=-0.433793 MaxLexEGivenF=-3.60871 IsSingletonFE=-1 +0 Glue=2 WordPenalty=0.434295 EGivenFCoherent=0.69897 LanguageModel=1.15331 CountEF=-0.477121 MaxLexFGivenE=0.433793 MaxLexEGivenF=3.60871 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.434291 LanguageModel=0.920248 MaxLexFGivenE=0.433793 MaxLexEGivenF=0.72794 +1 Glue=-1 WordPenalty=-0.434291 LanguageModel=-0.920248 MaxLexFGivenE=-0.433793 MaxLexEGivenF=-0.72794 +0 WordPenalty=0.434295 EGivenFCoherent=-1.41497 LanguageModel=4.1737 CountEF=1.13033 MaxLexFGivenE=0.206486 MaxLexEGivenF=-1.14969 IsSingletonFE=-1 +1 WordPenalty=-0.434295 EGivenFCoherent=1.41497 LanguageModel=-4.1737 CountEF=-1.13033 MaxLexFGivenE=-0.206486 MaxLexEGivenF=1.14969 IsSingletonFE=1 +0 Glue=-1 WordPenalty=0.434295 EGivenFCoherent=0.17609 LanguageModel=-0.886084 CountEF=-0.124939 MaxLexFGivenE=0.206486 MaxLexEGivenF=1.28989 +1 Glue=1 WordPenalty=-0.434295 EGivenFCoherent=-0.17609 LanguageModel=0.886084 CountEF=0.124939 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-1.28989 +1 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-0.95424 LanguageModel=6.84939 CountEF=0.69897 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-15.4215 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=0.95424 LanguageModel=-6.84939 CountEF=-0.69897 MaxLexFGivenE=0.206486 MaxLexEGivenF=15.4215 IsSingletonFE=1 +1 EGivenFCoherent=-1.6163 LanguageModel=-1.04043 CountEF=1.49485 MaxLexEGivenF=0.781661 +0 EGivenFCoherent=1.6163 LanguageModel=1.04043 CountEF=-1.49485 MaxLexEGivenF=-0.781661 +0 WordPenalty=0.868586 LanguageModel=3.55772 MaxLexFGivenE=1.07281 MaxLexEGivenF=-2.48839 +1 WordPenalty=-0.868586 LanguageModel=-3.55772 MaxLexFGivenE=-1.07281 MaxLexEGivenF=2.48839 +1 Glue=-1 WordPenalty=-0.868586 EGivenFCoherent=1.63682 LanguageModel=-4.16129 CountEF=-1.30642 MaxLexFGivenE=-0.206486 MaxLexEGivenF=4.47036 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.868586 EGivenFCoherent=-1.63682 LanguageModel=4.16129 CountEF=1.30642 MaxLexFGivenE=0.206486 MaxLexEGivenF=-4.47036 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-0.868586 EGivenFCoherent=0.716 LanguageModel=-3.68473 CountEF=-0.653209 MaxLexFGivenE=-0.206486 MaxLexEGivenF=1.70652 +0 Glue=2 WordPenalty=0.868586 EGivenFCoherent=-0.716 LanguageModel=3.68473 CountEF=0.653209 MaxLexFGivenE=0.206486 MaxLexEGivenF=-1.70652 +1 Glue=1 WordPenalty=-0.868586 EGivenFCoherent=-0.95424 LanguageModel=-2.6394 CountEF=0.69897 MaxLexFGivenE=-0.206486 MaxLexEGivenF=1.16561 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=0.868586 EGivenFCoherent=0.95424 LanguageModel=2.6394 CountEF=-0.69897 MaxLexFGivenE=0.206486 MaxLexEGivenF=-1.16561 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.868586 EGivenFCoherent=-0.95424 LanguageModel=6.97497 CountEF=0.69897 MaxLexFGivenE=0.433793 MaxLexEGivenF=-5.78055 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-0.868586 EGivenFCoherent=0.95424 LanguageModel=-6.97497 CountEF=-0.69897 MaxLexFGivenE=-0.433793 MaxLexEGivenF=5.78055 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.868586 EGivenFCoherent=0.12494 LanguageModel=-1.10255 CountEF=-0.11394 MaxLexFGivenE=-2.17171 MaxLexEGivenF=2.45645 +0 Glue=1 WordPenalty=0.868586 EGivenFCoherent=-0.12494 LanguageModel=1.10255 CountEF=0.11394 MaxLexFGivenE=2.17171 MaxLexEGivenF=-2.45645 +1 Glue=-1 WordPenalty=-0.868586 LanguageModel=-5.86241 MaxLexFGivenE=-1.07281 MaxLexEGivenF=3.1267 +0 Glue=1 WordPenalty=0.868586 LanguageModel=5.86241 MaxLexFGivenE=1.07281 MaxLexEGivenF=-3.1267 +0 Glue=-1 WordPenalty=0.434291 EGivenFCoherent=2.09342 LanguageModel=1.72042 CountEF=-1.79588 MaxLexEGivenF=-0.0007761 IsSingletonFE=1 +1 Glue=1 WordPenalty=-0.434291 EGivenFCoherent=-2.09342 LanguageModel=-1.72042 CountEF=1.79588 MaxLexEGivenF=0.0007761 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=0.434291 EGivenFCoherent=0.29408 LanguageModel=5.84954 CountEF=-0.45346 MaxLexEGivenF=-3.71771 IsSingletonFE=-1 +1 Glue=1 WordPenalty=-0.434291 EGivenFCoherent=-0.29408 LanguageModel=-5.84954 CountEF=0.45346 MaxLexEGivenF=3.71771 IsSingletonFE=1 +1 Glue=1 WordPenalty=-0.434291 EGivenFCoherent=-0.158923 LanguageModel=-1.72037 CountEF=0.15739 MaxLexEGivenF=1.46442 +0 Glue=-1 WordPenalty=0.434291 EGivenFCoherent=0.158923 LanguageModel=1.72037 CountEF=-0.15739 MaxLexEGivenF=-1.46442 +0 WordPenalty=-1.30289 EGivenFCoherent=1.9345 LanguageModel=-2.57635 CountEF=-1.63849 MaxLexEGivenF=7.49372 IsSingletonFE=1 +1 WordPenalty=1.30289 EGivenFCoherent=-1.9345 LanguageModel=2.57635 CountEF=1.63849 MaxLexEGivenF=-7.49372 IsSingletonFE=-1 +0 WordPenalty=-0.434295 EGivenFCoherent=1.45738 LanguageModel=2.65875 CountEF=-1.33746 MaxLexEGivenF=-0.487241 +1 WordPenalty=0.434295 EGivenFCoherent=-1.45738 LanguageModel=-2.65875 CountEF=1.33746 MaxLexEGivenF=0.487241 +0 WordPenalty=1.30288 EGivenFCoherent=-1.13918 LanguageModel=3.61122 CountEF=1.09691 MaxLexFGivenE=1.07281 MaxLexEGivenF=-2.18146 +1 WordPenalty=-1.30288 EGivenFCoherent=1.13918 LanguageModel=-3.61122 CountEF=-1.09691 MaxLexFGivenE=-1.07281 MaxLexEGivenF=2.18146 +0 WordPenalty=1.30288 EGivenFCoherent=-1.9345 LanguageModel=7.64245 CountEF=1.63849 MaxLexFGivenE=1.07281 MaxLexEGivenF=-7.17859 IsSingletonFE=-1 +1 WordPenalty=-1.30288 EGivenFCoherent=1.9345 LanguageModel=-7.64245 CountEF=-1.63849 MaxLexFGivenE=-1.07281 MaxLexEGivenF=7.17859 IsSingletonFE=1 +0 WordPenalty=1.30288 LanguageModel=5.94817 MaxLexFGivenE=1.07281 MaxLexEGivenF=-8.31563 +1 WordPenalty=-1.30288 LanguageModel=-5.94817 MaxLexFGivenE=-1.07281 MaxLexEGivenF=8.31563 +1 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=1.53148 LanguageModel=-4.63313 CountEF=-1.24304 MaxLexFGivenE=-2.17171 MaxLexEGivenF=5.3925 IsSingletonFE=1 +0 Glue=1 WordPenalty=1.30288 EGivenFCoherent=-1.53148 LanguageModel=4.63313 CountEF=1.24304 MaxLexFGivenE=2.17171 MaxLexEGivenF=-5.3925 IsSingletonFE=-1 +0 WordPenalty=-0.4343 EGivenFCoherent=0.69897 LanguageModel=-2.06984 CountEF=-0.477121 MaxLexEGivenF=5.31184 IsSingletonFE=1 +1 WordPenalty=0.4343 EGivenFCoherent=-0.69897 LanguageModel=2.06984 CountEF=0.477121 MaxLexEGivenF=-5.31184 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-0.434291 LanguageModel=-0.065099 MaxLexFGivenE=0.206486 MaxLexEGivenF=1.34227 +1 Glue=2 WordPenalty=0.434291 LanguageModel=0.065099 MaxLexFGivenE=-0.206486 MaxLexEGivenF=-1.34227 +1 Glue=-1 WordPenalty=0.434291 LanguageModel=0.100448 MaxLexEGivenF=-0.93433 +0 Glue=1 WordPenalty=-0.434291 LanguageModel=-0.100448 MaxLexEGivenF=0.93433 +1 Glue=1 WordPenalty=-0.434286 EGivenFCoherent=1.9345 LanguageModel=-0.134879 CountEF=-1.63849 MaxLexFGivenE=-0.927678 MaxLexEGivenF=1.50779 IsSingletonFE=1 +0 Glue=-1 WordPenalty=0.434286 EGivenFCoherent=-1.9345 LanguageModel=0.134879 CountEF=1.63849 MaxLexFGivenE=0.927678 MaxLexEGivenF=-1.50779 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-0.434286 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-4.45916 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=0.363092 MaxLexEGivenF=1.57938 IsSingletonFE=1 +0 Glue=2 WordPenalty=0.434286 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=4.45916 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-0.363092 MaxLexEGivenF=-1.57938 IsSingletonFE=-1 +0 LanguageModel=1.20103 MaxLexFGivenE=0.206486 MaxLexEGivenF=-1.9784 +1 LanguageModel=-1.20103 MaxLexFGivenE=-0.206486 MaxLexEGivenF=1.9784 +0 EGivenFCoherent=-0.30103 LanguageModel=4.65155 CountEF=0.176091 MaxLexFGivenE=1.07281 MaxLexEGivenF=3.26866 IsSingletonFE=-1 +1 EGivenFCoherent=0.30103 LanguageModel=-4.65155 CountEF=-0.176091 MaxLexFGivenE=-1.07281 MaxLexEGivenF=-3.26866 IsSingletonFE=1 +0 Glue=-2 EGivenFCoherent=0.95424 LanguageModel=-0.766651 CountEF=-0.69897 MaxLexEGivenF=2.60081 IsSingletonFE=1 +1 Glue=2 EGivenFCoherent=-0.95424 LanguageModel=0.766651 CountEF=0.69897 MaxLexEGivenF=-2.60081 IsSingletonFE=-1 +0 LanguageModel=-0.00237 MaxLexFGivenE=-0.023408 MaxLexEGivenF=-0.89998 +1 LanguageModel=0.00237 MaxLexFGivenE=0.023408 MaxLexEGivenF=0.89998 +1 Glue=-1 EGivenFCoherent=0.47712 LanguageModel=-3.6367 CountEF=-0.30103 MaxLexEGivenF=2.59042 IsSingletonFE=1 +0 Glue=1 EGivenFCoherent=-0.47712 LanguageModel=3.6367 CountEF=0.30103 MaxLexEGivenF=-2.59042 IsSingletonFE=-1 +1 LanguageModel=-1.49957 MaxLexEGivenF=-1.31534 +0 LanguageModel=1.49957 MaxLexEGivenF=1.31534 +0 Glue=1 WordPenalty=0.434291 EGivenFCoherent=-0.52288 LanguageModel=1.64004 CountEF=0.352182 MaxLexEGivenF=-3.1253 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-0.434291 EGivenFCoherent=0.52288 LanguageModel=-1.64004 CountEF=-0.352182 MaxLexEGivenF=3.1253 IsSingletonFE=1 +0 Glue=-1 WordPenalty=0.434291 EGivenFCoherent=0.47712 LanguageModel=-1.54574 CountEF=-0.30103 MaxLexEGivenF=0.68041 IsSingletonFE=1 +1 Glue=1 WordPenalty=-0.434291 EGivenFCoherent=-0.47712 LanguageModel=1.54574 CountEF=0.30103 MaxLexEGivenF=-0.68041 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=0.980257 LanguageModel=-1.19525 CountEF=-0.93952 MaxLexEGivenF=3.59926 +0 Glue=1 WordPenalty=0.434295 EGivenFCoherent=-0.980257 LanguageModel=1.19525 CountEF=0.93952 MaxLexEGivenF=-3.59926 +0 Glue=-1 EGivenFCoherent=-0.321785 PassThrough=1 LanguageModel=-7.41513 SampleCountF=-2.47857 CountEF=-2.15836 MaxLexFGivenE=0.889895 MaxLexEGivenF=3.4032 +1 Glue=1 EGivenFCoherent=0.321785 PassThrough=-1 LanguageModel=7.41513 SampleCountF=2.47857 CountEF=2.15836 MaxLexFGivenE=-0.889895 MaxLexEGivenF=-3.4032 +0 Glue=1 WordPenalty=0.434295 EGivenFCoherent=1.81954 LanguageModel=-1.70858 CountEF=-1.52504 MaxLexFGivenE=2.59323 MaxLexEGivenF=1.87613 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-1.81954 LanguageModel=1.70858 CountEF=1.52504 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-1.87613 IsSingletonFE=-1 +1 WordPenalty=-0.434295 EGivenFCoherent=-2.15534 LanguageModel=1.85379 CountEF=1.85733 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-2.68441 IsSingletonFE=-1 +0 WordPenalty=0.434295 EGivenFCoherent=2.15534 LanguageModel=-1.85379 CountEF=-1.85733 MaxLexFGivenE=2.59323 MaxLexEGivenF=2.68441 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.434295 EGivenFCoherent=0.60206 LanguageModel=-2.69543 CountEF=-0.39794 MaxLexFGivenE=2.59323 MaxLexEGivenF=1.87613 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-0.60206 LanguageModel=2.69543 CountEF=0.39794 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-1.87613 IsSingletonFE=-1 +0 Glue=2 WordPenalty=1.30288 LanguageModel=3.75007 MaxLexFGivenE=2.59323 MaxLexEGivenF=-3.63059 +1 Glue=-2 WordPenalty=-1.30288 LanguageModel=-3.75007 MaxLexFGivenE=-2.59323 MaxLexEGivenF=3.63059 +0 Glue=1 WordPenalty=1.30288 LanguageModel=8.08644 MaxLexFGivenE=2.59323 MaxLexEGivenF=-5.44547 +1 Glue=-1 WordPenalty=-1.30288 LanguageModel=-8.08644 MaxLexFGivenE=-2.59323 MaxLexEGivenF=5.44547 +0 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-0.321785 PassThrough=1 LanguageModel=-8.10162 SampleCountF=-2.47857 CountEF=-2.15836 MaxLexFGivenE=-0.486149 MaxLexEGivenF=3.65458 +1 Glue=1 WordPenalty=0.434295 EGivenFCoherent=0.321785 PassThrough=-1 LanguageModel=8.10162 SampleCountF=2.47857 CountEF=2.15836 MaxLexFGivenE=0.486149 MaxLexEGivenF=-3.65458 +0 Glue=1 WordPenalty=-0.434295 EGivenFCoherent=0.664643 PassThrough=1 LanguageModel=-7.52126 SampleCountF=-2.47857 CountEF=-2.86746 MaxLexFGivenE=0.889895 MaxLexEGivenF=4.61547 IsSingletonFE=1 +1 Glue=-1 WordPenalty=0.434295 EGivenFCoherent=-0.664643 PassThrough=-1 LanguageModel=7.52126 SampleCountF=2.47857 CountEF=2.86746 MaxLexFGivenE=-0.889895 MaxLexEGivenF=-4.61547 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-0.434295 EGivenFCoherent=-1.87506 PassThrough=1 LanguageModel=-7.70007 SampleCountF=-2.47857 CountEF=-0.69897 MaxLexFGivenE=0.889895 MaxLexEGivenF=4.61547 +1 Glue=-1 WordPenalty=0.434295 EGivenFCoherent=1.87506 PassThrough=-1 LanguageModel=7.70007 SampleCountF=2.47857 CountEF=0.69897 MaxLexFGivenE=-0.889895 MaxLexEGivenF=-4.61547 +0 Glue=-1 EGivenFCoherent=1.716 LanguageModel=-0.47421 CountEF=-1.30642 MaxLexFGivenE=1.16903 MaxLexEGivenF=2.93038 IsSingletonFE=2 +1 Glue=1 EGivenFCoherent=-1.716 LanguageModel=0.47421 CountEF=1.30642 MaxLexFGivenE=-1.16903 MaxLexEGivenF=-2.93038 IsSingletonFE=-2 +0 Glue=1 EGivenFCoherent=0.664643 PassThrough=1 LanguageModel=-3.61284 SampleCountF=-2.47857 CountEF=-2.86746 MaxLexFGivenE=1.02777 MaxLexEGivenF=1.08624 IsSingletonFE=1 +1 Glue=-1 EGivenFCoherent=-0.664643 PassThrough=-1 LanguageModel=3.61284 SampleCountF=2.47857 CountEF=2.86746 MaxLexFGivenE=-1.02777 MaxLexEGivenF=-1.08624 IsSingletonFE=-1 +1 EGivenFCoherent=-1.62325 LanguageModel=0.329003 CountEF=1.21748 MaxLexFGivenE=-1.16903 MaxLexEGivenF=-2.1221 IsSingletonFE=-2 +0 EGivenFCoherent=1.62325 LanguageModel=-0.329003 CountEF=-1.21748 MaxLexFGivenE=1.16903 MaxLexEGivenF=2.1221 IsSingletonFE=2 +1 EGivenFCoherent=-0.30103 LanguageModel=0.329005 CountEF=0.176091 MaxLexFGivenE=-1.16903 MaxLexEGivenF=-2.1221 IsSingletonFE=-1 +0 EGivenFCoherent=0.30103 LanguageModel=-0.329005 CountEF=-0.176091 MaxLexFGivenE=1.16903 MaxLexEGivenF=2.1221 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-1.32222 LanguageModel=-1.05165 CountEF=1.04139 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-2.98832 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.434295 EGivenFCoherent=1.32222 LanguageModel=1.05165 CountEF=-1.04139 MaxLexFGivenE=2.59323 MaxLexEGivenF=2.98832 IsSingletonFE=1 +1 WordPenalty=-0.434295 LanguageModel=-2.42265 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-1.23238 +0 WordPenalty=0.434295 LanguageModel=2.42265 MaxLexFGivenE=2.59323 MaxLexEGivenF=1.23238 +0 Glue=-1 WordPenalty=0.434295 EGivenFCoherent=2.15534 LanguageModel=0.838843 CountEF=-1.85733 MaxLexFGivenE=2.59323 MaxLexEGivenF=3.6453 IsSingletonFE=1 +1 Glue=1 WordPenalty=-0.434295 EGivenFCoherent=-2.15534 LanguageModel=-0.838843 CountEF=1.85733 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-3.6453 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.434295 EGivenFCoherent=0.60206 LanguageModel=-0.256506 CountEF=-0.39794 MaxLexFGivenE=2.59323 MaxLexEGivenF=1.11984 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-0.60206 LanguageModel=0.256506 CountEF=0.39794 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-1.11984 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.434295 EGivenFCoherent=1.32222 LanguageModel=1.19686 CountEF=-1.04139 MaxLexFGivenE=2.59323 MaxLexEGivenF=2.18004 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-1.32222 LanguageModel=-1.19686 CountEF=1.04139 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-2.18004 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=1.1549 PassThrough=-1 LanguageModel=-0.76973 SampleCountF=2.47857 CountEF=1.34242 MaxLexFGivenE=-1.02777 MaxLexEGivenF=1.57806 +0 Glue=1 WordPenalty=0.434295 EGivenFCoherent=-1.1549 PassThrough=1 LanguageModel=0.76973 SampleCountF=-2.47857 CountEF=-1.34242 MaxLexFGivenE=1.02777 MaxLexEGivenF=-1.57806 +0 Glue=1 WordPenalty=0.434295 EGivenFCoherent=0.30103 LanguageModel=1.16737 CountEF=-0.176091 MaxLexFGivenE=2.59323 MaxLexEGivenF=1.23921 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-0.30103 LanguageModel=-1.16737 CountEF=0.176091 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-1.23921 IsSingletonFE=-1 +0 WordPenalty=0.434295 EGivenFCoherent=2.51851 LanguageModel=1.97107 CountEF=-2.00216 MaxLexFGivenE=2.59323 MaxLexEGivenF=1.30311 IsSingletonFE=2 +1 WordPenalty=-0.434295 EGivenFCoherent=-2.51851 LanguageModel=-1.97107 CountEF=2.00216 MaxLexFGivenE=-2.59323 MaxLexEGivenF=-1.30311 IsSingletonFE=-2 +0 WordPenalty=0.434291 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=0.01031 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-0.327286 MaxLexEGivenF=-3.01029 IsSingletonFE=-1 +1 WordPenalty=-0.434291 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-0.01031 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=0.327286 MaxLexEGivenF=3.01029 IsSingletonFE=1 +1 WordPenalty=-0.434291 EGivenFCoherent=0.574031 PassThrough=-1 LanguageModel=2.86247 SampleCountF=2.47857 CountEF=1.90849 MaxLexFGivenE=-0.889895 MaxLexEGivenF=-1.04963 +0 WordPenalty=0.434291 EGivenFCoherent=-0.574031 PassThrough=1 LanguageModel=-2.86247 SampleCountF=-2.47857 CountEF=-1.90849 MaxLexFGivenE=0.889895 MaxLexEGivenF=1.04963 +1 WordPenalty=-0.434291 EGivenFCoherent=1.06215 PassThrough=-1 LanguageModel=2.86247 SampleCountF=2.47857 CountEF=1.43136 MaxLexFGivenE=-0.889895 MaxLexEGivenF=-1.04963 +0 WordPenalty=0.434291 EGivenFCoherent=-1.06215 PassThrough=1 LanguageModel=-2.86247 SampleCountF=-2.47857 CountEF=-1.43136 MaxLexFGivenE=0.889895 MaxLexEGivenF=1.04963 +1 Glue=-1 WordPenalty=-0.434291 EGivenFCoherent=-0.30103 LanguageModel=0.264483 CountEF=0.176091 MaxLexFGivenE=-2.45535 MaxLexEGivenF=-1.11091 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.434291 EGivenFCoherent=0.30103 LanguageModel=-0.264483 CountEF=-0.176091 MaxLexFGivenE=2.45535 MaxLexEGivenF=1.11091 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.868586 EGivenFCoherent=-1.81954 LanguageModel=-3.05357 CountEF=1.52504 MaxLexFGivenE=-2.59323 MaxLexEGivenF=4.9258 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.868586 EGivenFCoherent=1.81954 LanguageModel=3.05357 CountEF=-1.52504 MaxLexFGivenE=2.59323 MaxLexEGivenF=-4.9258 IsSingletonFE=1 +1 WordPenalty=-0.868586 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=-7.51499 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=-2.9415 MaxLexEGivenF=2.5352 IsSingletonFE=-1 +0 WordPenalty=0.868586 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=7.51499 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=2.9415 MaxLexEGivenF=-2.5352 IsSingletonFE=1 +1 WordPenalty=-1.73717 LanguageModel=-9.16252 MaxLexFGivenE=-2.59323 MaxLexEGivenF=8.28789 +0 WordPenalty=1.73717 LanguageModel=9.16252 MaxLexFGivenE=2.59323 MaxLexEGivenF=-8.28789 +1 Glue=-1 WordPenalty=-0.868586 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-2.15581 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=-0.889895 MaxLexEGivenF=3.86552 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.868586 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=2.15581 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=0.889895 MaxLexEGivenF=-3.86552 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.868586 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=-1.12546 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=0.889895 MaxLexEGivenF=-0.91893 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-0.868586 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=1.12546 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=-0.889895 MaxLexEGivenF=0.91893 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.868586 LanguageModel=3.95394 MaxLexFGivenE=2.45535 MaxLexEGivenF=-1.86336 +1 Glue=-1 WordPenalty=-0.868586 LanguageModel=-3.95394 MaxLexFGivenE=-2.45535 MaxLexEGivenF=1.86336 +1 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=2.47712 PassThrough=-1 LanguageModel=-7.66994 SampleCountF=2.47857 CountEF=0.30103 MaxLexFGivenE=-1.02777 MaxLexEGivenF=16.1951 IsSingletonFE=1 +0 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-2.47712 PassThrough=1 LanguageModel=7.66994 SampleCountF=-2.47857 CountEF=-0.30103 MaxLexFGivenE=1.02777 MaxLexEGivenF=-16.1951 IsSingletonFE=-1 +0 Glue=1 WordPenalty=1.73718 EGivenFCoherent=-0.657577 PassThrough=1 LanguageModel=4.87261 SampleCountF=-2.47857 CountEF=-1.82607 MaxLexFGivenE=1.02777 MaxLexEGivenF=-6.20961 +1 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=0.657577 PassThrough=-1 LanguageModel=-4.87261 SampleCountF=2.47857 CountEF=1.82607 MaxLexFGivenE=-1.02777 MaxLexEGivenF=6.20961 +1 WordPenalty=-1.73718 EGivenFCoherent=-2.15534 LanguageModel=-6.67075 CountEF=1.85733 MaxLexFGivenE=-2.59323 MaxLexEGivenF=4.20745 IsSingletonFE=-1 +0 WordPenalty=1.73718 EGivenFCoherent=2.15534 LanguageModel=6.67075 CountEF=-1.85733 MaxLexFGivenE=2.59323 MaxLexEGivenF=-4.20745 IsSingletonFE=1 +1 Glue=-1 WordPenalty=0.434295 EGivenFCoherent=-1.81954 LanguageModel=5.92141 CountEF=1.52504 MaxLexFGivenE=-1.07931 MaxLexEGivenF=-5.28542 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-0.434295 EGivenFCoherent=1.81954 LanguageModel=-5.92141 CountEF=-1.52504 MaxLexFGivenE=1.07931 MaxLexEGivenF=5.28542 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=2.15534 LanguageModel=-5.99227 CountEF=-1.85733 MaxLexFGivenE=1.07931 MaxLexEGivenF=4.82518 IsSingletonFE=1 +1 Glue=1 WordPenalty=0.434295 EGivenFCoherent=-2.15534 LanguageModel=5.99227 CountEF=1.85733 MaxLexFGivenE=-1.07931 MaxLexEGivenF=-4.82518 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-0.434295 EGivenFCoherent=-0.657577 PassThrough=1 LanguageModel=-7.71954 SampleCountF=-2.47857 CountEF=-1.82607 MaxLexFGivenE=-0.486149 MaxLexEGivenF=3.28326 +1 Glue=-1 WordPenalty=0.434295 EGivenFCoherent=0.657577 PassThrough=-1 LanguageModel=7.71954 SampleCountF=2.47857 CountEF=1.82607 MaxLexFGivenE=0.486149 MaxLexEGivenF=-3.28326 +1 Glue=1 WordPenalty=0.434295 EGivenFCoherent=0.657577 PassThrough=-1 LanguageModel=7.15391 SampleCountF=2.47857 CountEF=1.82607 MaxLexFGivenE=0.486149 MaxLexEGivenF=-3.36767 +0 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-0.657577 PassThrough=1 LanguageModel=-7.15391 SampleCountF=-2.47857 CountEF=-1.82607 MaxLexFGivenE=-0.486149 MaxLexEGivenF=3.36767 +1 WordPenalty=0.434295 EGivenFCoherent=-1.81954 LanguageModel=5.85877 CountEF=1.52504 MaxLexFGivenE=-1.07931 MaxLexEGivenF=-4.21154 IsSingletonFE=-1 +0 WordPenalty=-0.434295 EGivenFCoherent=1.81954 LanguageModel=-5.85877 CountEF=-1.52504 MaxLexFGivenE=1.07931 MaxLexEGivenF=4.21154 IsSingletonFE=1 +1 Glue=1 WordPenalty=0.434295 EGivenFCoherent=0.321785 PassThrough=-1 LanguageModel=5.77838 SampleCountF=2.47857 CountEF=2.15836 MaxLexFGivenE=0.348279 MaxLexEGivenF=-1.3393 +0 Glue=-1 WordPenalty=-0.434295 EGivenFCoherent=-0.321785 PassThrough=1 LanguageModel=-5.77838 SampleCountF=-2.47857 CountEF=-2.15836 MaxLexFGivenE=-0.348279 MaxLexEGivenF=1.3393 +0 Glue=2 EGivenFCoherent=1.81954 LanguageModel=-1.86778 CountEF=-1.52504 MaxLexFGivenE=1.21718 MaxLexEGivenF=0.94791 IsSingletonFE=1 +1 Glue=-2 EGivenFCoherent=-1.81954 LanguageModel=1.86778 CountEF=1.52504 MaxLexFGivenE=-1.21718 MaxLexEGivenF=-0.94791 IsSingletonFE=-1 +1 EGivenFCoherent=2.17609 PassThrough=-1 LanguageModel=2.32541 SampleCountF=2.47857 CountEF=0.477121 MaxLexFGivenE=1.77247 MaxLexEGivenF=1.21227 +0 EGivenFCoherent=-2.17609 PassThrough=1 LanguageModel=-2.32541 SampleCountF=-2.47857 CountEF=-0.477121 MaxLexFGivenE=-1.77247 MaxLexEGivenF=-1.21227 +0 Glue=1 EGivenFCoherent=1.81954 LanguageModel=-2.01299 CountEF=-1.52504 MaxLexFGivenE=1.21718 MaxLexEGivenF=1.75619 IsSingletonFE=1 +1 Glue=-1 EGivenFCoherent=-1.81954 LanguageModel=2.01299 CountEF=1.52504 MaxLexFGivenE=-1.21718 MaxLexEGivenF=-1.75619 IsSingletonFE=-1 +1 EGivenFCoherent=0.657577 PassThrough=-1 LanguageModel=2.87073 SampleCountF=2.47857 CountEF=1.82607 MaxLexFGivenE=-0.139051 MaxLexEGivenF=0.37107 +0 EGivenFCoherent=-0.657577 PassThrough=1 LanguageModel=-2.87073 SampleCountF=-2.47857 CountEF=-1.82607 MaxLexFGivenE=0.139051 MaxLexEGivenF=-0.37107 +1 EGivenFCoherent=-1.81954 LanguageModel=0.639523 CountEF=1.52504 MaxLexFGivenE=-2.55611 MaxLexEGivenF=-1.26099 IsSingletonFE=-1 +0 EGivenFCoherent=1.81954 LanguageModel=-0.639523 CountEF=-1.52504 MaxLexFGivenE=2.55611 MaxLexEGivenF=1.26099 IsSingletonFE=1 +0 EGivenFCoherent=-1.87506 PassThrough=1 LanguageModel=-3.52359 SampleCountF=-2.47857 CountEF=-0.69897 MaxLexFGivenE=-0.348279 MaxLexEGivenF=-0.44767 +1 EGivenFCoherent=1.87506 PassThrough=-1 LanguageModel=3.52359 SampleCountF=2.47857 CountEF=0.69897 MaxLexFGivenE=0.348279 MaxLexEGivenF=0.44767 +1 Glue=1 EGivenFCoherent=2.17609 PassThrough=-1 LanguageModel=2.18021 SampleCountF=2.47857 CountEF=0.477121 MaxLexFGivenE=1.77247 MaxLexEGivenF=2.02055 +0 Glue=-1 EGivenFCoherent=-2.17609 PassThrough=1 LanguageModel=-2.18021 SampleCountF=-2.47857 CountEF=-0.477121 MaxLexFGivenE=-1.77247 MaxLexEGivenF=-2.02055 +1 Glue=-1 EGivenFCoherent=-0.60206 LanguageModel=0.575084 CountEF=0.39794 MaxLexFGivenE=-1.8619 MaxLexEGivenF=-0.89266 IsSingletonFE=-1 +0 Glue=1 EGivenFCoherent=0.60206 LanguageModel=-0.575084 CountEF=-0.39794 MaxLexFGivenE=1.8619 MaxLexEGivenF=0.89266 IsSingletonFE=1 +0 EGivenFCoherent=0.30103 LanguageModel=0.05307 CountEF=-0.176091 MaxLexFGivenE=1.37604 MaxLexEGivenF=2.14049 IsSingletonFE=1 +1 EGivenFCoherent=-0.30103 LanguageModel=-0.05307 CountEF=0.176091 MaxLexFGivenE=-1.37604 MaxLexEGivenF=-2.14049 IsSingletonFE=-1 +0 Glue=1 LanguageModel=-2.90984 MaxLexFGivenE=1.07931 MaxLexEGivenF=4.37706 +1 Glue=-1 LanguageModel=2.90984 MaxLexFGivenE=-1.07931 MaxLexEGivenF=-4.37706 +1 Glue=-1 WordPenalty=0.434292 EGivenFCoherent=0.025693 LanguageModel=0.831149 SampleCountF=4.6579 CountEF=4.02242 MaxLexFGivenE=-0.480343 MaxLexEGivenF=0.998539 IsSingletonF=-2 IsSingletonFE=-3 +0 Glue=1 WordPenalty=-0.434292 EGivenFCoherent=-0.025693 LanguageModel=-0.831149 SampleCountF=-4.6579 CountEF=-4.02242 MaxLexFGivenE=0.480343 MaxLexEGivenF=-0.998539 IsSingletonF=2 IsSingletonFE=3 +0 WordPenalty=0.868593 EGivenFCoherent=-4.46806 LanguageModel=-0.590087 SampleCountF=-8.46891 CountEF=-3.75206 MaxLexFGivenE=-1.29235 MaxLexEGivenF=-6.0626 IsSingletonF=3 IsSingletonFE=2 +1 WordPenalty=-0.868593 EGivenFCoherent=4.46806 LanguageModel=0.590087 SampleCountF=8.46891 CountEF=3.75206 MaxLexFGivenE=1.29235 MaxLexEGivenF=6.0626 IsSingletonF=-3 IsSingletonFE=-2 +1 WordPenalty=-0.434294 EGivenFCoherent=1.48382 LanguageModel=-0.166777 SampleCountF=5.10506 CountEF=3.23239 MaxLexFGivenE=1.85391 MaxLexEGivenF=3.31813 IsSingletonF=-1 IsSingletonFE=-2 +0 WordPenalty=0.434294 EGivenFCoherent=-1.48382 LanguageModel=0.166777 SampleCountF=-5.10506 CountEF=-3.23239 MaxLexFGivenE=-1.85391 MaxLexEGivenF=-3.31813 IsSingletonF=1 IsSingletonFE=2 +1 WordPenalty=0.434293 EGivenFCoherent=1.59244 PassThrough=1 LanguageModel=7.93727 LanguageModel_OOV=1 SampleCountF=6.4375 CountEF=4.33808 MaxLexFGivenE=-1.31387 MaxLexEGivenF=0.001246 IsSingletonF=-3 IsSingletonFE=-3 +0 WordPenalty=-0.434293 EGivenFCoherent=-1.59244 PassThrough=-1 LanguageModel=-7.93727 LanguageModel_OOV=-1 SampleCountF=-6.4375 CountEF=-4.33808 MaxLexFGivenE=1.31387 MaxLexEGivenF=-0.001246 IsSingletonF=3 IsSingletonFE=3 +0 WordPenalty=-0.434292 EGivenFCoherent=-2.28537 LanguageModel=-3.38745 SampleCountF=-4.6579 CountEF=-2.07823 MaxLexFGivenE=0.313867 MaxLexEGivenF=-0.603306 IsSingletonF=2 IsSingletonFE=2 +1 WordPenalty=0.434292 EGivenFCoherent=2.28537 LanguageModel=3.38745 SampleCountF=4.6579 CountEF=2.07823 MaxLexFGivenE=-0.313867 MaxLexEGivenF=0.603306 IsSingletonF=-2 IsSingletonFE=-2 +1 WordPenalty=0.434293 EGivenFCoherent=1.27451 PassThrough=1 LanguageModel=7.93725 LanguageModel_OOV=1 SampleCountF=6.92305 CountEF=5.1072 MaxLexFGivenE=-1.31387 MaxLexEGivenF=0.001248 IsSingletonF=-4 IsSingletonFE=-3 +0 WordPenalty=-0.434293 EGivenFCoherent=-1.27451 PassThrough=-1 LanguageModel=-7.93725 LanguageModel_OOV=-1 SampleCountF=-6.92305 CountEF=-5.1072 MaxLexFGivenE=1.31387 MaxLexEGivenF=-0.001248 IsSingletonF=4 IsSingletonFE=3 +1 Glue=1 WordPenalty=0.434293 EGivenFCoherent=1.02321 PassThrough=1 LanguageModel=7.93726 LanguageModel_OOV=1 SampleCountF=6.4375 CountEF=4.89734 MaxLexFGivenE=-1.31387 MaxLexEGivenF=0.001246 IsSingletonF=-3 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-0.434293 EGivenFCoherent=-1.02321 PassThrough=-1 LanguageModel=-7.93726 LanguageModel_OOV=-1 SampleCountF=-6.4375 CountEF=-4.89734 MaxLexFGivenE=1.31387 MaxLexEGivenF=-0.001246 IsSingletonF=3 IsSingletonFE=3 +0 WordPenalty=-0.434293 EGivenFCoherent=-3.0353 PassThrough=1 LanguageModel=1.16238 LanguageModel_OOV=1 SampleCountF=-8.32099 CountEF=-5.16072 MaxLexFGivenE=-0.686133 MaxLexEGivenF=-1.20537 IsSingletonF=1 IsSingletonFE=1 +1 WordPenalty=0.434293 EGivenFCoherent=3.0353 PassThrough=-1 LanguageModel=-1.16238 LanguageModel_OOV=-1 SampleCountF=8.32099 CountEF=5.16072 MaxLexFGivenE=0.686133 MaxLexEGivenF=1.20537 IsSingletonF=-1 IsSingletonFE=-1 +0 WordPenalty=-0.434293 EGivenFCoherent=-3.22072 PassThrough=1 LanguageModel=1.16238 LanguageModel_OOV=1 SampleCountF=-8.32099 CountEF=-4.98025 MaxLexFGivenE=-0.686133 MaxLexEGivenF=-1.20537 IsSingletonFE=1 +1 WordPenalty=0.434293 EGivenFCoherent=3.22072 PassThrough=-1 LanguageModel=-1.16238 LanguageModel_OOV=-1 SampleCountF=8.32099 CountEF=4.98025 MaxLexFGivenE=0.686133 MaxLexEGivenF=1.20537 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-0.434292 EGivenFCoherent=-2.59687 LanguageModel=-3.38744 SampleCountF=-4.32234 CountEF=-1.69162 MaxLexFGivenE=0.313864 MaxLexEGivenF=-0.603306 IsSingletonF=1 IsSingletonFE=1 +1 Glue=-1 WordPenalty=0.434292 EGivenFCoherent=2.59687 LanguageModel=3.38744 SampleCountF=4.32234 CountEF=1.69162 MaxLexFGivenE=-0.313864 MaxLexEGivenF=0.603306 IsSingletonF=-1 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-0.434293 EGivenFCoherent=-2.37335 LanguageModel=-3.38745 SampleCountF=-7.13647 CountEF=-4.45911 MaxLexFGivenE=0.313867 MaxLexEGivenF=-0.603306 IsSingletonF=1 IsSingletonFE=2 +1 Glue=-1 WordPenalty=0.434293 EGivenFCoherent=2.37335 LanguageModel=3.38745 SampleCountF=7.13647 CountEF=4.45911 MaxLexFGivenE=-0.313867 MaxLexEGivenF=0.603306 IsSingletonF=-1 IsSingletonFE=-2 +0 WordPenalty=-0.434292 EGivenFCoherent=-2.45823 LanguageModel=-3.38746 SampleCountF=-4.6579 CountEF=-1.90848 MaxLexFGivenE=0.313867 MaxLexEGivenF=-0.603306 IsSingletonF=2 IsSingletonFE=2 +1 WordPenalty=0.434292 EGivenFCoherent=2.45823 LanguageModel=3.38746 SampleCountF=4.6579 CountEF=1.90848 MaxLexFGivenE=-0.313867 MaxLexEGivenF=0.603306 IsSingletonF=-2 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-0.434292 EGivenFCoherent=-2.59687 LanguageModel=-3.38746 SampleCountF=-4.32234 CountEF=-1.69162 MaxLexFGivenE=0.313864 MaxLexEGivenF=-0.603306 IsSingletonF=2 IsSingletonFE=1 +1 Glue=-1 WordPenalty=0.434292 EGivenFCoherent=2.59687 LanguageModel=3.38746 SampleCountF=4.32234 CountEF=1.69162 MaxLexFGivenE=-0.313864 MaxLexEGivenF=0.603306 IsSingletonF=-2 IsSingletonFE=-1 +0 WordPenalty=-0.434293 EGivenFCoherent=-1.35369 PassThrough=-1 LanguageModel=-7.93726 LanguageModel_OOV=-1 SampleCountF=-6.92305 CountEF=-5.0296 MaxLexFGivenE=1.31387 MaxLexEGivenF=-0.001248 IsSingletonF=3 IsSingletonFE=3 +1 WordPenalty=0.434293 EGivenFCoherent=1.35369 PassThrough=1 LanguageModel=7.93726 LanguageModel_OOV=1 SampleCountF=6.92305 CountEF=5.0296 MaxLexFGivenE=-1.31387 MaxLexEGivenF=0.001248 IsSingletonF=-3 IsSingletonFE=-3 +1 WordPenalty=0.434292 EGivenFCoherent=1.93892 LanguageModel=3.38746 SampleCountF=4.6579 CountEF=2.41598 MaxLexFGivenE=-0.313867 MaxLexEGivenF=0.603306 IsSingletonF=-2 IsSingletonFE=-2 +0 WordPenalty=-0.434292 EGivenFCoherent=-1.93892 LanguageModel=-3.38746 SampleCountF=-4.6579 CountEF=-2.41598 MaxLexFGivenE=0.313867 MaxLexEGivenF=-0.603306 IsSingletonF=2 IsSingletonFE=2 +0 WordPenalty=-0.434293 EGivenFCoherent=-2.1945 LanguageModel=-3.38744 SampleCountF=-7.13647 CountEF=-4.63911 MaxLexFGivenE=0.313867 MaxLexEGivenF=-0.603306 IsSingletonF=1 IsSingletonFE=2 +1 WordPenalty=0.434293 EGivenFCoherent=2.1945 LanguageModel=3.38744 SampleCountF=7.13647 CountEF=4.63911 MaxLexFGivenE=-0.313867 MaxLexEGivenF=0.603306 IsSingletonF=-1 IsSingletonFE=-2 +0 WordPenalty=-0.434292 EGivenFCoherent=-2.11141 PassThrough=1 LanguageModel=1.16239 LanguageModel_OOV=1 SampleCountF=-5.35687 CountEF=-3.14061 MaxLexFGivenE=-0.686133 MaxLexEGivenF=-1.20537 IsSingletonF=1 IsSingletonFE=1 +1 WordPenalty=0.434292 EGivenFCoherent=2.11141 PassThrough=-1 LanguageModel=-1.16239 LanguageModel_OOV=-1 SampleCountF=5.35687 CountEF=3.14061 MaxLexFGivenE=0.686133 MaxLexEGivenF=1.20537 IsSingletonF=-1 IsSingletonFE=-1 +1 Glue=1 WordPenalty=0.434293 EGivenFCoherent=1.24205 PassThrough=1 LanguageModel=7.93727 LanguageModel_OOV=1 SampleCountF=6.4375 CountEF=4.68408 MaxLexFGivenE=-1.31387 MaxLexEGivenF=0.001246 IsSingletonF=-3 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-0.434293 EGivenFCoherent=-1.24205 PassThrough=-1 LanguageModel=-7.93727 LanguageModel_OOV=-1 SampleCountF=-6.4375 CountEF=-4.68408 MaxLexFGivenE=1.31387 MaxLexEGivenF=-0.001246 IsSingletonF=3 IsSingletonFE=3 +1 WordPenalty=0.434292 EGivenFCoherent=2.94325 PassThrough=-1 LanguageModel=-1.16238 LanguageModel_OOV=-1 SampleCountF=5.35687 CountEF=2.3277 MaxLexFGivenE=0.686133 MaxLexEGivenF=1.20537 IsSingletonF=-1 IsSingletonFE=-1 +0 WordPenalty=-0.434292 EGivenFCoherent=-2.94325 PassThrough=1 LanguageModel=1.16238 LanguageModel_OOV=1 SampleCountF=-5.35687 CountEF=-2.3277 MaxLexFGivenE=-0.686133 MaxLexEGivenF=-1.20537 IsSingletonF=1 IsSingletonFE=1 +0 WordPenalty=-0.434293 EGivenFCoherent=-1.59244 PassThrough=-1 LanguageModel=-7.93727 LanguageModel_OOV=-1 SampleCountF=-6.4375 CountEF=-4.33808 MaxLexFGivenE=1.31387 MaxLexEGivenF=-0.001246 IsSingletonF=3 IsSingletonFE=3 +1 WordPenalty=0.434293 EGivenFCoherent=1.59244 PassThrough=1 LanguageModel=7.93727 LanguageModel_OOV=1 SampleCountF=6.4375 CountEF=4.33808 MaxLexFGivenE=-1.31387 MaxLexEGivenF=0.001246 IsSingletonF=-3 IsSingletonFE=-3 +1 WordPenalty=0.434292 EGivenFCoherent=1.02434 PassThrough=1 LanguageModel=7.93727 LanguageModel_OOV=1 SampleCountF=3.95893 CountEF=2.42036 MaxLexFGivenE=-1.31387 MaxLexEGivenF=0.001246 IsSingletonF=-3 IsSingletonFE=-3 +0 WordPenalty=-0.434292 EGivenFCoherent=-1.02434 PassThrough=-1 LanguageModel=-7.93727 LanguageModel_OOV=-1 SampleCountF=-3.95893 CountEF=-2.42036 MaxLexFGivenE=1.31387 MaxLexEGivenF=-0.001246 IsSingletonF=3 IsSingletonFE=3 +1 WordPenalty=0.434292 EGivenFCoherent=2.34016 LanguageModel=3.38745 SampleCountF=4.32234 CountEF=1.94547 MaxLexFGivenE=-0.313864 MaxLexEGivenF=0.603306 IsSingletonF=-2 IsSingletonFE=-1 +0 WordPenalty=-0.434292 EGivenFCoherent=-2.34016 LanguageModel=-3.38745 SampleCountF=-4.32234 CountEF=-1.94547 MaxLexFGivenE=0.313864 MaxLexEGivenF=-0.603306 IsSingletonF=2 IsSingletonFE=1 +1 WordPenalty=0.434293 EGivenFCoherent=2.05484 LanguageModel=3.38744 SampleCountF=7.13647 CountEF=4.77477 MaxLexFGivenE=-0.313867 MaxLexEGivenF=0.603306 IsSingletonF=-1 IsSingletonFE=-2 +0 WordPenalty=-0.434293 EGivenFCoherent=-2.05484 LanguageModel=-3.38744 SampleCountF=-7.13647 CountEF=-4.77477 MaxLexFGivenE=0.313867 MaxLexEGivenF=-0.603306 IsSingletonF=1 IsSingletonFE=2 +1 Glue=1 WordPenalty=0.434292 EGivenFCoherent=0.851474 PassThrough=1 LanguageModel=7.93727 LanguageModel_OOV=1 SampleCountF=3.95893 CountEF=2.59011 MaxLexFGivenE=-1.31387 MaxLexEGivenF=0.001246 IsSingletonF=-3 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-0.434292 EGivenFCoherent=-0.851474 PassThrough=-1 LanguageModel=-7.93727 LanguageModel_OOV=-1 SampleCountF=-3.95893 CountEF=-2.59011 MaxLexFGivenE=1.31387 MaxLexEGivenF=-0.001246 IsSingletonF=3 IsSingletonFE=3 +1 WordPenalty=0.434294 EGivenFCoherent=2.30501 LanguageModel=3.38744 SampleCountF=10.1006 CountEF=7.46161 MaxLexFGivenE=-0.313867 MaxLexEGivenF=0.603308 IsSingletonF=-3 IsSingletonFE=-2 +0 WordPenalty=-0.434294 EGivenFCoherent=-2.30501 LanguageModel=-3.38744 SampleCountF=-10.1006 CountEF=-7.46161 MaxLexFGivenE=0.313867 MaxLexEGivenF=-0.603308 IsSingletonF=3 IsSingletonFE=2 +0 WordPenalty=-0.434293 EGivenFCoherent=-2.2563 LanguageModel=-3.38746 SampleCountF=-7.13647 CountEF=-4.5773 MaxLexFGivenE=0.313867 MaxLexEGivenF=-0.603306 IsSingletonF=2 IsSingletonFE=2 +1 WordPenalty=0.434293 EGivenFCoherent=2.2563 LanguageModel=3.38746 SampleCountF=7.13647 CountEF=4.5773 MaxLexFGivenE=-0.313867 MaxLexEGivenF=0.603306 IsSingletonF=-2 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-0.434293 EGivenFCoherent=0.266329 PassThrough=-1 LanguageModel=-6.43494 LanguageModel_OOV=-1 SampleCountF=-6.4375 CountEF=-5.90368 MaxLexFGivenE=1.52245 MaxLexEGivenF=-0.679086 IsSingletonF=3 IsSingletonFE=4 +1 Glue=1 WordPenalty=0.434293 EGivenFCoherent=-0.266329 PassThrough=1 LanguageModel=6.43494 LanguageModel_OOV=1 SampleCountF=6.4375 CountEF=5.90368 MaxLexFGivenE=-1.52245 MaxLexEGivenF=0.679086 IsSingletonF=-3 IsSingletonFE=-4 +0 WordPenalty=-0.434294 EGivenFCoherent=-1.33843 PassThrough=1 LanguageModel=2.66472 LanguageModel_OOV=1 SampleCountF=-10.7996 CountEF=-9.0416 MaxLexFGivenE=-0.477553 MaxLexEGivenF=-1.88321 IsSingletonF=1 IsSingletonFE=2 +1 WordPenalty=0.434294 EGivenFCoherent=1.33843 PassThrough=-1 LanguageModel=-2.66472 LanguageModel_OOV=-1 SampleCountF=10.7996 CountEF=9.0416 MaxLexFGivenE=0.477553 MaxLexEGivenF=1.88321 IsSingletonF=-1 IsSingletonFE=-2 +1 Glue=1 WordPenalty=0.434293 EGivenFCoherent=0.162868 LanguageModel=1.8851 SampleCountF=7.13647 CountEF=6.37446 MaxLexFGivenE=-0.522447 MaxLexEGivenF=1.28115 IsSingletonF=-1 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-0.434293 EGivenFCoherent=-0.162868 LanguageModel=-1.8851 SampleCountF=-7.13647 CountEF=-6.37446 MaxLexFGivenE=0.522447 MaxLexEGivenF=-1.28115 IsSingletonF=1 IsSingletonFE=3 +1 Glue=-1 WordPenalty=0.434293 EGivenFCoherent=1.76913 PassThrough=-1 LanguageModel=-2.6647 LanguageModel_OOV=-1 SampleCountF=8.32099 CountEF=6.14102 MaxLexFGivenE=0.477553 MaxLexEGivenF=1.88321 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-0.434293 EGivenFCoherent=-1.76913 PassThrough=1 LanguageModel=2.6647 LanguageModel_OOV=1 SampleCountF=-8.32099 CountEF=-6.14102 MaxLexFGivenE=-0.477553 MaxLexEGivenF=-1.88321 IsSingletonFE=2 +1 Glue=-1 WordPenalty=0.434292 EGivenFCoherent=1.29339 LanguageModel=1.88512 SampleCountF=4.6579 CountEF=2.79293 MaxLexFGivenE=-0.522447 MaxLexEGivenF=1.28115 IsSingletonF=-2 IsSingletonFE=-3 +0 Glue=1 WordPenalty=-0.434292 EGivenFCoherent=-1.29339 LanguageModel=-1.88512 SampleCountF=-4.6579 CountEF=-2.79293 MaxLexFGivenE=0.522447 MaxLexEGivenF=-1.28115 IsSingletonF=2 IsSingletonFE=3 +1 WordPenalty=0.434292 EGivenFCoherent=0.163996 LanguageModel=1.88511 SampleCountF=4.6579 CountEF=3.89748 MaxLexFGivenE=-0.522447 MaxLexEGivenF=1.28115 IsSingletonF=-1 IsSingletonFE=-3 +0 WordPenalty=-0.434292 EGivenFCoherent=-0.163996 LanguageModel=-1.88511 SampleCountF=-4.6579 CountEF=-3.89748 MaxLexFGivenE=0.522447 MaxLexEGivenF=-1.28115 IsSingletonF=1 IsSingletonFE=3 +1 Glue=-1 WordPenalty=0.434292 EGivenFCoherent=0.779974 LanguageModel=1.88509 SampleCountF=4.6579 CountEF=3.30063 MaxLexFGivenE=-0.522447 MaxLexEGivenF=1.28115 IsSingletonF=-2 IsSingletonFE=-3 +0 Glue=1 WordPenalty=-0.434292 EGivenFCoherent=-0.779974 LanguageModel=-1.88509 SampleCountF=-4.6579 CountEF=-3.30063 MaxLexFGivenE=0.522447 MaxLexEGivenF=-1.28115 IsSingletonF=2 IsSingletonFE=3 +1 WordPenalty=0.434292 EGivenFCoherent=0.163996 LanguageModel=1.88512 SampleCountF=4.6579 CountEF=3.89748 MaxLexFGivenE=-0.522447 MaxLexEGivenF=1.28115 IsSingletonF=-2 IsSingletonFE=-3 +0 WordPenalty=-0.434292 EGivenFCoherent=-0.163996 LanguageModel=-1.88512 SampleCountF=-4.6579 CountEF=-3.89748 MaxLexFGivenE=0.522447 MaxLexEGivenF=-1.28115 IsSingletonF=2 IsSingletonFE=3 +1 WordPenalty=0.434292 EGivenFCoherent=0.704898 LanguageModel=1.88511 SampleCountF=4.32234 CountEF=3.29131 MaxLexFGivenE=-0.522444 MaxLexEGivenF=1.28115 IsSingletonF=-2 IsSingletonFE=-2 +0 WordPenalty=-0.434292 EGivenFCoherent=-0.704898 LanguageModel=-1.88511 SampleCountF=-4.32234 CountEF=-3.29131 MaxLexFGivenE=0.522444 MaxLexEGivenF=-1.28115 IsSingletonF=2 IsSingletonFE=2 +1 EGivenFCoherent=3.54781 LanguageModel=2.50202 SampleCountF=4.6579 CountEF=0.976495 MaxLexFGivenE=-0.313867 MaxLexEGivenF=1.51167 IsSingletonF=-1 IsSingletonFE=-2 +0 EGivenFCoherent=-3.54781 LanguageModel=-2.50202 SampleCountF=-4.6579 CountEF=-0.976495 MaxLexFGivenE=0.313867 MaxLexEGivenF=-1.51167 IsSingletonF=1 IsSingletonFE=2 +0 EGivenFCoherent=-3.89316 PassThrough=1 LanguageModel=2.04781 LanguageModel_OOV=1 SampleCountF=-5.35687 CountEF=-1.53137 MaxLexFGivenE=-0.686133 MaxLexEGivenF=-2.11373 IsSingletonFE=1 +1 EGivenFCoherent=3.89316 PassThrough=-1 LanguageModel=-2.04781 LanguageModel_OOV=-1 SampleCountF=5.35687 CountEF=1.53137 MaxLexFGivenE=0.686133 MaxLexEGivenF=2.11373 IsSingletonFE=-1 +1 EGivenFCoherent=3.84884 LanguageModel=3.24664 SampleCountF=4.6579 CountEF=0.800404 MaxLexFGivenE=-0.313867 MaxLexEGivenF=2.10248 IsSingletonF=-2 IsSingletonFE=-1 +0 EGivenFCoherent=-3.84884 LanguageModel=-3.24664 SampleCountF=-4.6579 CountEF=-0.800404 MaxLexFGivenE=0.313867 MaxLexEGivenF=-2.10248 IsSingletonF=2 IsSingletonFE=1 +1 Glue=1 EGivenFCoherent=2.59213 PassThrough=-1 LanguageModel=-2.80552 LanguageModel_OOV=-1 SampleCountF=5.35687 CountEF=2.66703 MaxLexFGivenE=0.477553 MaxLexEGivenF=3.38238 IsSingletonF=-1 IsSingletonFE=-1 +0 Glue=-1 EGivenFCoherent=-2.59213 PassThrough=1 LanguageModel=2.80552 LanguageModel_OOV=1 SampleCountF=-5.35687 CountEF=-2.66703 MaxLexFGivenE=-0.477553 MaxLexEGivenF=-3.38238 IsSingletonF=1 IsSingletonFE=1 +0 WordPenalty=-0.868592 EGivenFCoherent=1.32331 PassThrough=-1 LanguageModel=-7.1473 LanguageModel_OOV=-1 SampleCountF=-3.5592 CountEF=-4.20648 MaxLexFGivenE=0.710202 MaxLexEGivenF=3.02583 IsSingletonF=2 IsSingletonFE=3 +1 WordPenalty=0.868592 EGivenFCoherent=-1.32331 PassThrough=1 LanguageModel=7.1473 LanguageModel_OOV=1 SampleCountF=3.5592 CountEF=4.20648 MaxLexFGivenE=-0.710202 MaxLexEGivenF=-3.02583 IsSingletonF=-2 IsSingletonFE=-3 +1 Glue=1 WordPenalty=0.868593 EGivenFCoherent=-0.498138 LanguageModel=2.59746 SampleCountF=7.22229 CountEF=7.21962 MaxLexFGivenE=0.289798 MaxLexEGivenF=-2.42377 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-0.868593 EGivenFCoherent=0.498138 LanguageModel=-2.59746 SampleCountF=-7.22229 CountEF=-7.21962 MaxLexFGivenE=-0.289798 MaxLexEGivenF=2.42377 IsSingletonFE=2 +0 WordPenalty=-0.868592 EGivenFCoherent=0.283456 LanguageModel=-2.59748 SampleCountF=-4.25817 CountEF=-4.18949 MaxLexFGivenE=-0.289798 MaxLexEGivenF=2.42377 IsSingletonF=1 IsSingletonFE=2 +1 WordPenalty=0.868592 EGivenFCoherent=-0.283456 LanguageModel=2.59748 SampleCountF=4.25817 CountEF=4.18949 MaxLexFGivenE=0.289798 MaxLexEGivenF=-2.42377 IsSingletonF=-1 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-0.868592 EGivenFCoherent=0.540166 LanguageModel=-2.59746 SampleCountF=-4.25817 CountEF=-4.44334 MaxLexFGivenE=-0.289798 MaxLexEGivenF=2.42377 IsSingletonF=1 IsSingletonFE=2 +1 Glue=-1 WordPenalty=0.868592 EGivenFCoherent=-0.540166 LanguageModel=2.59746 SampleCountF=4.25817 CountEF=4.44334 MaxLexFGivenE=0.289798 MaxLexEGivenF=-2.42377 IsSingletonF=-1 IsSingletonFE=-2 +1 Glue=-1 WordPenalty=0.868593 EGivenFCoherent=-1.24526 PassThrough=1 LanguageModel=7.14729 LanguageModel_OOV=1 SampleCountF=6.03777 CountEF=6.60586 MaxLexFGivenE=-0.710202 MaxLexEGivenF=-3.02583 IsSingletonF=-3 IsSingletonFE=-3 +0 Glue=1 WordPenalty=-0.868593 EGivenFCoherent=1.24526 PassThrough=-1 LanguageModel=-7.14729 LanguageModel_OOV=-1 SampleCountF=-6.03777 CountEF=-6.60586 MaxLexFGivenE=0.710202 MaxLexEGivenF=3.02583 IsSingletonF=3 IsSingletonFE=3 +0 Glue=-1 WordPenalty=-0.868593 EGivenFCoherent=0.3919 LanguageModel=-2.59747 SampleCountF=-7.22229 CountEF=-7.11675 MaxLexFGivenE=-0.289798 MaxLexEGivenF=2.42377 IsSingletonFE=2 +1 Glue=1 WordPenalty=0.868593 EGivenFCoherent=-0.3919 LanguageModel=2.59747 SampleCountF=7.22229 CountEF=7.11675 MaxLexFGivenE=0.289798 MaxLexEGivenF=-2.42377 IsSingletonFE=-2 +0 WordPenalty=-0.868593 EGivenFCoherent=0.213624 LanguageModel=-2.59749 SampleCountF=-6.73674 CountEF=-6.48329 MaxLexFGivenE=-0.289798 MaxLexEGivenF=2.42377 IsSingletonFE=2 +1 WordPenalty=0.868593 EGivenFCoherent=-0.213624 LanguageModel=2.59749 SampleCountF=6.73674 CountEF=6.48329 MaxLexFGivenE=0.289798 MaxLexEGivenF=-2.42377 IsSingletonFE=-2 +1 Glue=1 WordPenalty=0.868592 EGivenFCoherent=-1.32331 PassThrough=1 LanguageModel=7.1473 LanguageModel_OOV=1 SampleCountF=3.5592 CountEF=4.20648 MaxLexFGivenE=-0.710202 MaxLexEGivenF=-3.02583 IsSingletonF=-1 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-0.868592 EGivenFCoherent=1.32331 PassThrough=-1 LanguageModel=-7.1473 LanguageModel_OOV=-1 SampleCountF=-3.5592 CountEF=-4.20648 MaxLexFGivenE=0.710202 MaxLexEGivenF=3.02583 IsSingletonF=1 IsSingletonFE=3 +0 WordPenalty=-1.30288 EGivenFCoherent=1.2561 PassThrough=-1 LanguageModel=-8.58067 LanguageModel_OOV=-1 SampleCountF=-3.07365 CountEF=-3.68096 MaxLexFGivenE=0.752306 MaxLexEGivenF=4.48078 IsSingletonF=2 IsSingletonFE=3 +1 WordPenalty=1.30288 EGivenFCoherent=-1.2561 PassThrough=1 LanguageModel=8.58067 LanguageModel_OOV=1 SampleCountF=3.07365 CountEF=3.68096 MaxLexFGivenE=-0.752306 MaxLexEGivenF=-4.48078 IsSingletonF=-2 IsSingletonFE=-3 +0 WordPenalty=-1.30288 EGivenFCoherent=0.972548 LanguageModel=-4.03084 SampleCountF=-3.77262 CountEF=-4.29765 MaxLexFGivenE=-0.247694 MaxLexEGivenF=3.87872 IsSingletonF=1 IsSingletonFE=2 +1 WordPenalty=1.30288 EGivenFCoherent=-0.972548 LanguageModel=4.03084 SampleCountF=3.77262 CountEF=4.29765 MaxLexFGivenE=0.247694 MaxLexEGivenF=-3.87872 IsSingletonF=-1 IsSingletonFE=-2 +1 Glue=1 WordPenalty=1.30288 EGivenFCoherent=-2.00305 PassThrough=1 LanguageModel=8.58066 LanguageModel_OOV=1 SampleCountF=0.59508 CountEF=1.94324 MaxLexFGivenE=-0.752306 MaxLexEGivenF=-4.48078 IsSingletonF=-2 IsSingletonFE=-3 +0 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=2.00305 PassThrough=-1 LanguageModel=-8.58066 LanguageModel_OOV=-1 SampleCountF=-0.59508 CountEF=-1.94324 MaxLexFGivenE=0.752306 MaxLexEGivenF=4.48078 IsSingletonF=2 IsSingletonFE=3 +1 Glue=1 WordPenalty=4.34294 EGivenFCoherent=-4.3934 LanguageModel=11.0755 SampleCountF=-4.96126 CountEF=-0.778149 MaxLexFGivenE=2.64271 MaxLexEGivenF=-17.3423 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-4.34294 EGivenFCoherent=4.3934 LanguageModel=-11.0755 SampleCountF=4.96126 CountEF=0.778149 MaxLexFGivenE=-2.64271 MaxLexEGivenF=17.3423 IsSingletonFE=1 +0 WordPenalty=-4.34293 EGivenFCoherent=5.17248 LanguageModel=-10.3083 SampleCountF=7.83777 CountEF=3.06145 MaxLexFGivenE=-2.8492 MaxLexEGivenF=16.8298 IsSingletonFE=1 +1 WordPenalty=4.34293 EGivenFCoherent=-5.17248 LanguageModel=10.3083 SampleCountF=-7.83777 CountEF=-3.06145 MaxLexFGivenE=2.8492 MaxLexEGivenF=-16.8298 IsSingletonFE=-1 +1 WordPenalty=4.34294 EGivenFCoherent=-5.70457 LanguageModel=9.87082 SampleCountF=-7.83777 CountEF=-2.55307 MaxLexFGivenE=2.8492 MaxLexEGivenF=-16.8298 IsSingletonFE=-1 +0 WordPenalty=-4.34294 EGivenFCoherent=5.70457 LanguageModel=-9.87082 SampleCountF=7.83777 CountEF=2.55307 MaxLexFGivenE=-2.8492 MaxLexEGivenF=16.8298 IsSingletonFE=1 +1 WordPenalty=3.90864 EGivenFCoherent=-6.3934 LanguageModel=7.76748 SampleCountF=-7.83777 CountEF=-1.89209 MaxLexFGivenE=2.8492 MaxLexEGivenF=-15.289 IsSingletonFE=-1 +0 WordPenalty=-3.90864 EGivenFCoherent=6.3934 LanguageModel=-7.76748 SampleCountF=7.83777 CountEF=1.89209 MaxLexFGivenE=-2.8492 MaxLexEGivenF=15.289 IsSingletonFE=1 +0 WordPenalty=-3.90865 EGivenFCoherent=5.22015 LanguageModel=-8.46733 SampleCountF=4.53529 CountEF=-0.333989 MaxLexFGivenE=-2.8492 MaxLexEGivenF=15.2427 IsSingletonF=1 +1 WordPenalty=3.90865 EGivenFCoherent=-5.22015 LanguageModel=8.46733 SampleCountF=-4.53529 CountEF=0.333989 MaxLexFGivenE=2.8492 MaxLexEGivenF=-15.2427 IsSingletonF=-1 +1 Glue=-1 WordPenalty=3.90864 EGivenFCoherent=-6.04312 LanguageModel=8.13538 SampleCountF=-10.7143 CountEF=-5.16866 MaxLexFGivenE=3.05569 MaxLexEGivenF=-15.105 +0 Glue=1 WordPenalty=-3.90864 EGivenFCoherent=6.04312 LanguageModel=-8.13538 SampleCountF=10.7143 CountEF=5.16866 MaxLexFGivenE=-3.05569 MaxLexEGivenF=15.105 +1 WordPenalty=3.47435 EGivenFCoherent=-6.07788 LanguageModel=6.577 SampleCountF=-10.7143 CountEF=-5.01246 MaxLexFGivenE=3.05569 MaxLexEGivenF=-13.939 IsSingletonFE=1 +0 WordPenalty=-3.47435 EGivenFCoherent=6.07788 LanguageModel=-6.577 SampleCountF=10.7143 CountEF=5.01246 MaxLexFGivenE=-3.05569 MaxLexEGivenF=13.939 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=3.47435 EGivenFCoherent=-5.10251 LanguageModel=7.3442 SampleCountF=-7.83777 CountEF=-2.90525 MaxLexFGivenE=2.8492 MaxLexEGivenF=-14.4515 IsSingletonFE=1 +0 Glue=1 WordPenalty=-3.47435 EGivenFCoherent=5.10251 LanguageModel=-7.3442 SampleCountF=7.83777 CountEF=2.90525 MaxLexFGivenE=-2.8492 MaxLexEGivenF=14.4515 IsSingletonFE=-1 +0 WordPenalty=-3.47437 EGivenFCoherent=2.32565 LanguageModel=-10.1242 SampleCountF=2.35775 CountEF=0.05115 MaxLexFGivenE=-3.10334 MaxLexEGivenF=15.7657 IsSingletonF=1 +1 WordPenalty=3.47437 EGivenFCoherent=-2.32565 LanguageModel=10.1242 SampleCountF=-2.35775 CountEF=-0.05115 MaxLexFGivenE=3.10334 MaxLexEGivenF=-15.7657 IsSingletonF=-1 +1 Glue=1 WordPenalty=3.47435 EGivenFCoherent=-6.93998 LanguageModel=6.34673 SampleCountF=-11.1402 CountEF=-4.5215 MaxLexFGivenE=2.8492 MaxLexEGivenF=-14.0767 +0 Glue=-1 WordPenalty=-3.47435 EGivenFCoherent=6.93998 LanguageModel=-6.34673 SampleCountF=11.1402 CountEF=4.5215 MaxLexFGivenE=-2.8492 MaxLexEGivenF=14.0767 +0 WordPenalty=-3.47434 EGivenFCoherent=7.36877 LanguageModel=-5.57953 SampleCountF=10.7143 CountEF=3.9993 MaxLexFGivenE=-3.05569 MaxLexEGivenF=13.5642 IsSingletonF=1 IsSingletonFE=1 +1 WordPenalty=3.47434 EGivenFCoherent=-7.36877 LanguageModel=5.57953 SampleCountF=-10.7143 CountEF=-3.9993 MaxLexFGivenE=3.05569 MaxLexEGivenF=-13.5642 IsSingletonF=-1 IsSingletonFE=-1 +0 WordPenalty=-3.47435 EGivenFCoherent=5.06774 LanguageModel=-7.44148 SampleCountF=7.83777 CountEF=3.06145 MaxLexFGivenE=-2.8492 MaxLexEGivenF=14.4515 IsSingletonF=1 +1 WordPenalty=3.47435 EGivenFCoherent=-5.06774 LanguageModel=7.44148 SampleCountF=-7.83777 CountEF=-3.06145 MaxLexFGivenE=2.8492 MaxLexEGivenF=-14.4515 IsSingletonF=-1 +0 Glue=1 WordPenalty=-3.47434 EGivenFCoherent=7.06774 LanguageModel=-5.57953 SampleCountF=10.7143 CountEF=4.17539 MaxLexFGivenE=-3.05569 MaxLexEGivenF=13.5642 +1 Glue=-1 WordPenalty=3.47434 EGivenFCoherent=-7.06774 LanguageModel=5.57953 SampleCountF=-10.7143 CountEF=-4.17539 MaxLexFGivenE=3.05569 MaxLexEGivenF=-13.5642 +0 WordPenalty=-3.47435 EGivenFCoherent=5.10251 LanguageModel=-7.3442 SampleCountF=7.83777 CountEF=2.90525 MaxLexFGivenE=-2.8492 MaxLexEGivenF=14.4515 IsSingletonFE=-1 +1 WordPenalty=3.47435 EGivenFCoherent=-5.10251 LanguageModel=7.3442 SampleCountF=-7.83777 CountEF=-2.90525 MaxLexFGivenE=2.8492 MaxLexEGivenF=-14.4515 IsSingletonFE=1 +0 Glue=2 WordPenalty=-3.47435 EGivenFCoherent=4.76671 LanguageModel=-7.44148 SampleCountF=7.83777 CountEF=3.23754 MaxLexFGivenE=-2.8492 MaxLexEGivenF=14.4515 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=3.47435 EGivenFCoherent=-4.76671 LanguageModel=7.44148 SampleCountF=-7.83777 CountEF=-3.23754 MaxLexFGivenE=2.8492 MaxLexEGivenF=-14.4515 IsSingletonFE=1 +1 WordPenalty=3.04005 EGivenFCoherent=-6.17809 LanguageModel=5.32088 SampleCountF=-11.1402 CountEF=-5.25457 MaxLexFGivenE=2.8492 MaxLexEGivenF=-13.2855 +0 WordPenalty=-3.04005 EGivenFCoherent=6.17809 LanguageModel=-5.32088 SampleCountF=11.1402 CountEF=5.25457 MaxLexFGivenE=-2.8492 MaxLexEGivenF=13.2855 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-4.3934 LanguageModel=6.08808 SampleCountF=-4.96126 CountEF=-0.778149 MaxLexFGivenE=2.64271 MaxLexEGivenF=-13.798 IsSingletonF=-1 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=4.3934 LanguageModel=-6.08808 SampleCountF=4.96126 CountEF=0.778149 MaxLexFGivenE=-2.64271 MaxLexEGivenF=13.798 IsSingletonF=1 IsSingletonFE=1 +1 WordPenalty=3.04005 EGivenFCoherent=-4.87145 LanguageModel=5.32087 SampleCountF=-7.83777 CountEF=-3.23754 MaxLexFGivenE=2.8492 MaxLexEGivenF=-13.2855 +0 WordPenalty=-3.04005 EGivenFCoherent=4.87145 LanguageModel=-5.32087 SampleCountF=7.83777 CountEF=3.23754 MaxLexFGivenE=-2.8492 MaxLexEGivenF=13.2855 +0 Glue=1 WordPenalty=-3.04005 EGivenFCoherent=5.23943 LanguageModel=-5.32088 SampleCountF=7.83777 CountEF=2.93651 MaxLexFGivenE=-2.8492 MaxLexEGivenF=13.2855 +1 Glue=-1 WordPenalty=3.04005 EGivenFCoherent=-5.23943 LanguageModel=5.32088 SampleCountF=-7.83777 CountEF=-2.93651 MaxLexFGivenE=2.8492 MaxLexEGivenF=-13.2855 +1 Glue=-2 WordPenalty=3.04005 EGivenFCoherent=-4.87145 LanguageModel=5.32088 SampleCountF=-7.83777 CountEF=-3.23754 MaxLexFGivenE=2.8492 MaxLexEGivenF=-13.2855 +0 Glue=2 WordPenalty=-3.04005 EGivenFCoherent=4.87145 LanguageModel=-5.32088 SampleCountF=7.83777 CountEF=3.23754 MaxLexFGivenE=-2.8492 MaxLexEGivenF=13.2855 +1 WordPenalty=3.04005 EGivenFCoherent=-4.87145 LanguageModel=5.32087 SampleCountF=-7.83777 CountEF=-3.23754 MaxLexFGivenE=2.8492 MaxLexEGivenF=-13.2855 +0 WordPenalty=-3.04005 EGivenFCoherent=4.87145 LanguageModel=-5.32087 SampleCountF=7.83777 CountEF=3.23754 MaxLexFGivenE=-2.8492 MaxLexEGivenF=13.2855 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=5.30947 LanguageModel=-5.32087 SampleCountF=4.53529 CountEF=-0.419899 MaxLexFGivenE=-2.8492 MaxLexEGivenF=13.2855 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-5.30947 LanguageModel=5.32087 SampleCountF=-4.53529 CountEF=0.419899 MaxLexFGivenE=2.8492 MaxLexEGivenF=-13.2855 +1 Glue=-1 WordPenalty=3.90864 EGivenFCoherent=-5.71999 LanguageModel=8.20287 SampleCountF=-7.83777 CountEF=-2.36921 MaxLexFGivenE=3.11139 MaxLexEGivenF=-16.6568 +0 Glue=1 WordPenalty=-3.90864 EGivenFCoherent=5.71999 LanguageModel=-8.20287 SampleCountF=7.83777 CountEF=2.36921 MaxLexFGivenE=-3.11139 MaxLexEGivenF=16.6568 +1 WordPenalty=3.90865 EGivenFCoherent=-4.46035 LanguageModel=8.20286 SampleCountF=-4.11204 CountEF=0.090181 MaxLexFGivenE=3.11139 MaxLexEGivenF=-16.6568 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=4.46035 LanguageModel=-8.20286 SampleCountF=4.11204 CountEF=-0.090181 MaxLexFGivenE=-3.11139 MaxLexEGivenF=16.6568 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.47434 EGivenFCoherent=-4.89165 LanguageModel=7.17702 SampleCountF=-7.83777 CountEF=-3.18639 MaxLexFGivenE=3.11139 MaxLexEGivenF=-15.8656 +0 Glue=1 WordPenalty=-3.47434 EGivenFCoherent=4.89165 LanguageModel=-7.17702 SampleCountF=7.83777 CountEF=3.18639 MaxLexFGivenE=-3.11139 MaxLexEGivenF=15.8656 +1 WordPenalty=3.47434 EGivenFCoherent=-4.69536 LanguageModel=7.17702 SampleCountF=-7.83777 CountEF=-3.36248 MaxLexFGivenE=3.11139 MaxLexEGivenF=-15.8656 +0 WordPenalty=-3.47434 EGivenFCoherent=4.69536 LanguageModel=-7.17702 SampleCountF=7.83777 CountEF=3.36248 MaxLexFGivenE=-3.11139 MaxLexEGivenF=15.8656 +0 Glue=1 WordPenalty=-3.47435 EGivenFCoherent=2.449 LanguageModel=-9.09251 SampleCountF=1.93178 CountEF=-0.29496 MaxLexFGivenE=-3.57202 MaxLexEGivenF=16.6672 +1 Glue=-1 WordPenalty=3.47435 EGivenFCoherent=-2.449 LanguageModel=9.09251 SampleCountF=-1.93178 CountEF=0.29496 MaxLexFGivenE=3.57202 MaxLexEGivenF=-16.6672 +0 WordPenalty=-3.47435 EGivenFCoherent=2.95609 LanguageModel=-7.17701 SampleCountF=0.809563 CountEF=-1.85126 MaxLexFGivenE=-3.11139 MaxLexEGivenF=15.8656 IsSingletonFE=1 +1 WordPenalty=3.47435 EGivenFCoherent=-2.95609 LanguageModel=7.17701 SampleCountF=-0.809563 CountEF=1.85126 MaxLexFGivenE=3.11139 MaxLexEGivenF=-15.8656 IsSingletonFE=-1 +0 WordPenalty=-3.04007 EGivenFCoherent=2.45059 LanguageModel=-7.87616 SampleCountF=1.50853 CountEF=-0.751821 MaxLexFGivenE=-3.57201 MaxLexEGivenF=15.168 IsSingletonFE=1 +1 WordPenalty=3.04007 EGivenFCoherent=-2.45059 LanguageModel=7.87616 SampleCountF=-1.50853 CountEF=0.751821 MaxLexFGivenE=3.57201 MaxLexEGivenF=-15.168 IsSingletonFE=-1 +0 WordPenalty=-3.90865 EGivenFCoherent=4.92205 LanguageModel=-8.87789 SampleCountF=4.96126 CountEF=0.312029 MaxLexFGivenE=-2.64271 MaxLexEGivenF=16.5511 IsSingletonFE=1 +1 WordPenalty=3.90865 EGivenFCoherent=-4.92205 LanguageModel=8.87789 SampleCountF=-4.96126 CountEF=-0.312029 MaxLexFGivenE=2.64271 MaxLexEGivenF=-16.5511 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=2.17147 EGivenFCoherent=-1.46035 LanguageModel=6.24541 SampleCountF=-1.23553 CountEF=0.463761 MaxLexFGivenE=5.62408 MaxLexEGivenF=-5.75717 +0 Glue=1 WordPenalty=-2.17147 EGivenFCoherent=1.46035 LanguageModel=-6.24541 SampleCountF=1.23553 CountEF=-0.463761 MaxLexFGivenE=-5.62408 MaxLexEGivenF=5.75717 +0 Glue=1 WordPenalty=-3.04005 EGivenFCoherent=4.34415 LanguageModel=-5.71579 SampleCountF=7.83777 CountEF=3.87863 MaxLexFGivenE=-2.8492 MaxLexEGivenF=13.7065 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.04005 EGivenFCoherent=-4.34415 LanguageModel=5.71579 SampleCountF=-7.83777 CountEF=-3.87863 MaxLexFGivenE=2.8492 MaxLexEGivenF=-13.7065 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=2.60576 EGivenFCoherent=-3.84683 LanguageModel=4.29503 SampleCountF=-7.83777 CountEF=-4.23081 MaxLexFGivenE=2.8492 MaxLexEGivenF=-12.4943 +0 Glue=1 WordPenalty=-2.60576 EGivenFCoherent=3.84683 LanguageModel=-4.29503 SampleCountF=7.83777 CountEF=4.23081 MaxLexFGivenE=-2.8492 MaxLexEGivenF=12.4943 +0 WordPenalty=-2.60576 EGivenFCoherent=3.36877 LanguageModel=-5.06223 SampleCountF=4.96126 CountEF=1.77142 MaxLexFGivenE=-2.64271 MaxLexEGivenF=13.0068 IsSingletonF=1 IsSingletonFE=1 +1 WordPenalty=2.60576 EGivenFCoherent=-3.36877 LanguageModel=5.06223 SampleCountF=-4.96126 CountEF=-1.77142 MaxLexFGivenE=2.64271 MaxLexEGivenF=-13.0068 IsSingletonF=-1 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-2.60576 EGivenFCoherent=1.38557 LanguageModel=-7.19861 SampleCountF=0.849219 CountEF=-0.205251 MaxLexFGivenE=3.67564 MaxLexEGivenF=14.2889 IsSingletonFE=2 +1 Glue=-1 WordPenalty=2.60576 EGivenFCoherent=-1.38557 LanguageModel=7.19861 SampleCountF=-0.849219 CountEF=0.205251 MaxLexFGivenE=-3.67564 MaxLexEGivenF=-14.2889 IsSingletonFE=-2 +1 Glue=1 WordPenalty=2.60576 EGivenFCoherent=-3.37298 LanguageModel=6.23588 SampleCountF=-3.72573 CountEF=-0.915319 MaxLexFGivenE=-2.98137 MaxLexEGivenF=-12.7974 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-2.60576 EGivenFCoherent=3.37298 LanguageModel=-6.23588 SampleCountF=3.72573 CountEF=0.915319 MaxLexFGivenE=2.98137 MaxLexEGivenF=12.7974 IsSingletonFE=2 +0 WordPenalty=-2.60577 EGivenFCoherent=1.86612 LanguageModel=-7.00309 SampleCountF=0.849219 CountEF=-0.689211 MaxLexFGivenE=3.18786 MaxLexEGivenF=13.31 IsSingletonFE=2 +1 WordPenalty=2.60577 EGivenFCoherent=-1.86612 LanguageModel=7.00309 SampleCountF=-0.849219 CountEF=0.689211 MaxLexFGivenE=-3.18786 MaxLexEGivenF=-13.31 IsSingletonFE=-2 +0 WordPenalty=-2.60576 EGivenFCoherent=3.03778 LanguageModel=-6.23589 SampleCountF=3.72573 CountEF=1.24191 MaxLexFGivenE=2.98137 MaxLexEGivenF=12.7974 IsSingletonFE=2 +1 WordPenalty=2.60576 EGivenFCoherent=-3.03778 LanguageModel=6.23589 SampleCountF=-3.72573 CountEF=-1.24191 MaxLexFGivenE=-2.98137 MaxLexEGivenF=-12.7974 IsSingletonFE=-2 +1 WordPenalty=2.17148 EGivenFCoherent=-4.6958 LanguageModel=3.36826 SampleCountF=-3.72573 CountEF=0.26324 MaxLexFGivenE=-2.98137 MaxLexEGivenF=-11.3275 IsSingletonFE=-2 +0 WordPenalty=-2.17148 EGivenFCoherent=4.6958 LanguageModel=-3.36826 SampleCountF=3.72573 CountEF=-0.26324 MaxLexFGivenE=2.98137 MaxLexEGivenF=11.3275 IsSingletonFE=2 +1 WordPenalty=2.17147 EGivenFCoherent=-5.86747 LanguageModel=2.60106 SampleCountF=-6.60224 CountEF=-1.66788 MaxLexFGivenE=-2.77488 MaxLexEGivenF=-10.815 IsSingletonFE=-2 +0 WordPenalty=-2.17147 EGivenFCoherent=5.86747 LanguageModel=-2.60106 SampleCountF=6.60224 CountEF=1.66788 MaxLexFGivenE=2.77488 MaxLexEGivenF=10.815 IsSingletonFE=2 +0 Glue=1 WordPenalty=-2.17148 EGivenFCoherent=4.74764 LanguageModel=-3.36826 SampleCountF=0.42325 CountEF=-3.52027 MaxLexFGivenE=2.98137 MaxLexEGivenF=11.3275 IsSingletonFE=2 +1 Glue=-1 WordPenalty=2.17148 EGivenFCoherent=-4.74764 LanguageModel=3.36826 SampleCountF=-0.42325 CountEF=3.52027 MaxLexFGivenE=-2.98137 MaxLexEGivenF=-11.3275 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=2.6074 LanguageModel=-6.15117 SampleCountF=4.11204 CountEF=1.72027 MaxLexFGivenE=-3.11139 MaxLexEGivenF=15.0743 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-2.6074 LanguageModel=6.15117 SampleCountF=-4.11204 CountEF=-1.72027 MaxLexFGivenE=3.11139 MaxLexEGivenF=-15.0743 IsSingletonFE=-1 +0 WordPenalty=-3.47436 EGivenFCoherent=3.84613 LanguageModel=-8.31771 SampleCountF=0.849219 CountEF=-2.53857 MaxLexFGivenE=1.0758 MaxLexEGivenF=17.4532 IsSingletonFE=2 +1 WordPenalty=3.47436 EGivenFCoherent=-3.84613 LanguageModel=8.31771 SampleCountF=-0.849219 CountEF=2.53857 MaxLexFGivenE=-1.0758 MaxLexEGivenF=-17.4532 IsSingletonFE=-2 +1 Glue=-1 WordPenalty=3.47436 EGivenFCoherent=-4.94227 LanguageModel=7.13404 SampleCountF=-3.72573 CountEF=0.614359 MaxLexFGivenE=-3.98069 MaxLexEGivenF=-16.7635 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.47436 EGivenFCoherent=4.94227 LanguageModel=-7.13404 SampleCountF=3.72573 CountEF=-0.614359 MaxLexFGivenE=3.98069 MaxLexEGivenF=16.7635 IsSingletonFE=1 +1 WordPenalty=3.47435 EGivenFCoherent=-3.67461 LanguageModel=8.28479 SampleCountF=-3.72573 CountEF=-0.628789 MaxLexFGivenE=-0.869309 MaxLexEGivenF=-16.9407 IsSingletonFE=-2 +0 WordPenalty=-3.47435 EGivenFCoherent=3.67461 LanguageModel=-8.28479 SampleCountF=3.72573 CountEF=0.628789 MaxLexFGivenE=0.869309 MaxLexEGivenF=16.9407 IsSingletonFE=2 +0 WordPenalty=-3.04007 EGivenFCoherent=1.39677 LanguageModel=-9.23203 SampleCountF=1.12222 CountEF=0.12494 MaxLexFGivenE=0.408684 MaxLexEGivenF=16.5301 IsSingletonFE=1 +1 WordPenalty=3.04007 EGivenFCoherent=-1.39677 LanguageModel=9.23203 SampleCountF=-1.12222 CountEF=-0.12494 MaxLexFGivenE=-0.408684 MaxLexEGivenF=-16.5301 IsSingletonFE=-1 +1 Glue=1 WordPenalty=3.04007 EGivenFCoherent=-2.2587 LanguageModel=8.08374 SampleCountF=-0.849219 CountEF=1.14613 MaxLexFGivenE=-1.0758 MaxLexEGivenF=-16.241 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.04007 EGivenFCoherent=2.2587 LanguageModel=-8.08374 SampleCountF=0.849219 CountEF=-1.14613 MaxLexFGivenE=1.0758 MaxLexEGivenF=16.241 IsSingletonFE=1 +0 Glue=1 WordPenalty=-3.04007 EGivenFCoherent=2.20191 LanguageModel=-7.94613 SampleCountF=0.849219 CountEF=-1.12624 MaxLexFGivenE=1.0758 MaxLexEGivenF=16.2872 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.04007 EGivenFCoherent=-2.20191 LanguageModel=7.94613 SampleCountF=-0.849219 CountEF=1.12624 MaxLexFGivenE=-1.0758 MaxLexEGivenF=-16.2872 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-2.77092 LanguageModel=7.2762 SampleCountF=-3.72573 CountEF=-1.38021 MaxLexFGivenE=-0.869309 MaxLexEGivenF=-15.7747 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=2.77092 LanguageModel=-7.2762 SampleCountF=3.72573 CountEF=1.38021 MaxLexFGivenE=0.869309 MaxLexEGivenF=15.7747 IsSingletonFE=1 +0 WordPenalty=-2.60576 EGivenFCoherent=3.33882 LanguageModel=-5.1556 SampleCountF=3.72573 CountEF=0.961079 MaxLexFGivenE=0.869309 MaxLexEGivenF=14.6086 IsSingletonFE=2 +1 WordPenalty=2.60576 EGivenFCoherent=-3.33882 LanguageModel=5.1556 SampleCountF=-3.72573 CountEF=-0.961079 MaxLexFGivenE=-0.869309 MaxLexEGivenF=-14.6086 IsSingletonFE=-2 +0 Glue=1 WordPenalty=2.17147 LanguageModel=7.73269 MaxLexFGivenE=3.35007 MaxLexEGivenF=-3.21167 +1 Glue=-1 WordPenalty=-2.17147 LanguageModel=-7.73269 MaxLexFGivenE=-3.35007 MaxLexEGivenF=3.21167 +0 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-0.47712 LanguageModel=11.738 CountEF=0.30103 MaxLexFGivenE=5.45426 MaxLexEGivenF=-3.34888 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=0.47712 LanguageModel=-11.738 CountEF=-0.30103 MaxLexFGivenE=-5.45426 MaxLexEGivenF=3.34888 IsSingletonFE=1 +0 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-0.30103 LanguageModel=8.91443 CountEF=0.176091 MaxLexFGivenE=-0.223359 MaxLexEGivenF=-7.68771 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=0.30103 LanguageModel=-8.91443 CountEF=-0.176091 MaxLexFGivenE=0.223359 MaxLexEGivenF=7.68771 IsSingletonFE=1 +1 WordPenalty=-2.60577 LanguageModel=-8.03323 MaxLexFGivenE=-1.08755 MaxLexEGivenF=6.48981 +0 WordPenalty=2.60577 LanguageModel=8.03323 MaxLexFGivenE=1.08755 MaxLexEGivenF=-6.48981 +0 Glue=1 WordPenalty=1.73717 LanguageModel=2.46869 MaxLexFGivenE=1.57938 MaxLexEGivenF=-0.50994 +1 Glue=-1 WordPenalty=-1.73717 LanguageModel=-2.46869 MaxLexFGivenE=-1.57938 MaxLexEGivenF=0.50994 +1 Glue=-2 WordPenalty=-1.73718 LanguageModel=-8.83022 MaxLexFGivenE=0.068459 MaxLexEGivenF=5.17864 +0 Glue=2 WordPenalty=1.73718 LanguageModel=8.83022 MaxLexFGivenE=-0.068459 MaxLexEGivenF=-5.17864 +1 WordPenalty=-2.17148 EGivenFCoherent=-0.47712 LanguageModel=-2.4673 CountEF=0.30103 MaxLexFGivenE=1.14775 MaxLexEGivenF=4.9308 IsSingletonFE=-1 +0 WordPenalty=2.17148 EGivenFCoherent=0.47712 LanguageModel=2.4673 CountEF=-0.30103 MaxLexFGivenE=-1.14775 MaxLexEGivenF=-4.9308 IsSingletonFE=1 +1 Glue=1 WordPenalty=-1.30288 EGivenFCoherent=-0.47712 LanguageModel=-5.88411 CountEF=0.30103 MaxLexFGivenE=-4.29951 MaxLexEGivenF=0.33796 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=1.30288 EGivenFCoherent=0.47712 LanguageModel=5.88411 CountEF=-0.30103 MaxLexFGivenE=4.29951 MaxLexEGivenF=-0.33796 IsSingletonFE=1 +0 Glue=1 WordPenalty=1.30288 EGivenFCoherent=1.6902 LanguageModel=4.63034 CountEF=-1.5682 MaxLexFGivenE=5.45426 MaxLexEGivenF=0.961444 +1 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=-1.6902 LanguageModel=-4.63034 CountEF=1.5682 MaxLexFGivenE=-5.45426 MaxLexEGivenF=-0.961444 +1 WordPenalty=-1.30288 EGivenFCoherent=-1.61278 LanguageModel=-0.910922 CountEF=1.32222 MaxLexFGivenE=-3.35007 MaxLexEGivenF=-1.09865 IsSingletonFE=-1 +0 WordPenalty=1.30288 EGivenFCoherent=1.61278 LanguageModel=0.910922 CountEF=-1.32222 MaxLexFGivenE=3.35007 MaxLexEGivenF=1.09865 IsSingletonFE=1 +1 Glue=1 WordPenalty=-1.30288 EGivenFCoherent=-0.47712 LanguageModel=-3.85474 CountEF=0.30103 MaxLexFGivenE=-2.19532 MaxLexEGivenF=1.11321 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=1.30288 EGivenFCoherent=0.47712 LanguageModel=3.85474 CountEF=-0.30103 MaxLexFGivenE=2.19532 MaxLexEGivenF=-1.11321 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=0.47712 LanguageModel=-5.52163 CountEF=-0.30103 MaxLexFGivenE=-3.02499 MaxLexEGivenF=1.35775 IsSingletonFE=1 +0 Glue=1 WordPenalty=1.30288 EGivenFCoherent=-0.47712 LanguageModel=5.52163 CountEF=0.30103 MaxLexFGivenE=3.02499 MaxLexEGivenF=-1.35775 IsSingletonFE=-1 +1 WordPenalty=-1.30289 LanguageModel=-5.43188 MaxLexFGivenE=-0.634791 MaxLexEGivenF=3.29473 +0 WordPenalty=1.30289 LanguageModel=5.43188 MaxLexFGivenE=0.634791 MaxLexEGivenF=-3.29473 +1 Glue=-1 WordPenalty=-1.30289 LanguageModel=-2.4283 MaxLexFGivenE=-0.622941 MaxLexEGivenF=0.64701 +0 Glue=1 WordPenalty=1.30289 LanguageModel=2.4283 MaxLexFGivenE=0.622941 MaxLexEGivenF=-0.64701 +1 Glue=-1 WordPenalty=-1.30289 EGivenFCoherent=-0.47712 LanguageModel=-6.88346 CountEF=0.30103 MaxLexFGivenE=-3.35007 MaxLexEGivenF=1.11531 IsSingletonFE=-1 +0 Glue=1 WordPenalty=1.30289 EGivenFCoherent=0.47712 LanguageModel=6.88346 CountEF=-0.30103 MaxLexFGivenE=3.35007 MaxLexEGivenF=-1.11531 IsSingletonFE=1 +0 Glue=1 WordPenalty=1.30288 EGivenFCoherent=1.5563 LanguageModel=-0.504382 CountEF=-1.26717 MaxLexFGivenE=1.57938 MaxLexEGivenF=1.11568 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=-1.5563 LanguageModel=0.504382 CountEF=1.26717 MaxLexFGivenE=-1.57938 MaxLexEGivenF=-1.11568 IsSingletonFE=-1 +0 WordPenalty=1.30288 LanguageModel=5.64813 MaxLexFGivenE=3.02499 MaxLexEGivenF=-1.85686 +1 WordPenalty=-1.30288 LanguageModel=-5.64813 MaxLexFGivenE=-3.02499 MaxLexEGivenF=1.85686 +1 WordPenalty=-1.30288 EGivenFCoherent=-2.16732 LanguageModel=-4.91619 CountEF=1.86923 MaxLexFGivenE=-5.45426 MaxLexEGivenF=-0.961444 IsSingletonFE=-1 +0 WordPenalty=1.30288 EGivenFCoherent=2.16732 LanguageModel=4.91619 CountEF=-1.86923 MaxLexFGivenE=5.45426 MaxLexEGivenF=0.961444 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=0.47712 LanguageModel=-7.18337 CountEF=-0.30103 MaxLexFGivenE=-5.45426 MaxLexEGivenF=-0.548094 IsSingletonFE=1 +0 Glue=1 WordPenalty=1.30288 EGivenFCoherent=-0.47712 LanguageModel=7.18337 CountEF=0.30103 MaxLexFGivenE=5.45426 MaxLexEGivenF=0.548094 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.86859 EGivenFCoherent=3.4908 LanguageModel=-0.912144 CountEF=-3.0281 MaxLexFGivenE=3.51682 MaxLexEGivenF=2.74995 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.86859 EGivenFCoherent=-3.4908 LanguageModel=0.912144 CountEF=3.0281 MaxLexFGivenE=-3.51682 MaxLexEGivenF=-2.74995 IsSingletonFE=-1 +0 Glue=2 WordPenalty=0.86859 LanguageModel=2.55505 MaxLexFGivenE=3.67172 MaxLexEGivenF=3.14789 +1 Glue=-2 WordPenalty=-0.86859 LanguageModel=-2.55505 MaxLexFGivenE=-3.67172 MaxLexEGivenF=-3.14789 +0 WordPenalty=0.86859 EGivenFCoherent=1.61278 LanguageModel=4.99518 CountEF=-1.32222 MaxLexFGivenE=1.71408 MaxLexEGivenF=-1.34338 IsSingletonFE=1 +1 WordPenalty=-0.86859 EGivenFCoherent=-1.61278 LanguageModel=-4.99518 CountEF=1.32222 MaxLexFGivenE=-1.71408 MaxLexEGivenF=1.34338 IsSingletonFE=-1 +1 Glue=1 WordPenalty=-0.868586 EGivenFCoherent=-0.30103 LanguageModel=-1.36053 CountEF=0.176091 MaxLexFGivenE=-2.19532 MaxLexEGivenF=-1.02856 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=0.868586 EGivenFCoherent=0.30103 LanguageModel=1.36053 CountEF=-0.176091 MaxLexFGivenE=2.19532 MaxLexEGivenF=1.02856 IsSingletonFE=1 +0 Glue=-1 WordPenalty=0.86859 EGivenFCoherent=2.71265 LanguageModel=0.634946 CountEF=-2.23805 MaxLexFGivenE=2.36207 MaxLexEGivenF=0.951434 IsSingletonFE=2 +1 Glue=1 WordPenalty=-0.86859 EGivenFCoherent=-2.71265 LanguageModel=-0.634946 CountEF=2.23805 MaxLexFGivenE=-2.36207 MaxLexEGivenF=-0.951434 IsSingletonFE=-2 +0 Glue=1 WordPenalty=0.86859 EGivenFCoherent=1 LanguageModel=7.95872 CountEF=-0.74036 MaxLexFGivenE=4.96243 MaxLexEGivenF=-0.222596 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-0.86859 EGivenFCoherent=-1 LanguageModel=-7.95872 CountEF=0.74036 MaxLexFGivenE=-4.96243 MaxLexEGivenF=0.222596 IsSingletonFE=-1 +0 Glue=2 WordPenalty=0.86859 EGivenFCoherent=1 LanguageModel=6.23639 CountEF=-0.74036 MaxLexFGivenE=4.79568 MaxLexEGivenF=-0.162506 IsSingletonFE=1 +1 Glue=-2 WordPenalty=-0.86859 EGivenFCoherent=-1 LanguageModel=-6.23639 CountEF=0.74036 MaxLexFGivenE=-4.79568 MaxLexEGivenF=0.162506 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-0.86859 EGivenFCoherent=-1.78887 LanguageModel=-0.634936 CountEF=1.44716 MaxLexFGivenE=-2.36207 MaxLexEGivenF=-0.951434 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.86859 EGivenFCoherent=1.78887 LanguageModel=0.634936 CountEF=-1.44716 MaxLexFGivenE=2.36207 MaxLexEGivenF=0.951434 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=0.47712 LanguageModel=-12.5027 CountEF=-0.30103 MaxLexFGivenE=-3.02499 MaxLexEGivenF=10.8686 IsSingletonFE=1 +0 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-0.47712 LanguageModel=12.5027 CountEF=0.30103 MaxLexFGivenE=3.02499 MaxLexEGivenF=-10.8686 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-3.04006 EGivenFCoherent=-1.6902 LanguageModel=-8.34192 CountEF=1.5682 MaxLexFGivenE=-3.51682 MaxLexEGivenF=7.97323 +0 Glue=2 WordPenalty=3.04006 EGivenFCoherent=1.6902 LanguageModel=8.34192 CountEF=-1.5682 MaxLexFGivenE=3.51682 MaxLexEGivenF=-7.97323 +1 Glue=-1 WordPenalty=-3.04006 LanguageModel=-12.5269 MaxLexFGivenE=-1.54733 MaxLexEGivenF=12.0065 +0 Glue=1 WordPenalty=3.04006 LanguageModel=12.5269 MaxLexFGivenE=1.54733 MaxLexEGivenF=-12.0065 +0 WordPenalty=0.434295 EGivenFCoherent=2.23553 LanguageModel=-0.027803 CountEF=-1.93702 MaxLexFGivenE=4.13276 MaxLexEGivenF=3.07617 IsSingletonFE=1 +1 WordPenalty=-0.434295 EGivenFCoherent=-2.23553 LanguageModel=0.027803 CountEF=1.93702 MaxLexFGivenE=-4.13276 MaxLexEGivenF=-3.07617 IsSingletonFE=-1 +1 WordPenalty=-0.434295 EGivenFCoherent=-0.12494 LanguageModel=-3.28236 CountEF=0.09691 MaxLexFGivenE=-1.71408 MaxLexEGivenF=-2.23502 +0 WordPenalty=0.434295 EGivenFCoherent=0.12494 LanguageModel=3.28236 CountEF=-0.09691 MaxLexFGivenE=1.71408 MaxLexEGivenF=2.23502 +0 Glue=1 WordPenalty=1.73718 EGivenFCoherent=1.63347 LanguageModel=5.99815 CountEF=-1.53908 MaxLexFGivenE=4.52987 MaxLexEGivenF=-2.45899 +1 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=-1.63347 LanguageModel=-5.99815 CountEF=1.53908 MaxLexFGivenE=-4.52987 MaxLexEGivenF=2.45899 +1 Glue=-2 WordPenalty=-1.73718 EGivenFCoherent=0.60206 LanguageModel=-9.71352 CountEF=-0.39794 MaxLexFGivenE=-2.72713 MaxLexEGivenF=4.36262 IsSingletonFE=1 +0 Glue=2 WordPenalty=1.73718 EGivenFCoherent=-0.60206 LanguageModel=9.71352 CountEF=0.39794 MaxLexFGivenE=2.72713 MaxLexEGivenF=-4.36262 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=-0.47712 LanguageModel=-7.2765 CountEF=0.30103 MaxLexFGivenE=-0.789691 MaxLexEGivenF=5.03856 IsSingletonFE=-1 +0 Glue=1 WordPenalty=1.73718 EGivenFCoherent=0.47712 LanguageModel=7.2765 CountEF=-0.30103 MaxLexFGivenE=0.789691 MaxLexEGivenF=-5.03856 IsSingletonFE=1 +0 Glue=2 WordPenalty=1.73718 EGivenFCoherent=-0.47712 LanguageModel=6.61398 CountEF=0.30103 MaxLexFGivenE=-1.1798 MaxLexEGivenF=-5.12531 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-1.73718 EGivenFCoherent=0.47712 LanguageModel=-6.61398 CountEF=-0.30103 MaxLexFGivenE=1.1798 MaxLexEGivenF=5.12531 IsSingletonFE=1 +0 WordPenalty=1.73718 EGivenFCoherent=-0.47712 LanguageModel=11.7101 CountEF=0.30103 MaxLexFGivenE=3.39005 MaxLexEGivenF=-4.0229 IsSingletonFE=-1 +1 WordPenalty=-1.73718 EGivenFCoherent=0.47712 LanguageModel=-11.7101 CountEF=-0.30103 MaxLexFGivenE=-3.39005 MaxLexEGivenF=4.0229 IsSingletonFE=1 +1 WordPenalty=-1.73718 EGivenFCoherent=-1.45738 LanguageModel=-3.84963 CountEF=1.39295 MaxLexFGivenE=-2.56038 MaxLexEGivenF=1.10911 +0 WordPenalty=1.73718 EGivenFCoherent=1.45738 LanguageModel=3.84963 CountEF=-1.39295 MaxLexFGivenE=2.56038 MaxLexEGivenF=-1.10911 +0 Glue=1 WordPenalty=1.73718 EGivenFCoherent=-0.88461 LanguageModel=5.03044 CountEF=0.77815 MaxLexFGivenE=-1.15475 MaxLexEGivenF=-6.87879 +1 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=0.88461 LanguageModel=-5.03044 CountEF=-0.77815 MaxLexFGivenE=1.15475 MaxLexEGivenF=6.87879 +1 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=-2.16732 LanguageModel=-5.82706 CountEF=1.86923 MaxLexFGivenE=-3.71513 MaxLexEGivenF=1.06738 IsSingletonFE=-1 +0 Glue=1 WordPenalty=1.73718 EGivenFCoherent=2.16732 LanguageModel=5.82706 CountEF=-1.86923 MaxLexFGivenE=3.71513 MaxLexEGivenF=-1.06738 IsSingletonFE=1 +1 Glue=-2 WordPenalty=-1.73718 EGivenFCoherent=-1.86629 LanguageModel=-8.00702 CountEF=1.69314 MaxLexFGivenE=-3.88188 MaxLexEGivenF=3.31716 +0 Glue=2 WordPenalty=1.73718 EGivenFCoherent=1.86629 LanguageModel=8.00702 CountEF=-1.69314 MaxLexFGivenE=3.88188 MaxLexEGivenF=-3.31716 +1 Glue=-2 WordPenalty=-2.17147 LanguageModel=-6.73356 MaxLexFGivenE=-2.56038 MaxLexEGivenF=2.82049 +0 Glue=2 WordPenalty=2.17147 LanguageModel=6.73356 MaxLexFGivenE=2.56038 MaxLexEGivenF=-2.82049 +0 Glue=3 WordPenalty=2.17147 EGivenFCoherent=-1.20412 LanguageModel=10.314 CountEF=0.79588 MaxLexFGivenE=7.48036 MaxLexEGivenF=-3.36976 IsSingletonFE=-2 +1 Glue=-3 WordPenalty=-2.17147 EGivenFCoherent=1.20412 LanguageModel=-10.314 CountEF=-0.79588 MaxLexFGivenE=-7.48036 MaxLexEGivenF=3.36976 IsSingletonFE=2 +1 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=1.83885 LanguageModel=-6.64673 CountEF=-1.38021 MaxLexEGivenF=6.79166 IsSingletonFE=2 +0 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-1.83885 LanguageModel=6.64673 CountEF=1.38021 MaxLexEGivenF=-6.79166 IsSingletonFE=-2 +0 WordPenalty=2.17148 EGivenFCoherent=-0.17609 LanguageModel=5.04813 CountEF=0.124939 MaxLexFGivenE=1.40563 MaxLexEGivenF=-5.06667 +1 WordPenalty=-2.17148 EGivenFCoherent=0.17609 LanguageModel=-5.04813 CountEF=-0.124939 MaxLexFGivenE=-1.40563 MaxLexEGivenF=5.06667 +0 Glue=1 WordPenalty=2.17147 LanguageModel=7.25136 MaxLexFGivenE=4.52987 MaxLexEGivenF=-1.98068 +1 Glue=-1 WordPenalty=-2.17147 LanguageModel=-7.25136 MaxLexFGivenE=-4.52987 MaxLexEGivenF=1.98068 +0 WordPenalty=1.30289 EGivenFCoherent=-1.0607 LanguageModel=3.2329 CountEF=0.903089 MaxLexFGivenE=-1.15475 MaxLexEGivenF=-4.73702 +1 WordPenalty=-1.30289 EGivenFCoherent=1.0607 LanguageModel=-3.2329 CountEF=-0.903089 MaxLexFGivenE=1.15475 MaxLexEGivenF=4.73702 +1 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=0.60206 LanguageModel=-12.2654 CountEF=-0.39794 MaxLexFGivenE=-6.53577 MaxLexEGivenF=11.659 IsSingletonFE=1 +0 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-0.60206 LanguageModel=12.2654 CountEF=0.39794 MaxLexFGivenE=6.53577 MaxLexEGivenF=-11.659 IsSingletonFE=-1 +0 Glue=2 WordPenalty=2.60577 EGivenFCoherent=-0.47712 LanguageModel=12.8424 CountEF=0.30103 MaxLexFGivenE=2.72713 MaxLexEGivenF=-11.2378 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-2.60577 EGivenFCoherent=0.47712 LanguageModel=-12.8424 CountEF=-0.30103 MaxLexFGivenE=-2.72713 MaxLexEGivenF=11.2378 IsSingletonFE=1 +0 WordPenalty=2.60577 LanguageModel=6.70002 MaxLexFGivenE=-1.77069 MaxLexEGivenF=-11.0911 +1 WordPenalty=-2.60577 LanguageModel=-6.70002 MaxLexFGivenE=1.77069 MaxLexEGivenF=11.0911 +0 Glue=2 WordPenalty=3.47435 EGivenFCoherent=-2.53403 LanguageModel=11.666 CountEF=2.11059 MaxLexFGivenE=0.198705 MaxLexEGivenF=-15.2887 IsSingletonFE=-2 +1 Glue=-2 WordPenalty=-3.47435 EGivenFCoherent=2.53403 LanguageModel=-11.666 CountEF=-2.11059 MaxLexFGivenE=-0.198705 MaxLexEGivenF=15.2887 IsSingletonFE=2 +1 WordPenalty=-1.30288 EGivenFCoherent=0.8451 LanguageModel=-6.54828 CountEF=-0.60206 MaxLexEGivenF=0.700088 IsSingletonFE=1 +0 WordPenalty=1.30288 EGivenFCoherent=-0.8451 LanguageModel=6.54828 CountEF=0.60206 MaxLexEGivenF=-0.700088 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-1.30288 LanguageModel=-4.74249 MaxLexFGivenE=-0.048423 MaxLexEGivenF=2.7166 +0 Glue=1 WordPenalty=1.30288 LanguageModel=4.74249 MaxLexFGivenE=0.048423 MaxLexEGivenF=-2.7166 +0 Glue=2 WordPenalty=1.30288 EGivenFCoherent=-1.08686 LanguageModel=4.44729 CountEF=1.05944 MaxLexFGivenE=0.704982 MaxLexEGivenF=-1.81831 +1 Glue=-2 WordPenalty=-1.30288 EGivenFCoherent=1.08686 LanguageModel=-4.44729 CountEF=-1.05944 MaxLexFGivenE=-0.704982 MaxLexEGivenF=1.81831 +0 WordPenalty=2.60577 EGivenFCoherent=-2.233 LanguageModel=8.39943 CountEF=1.9345 MaxLexFGivenE=0.10356 MaxLexEGivenF=-17.1899 IsSingletonFE=-1 +1 WordPenalty=-2.60577 EGivenFCoherent=2.233 LanguageModel=-8.39943 CountEF=-1.9345 MaxLexFGivenE=-0.10356 MaxLexEGivenF=17.1899 IsSingletonFE=1 +0 WordPenalty=2.17148 EGivenFCoherent=-0.47712 LanguageModel=7.20171 CountEF=0.30103 MaxLexFGivenE=-0.431541 MaxLexEGivenF=-15.3186 IsSingletonFE=-1 +1 WordPenalty=-2.17148 EGivenFCoherent=0.47712 LanguageModel=-7.20171 CountEF=-0.30103 MaxLexFGivenE=0.431541 MaxLexEGivenF=15.3186 IsSingletonFE=1 +1 WordPenalty=-0.868586 EGivenFCoherent=-0.139575 LanguageModel=-4.16827 CountEF=0.260099 MaxLexFGivenE=-0.661872 MaxLexEGivenF=0.91017 IsSingletonFE=1 +0 WordPenalty=0.868586 EGivenFCoherent=0.139575 LanguageModel=4.16827 CountEF=-0.260099 MaxLexFGivenE=0.661872 MaxLexEGivenF=-0.91017 IsSingletonFE=-1 +0 Glue=3 WordPenalty=0.868586 EGivenFCoherent=-0.440605 LanguageModel=3.58391 CountEF=0.43619 MaxLexFGivenE=0.936722 MaxLexEGivenF=-0.42316 +1 Glue=-3 WordPenalty=-0.868586 EGivenFCoherent=0.440605 LanguageModel=-3.58391 CountEF=-0.43619 MaxLexFGivenE=-0.936722 MaxLexEGivenF=0.42316 +0 Glue=1 WordPenalty=-0.868594 EGivenFCoherent=3.32222 LanguageModel=-0.308193 SampleCountF=2.47857 CountEF=-0.397939 MaxLexFGivenE=1.45859 MaxLexEGivenF=11.5139 IsSingletonFE=1 +1 Glue=-1 WordPenalty=0.868594 EGivenFCoherent=-3.32222 LanguageModel=0.308193 SampleCountF=-2.47857 CountEF=0.397939 MaxLexFGivenE=-1.45859 MaxLexEGivenF=-11.5139 IsSingletonFE=-1 +0 Glue=1 WordPenalty=0.434295 LanguageModel=1.80157 MaxLexFGivenE=0.29866 MaxLexEGivenF=0.79921 +1 Glue=-1 WordPenalty=-0.434295 LanguageModel=-1.80157 MaxLexFGivenE=-0.29866 MaxLexEGivenF=-0.79921 +0 Glue=1 WordPenalty=0.434291 EGivenFCoherent=0.770595 LanguageModel=1.55462 CountEF=-0.75841 MaxLexEGivenF=0.780331 +1 Glue=-1 WordPenalty=-0.434291 EGivenFCoherent=-0.770595 LanguageModel=-1.55462 CountEF=0.75841 MaxLexEGivenF=-0.780331 +1 Glue=2 WordPenalty=-0.434295 EGivenFCoherent=-0.66901 LanguageModel=-1.88269 CountEF=0.57403 MaxLexFGivenE=0.212633 MaxLexEGivenF=0.88933 +0 Glue=-2 WordPenalty=0.434295 EGivenFCoherent=0.66901 LanguageModel=1.88269 CountEF=-0.57403 MaxLexFGivenE=-0.212633 MaxLexEGivenF=-0.88933 +0 EGivenFCoherent=1.20828 LanguageModel=1.3515 SampleCountF=2.47857 CountEF=1.14613 MaxLexFGivenE=-0.638062 MaxLexEGivenF=-2.22609 IsSingletonFE=-1 +1 EGivenFCoherent=-1.20828 LanguageModel=-1.3515 SampleCountF=-2.47857 CountEF=-1.14613 MaxLexFGivenE=0.638062 MaxLexEGivenF=2.22609 IsSingletonFE=1 +0 Glue=2 EGivenFCoherent=0.90558 LanguageModel=1.80899 SampleCountF=2.47857 CountEF=1.47712 MaxLexFGivenE=0.078902 MaxLexEGivenF=-0.922168 IsSingletonFE=-1 +1 Glue=-2 EGivenFCoherent=-0.90558 LanguageModel=-1.80899 SampleCountF=-2.47857 CountEF=-1.47712 MaxLexFGivenE=-0.078902 MaxLexEGivenF=0.922168 IsSingletonFE=1 +1 Glue=2 WordPenalty=-0.434295 EGivenFCoherent=-0.440605 LanguageModel=-1.25416 CountEF=0.43619 MaxLexFGivenE=0.303513 MaxLexEGivenF=-0.82723 +0 Glue=-2 WordPenalty=0.434295 EGivenFCoherent=0.440605 LanguageModel=1.25416 CountEF=-0.43619 MaxLexFGivenE=-0.303513 MaxLexEGivenF=0.82723 +1 WordPenalty=0.868586 EGivenFCoherent=-1.31527 LanguageModel=0.051496 CountEF=1.19728 MaxLexFGivenE=0.685097 MaxLexEGivenF=-3.1609 +0 WordPenalty=-0.868586 EGivenFCoherent=1.31527 LanguageModel=-0.051496 CountEF=-1.19728 MaxLexFGivenE=-0.685097 MaxLexEGivenF=3.1609 +1 Glue=2 WordPenalty=1.73718 EGivenFCoherent=-2.233 LanguageModel=2.2448 CountEF=1.9345 MaxLexFGivenE=0.685097 MaxLexEGivenF=-13.064 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-1.73718 EGivenFCoherent=2.233 LanguageModel=-2.2448 CountEF=-1.9345 MaxLexFGivenE=-0.685097 MaxLexEGivenF=13.064 IsSingletonFE=1 +0 Glue=2 WordPenalty=3.04006 EGivenFCoherent=-2.53403 LanguageModel=9.65304 CountEF=2.11059 MaxLexFGivenE=0.661872 MaxLexEGivenF=-16.3551 IsSingletonFE=-2 +1 Glue=-2 WordPenalty=-3.04006 EGivenFCoherent=2.53403 LanguageModel=-9.65304 CountEF=-2.11059 MaxLexFGivenE=-0.661872 MaxLexEGivenF=16.3551 IsSingletonFE=2 +0 Glue=1 WordPenalty=2.60577 EGivenFCoherent=-0.4614 LanguageModel=7.6285 SampleCountF=2.47857 CountEF=2.69897 MaxLexFGivenE=0.669929 MaxLexEGivenF=-14.8194 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-2.60577 EGivenFCoherent=0.4614 LanguageModel=-7.6285 SampleCountF=-2.47857 CountEF=-2.69897 MaxLexFGivenE=-0.669929 MaxLexEGivenF=14.8194 IsSingletonFE=1 +0 Glue=1 WordPenalty=0.434291 LanguageModel=2.87433 MaxLexFGivenE=-0.047035 MaxLexEGivenF=-0.4698 +1 Glue=-1 WordPenalty=-0.434291 LanguageModel=-2.87433 MaxLexFGivenE=0.047035 MaxLexEGivenF=0.4698 +0 Glue=-1 WordPenalty=2.17148 EGivenFCoherent=-0.302477 LanguageModel=6.37 SampleCountF=2.47857 CountEF=2.54158 MaxLexFGivenE=0.078902 MaxLexEGivenF=-16.7653 IsSingletonFE=-1 +1 Glue=1 WordPenalty=-2.17148 EGivenFCoherent=0.302477 LanguageModel=-6.37 SampleCountF=-2.47857 CountEF=-2.54158 MaxLexFGivenE=-0.078902 MaxLexEGivenF=16.7653 IsSingletonFE=1 +0 Glue=1 WordPenalty=-1.30289 EGivenFCoherent=0.54407 LanguageModel=-2.60551 CountEF=-0.47712 MaxLexFGivenE=0.198705 MaxLexEGivenF=11.5875 +1 Glue=-1 WordPenalty=1.30289 EGivenFCoherent=-0.54407 LanguageModel=2.60551 CountEF=0.47712 MaxLexFGivenE=-0.198705 MaxLexEGivenF=-11.5875 +0 EGivenFCoherent=1.01974 LanguageModel=1.18003 SampleCountF=2.47857 CountEF=1.63849 MaxLexFGivenE=0.669929 MaxLexEGivenF=3.65155 IsSingletonFE=1 +1 EGivenFCoherent=-1.01974 LanguageModel=-1.18003 SampleCountF=-2.47857 CountEF=-1.63849 MaxLexFGivenE=-0.669929 MaxLexEGivenF=-3.65155 IsSingletonFE=-1 +0 Glue=-1 LanguageModel=0.354 MaxLexFGivenE=0.06692 MaxLexEGivenF=-2.33946 +1 Glue=1 LanguageModel=-0.354 MaxLexFGivenE=-0.06692 MaxLexEGivenF=2.33946 +1 WordPenalty=-0.434292 EGivenFCoherent=-2.47459 LanguageModel=-3.35474 SampleCountF=-2.47857 CountEF=-0.181111 MaxLexFGivenE=-1.65369 MaxLexEGivenF=0.359496 +0 WordPenalty=0.434292 EGivenFCoherent=2.47459 LanguageModel=3.35474 SampleCountF=2.47857 CountEF=0.181111 MaxLexFGivenE=1.65369 MaxLexEGivenF=-0.359496 +0 WordPenalty=-1.73718 EGivenFCoherent=1.49136 LanguageModel=-4.62883 CountEF=-1.32222 MaxLexFGivenE=0.048423 MaxLexEGivenF=12.4176 +1 WordPenalty=1.73718 EGivenFCoherent=-1.49136 LanguageModel=4.62883 CountEF=1.32222 MaxLexFGivenE=-0.048423 MaxLexEGivenF=-12.4176 +0 WordPenalty=-1.30289 EGivenFCoherent=1.14613 LanguageModel=-2.4337 CountEF=-0.87506 MaxLexEGivenF=13.2714 IsSingletonFE=1 +1 WordPenalty=1.30289 EGivenFCoherent=-1.14613 LanguageModel=2.4337 CountEF=0.87506 MaxLexEGivenF=-13.2714 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=1.68875 LanguageModel=-2.26252 SampleCountF=2.47857 CountEF=1.06446 MaxLexFGivenE=0.031867 MaxLexEGivenF=7.51625 IsSingletonFE=1 +1 Glue=1 WordPenalty=1.30288 EGivenFCoherent=-1.68875 LanguageModel=2.26252 SampleCountF=-2.47857 CountEF=-1.06446 MaxLexFGivenE=-0.031867 MaxLexEGivenF=-7.51625 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-1.30288 EGivenFCoherent=2.77562 LanguageModel=-2.23728 SampleCountF=2.47857 CountEF=0.00502 MaxLexFGivenE=0.031867 MaxLexEGivenF=6.00387 IsSingletonFE=1 +1 Glue=1 WordPenalty=1.30288 EGivenFCoherent=-2.77562 LanguageModel=2.23728 SampleCountF=-2.47857 CountEF=-0.00502 MaxLexFGivenE=-0.031867 MaxLexEGivenF=-6.00387 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-0.434294 EGivenFCoherent=0.86082 LanguageModel=-0.466049 SampleCountF=2.47857 CountEF=1.79588 MaxLexFGivenE=0.669929 MaxLexEGivenF=4.55991 IsSingletonFE=1 +1 Glue=-1 WordPenalty=0.434294 EGivenFCoherent=-0.86082 LanguageModel=0.466049 SampleCountF=-2.47857 CountEF=-1.79588 MaxLexFGivenE=-0.669929 MaxLexEGivenF=-4.55991 IsSingletonFE=-1 +0 Glue=2 WordPenalty=-0.434294 EGivenFCoherent=2.17609 LanguageModel=0.885934 SampleCountF=2.47857 CountEF=0.5986 MaxLexFGivenE=1.65369 MaxLexEGivenF=5.26586 IsSingletonFE=1 +1 Glue=-2 WordPenalty=0.434294 EGivenFCoherent=-2.17609 LanguageModel=-0.885934 SampleCountF=-2.47857 CountEF=-0.5986 MaxLexFGivenE=-1.65369 MaxLexEGivenF=-5.26586 IsSingletonFE=-1 +0 Glue=2 WordPenalty=0.434296 LanguageModel=1.06428 MaxLexFGivenE=0.936722 MaxLexEGivenF=-0.9417 +1 Glue=-2 WordPenalty=-0.434296 LanguageModel=-1.06428 MaxLexFGivenE=-0.936722 MaxLexEGivenF=0.9417 +0 Glue=3 WordPenalty=1.30289 EGivenFCoherent=0.3837 LanguageModel=4.61236 SampleCountF=2.47857 CountEF=2.09691 MaxLexFGivenE=1.45859 MaxLexEGivenF=-8.37634 +1 Glue=-3 WordPenalty=-1.30289 EGivenFCoherent=-0.3837 LanguageModel=-4.61236 SampleCountF=-2.47857 CountEF=-2.09691 MaxLexFGivenE=-1.45859 MaxLexEGivenF=8.37634 +0 WordPenalty=1.73718 EGivenFCoherent=-4.31218 LanguageModel=3.16019 SampleCountF=-2.47857 CountEF=1.33244 MaxLexFGivenE=-1.60526 MaxLexEGivenF=-8.18409 IsSingletonFE=-2 +1 WordPenalty=-1.73718 EGivenFCoherent=4.31218 LanguageModel=-3.16019 SampleCountF=2.47857 CountEF=-1.33244 MaxLexFGivenE=1.60526 MaxLexEGivenF=8.18409 IsSingletonFE=2 +1 WordPenalty=-1.73717 EGivenFCoherent=3.57054 LanguageModel=-4.17727 SampleCountF=2.47857 CountEF=-0.720159 MaxLexFGivenE=0.912064 MaxLexEGivenF=7.86395 IsSingletonFE=1 +0 WordPenalty=1.73717 EGivenFCoherent=-3.57054 LanguageModel=4.17727 SampleCountF=-2.47857 CountEF=0.720159 MaxLexFGivenE=-0.912064 MaxLexEGivenF=-7.86395 IsSingletonFE=-1 +0 Glue=2 WordPenalty=1.73717 EGivenFCoherent=-3.57054 LanguageModel=4.02723 SampleCountF=-2.47857 CountEF=0.720159 MaxLexFGivenE=-0.716964 MaxLexEGivenF=-5.79452 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-1.73717 EGivenFCoherent=3.57054 LanguageModel=-4.02723 SampleCountF=2.47857 CountEF=-0.720159 MaxLexFGivenE=0.716964 MaxLexEGivenF=5.79452 IsSingletonFE=1 +1 Glue=-1 WordPenalty=-1.73717 EGivenFCoherent=2.6167 LanguageModel=-5.62476 SampleCountF=2.47857 CountEF=0.16241 MaxLexFGivenE=-0.266793 MaxLexEGivenF=6.22826 IsSingletonFE=1 +0 Glue=1 WordPenalty=1.73717 EGivenFCoherent=-2.6167 LanguageModel=5.62476 SampleCountF=-2.47857 CountEF=-0.16241 MaxLexFGivenE=0.266793 MaxLexEGivenF=-6.22826 IsSingletonFE=-1 +0 Glue=1 WordPenalty=1.73717 EGivenFCoherent=-3.57054 LanguageModel=3.66587 SampleCountF=-2.47857 CountEF=0.720159 MaxLexFGivenE=-0.716964 MaxLexEGivenF=-6.64236 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-1.73717 EGivenFCoherent=3.57054 LanguageModel=-3.66587 SampleCountF=2.47857 CountEF=-0.720159 MaxLexFGivenE=0.716964 MaxLexEGivenF=6.64236 IsSingletonFE=1 +0 WordPenalty=1.73717 EGivenFCoherent=-2.77562 LanguageModel=4.33283 SampleCountF=-2.47857 CountEF=-0.00502 MaxLexFGivenE=-0.2445 MaxLexEGivenF=-6.30239 IsSingletonFE=-1 +1 WordPenalty=-1.73717 EGivenFCoherent=2.77562 LanguageModel=-4.33283 SampleCountF=2.47857 CountEF=0.00502 MaxLexFGivenE=0.2445 MaxLexEGivenF=6.30239 IsSingletonFE=1 +1 Glue=-2 WordPenalty=-1.73718 EGivenFCoherent=3.75588 LanguageModel=-3.1453 SampleCountF=2.47857 CountEF=-0.9345 MaxLexFGivenE=1.22826 MaxLexEGivenF=7.00834 IsSingletonFE=1 +0 Glue=2 WordPenalty=1.73718 EGivenFCoherent=-3.75588 LanguageModel=3.1453 SampleCountF=-2.47857 CountEF=0.9345 MaxLexFGivenE=-1.22826 MaxLexEGivenF=-7.00834 IsSingletonFE=-1 +1 Glue=1 WordPenalty=-1.73717 EGivenFCoherent=3.31527 LanguageModel=-4.43724 SampleCountF=2.47857 CountEF=-0.49831 MaxLexFGivenE=0.716964 MaxLexEGivenF=6.93421 IsSingletonFE=1 +0 Glue=-1 WordPenalty=1.73717 EGivenFCoherent=-3.31527 LanguageModel=4.43724 SampleCountF=-2.47857 CountEF=0.49831 MaxLexFGivenE=-0.716964 MaxLexEGivenF=-6.93421 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=-1.73717 EGivenFCoherent=3.60745 LanguageModel=-5.69012 SampleCountF=2.47857 CountEF=-0.54753 MaxLexFGivenE=0.716964 MaxLexEGivenF=7.79405 IsSingletonFE=2 +0 Glue=1 WordPenalty=1.73717 EGivenFCoherent=-3.60745 LanguageModel=5.69012 SampleCountF=-2.47857 CountEF=0.54753 MaxLexFGivenE=-0.716964 MaxLexEGivenF=-7.79405 IsSingletonFE=-2 +1 WordPenalty=-1.73718 EGivenFCoherent=3.15382 LanguageModel=-3.02521 SampleCountF=2.47857 CountEF=-0.53656 MaxLexFGivenE=1.27198 MaxLexEGivenF=6.46593 +0 WordPenalty=1.73718 EGivenFCoherent=-3.15382 LanguageModel=3.02521 SampleCountF=-2.47857 CountEF=0.53656 MaxLexFGivenE=-1.27198 MaxLexEGivenF=-6.46593 +1 Glue=-1 WordPenalty=-2.17147 EGivenFCoherent=3.31527 LanguageModel=-5.27565 SampleCountF=2.47857 CountEF=-0.49831 MaxLexFGivenE=-0.266793 MaxLexEGivenF=7.50549 IsSingletonFE=1 +0 Glue=1 WordPenalty=2.17147 EGivenFCoherent=-3.31527 LanguageModel=5.27565 SampleCountF=-2.47857 CountEF=0.49831 MaxLexFGivenE=0.266793 MaxLexEGivenF=-7.50549 IsSingletonFE=-1 +1 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=4.86982 LanguageModel=-6.90259 SampleCountF=2.47857 CountEF=-1.7796 MaxLexFGivenE=0.669929 MaxLexEGivenF=9.16464 IsSingletonFE=2 +0 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-4.86982 LanguageModel=6.90259 SampleCountF=-2.47857 CountEF=1.7796 MaxLexFGivenE=-0.669929 MaxLexEGivenF=-9.16464 IsSingletonFE=-2 +0 WordPenalty=1.30288 EGivenFCoherent=-3.32222 LanguageModel=2.73793 SampleCountF=-2.47857 CountEF=0.27646 MaxLexFGivenE=-1.60526 MaxLexEGivenF=-6.61501 IsSingletonFE=-2 +1 WordPenalty=-1.30288 EGivenFCoherent=3.32222 LanguageModel=-2.73793 SampleCountF=2.47857 CountEF=-0.27646 MaxLexFGivenE=1.60526 MaxLexEGivenF=6.61501 IsSingletonFE=2 +1 WordPenalty=-1.30288 EGivenFCoherent=2.77562 LanguageModel=-3.09631 SampleCountF=2.47857 CountEF=0.00502 MaxLexFGivenE=0.912064 MaxLexEGivenF=6.59878 IsSingletonFE=1 +0 WordPenalty=1.30288 EGivenFCoherent=-2.77562 LanguageModel=3.09631 SampleCountF=-2.47857 CountEF=-0.00502 MaxLexFGivenE=-0.912064 MaxLexEGivenF=-6.59878 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=1.30288 EGivenFCoherent=-1.39794 LanguageModel=3.35668 SampleCountF=-2.47857 CountEF=-1.14267 MaxLexFGivenE=-0.716964 MaxLexEGivenF=-5.38511 +1 Glue=1 WordPenalty=-1.30288 EGivenFCoherent=1.39794 LanguageModel=-3.35668 SampleCountF=2.47857 CountEF=1.14267 MaxLexFGivenE=0.716964 MaxLexEGivenF=5.38511 +1 WordPenalty=-1.30288 EGivenFCoherent=1.83855 LanguageModel=-2.5783 SampleCountF=2.47857 CountEF=0.706478 MaxLexFGivenE=0.801704 MaxLexEGivenF=4.58219 +0 WordPenalty=1.30288 EGivenFCoherent=-1.83855 LanguageModel=2.5783 SampleCountF=-2.47857 CountEF=-0.706478 MaxLexFGivenE=-0.801704 MaxLexEGivenF=-4.58219 +1 Glue=-2 WordPenalty=-1.30288 EGivenFCoherent=2.77562 LanguageModel=-2.43421 SampleCountF=2.47857 CountEF=0.00502 MaxLexFGivenE=0.716964 MaxLexEGivenF=3.36624 IsSingletonFE=1 +0 Glue=2 WordPenalty=1.30288 EGivenFCoherent=-2.77562 LanguageModel=2.43421 SampleCountF=-2.47857 CountEF=-0.00502 MaxLexFGivenE=-0.716964 MaxLexEGivenF=-3.36624 IsSingletonFE=-1 +1 Glue=2 WordPenalty=4.77724 EGivenFCoherent=-3.38021 LanguageModel=13.8354 CountEF=2.60962 MaxLexFGivenE=1.2787 MaxLexEGivenF=-28.6474 IsSingletonFE=-4 +0 Glue=-2 WordPenalty=-4.77724 EGivenFCoherent=3.38021 LanguageModel=-13.8354 CountEF=-2.60962 MaxLexFGivenE=-1.2787 MaxLexEGivenF=28.6474 IsSingletonFE=4 +1 WordPenalty=4.34294 EGivenFCoherent=-2.5673 LanguageModel=12.2858 CountEF=1.95964 MaxLexFGivenE=1.2787 MaxLexEGivenF=-27.739 IsSingletonFE=-3 +0 WordPenalty=-4.34294 EGivenFCoherent=2.5673 LanguageModel=-12.2858 CountEF=-1.95964 MaxLexFGivenE=-1.2787 MaxLexEGivenF=27.739 IsSingletonFE=3 +0 Glue=2 WordPenalty=-4.34294 EGivenFCoherent=2.15915 LanguageModel=-11.9478 CountEF=-1.75028 MaxLexEGivenF=27.5591 IsSingletonFE=2 +1 Glue=-2 WordPenalty=4.34294 EGivenFCoherent=-2.15915 LanguageModel=11.9478 CountEF=1.75028 MaxLexEGivenF=-27.5591 IsSingletonFE=-2 +0 WordPenalty=-4.34294 EGivenFCoherent=2.93785 LanguageModel=-11.9478 CountEF=-2.41687 MaxLexEGivenF=27.5591 IsSingletonFE=2 +1 WordPenalty=4.34294 EGivenFCoherent=-2.93785 LanguageModel=11.9478 CountEF=2.41687 MaxLexEGivenF=-27.5591 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-4.34294 EGivenFCoherent=2.31069 LanguageModel=-12.2858 CountEF=-1.67634 MaxLexFGivenE=-1.2787 MaxLexEGivenF=27.739 IsSingletonFE=3 +1 Glue=-1 WordPenalty=4.34294 EGivenFCoherent=-2.31069 LanguageModel=12.2858 CountEF=1.67634 MaxLexFGivenE=1.2787 MaxLexEGivenF=-27.739 IsSingletonFE=-3 +1 Glue=1 WordPenalty=3.47435 EGivenFCoherent=-1.22386 LanguageModel=10.4707 CountEF=0.844031 MaxLexFGivenE=0.021104 MaxLexEGivenF=-17.7071 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.47435 EGivenFCoherent=1.22386 LanguageModel=-10.4707 CountEF=-0.844031 MaxLexFGivenE=-0.021104 MaxLexEGivenF=17.7071 IsSingletonFE=2 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=0.264601 LanguageModel=-9.31313 CountEF=-0.079879 MaxLexFGivenE=-1.91159 MaxLexEGivenF=16.3974 IsSingletonFE=1 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-0.264601 LanguageModel=9.31313 CountEF=0.079879 MaxLexFGivenE=1.91159 MaxLexEGivenF=-16.3974 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=2.25862 LanguageModel=-8.39698 CountEF=-1.64014 MaxLexFGivenE=1.2787 MaxLexEGivenF=17.5719 IsSingletonFE=3 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-2.25862 LanguageModel=8.39698 CountEF=1.64014 MaxLexFGivenE=-1.2787 MaxLexEGivenF=-17.5719 IsSingletonFE=-3 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-0.297556 LanguageModel=8.97548 CountEF=0.080121 MaxLexFGivenE=0.021104 MaxLexEGivenF=-16.5411 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=0.297556 LanguageModel=-8.97548 CountEF=-0.080121 MaxLexFGivenE=-0.021104 MaxLexEGivenF=16.5411 IsSingletonFE=1 +1 WordPenalty=3.04006 EGivenFCoherent=0.193029 LanguageModel=10.4416 SampleCountF=2.47857 CountEF=2.12149 MaxLexFGivenE=-0.538455 MaxLexEGivenF=-17.0079 IsSingletonFE=-1 +0 WordPenalty=-3.04006 EGivenFCoherent=-0.193029 LanguageModel=-10.4416 SampleCountF=-2.47857 CountEF=-2.12149 MaxLexFGivenE=0.538455 MaxLexEGivenF=17.0079 IsSingletonFE=1 +0 WordPenalty=-3.04006 EGivenFCoherent=1.74752 LanguageModel=-8.67608 CountEF=-1.13703 MaxLexFGivenE=1.2787 MaxLexEGivenF=17.9583 IsSingletonFE=3 +1 WordPenalty=3.04006 EGivenFCoherent=-1.74752 LanguageModel=8.67608 CountEF=1.13703 MaxLexFGivenE=-1.2787 MaxLexEGivenF=-17.9583 IsSingletonFE=-3 +1 WordPenalty=3.04006 EGivenFCoherent=-1.54206 LanguageModel=8.84407 CountEF=1.05462 MaxLexFGivenE=0.021104 MaxLexEGivenF=-17.7658 IsSingletonFE=-2 +0 WordPenalty=-3.04006 EGivenFCoherent=1.54206 LanguageModel=-8.84407 CountEF=-1.05462 MaxLexFGivenE=-0.021104 MaxLexEGivenF=17.7658 IsSingletonFE=2 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=0.207609 LanguageModel=-9.10688 CountEF=-0.11261 MaxLexFGivenE=-0.021104 MaxLexEGivenF=15.3165 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-0.207609 LanguageModel=9.10688 CountEF=0.11261 MaxLexFGivenE=0.021104 MaxLexEGivenF=-15.3165 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=0.711978 LanguageModel=-8.86642 CountEF=-0.338881 MaxLexEGivenF=16.5272 IsSingletonFE=2 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-0.711978 LanguageModel=8.86642 CountEF=0.338881 MaxLexEGivenF=-16.5272 IsSingletonFE=-2 +0 Glue=-1 WordPenalty=-3.04006 EGivenFCoherent=1.321 LanguageModel=-8.97548 CountEF=-0.866449 MaxLexFGivenE=-0.021104 MaxLexEGivenF=16.5411 IsSingletonFE=2 +1 Glue=1 WordPenalty=3.04006 EGivenFCoherent=-1.321 LanguageModel=8.97548 CountEF=0.866449 MaxLexFGivenE=0.021104 MaxLexEGivenF=-16.5411 IsSingletonFE=-2 +1 Glue=-3 WordPenalty=3.04006 EGivenFCoherent=0.480363 LanguageModel=9.3135 CountEF=-0.553168 MaxLexFGivenE=1.29981 MaxLexEGivenF=-16.7211 IsSingletonFE=-1 +0 Glue=3 WordPenalty=-3.04006 EGivenFCoherent=-0.480363 LanguageModel=-9.3135 CountEF=0.553168 MaxLexFGivenE=-1.29981 MaxLexEGivenF=16.7211 IsSingletonFE=1 +0 Glue=-3 WordPenalty=-3.04006 EGivenFCoherent=2.41832 LanguageModel=-8.67608 CountEF=-1.79957 MaxLexFGivenE=1.2787 MaxLexEGivenF=17.9583 IsSingletonFE=3 +1 Glue=3 WordPenalty=3.04006 EGivenFCoherent=-2.41832 LanguageModel=8.67608 CountEF=1.79957 MaxLexFGivenE=-1.2787 MaxLexEGivenF=-17.9583 IsSingletonFE=-3 +1 Glue=3 WordPenalty=3.04006 EGivenFCoherent=-2.0997 LanguageModel=8.8664 CountEF=1.51401 MaxLexEGivenF=-16.5272 IsSingletonFE=-3 +0 Glue=-3 WordPenalty=-3.04006 EGivenFCoherent=2.0997 LanguageModel=-8.8664 CountEF=-1.51401 MaxLexEGivenF=16.5272 IsSingletonFE=3 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-1.30384 LanguageModel=8.7957 CountEF=0.831953 MaxLexEGivenF=-17.7541 IsSingletonFE=-3 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=1.30384 LanguageModel=-8.7957 CountEF=-0.831953 MaxLexEGivenF=17.7541 IsSingletonFE=3 +1 Glue=-1 WordPenalty=3.04006 EGivenFCoherent=-1.04777 LanguageModel=9.01412 CountEF=0.671171 MaxLexEGivenF=-18.1383 IsSingletonFE=-2 +0 Glue=1 WordPenalty=-3.04006 EGivenFCoherent=1.04777 LanguageModel=-9.01412 CountEF=-0.671171 MaxLexEGivenF=18.1383 IsSingletonFE=2 +1 Glue=2 WordPenalty=3.04006 EGivenFCoherent=-2.08253 LanguageModel=8.96691 CountEF=1.46728 MaxLexFGivenE=-1.13327 MaxLexEGivenF=-18.9376 IsSingletonFE=-3 +0 Glue=-2 WordPenalty=-3.04006 EGivenFCoherent=2.08253 LanguageModel=-8.96691 CountEF=-1.46728 MaxLexFGivenE=1.13327 MaxLexEGivenF=18.9376 IsSingletonFE=3 +0 WordPenalty=-3.04006 EGivenFCoherent=0.204117 LanguageModel=-9.50052 SampleCountF=-2.47857 CountEF=-2.4346 MaxLexFGivenE=-0.021107 MaxLexEGivenF=18.9789 IsSingletonFE=1 +1 WordPenalty=3.04006 EGivenFCoherent=-0.204117 LanguageModel=9.50052 SampleCountF=2.47857 CountEF=2.4346 MaxLexFGivenE=0.021107 MaxLexEGivenF=-18.9789 IsSingletonFE=-1 +1 WordPenalty=3.04006 EGivenFCoherent=-1.76391 LanguageModel=8.73499 CountEF=1.18172 MaxLexEGivenF=-17.7519 IsSingletonFE=-3 +0 WordPenalty=-3.04006 EGivenFCoherent=1.76391 LanguageModel=-8.73499 CountEF=-1.18172 MaxLexEGivenF=17.7519 IsSingletonFE=3 +0 Glue=1 WordPenalty=-2.60576 EGivenFCoherent=-0.494059 LanguageModel=-7.50027 SampleCountF=-2.47857 CountEF=-1.9454 MaxLexEGivenF=15.5128 +1 Glue=-1 WordPenalty=2.60576 EGivenFCoherent=0.494059 LanguageModel=7.50027 SampleCountF=2.47857 CountEF=1.9454 MaxLexEGivenF=-15.5128 +0 WordPenalty=-2.60577 EGivenFCoherent=0.968592 LanguageModel=-8.17597 CountEF=-0.622182 MaxLexFGivenE=0.021104 MaxLexEGivenF=12.9159 IsSingletonFE=2 +1 WordPenalty=2.60577 EGivenFCoherent=-0.968592 LanguageModel=8.17597 CountEF=0.622182 MaxLexFGivenE=-0.021104 MaxLexEGivenF=-12.9159 IsSingletonFE=-2 +0 Glue=2 WordPenalty=-3.04006 EGivenFCoherent=0.156459 LanguageModel=-9.60688 CountEF=0.058087 MaxLexFGivenE=-1.36101 MaxLexEGivenF=16.5951 IsSingletonFE=2 +1 Glue=-2 WordPenalty=3.04006 EGivenFCoherent=-0.156459 LanguageModel=9.60688 CountEF=-0.058087 MaxLexFGivenE=1.36101 MaxLexEGivenF=-16.5951 IsSingletonFE=-2 +1 WordPenalty=3.04006 EGivenFCoherent=-0.508641 LanguageModel=9.71594 CountEF=0.306659 MaxLexFGivenE=1.38211 MaxLexEGivenF=-16.609 IsSingletonFE=-1 +0 WordPenalty=-3.04006 EGivenFCoherent=0.508641 LanguageModel=-9.71594 CountEF=-0.306659 MaxLexFGivenE=-1.38211 MaxLexEGivenF=16.609 IsSingletonFE=1 +0 Glue=4 WordPenalty=-1.73718 EGivenFCoherent=-1.25929 LanguageModel=-5.58463 CountEF=1.08025 MaxLexFGivenE=-1.29552 MaxLexEGivenF=10.6826 IsSingletonFE=-1 +1 Glue=-4 WordPenalty=1.73718 EGivenFCoherent=1.25929 LanguageModel=5.58463 CountEF=-1.08025 MaxLexFGivenE=1.29552 MaxLexEGivenF=-10.6826 IsSingletonFE=1 +1 Glue=1 WordPenalty=2.17147 EGivenFCoherent=-1.00335 LanguageModel=6.71352 CountEF=0.778381 MaxLexEGivenF=-12.0215 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-2.17147 EGivenFCoherent=1.00335 LanguageModel=-6.71352 CountEF=-0.778381 MaxLexEGivenF=12.0215 IsSingletonFE=1 +1 Glue=-2 WordPenalty=2.17147 EGivenFCoherent=0.315271 LanguageModel=6.8226 CountEF=-0.209609 MaxLexFGivenE=0.021104 MaxLexEGivenF=-12.0354 IsSingletonFE=1 +0 Glue=2 WordPenalty=-2.17147 EGivenFCoherent=-0.315271 LanguageModel=-6.8226 CountEF=0.209609 MaxLexFGivenE=-0.021104 MaxLexEGivenF=12.0354 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=0.922832 LanguageModel=-6.99262 CountEF=-0.66794 MaxLexEGivenF=12.4079 IsSingletonFE=1 +1 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-0.922832 LanguageModel=6.99262 CountEF=0.66794 MaxLexEGivenF=-12.4079 IsSingletonFE=-1 +1 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-1.46288 LanguageModel=6.71351 CountEF=1.00563 MaxLexEGivenF=-12.0215 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=1.46288 LanguageModel=-6.71351 CountEF=-1.00563 MaxLexEGivenF=12.0215 IsSingletonFE=2 +0 Glue=1 WordPenalty=-2.17147 EGivenFCoherent=-0.3672 LanguageModel=-7.16062 CountEF=0.35002 MaxLexFGivenE=-1.29981 MaxLexEGivenF=12.2153 +1 Glue=-1 WordPenalty=2.17147 EGivenFCoherent=0.3672 LanguageModel=7.16062 CountEF=-0.35002 MaxLexFGivenE=1.29981 MaxLexEGivenF=-12.2153 +0 WordPenalty=-1.30288 EGivenFCoherent=0.76839 LanguageModel=-3.10234 SampleCountF=-2.47857 CountEF=-3.10577 MaxLexFGivenE=0.004288 MaxLexEGivenF=9.58038 +1 WordPenalty=1.30288 EGivenFCoherent=-0.76839 LanguageModel=3.10234 SampleCountF=2.47857 CountEF=3.10577 MaxLexFGivenE=-0.004288 MaxLexEGivenF=-9.58038 +1 WordPenalty=1.30288 EGivenFCoherent=0.193029 LanguageModel=2.98946 SampleCountF=2.47857 CountEF=2.12149 MaxLexFGivenE=-0.117194 MaxLexEGivenF=-11.1205 IsSingletonFE=-1 +0 WordPenalty=-1.30288 EGivenFCoherent=-0.193029 LanguageModel=-2.98946 SampleCountF=-2.47857 CountEF=-2.12149 MaxLexFGivenE=0.117194 MaxLexEGivenF=11.1205 IsSingletonFE=1 +0 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=-0.051151 LanguageModel=-5.45321 SampleCountF=-2.47857 CountEF=-2.26067 MaxLexEGivenF=11.1131 IsSingletonFE=1 +1 Glue=1 WordPenalty=1.73718 EGivenFCoherent=0.051151 LanguageModel=5.45321 SampleCountF=2.47857 CountEF=2.26067 MaxLexEGivenF=-11.1131 IsSingletonFE=-1 +1 Glue=4 WordPenalty=1.73718 EGivenFCoherent=-1.23888 LanguageModel=5.34413 SampleCountF=2.47857 CountEF=3.30989 MaxLexFGivenE=-0.021101 MaxLexEGivenF=-11.0992 IsSingletonFE=-2 +0 Glue=-4 WordPenalty=-1.73718 EGivenFCoherent=1.23888 LanguageModel=-5.34413 SampleCountF=-2.47857 CountEF=-3.30989 MaxLexFGivenE=0.021101 MaxLexEGivenF=11.0992 IsSingletonFE=2 +0 Glue=-2 WordPenalty=-1.73718 EGivenFCoherent=0.425969 LanguageModel=-5.32057 SampleCountF=-2.47857 CountEF=-2.62865 MaxLexEGivenF=11.1131 IsSingletonFE=1 +1 Glue=2 WordPenalty=1.73718 EGivenFCoherent=-0.425969 LanguageModel=5.32057 SampleCountF=2.47857 CountEF=2.62865 MaxLexEGivenF=-11.1131 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=-0.051151 LanguageModel=-5.9801 SampleCountF=-2.47857 CountEF=-2.26067 MaxLexFGivenE=0.193304 MaxLexEGivenF=11.8441 IsSingletonFE=1 +1 Glue=1 WordPenalty=1.73718 EGivenFCoherent=0.051151 LanguageModel=5.9801 SampleCountF=2.47857 CountEF=2.26067 MaxLexFGivenE=-0.193304 MaxLexEGivenF=-11.8441 IsSingletonFE=-1 +0 Glue=1 WordPenalty=-1.73718 EGivenFCoherent=-0.989003 LanguageModel=-3.87871 SampleCountF=-2.47857 CountEF=-1.5762 MaxLexFGivenE=-0.021107 MaxLexEGivenF=11.127 IsSingletonFE=-1 +1 Glue=-1 WordPenalty=1.73718 EGivenFCoherent=0.989003 LanguageModel=3.87871 SampleCountF=2.47857 CountEF=1.5762 MaxLexFGivenE=0.021107 MaxLexEGivenF=-11.127 IsSingletonFE=1 +1 Glue=2 WordPenalty=1.73718 EGivenFCoherent=-0.681239 LanguageModel=5.67594 SampleCountF=2.47857 CountEF=2.8505 MaxLexFGivenE=-0.632885 MaxLexEGivenF=-11.4229 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-1.73718 EGivenFCoherent=0.681239 LanguageModel=-5.67594 SampleCountF=-2.47857 CountEF=-2.8505 MaxLexFGivenE=0.632885 MaxLexEGivenF=11.4229 IsSingletonFE=1 +1 WordPenalty=1.73718 EGivenFCoherent=0.051151 LanguageModel=5.75438 SampleCountF=2.47857 CountEF=2.26067 MaxLexFGivenE=-0.021101 MaxLexEGivenF=-11.0992 IsSingletonFE=-1 +0 WordPenalty=-1.73718 EGivenFCoherent=-0.051151 LanguageModel=-5.75438 SampleCountF=-2.47857 CountEF=-2.26067 MaxLexFGivenE=0.021101 MaxLexEGivenF=11.0992 IsSingletonFE=1 +1 Glue=-1 WordPenalty=1.73718 EGivenFCoherent=-0.132118 LanguageModel=4.78739 SampleCountF=2.47857 CountEF=2.70088 MaxLexFGivenE=0.021107 MaxLexEGivenF=-11.127 IsSingletonFE=1 +0 Glue=1 WordPenalty=-1.73718 EGivenFCoherent=0.132118 LanguageModel=-4.78739 SampleCountF=-2.47857 CountEF=-2.70088 MaxLexFGivenE=-0.021107 MaxLexEGivenF=11.127 IsSingletonFE=-1 +0 Glue=-2 WordPenalty=-1.73718 EGivenFCoherent=0.345447 LanguageModel=-3.76964 SampleCountF=-2.47857 CountEF=-2.51821 MaxLexEGivenF=11.1131 IsSingletonFE=1 +1 Glue=2 WordPenalty=1.73718 EGivenFCoherent=-0.345447 LanguageModel=3.76964 SampleCountF=2.47857 CountEF=2.51821 MaxLexEGivenF=-11.1131 IsSingletonFE=-1 +0 Glue=3 WordPenalty=-1.73718 EGivenFCoherent=-1.70916 LanguageModel=-5.75438 SampleCountF=-2.47857 CountEF=-0.770243 MaxLexFGivenE=0.021101 MaxLexEGivenF=11.0992 IsSingletonFE=1 +1 Glue=-3 WordPenalty=1.73718 EGivenFCoherent=1.70916 LanguageModel=5.75438 SampleCountF=2.47857 CountEF=0.770243 MaxLexFGivenE=-0.021101 MaxLexEGivenF=-11.0992 IsSingletonFE=-1 +0 Glue=-1 WordPenalty=-1.73718 EGivenFCoherent=0.318855 LanguageModel=-5.75438 SampleCountF=-2.47857 CountEF=-2.62664 MaxLexFGivenE=0.021101 MaxLexEGivenF=11.0992 IsSingletonFE=1 +1 Glue=1 WordPenalty=1.73718 EGivenFCoherent=-0.318855 LanguageModel=5.75438 SampleCountF=2.47857 CountEF=2.62664 MaxLexFGivenE=-0.021101 MaxLexEGivenF=-11.0992 IsSingletonFE=-1 +1 WordPenalty=2.17147 EGivenFCoherent=-0.028027 LanguageModel=5.08886 SampleCountF=2.47857 CountEF=2.30966 MaxLexFGivenE=-0.01681 MaxLexEGivenF=-11.0208 IsSingletonFE=-1 +0 WordPenalty=-2.17147 EGivenFCoherent=0.028027 LanguageModel=-5.08886 SampleCountF=-2.47857 CountEF=-2.30966 MaxLexFGivenE=0.01681 MaxLexEGivenF=11.0208 IsSingletonFE=1 +1 Glue=-3 WordPenalty=2.17147 EGivenFCoherent=0.494059 LanguageModel=6.5761 SampleCountF=2.47857 CountEF=1.9454 MaxLexFGivenE=-0.611781 MaxLexEGivenF=-11.6979 +0 Glue=3 WordPenalty=-2.17147 EGivenFCoherent=-0.494059 LanguageModel=-6.5761 SampleCountF=-2.47857 CountEF=-1.9454 MaxLexFGivenE=0.611781 MaxLexEGivenF=11.6979 +0 Glue=-4 WordPenalty=-1.73718 EGivenFCoherent=1.23888 LanguageModel=-5.76013 SampleCountF=-2.47857 CountEF=-3.30989 MaxLexFGivenE=0.01681 MaxLexEGivenF=11.7235 IsSingletonFE=2 +1 Glue=4 WordPenalty=1.73718 EGivenFCoherent=-1.23888 LanguageModel=5.76013 SampleCountF=2.47857 CountEF=3.30989 MaxLexFGivenE=-0.01681 MaxLexEGivenF=-11.7235 IsSingletonFE=-2 +0 Glue=-2 WordPenalty=-2.17147 EGivenFCoherent=0.743387 LanguageModel=-6.94955 SampleCountF=-2.47857 CountEF=-2.75229 MaxLexEGivenF=13.8684 IsSingletonFE=2 +1 Glue=2 WordPenalty=2.17147 EGivenFCoherent=-0.743387 LanguageModel=6.94955 SampleCountF=2.47857 CountEF=2.75229 MaxLexEGivenF=-13.8684 IsSingletonFE=-2 diff --git a/tests/issues/5/test-error.sh b/tests/issues/5/test-error.sh new file mode 100755 index 00000000..49fae40c --- /dev/null +++ b/tests/issues/5/test-error.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +scriptDir=$(cd $(dirname $0); pwd) + +set -u +autoreconf -ifv +./configure --with-boost=$HOME/prefix --disable-gtest +make clean +make -j32 + +set +eo pipefail +make -j32 + +echo >&2 "=============================" +echo >&2 "TESTING: $(git log | head -n1 | cut -f2 -d' ')" +echo >&2 "=============================" +zcat $scriptDir/mapoutput.abj.gz \ + | $scriptDir/cdec/pro-train/mr_pro_reduce --weights $scriptDir/weights.0 -C 500 -y 5000 --interpolate_with_weights 1 +echo >&2 "=============================" + +sleep 5 diff --git a/tests/issues/5/weights.0 b/tests/issues/5/weights.0 new file mode 100644 index 00000000..7475a6bb --- /dev/null +++ b/tests/issues/5/weights.0 @@ -0,0 +1,6 @@ +Glue -0.1 +WordPenalty -1.0 +EGivenFCoherent -0.25 +PassThrough -0.35 +LanguageModel 0.30 +LanguageModel_OOV -1.25 diff --git a/training/Makefile.am b/training/Makefile.am index 991ac210..8124b107 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -23,11 +23,17 @@ noinst_PROGRAMS = \ TESTS = lbfgs_test optimize_test -mpi_online_optimize_SOURCES = mpi_online_optimize.cc online_optimizer.cc -mpi_online_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +noinst_LIBRARIES = libtraining.a +libtraining_a_SOURCES = \ + candidate_set.cc \ + optimize.cc \ + online_optimizer.cc -mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc online_optimizer.cc optimize.cc -mpi_flex_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +mpi_online_optimize_SOURCES = mpi_online_optimize.cc +mpi_online_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz + +mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc +mpi_flex_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz @@ -35,8 +41,8 @@ mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mtev mpi_extract_features_SOURCES = mpi_extract_features.cc mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc optimize.cc -mpi_batch_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc +mpi_batch_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz @@ -50,14 +56,14 @@ test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteva model1_SOURCES = model1.cc ttables.cc model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz -lbl_model_SOURCES = lbl_model.cc optimize.cc -lbl_model_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz +lbl_model_SOURCES = lbl_model.cc +lbl_model_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz grammar_convert_SOURCES = grammar_convert.cc grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz -optimize_test_SOURCES = optimize_test.cc optimize.cc online_optimizer.cc -optimize_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz +optimize_test_SOURCES = optimize_test.cc +optimize_test_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz collapse_weights_SOURCES = collapse_weights.cc collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz @@ -65,8 +71,8 @@ collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/lib lbfgs_test_SOURCES = lbfgs_test.cc lbfgs_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz -mr_optimize_reduce_SOURCES = mr_optimize_reduce.cc optimize.cc -mr_optimize_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz +mr_optimize_reduce_SOURCES = mr_optimize_reduce.cc +mr_optimize_reduce_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz mr_em_map_adapter_SOURCES = mr_em_map_adapter.cc mr_em_map_adapter_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz diff --git a/training/candidate_set.cc b/training/candidate_set.cc new file mode 100644 index 00000000..8c086ece --- /dev/null +++ b/training/candidate_set.cc @@ -0,0 +1,168 @@ +#include "candidate_set.h" + +#include <tr1/unordered_set> + +#include <boost/functional/hash.hpp> + +#include "ns.h" +#include "filelib.h" +#include "wordid.h" +#include "tdict.h" +#include "hg.h" +#include "kbest.h" +#include "viterbi.h" + +using namespace std; + +namespace training { + +struct ApproxVectorHasher { + static const size_t MASK = 0xFFFFFFFFull; + union UType { + double f; // leave as double + size_t i; + }; + static inline double round(const double x) { + UType t; + t.f = x; + size_t r = t.i & MASK; + if ((r << 1) > MASK) + t.i += MASK - r + 1; + else + t.i &= (1ull - MASK); + return t.f; + } + size_t operator()(const SparseVector<double>& x) const { + size_t h = 0x573915839; + for (SparseVector<double>::const_iterator it = x.begin(); it != x.end(); ++it) { + UType t; + t.f = it->second; + if (t.f) { + size_t z = (t.i >> 32); + boost::hash_combine(h, it->first); + boost::hash_combine(h, z); + } + } + return h; + } +}; + +struct ApproxVectorEquals { + bool operator()(const SparseVector<double>& a, const SparseVector<double>& b) const { + SparseVector<double>::const_iterator bit = b.begin(); + for (SparseVector<double>::const_iterator ait = a.begin(); ait != a.end(); ++ait) { + if (bit == b.end() || + ait->first != bit->first || + ApproxVectorHasher::round(ait->second) != ApproxVectorHasher::round(bit->second)) + return false; + ++bit; + } + if (bit != b.end()) return false; + return true; + } +}; + +struct CandidateCompare { + bool operator()(const Candidate& a, const Candidate& b) const { + ApproxVectorEquals eq; + return (a.ewords == b.ewords && eq(a.fmap,b.fmap)); + } +}; + +struct CandidateHasher { + size_t operator()(const Candidate& x) const { + boost::hash<vector<WordID> > hhasher; + ApproxVectorHasher vhasher; + size_t ha = hhasher(x.ewords); + boost::hash_combine(ha, vhasher(x.fmap)); + return ha; + } +}; + +static void ParseSparseVector(string& line, size_t cur, SparseVector<double>* out) { + SparseVector<double>& x = *out; + size_t last_start = cur; + size_t last_comma = string::npos; + while(cur <= line.size()) { + if (line[cur] == ' ' || cur == line.size()) { + if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { + cerr << "[ERROR] " << line << endl << " position = " << cur << endl; + exit(1); + } + const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); + if (cur < line.size()) line[cur] = 0; + const double val = strtod(&line[last_comma + 1], NULL); + x.set_value(fid, val); + + last_comma = string::npos; + last_start = cur+1; + } else { + if (line[cur] == '=') + last_comma = cur; + } + ++cur; + } +} + +void CandidateSet::WriteToFile(const string& file) const { + WriteFile wf(file); + ostream& out = *wf.stream(); + out.precision(10); + string ss; + for (unsigned i = 0; i < cs.size(); ++i) { + out << TD::GetString(cs[i].ewords) << endl; + out << cs[i].fmap << endl; + cs[i].eval_feats.Encode(&ss); + out << ss << endl; + } +} + +void CandidateSet::ReadFromFile(const string& file) { + cerr << "Reading candidates from " << file << endl; + ReadFile rf(file); + istream& in = *rf.stream(); + string cand; + string feats; + string ss; + while(getline(in, cand)) { + getline(in, feats); + getline(in, ss); + assert(in); + cs.push_back(Candidate()); + TD::ConvertSentence(cand, &cs.back().ewords); + ParseSparseVector(feats, 0, &cs.back().fmap); + cs.back().eval_feats = SufficientStats(ss); + } + cerr << " read " << cs.size() << " candidates\n"; +} + +void CandidateSet::Dedup() { + cerr << "Dedup in=" << cs.size(); + tr1::unordered_set<Candidate, CandidateHasher, CandidateCompare> u; + while(cs.size() > 0) { + u.insert(cs.back()); + cs.pop_back(); + } + tr1::unordered_set<Candidate, CandidateHasher, CandidateCompare>::iterator it = u.begin(); + while (it != u.end()) { + cs.push_back(*it); + it = u.erase(it); + } + cerr << " out=" << cs.size() << endl; +} + +void CandidateSet::AddKBestCandidates(const Hypergraph& hg, size_t kbest_size, const SegmentEvaluator* scorer) { + KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, kbest_size); + + for (unsigned i = 0; i < kbest_size; ++i) { + const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(hg.nodes_.size() - 1, i); + if (!d) break; + cs.push_back(Candidate(d->yield, d->feature_values)); + if (scorer) + scorer->Evaluate(d->yield, &cs.back().eval_feats); + } + Dedup(); +} + +} diff --git a/training/candidate_set.h b/training/candidate_set.h new file mode 100644 index 00000000..9d326ed0 --- /dev/null +++ b/training/candidate_set.h @@ -0,0 +1,60 @@ +#ifndef _CANDIDATE_SET_H_ +#define _CANDIDATE_SET_H_ + +#include <vector> +#include <algorithm> + +#include "ns.h" +#include "wordid.h" +#include "sparse_vector.h" + +class Hypergraph; + +namespace training { + +struct Candidate { + Candidate() {} + Candidate(const std::vector<WordID>& e, const SparseVector<double>& fm) : + ewords(e), + fmap(fm) {} + Candidate(const std::vector<WordID>& e, + const SparseVector<double>& fm, + const SegmentEvaluator& se) : + ewords(e), + fmap(fm) { + se.Evaluate(ewords, &eval_feats); + } + + void swap(Candidate& other) { + eval_feats.swap(other.eval_feats); + ewords.swap(other.ewords); + fmap.swap(other.fmap); + } + + std::vector<WordID> ewords; + SparseVector<double> fmap; + SufficientStats eval_feats; +}; + +// represents some kind of collection of translation candidates, e.g. +// aggregated k-best lists, sample lists, etc. +class CandidateSet { + public: + CandidateSet() {} + inline size_t size() const { return cs.size(); } + const Candidate& operator[](size_t i) const { return cs[i]; } + + void ReadFromFile(const std::string& file); + void WriteToFile(const std::string& file) const; + void AddKBestCandidates(const Hypergraph& hg, size_t kbest_size, const SegmentEvaluator* scorer = NULL); + // TODO add code to do unique k-best + // TODO add code to draw k samples + + private: + void Dedup(); + std::vector<Candidate> cs; +}; + +} + +#endif diff --git a/training/mpi_flex_optimize.cc b/training/mpi_flex_optimize.cc index a9197208..a9ead018 100644 --- a/training/mpi_flex_optimize.cc +++ b/training/mpi_flex_optimize.cc @@ -179,18 +179,16 @@ double ApplyRegularizationTerms(const double C, const double T, const vector<double>& weights, const vector<double>& prev_weights, - vector<double>* g) { - assert(weights.size() == g->size()); + double* g) { double reg = 0; for (size_t i = 0; i < weights.size(); ++i) { const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); const double& w_i = weights[i]; - double& g_i = (*g)[i]; reg += C * w_i * w_i; - g_i += 2 * C * w_i; + g[i] += 2 * C * w_i; reg += T * (w_i - prev_w_i) * (w_i - prev_w_i); - g_i += 2 * T * (w_i - prev_w_i); + g[i] += 2 * T * (w_i - prev_w_i); } return reg; } @@ -365,7 +363,7 @@ int main(int argc, char** argv) { time_series_strength, // * (iter == 0 ? 0.0 : 1.0), cur_weights, prev_weights, - &gg); + &gg[0]); obj += r; if (mi == 0 || mi == (minibatch_iterations - 1)) { if (!mi) cerr << iter << ' '; else cerr << ' '; diff --git a/utils/Jamfile b/utils/Jamfile index 53a51277..4444b25f 100644 --- a/utils/Jamfile +++ b/utils/Jamfile @@ -24,11 +24,9 @@ lib utils : ..//z : <include>.. <include>. : : <include>.. <include>. ; -exe ts : ts.cc utils ; exe atools : atools.cc utils ..//boost_program_options ; -exe phmt : phmt.cc utils ; exe reconstruct_weights : reconstruct_weights.cc utils ..//boost_program_options ; alias programs : reconstruct_weights atools ; -all_tests : utils ; +all_tests [ glob *_test.cc phmt.cc ts.cc ] : utils : <testing.arg>$(TOP)/utils/test_data ; diff --git a/utils/Makefile.am b/utils/Makefile.am index 46650c75..386344dd 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,10 +1,9 @@ - bin_PROGRAMS = reconstruct_weights atools -noinst_PROGRAMS = ts phmt mfcr_test -TESTS = ts phmt mfcr_test - -noinst_PROGRAMS += \ +noinst_PROGRAMS = \ + ts \ + phmt \ + mfcr_test \ crp_test \ dict_test \ m_test \ @@ -12,11 +11,7 @@ noinst_PROGRAMS += \ logval_test \ small_vector_test -TESTS += crp_test small_vector_test logval_test weights_test dict_test m_test - -reconstruct_weights_SOURCES = reconstruct_weights.cc - -atools_SOURCES = atools.cc +TESTS = ts mfcr_test crp_test small_vector_test logval_test weights_test dict_test m_test noinst_LIBRARIES = libutils.a @@ -39,26 +34,31 @@ if HAVE_CMPH libutils_a_SOURCES += perfect_hash.cc endif +reconstruct_weights_SOURCES = reconstruct_weights.cc +reconstruct_weights_LDADD = libutils.a -lz +atools_SOURCES = atools.cc +atools_LDADD = libutils.a -lz + phmt_SOURCES = phmt.cc +phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz ts_SOURCES = ts.cc +ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz m_test_SOURCES = m_test.cc -m_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz dict_test_SOURCES = dict_test.cc -dict_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz mfcr_test_SOURCES = mfcr_test.cc -mfcr_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +mfcr_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz weights_test_SOURCES = weights_test.cc -weights_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz crp_test_SOURCES = crp_test.cc -crp_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +crp_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz logval_test_SOURCES = logval_test.cc -logval_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz small_vector_test_SOURCES = small_vector_test.cc -small_vector_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) - -AM_LDFLAGS = libutils.a -lz +small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I. +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. ################################################################ diff --git a/utils/alignment_io.cc b/utils/alignment_io.cc index 1d923f7f..460fbd3f 100644 --- a/utils/alignment_io.cc +++ b/utils/alignment_io.cc @@ -7,7 +7,7 @@ static bool is_digit(char x) { return x >= '0' && x <= '9'; } boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const string& al) { int max_x = 0; int max_y = 0; - int i = 0; + unsigned i = 0; size_t pos = al.rfind(" ||| "); if (pos != string::npos) { i = pos + 5; } while (i < al.size()) { @@ -65,8 +65,8 @@ boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const st void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* o) { ostream& out = *o; bool need_space = false; - for (int i = 0; i < alignment.width(); ++i) - for (int j = 0; j < alignment.height(); ++j) + for (unsigned i = 0; i < alignment.width(); ++i) + for (unsigned j = 0; j < alignment.height(); ++j) if (alignment(i,j)) { if (need_space) out << ' '; else need_space = true; out << i << '-' << j; @@ -77,8 +77,8 @@ void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream void AlignmentIO::SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, ostream* o) { ostream& out = *o; bool need_space = false; - for (int i = 0; i < alignment.width(); ++i) - for (int j = 0; j < alignment.height(); ++j) { + for (unsigned i = 0; i < alignment.width(); ++i) + for (unsigned j = 0; j < alignment.height(); ++j) { const AlignmentType& aij = alignment(i,j); if (aij != kNONE) { if (need_space) out << ' '; else need_space = true; diff --git a/utils/alignment_io.h b/utils/alignment_io.h index 36bcecd7..63fb916b 100644 --- a/utils/alignment_io.h +++ b/utils/alignment_io.h @@ -16,12 +16,12 @@ struct AlignmentIO { inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::AlignmentType>& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) { + for (unsigned j=0; j<m.height(); ++j) { switch (m(i,j)) { case AlignmentIO::kNONE: os << '.'; break; case AlignmentIO::kTRANSLATION: os << '*'; break; @@ -32,7 +32,7 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::Ali os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; return os; diff --git a/utils/array2d.h b/utils/array2d.h index ee2600d2..1a8e4157 100644 --- a/utils/array2d.h +++ b/utils/array2d.h @@ -15,12 +15,12 @@ class Array2D { typedef typename std::vector<T>::iterator iterator; typedef typename std::vector<T>::const_iterator const_iterator; Array2D() : width_(0), height_(0) {} - Array2D(int w, int h, const T& d = T()) : + Array2D(unsigned w, unsigned h, const T& d = T()) : width_(w), height_(h), data_(w*h, d) {} Array2D(const Array2D& rhs) : width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {} bool empty() const { return data_.empty(); } - void resize(int w, int h, const T& d = T()) { + void resize(unsigned w, unsigned h, const T& d = T()) { data_.resize(w * h, d); width_ = w; height_ = h; @@ -32,25 +32,25 @@ class Array2D { return *this; } void fill(const T& v) { data_.assign(data_.size(), v); } - int width() const { return width_; } - int height() const { return height_; } - reference operator()(int i, int j) { + unsigned width() const { return width_; } + unsigned height() const { return height_; } + reference operator()(unsigned i, unsigned j) { return data_[offset(i, j)]; } void clear() { data_.clear(); width_=0; height_=0; } - const_reference operator()(int i, int j) const { + const_reference operator()(unsigned i, unsigned j) const { return data_[offset(i, j)]; } - iterator begin_col(int j) { + iterator begin_col(unsigned j) { return data_.begin() + offset(0,j); } - const_iterator begin_col(int j) const { + const_iterator begin_col(unsigned j) const { return data_.begin() + offset(0,j); } - iterator end_col(int j) { + iterator end_col(unsigned j) { return data_.begin() + offset(0,j) + width_; } - const_iterator end_col(int j) const { + const_iterator end_col(unsigned j) const { return data_.begin() + offset(0,j) + width_; } iterator end() { return data_.end(); } @@ -71,14 +71,14 @@ class Array2D { } private: - inline int offset(int i, int j) const { + inline unsigned offset(unsigned i, unsigned j) const { assert(i<width_); assert(j<height_); return i + j * width_; } - int width_; - int height_; + unsigned width_; + unsigned height_; std::vector<T> data_; }; @@ -120,8 +120,8 @@ Array2D<T> operator-(const Array2D<T>& l, const Array2D<T>& r) { template <typename T> inline std::ostream& operator<<(std::ostream& os, const Array2D<T>& m) { - for (int i=0; i<m.width(); ++i) { - for (int j=0; j<m.height(); ++j) + for (unsigned i=0; i<m.width(); ++i) { + for (unsigned j=0; j<m.height(); ++j) os << '\t' << m(i,j); os << '\n'; } @@ -130,17 +130,17 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<T>& m) { inline std::ostream& operator<<(std::ostream& os, const Array2D<bool>& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (m(i,j) ? '*' : '.'); os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; return os; @@ -148,12 +148,12 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<bool>& m) { inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool> >& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10) << "\t"; os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) { + for (unsigned j=0; j<m.height(); ++j) { const std::vector<bool>& ar = m(i,j); for (unsigned k=0; k<ar.size(); ++k) os << (ar[k] ? '*' : '.'); @@ -162,7 +162,7 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10) << "\t"; os << "\n"; return os; diff --git a/utils/atools.cc b/utils/atools.cc index bce7822e..24406b71 100644 --- a/utils/atools.cc +++ b/utils/atools.cc @@ -27,7 +27,7 @@ struct Command { x->resize(max(a.width(), b.width()), max(a.height(), b.height())); } static bool Safe(const Array2D<bool>& a, int i, int j) { - if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) + if (i >= 0 && j >= 0 && i < static_cast<int>(a.width()) && j < static_cast<int>(a.height())) return a(i,j); else return false; @@ -43,18 +43,18 @@ struct FMeasureCommand : public Command { bool RequiresTwoOperands() const { return true; } void Apply(const Array2D<bool>& hyp, const Array2D<bool>& ref, Array2D<bool>* x) { (void) x; // AER just computes statistics, not an alignment - int i_len = ref.width(); - int j_len = ref.height(); - for (int i = 0; i < i_len; ++i) { - for (int j = 0; j < j_len; ++j) { + unsigned i_len = ref.width(); + unsigned j_len = ref.height(); + for (unsigned i = 0; i < i_len; ++i) { + for (unsigned j = 0; j < j_len; ++j) { if (ref(i,j)) { ++num_in_ref; if (Safe(hyp, i, j)) ++matches; } } } - for (int i = 0; i < hyp.width(); ++i) - for (int j = 0; j < hyp.height(); ++j) + for (unsigned i = 0; i < hyp.width(); ++i) + for (unsigned j = 0; j < hyp.height(); ++j) if (hyp(i,j)) ++num_predicted; } void Summary() { @@ -97,8 +97,8 @@ struct InvertCommand : public Command { void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { Array2D<bool>& res = *x; res.resize(in.height(), in.width()); - for (int i = 0; i < in.height(); ++i) - for (int j = 0; j < in.width(); ++j) + for (unsigned i = 0; i < in.height(); ++i) + for (unsigned j = 0; j < in.width(); ++j) res(i, j) = in(j, i); } }; @@ -109,8 +109,8 @@ struct IntersectCommand : public Command { void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { EnsureSize(a, b, x); Array2D<bool>& res = *x; - for (int i = 0; i < a.width(); ++i) - for (int j = 0; j < a.height(); ++j) + for (unsigned i = 0; i < a.width(); ++i) + for (unsigned j = 0; j < a.height(); ++j) res(i, j) = Safe(a, i, j) && Safe(b, i, j); } }; @@ -121,8 +121,8 @@ struct UnionCommand : public Command { void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { EnsureSize(a, b, x); Array2D<bool>& res = *x; - for (int i = 0; i < res.width(); ++i) - for (int j = 0; j < res.height(); ++j) + for (unsigned i = 0; i < res.width(); ++i) + for (unsigned j = 0; j < res.height(); ++j) res(i, j) = Safe(a, i, j) || Safe(b, i, j); } }; @@ -136,14 +136,14 @@ struct RefineCommand : public Command { } bool RequiresTwoOperands() const { return true; } - void Align(int i, int j) { + void Align(unsigned i, unsigned j) { res_(i, j) = true; is_i_aligned_[i] = true; is_j_aligned_[j] = true; } bool IsNeighborAligned(int i, int j) const { - for (int k = 0; k < neighbors_.size(); ++k) { + for (unsigned k = 0; k < neighbors_.size(); ++k) { const int di = neighbors_[k].first; const int dj = neighbors_[k].second; if (Safe(res_, i + di, j + dj)) @@ -177,8 +177,8 @@ struct RefineCommand : public Command { EnsureSize(a, b, &un_); is_i_aligned_.resize(res_.width(), false); is_j_aligned_.resize(res_.height(), false); - for (int i = 0; i < in_.width(); ++i) - for (int j = 0; j < in_.height(); ++j) { + for (unsigned i = 0; i < in_.width(); ++i) + for (unsigned j = 0; j < in_.height(); ++j) { un_(i, j) = Safe(a, i, j) || Safe(b, i, j); in_(i, j) = Safe(a, i, j) && Safe(b, i, j); if (in_(i, j)) Align(i, j); @@ -188,16 +188,16 @@ struct RefineCommand : public Command { // if they match the constraints determined by pred void Grow(Predicate pred, bool idempotent, const Array2D<bool>& adds) { if (idempotent) { - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) { + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) { if (adds(i, j) && !res_(i, j) && (this->*pred)(i, j)) Align(i, j); } return; } set<pair<int, int> > p; - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) if (adds(i, j) && !res_(i, j)) p.insert(make_pair(i, j)); bool keep_going = !p.empty(); @@ -263,7 +263,7 @@ struct GDFACommand : public DiagCommand { map<string, boost::shared_ptr<Command> > commands; -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { +void InitCommandLine(unsigned argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); ostringstream os; os << "Operation to perform:"; diff --git a/utils/ccrp.h b/utils/ccrp.h index 8635b422..1d41a3ef 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -232,7 +232,7 @@ class CCRP { if (num_customers() == 0) return; DiscountResampler dr(*this); StrengthResampler sr(*this); - for (int iter = 0; iter < nloop; ++iter) { + for (unsigned iter = 0; iter < nloop; ++iter) { if (has_strength_prior()) { strength_ = slice_sampler1d(sr, strength_, *rng, -discount_ + std::numeric_limits<double>::min(), std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h index 6efbfc78..724b11bd 100644 --- a/utils/ccrp_nt.h +++ b/utils/ccrp_nt.h @@ -111,7 +111,7 @@ class CCRP_NoTable { void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { assert(has_alpha_prior()); ConcentrationResampler cr(*this); - for (int iter = 0; iter < nloop; ++iter) { + for (unsigned iter = 0; iter < nloop; ++iter) { alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); } diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 3cc48f8e..e86cbdc1 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -30,7 +30,7 @@ // to just set it #define L2_CACHE_LINE 128 -// this should just be a typedef to pair<int,T> on the new c++ +// this should just be a typedef to pair<unsigned,T> on the new c++ // I have to avoid this since I want to use unions and c++-98 // does not let unions have types with constructors in them // this type bypasses default constructors. use with caution! @@ -38,32 +38,32 @@ // does anything template <typename T> struct PairIntT { - const PairIntT& operator=(const std::pair<const int, T>& v) { + const PairIntT& operator=(const std::pair<const unsigned, T>& v) { std::memcpy(this, &v, sizeof(PairIntT)); return *this; } - operator const std::pair<const int, T>&() const { - return *reinterpret_cast<const std::pair<const int, T>*>(this); + operator const std::pair<const unsigned, T>&() const { + return *reinterpret_cast<const std::pair<const unsigned, T>*>(this); } - int& first() { - return reinterpret_cast<std::pair<int, T>*>(this)->first; + unsigned& first() { + return reinterpret_cast<std::pair<unsigned, T>*>(this)->first; } T& second() { - return reinterpret_cast<std::pair<int, T>*>(this)->second; + return reinterpret_cast<std::pair<unsigned, T>*>(this)->second; } - const int& first() const { - return reinterpret_cast<const std::pair<int, T>*>(this)->first; + const unsigned& first() const { + return reinterpret_cast<const std::pair<unsigned, T>*>(this)->first; } const T& second() const { - return reinterpret_cast<const std::pair<int, T>*>(this)->second; + return reinterpret_cast<const std::pair<unsigned, T>*>(this)->second; } private: // very bad way of bypassing the default constructor on T - char data_[sizeof(std::pair<int, T>)]; + char data_[sizeof(std::pair<unsigned, T>)]; }; -BOOST_STATIC_ASSERT(sizeof(PairIntT<float>) == sizeof(std::pair<int,float>)); +BOOST_STATIC_ASSERT(sizeof(PairIntT<float>) == sizeof(std::pair<unsigned,float>)); -template <typename T, int LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15 : 7)> +template <typename T, unsigned LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15u : 7u)> class FastSparseVector { public: struct const_iterator { @@ -79,17 +79,17 @@ class FastSparseVector { } const bool local_; const PairIntT<T>* local_it_; - typename std::map<int, T>::const_iterator remote_it_; - const std::pair<const int, T>& operator*() const { + typename std::map<unsigned, T>::const_iterator remote_it_; + const std::pair<const unsigned, T>& operator*() const { if (local_) - return *reinterpret_cast<const std::pair<const int, float>*>(local_it_); + return *reinterpret_cast<const std::pair<const unsigned, float>*>(local_it_); else return *remote_it_; } - const std::pair<const int, T>* operator->() const { + const std::pair<const unsigned, T>* operator->() const { if (local_) - return reinterpret_cast<const std::pair<const int, T>*>(local_it_); + return reinterpret_cast<const std::pair<const unsigned, T>*>(local_it_); else return &*remote_it_; } @@ -118,17 +118,17 @@ class FastSparseVector { } FastSparseVector(const FastSparseVector& other) { std::memcpy(this, &other, sizeof(FastSparseVector)); - if (is_remote_) data_.rbmap = new std::map<int, T>(*data_.rbmap); + if (is_remote_) data_.rbmap = new std::map<unsigned, T>(*data_.rbmap); } - FastSparseVector(std::pair<int, T>* first, std::pair<int, T>* last) { + FastSparseVector(std::pair<unsigned, T>* first, std::pair<unsigned, T>* last) { const ptrdiff_t n = last - first; if (n <= LOCAL_MAX) { is_remote_ = false; local_size_ = n; - std::memcpy(data_.local, first, sizeof(std::pair<int, T>) * n); + std::memcpy(data_.local, first, sizeof(std::pair<unsigned, T>) * n); } else { is_remote_ = true; - data_.rbmap = new std::map<int, T>(first, last); + data_.rbmap = new std::map<unsigned, T>(first, last); } } void erase(int k) { @@ -150,31 +150,31 @@ class FastSparseVector { clear(); std::memcpy(this, &other, sizeof(FastSparseVector)); if (is_remote_) - data_.rbmap = new std::map<int, T>(*data_.rbmap); + data_.rbmap = new std::map<unsigned, T>(*data_.rbmap); return *this; } T const& get_singleton() const { assert(size()==1); return begin()->second; } - bool nonzero(int k) const { + bool nonzero(unsigned k) const { return static_cast<bool>(value(k)); } - inline void set_value(int k, const T& v) { + inline void set_value(unsigned k, const T& v) { get_or_create_bin(k) = v; } - inline T& add_value(int k, const T& v) { + inline T& add_value(unsigned k, const T& v) { return get_or_create_bin(k) += v; } - inline T get(int k) const { + inline T get(unsigned k) const { return value(k); } - inline T value(int k) const { + inline T value(unsigned k) const { if (is_remote_) { - typename std::map<int, T>::const_iterator it = data_.rbmap->find(k); + typename std::map<unsigned, T>::const_iterator it = data_.rbmap->find(k); if (it != data_.rbmap->end()) return it->second; } else { - for (int i = 0; i < local_size_; ++i) { + for (unsigned i = 0; i < local_size_; ++i) { const PairIntT<T>& p = data_.local[i]; if (p.first() == k) return p.second(); } @@ -256,8 +256,8 @@ class FastSparseVector { } inline FastSparseVector& operator*=(const T& scalar) { if (is_remote_) { - const typename std::map<int, T>::iterator end = data_.rbmap->end(); - for (typename std::map<int, T>::iterator it = data_.rbmap->begin(); it != end; ++it) + const typename std::map<unsigned, T>::iterator end = data_.rbmap->end(); + for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it) it->second *= scalar; } else { for (int i = 0; i < local_size_; ++i) @@ -267,8 +267,8 @@ class FastSparseVector { } inline FastSparseVector& operator/=(const T& scalar) { if (is_remote_) { - const typename std::map<int, T>::iterator end = data_.rbmap->end(); - for (typename std::map<int, T>::iterator it = data_.rbmap->begin(); it != end; ++it) + const typename std::map<unsigned, T>::iterator end = data_.rbmap->end(); + for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it) it->second /= scalar; } else { for (int i = 0; i < local_size_; ++i) @@ -300,7 +300,7 @@ class FastSparseVector { T dot(const std::vector<T>& v) const { T res = T(); for (const_iterator it = begin(), e = end(); it != e; ++it) - if (it->first < v.size()) res += it->second * v[it->first]; + if (static_cast<unsigned>(it->first) < v.size()) res += it->second * v[it->first]; return res; } T dot(const FastSparseVector<T>& other) const { @@ -330,11 +330,11 @@ class FastSparseVector { v.resize(i+1); return v[i]; } - inline T& get_or_create_bin(int k) { + inline T& get_or_create_bin(unsigned k) { if (is_remote_) { return (*data_.rbmap)[k]; } else { - for (int i = 0; i < local_size_; ++i) + for (unsigned i = 0; i < local_size_; ++i) if (data_.local[i].first() == k) return data_.local[i].second(); } assert(!is_remote_); @@ -353,17 +353,17 @@ class FastSparseVector { void swap_local_rbmap() { if (is_remote_) { // data is in rbmap, move to local assert(data_.rbmap->size() < LOCAL_MAX); - const std::map<int, T>* m = data_.rbmap; + const std::map<unsigned, T>* m = data_.rbmap; local_size_ = m->size(); int i = 0; - for (typename std::map<int, T>::const_iterator it = m->begin(); + for (typename std::map<unsigned, T>::const_iterator it = m->begin(); it != m->end(); ++it) { data_.local[i] = *it; ++i; } is_remote_ = false; } else { // data is local, move to rbmap - std::map<int, T>* m = new std::map<int, T>(&data_.local[0], &data_.local[local_size_]); + std::map<unsigned, T>* m = new std::map<unsigned, T>(&data_.local[0], &data_.local[local_size_]); data_.rbmap = m; is_remote_ = true; } @@ -371,7 +371,7 @@ class FastSparseVector { union { PairIntT<T> local[LOCAL_MAX]; - std::map<int, T>* rbmap; + std::map<unsigned, T>* rbmap; } data_; unsigned char local_size_; bool is_remote_; @@ -399,8 +399,8 @@ class FastSparseVector { void load(Archive & ar, const unsigned int version) { (void) version; this->clear(); - int sz; ar & sz; - for (int i = 0; i < sz; ++i) { + unsigned sz; ar & sz; + for (unsigned i = 0; i < sz; ++i) { std::pair<std::string, T> wire_pair; ar & wire_pair; this->set_value(FD::Convert(wire_pair.first), wire_pair.second); diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc index cc886335..29a1a2ce 100644 --- a/utils/mfcr_test.cc +++ b/utils/mfcr_test.cc @@ -4,11 +4,17 @@ #include <cassert> #include <cmath> +#define BOOST_TEST_MODULE MFCRTest +#include <boost/test/unit_test.hpp> +#include <boost/test/floating_point_comparison.hpp> + #include "sampler.h" using namespace std; -void test_exch(MT19937* rng) { +BOOST_AUTO_TEST_CASE(Exchangability) { + MT19937 r; + MT19937* rng = &r; MFCR<2, int> crp(0.5, 3.0); vector<double> lambdas(2); vector<double> p0s(2); @@ -64,9 +70,3 @@ void test_exch(MT19937* rng) { assert(error2 < 0.05); }; -int main(int argc, char** argv) { - MT19937 rng; - test_exch(&rng); - return 0; -} - diff --git a/utils/null_traits.h b/utils/null_traits.h index fac857d9..7b2d32d0 100644 --- a/utils/null_traits.h +++ b/utils/null_traits.h @@ -3,23 +3,23 @@ template <class V> struct null_traits { - static V null; //TODO: maybe take out default null and make ppl explicitly define? they may be surprised that they need to when they include a header lib that uses null_traits + static V xnull; //TODO: maybe take out default null and make ppl explicitly define? they may be surprised that they need to when they include a header lib that uses null_traits }; // global bool is_null(V const& v) // definitely override this, and possibly set_null and is_null. that's the point. template <class V> -V null_traits<V>::null; +V null_traits<V>::xnull; //TODO: are we getting single init of the static null object? template <class V> void set_null(V &v) { - v=null_traits<V>::null; + v=null_traits<V>::xnull; } template <class V> void is_null(V const& v) { - return v==null_traits<V>::null; + return v==null_traits<V>::xnull; } diff --git a/utils/sampler.h b/utils/sampler.h index 22c873d4..b237c716 100644 --- a/utils/sampler.h +++ b/utils/sampler.h @@ -49,9 +49,10 @@ struct RandomNumberGenerator { size_t SelectSample(const F& a, const F& b, double T = 1.0) { if (T == 1.0) { if (F(this->next()) > (a / (a + b))) return 1; else return 0; - } else { - assert(!"not implemented"); } + std::cerr << "SelectSample with annealing not implemented\n"; + abort(); + return 0; } // T is the annealing temperature, if desired diff --git a/utils/small_vector.h b/utils/small_vector.h index d04d1352..894b1b32 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -316,6 +316,7 @@ inline void swap(SmallVector<T,M> &a,SmallVector<T,M> &b) { } typedef SmallVector<int,2> SmallVectorInt; +typedef SmallVector<unsigned,2> SmallVectorUnsigned; template <class T,int M> void memcpy(void *out,SmallVector<T,M> const& v) { diff --git a/utils/sparse_vector.cc b/utils/sparse_vector.cc index 27bb88dd..00e7bd60 100644 --- a/utils/sparse_vector.cc +++ b/utils/sparse_vector.cc @@ -32,7 +32,7 @@ void Encode(double objective, const SparseVector<double>& v, ostream* out) { *reinterpret_cast<double*>(&data[off_objective]) = objective; *reinterpret_cast<int*>(&data[off_num_feats]) = num_feats; char* cur = &data[off_data]; - assert(cur - data == off_data); + assert(static_cast<size_t>(cur - data) == off_data); for (const_iterator it = v.begin(); it != v.end(); ++it) { const string& fname = FD::Convert(it->first); *cur++ = static_cast<char>(fname.size()); // name len @@ -41,10 +41,10 @@ void Encode(double objective, const SparseVector<double>& v, ostream* out) { *reinterpret_cast<double*>(cur) = it->second; cur += sizeof(double); } - assert(cur - data == off_magic); + assert(static_cast<size_t>(cur - data) == off_magic); *reinterpret_cast<unsigned int*>(cur) = 0xBAABABBAu; cur += sizeof(unsigned int); - assert(cur - data == tot_size); + assert(static_cast<size_t>(cur - data) == tot_size); b64encode(data, tot_size, out); delete[] data; } diff --git a/utils/stringlib.h b/utils/stringlib.h index f457e1e4..75772c4d 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -97,9 +97,9 @@ inline std::string Trim(const std::string& str, const std::string& dropChars = " inline void Tokenize(const std::string& str, char delimiter, std::vector<std::string>* res) { std::string s = str; - int last = 0; + unsigned last = 0; res->clear(); - for (int i=0; i < s.size(); ++i) + for (unsigned i=0; i < s.size(); ++i) if (s[i] == delimiter) { s[i]=0; if (last != i) { @@ -120,14 +120,14 @@ inline unsigned NTokens(const std::string& str, char delimiter) inline std::string LowercaseString(const std::string& in) { std::string res(in.size(),' '); - for (int i = 0; i < in.size(); ++i) + for (unsigned i = 0; i < in.size(); ++i) res[i] = tolower(in[i]); return res; } inline std::string UppercaseString(const std::string& in) { std::string res(in.size(),' '); - for (int i = 0; i < in.size(); ++i) + for (unsigned i = 0; i < in.size(); ++i) res[i] = toupper(in[i]); return res; } @@ -146,8 +146,8 @@ inline int CountSubstrings(const std::string& str, const std::string& sub) { inline int SplitOnWhitespace(const std::string& in, std::vector<std::string>* out) { out->clear(); - int i = 0; - int start = 0; + unsigned i = 0; + unsigned start = 0; std::string cur; while(i < in.size()) { if (in[i] == ' ' || in[i] == '\t') { @@ -231,7 +231,7 @@ template <class F> void VisitTokens(std::string const& s,F f) { if (0) { std::vector<std::string> ss=SplitOnWhitespace(s); - for (int i=0;i<ss.size();++i) + for (unsigned i=0;i<ss.size();++i) f(ss[i]); return; } @@ -249,7 +249,7 @@ inline void SplitCommandAndParam(const std::string& in, std::string* cmd, std::s SplitOnWhitespace(in, &x); if (x.size() == 0) return; *cmd = x[0]; - for (int i = 1; i < x.size(); ++i) { + for (unsigned i = 1; i < x.size(); ++i) { if (i > 1) { *param += " "; } *param += x[i]; } diff --git a/utils/tdict.cc b/utils/tdict.cc index de234323..f33bd576 100644 --- a/utils/tdict.cc +++ b/utils/tdict.cc @@ -37,7 +37,7 @@ void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID> std::string TD::GetString(const std::vector<WordID>& str) { ostringstream o; - for (int i=0;i<str.size();++i) { + for (unsigned i=0;i<str.size();++i) { if (i) o << ' '; o << TD::Convert(str[i]); } diff --git a/utils/weights.cc b/utils/weights.cc index 39c18474..f56e2a20 100644 --- a/utils/weights.cc +++ b/utils/weights.cc @@ -45,11 +45,11 @@ void Weights::InitFromFile(const string& filename, } for (int i = buf.size() - 1; i > 0; --i) if (buf[i] == '=' || buf[i] == '\t') { buf[i] = ' '; break; } - int start = 0; + unsigned start = 0; while(start < buf.size() && buf[start] == ' ') ++start; - int end = 0; + unsigned end = 0; while(end < buf.size() && buf[end] != ' ') ++end; - const int fid = FD::Convert(buf.substr(start, end - start)); + const unsigned fid = FD::Convert(buf.substr(start, end - start)); if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); } while(end < buf.size() && buf[end] == ' ') ++end; val = strtod(&buf.c_str()[end], NULL); @@ -73,7 +73,7 @@ void Weights::InitFromFile(const string& filename, } else { // !read_text char buf[6]; in.read(buf, 5); - size_t num_keys; + int num_keys; in.read(reinterpret_cast<char*>(&num_keys), sizeof(size_t)); if (num_keys != FD::NumFeats()) { cerr << "Hash function reports " << FD::NumFeats() << " keys but weights file contains " << num_keys << endl; @@ -102,8 +102,8 @@ void Weights::WriteToFile(const string& fname, if (write_text) { if (extra) { o << "# " << *extra << endl; } o.precision(17); - const int num_feats = FD::NumFeats(); - for (int i = 1; i < num_feats; ++i) { + const unsigned num_feats = FD::NumFeats(); + for (unsigned i = 1; i < num_feats; ++i) { const weight_t val = (i < weights.size() ? weights[i] : 0.0); if (hide_zero_value_features && val == 0.0) continue; o << FD::Convert(i) << ' ' << val << endl; @@ -126,7 +126,7 @@ void Weights::InitSparseVector(const vector<weight_t>& dv, } void Weights::SanityCheck(const vector<weight_t>& w) { - for (int i = 0; i < w.size(); ++i) { + for (unsigned i = 0; i < w.size(); ++i) { assert(!isnan(w[i])); assert(!isinf(w[i])); } @@ -142,7 +142,7 @@ struct FComp { void Weights::ShowLargestFeatures(const vector<weight_t>& w) { vector<int> fnums(w.size()); - for (int i = 0; i < w.size(); ++i) + for (unsigned i = 0; i < w.size(); ++i) fnums[i] = i; int nf = FD::NumFeats(); if (nf > 10) nf = 10; |