summaryrefslogtreecommitdiff
path: root/utils
AgeCommit message (Collapse)Author
2012-04-16switch to log domain for matrix operationsChris Dyer
2012-04-12unigram pyp lm addedChris Dyer
2012-04-07Merge remote-tracking branch 'upstream/master'Patrick Simianer
2012-03-24rename aligner, add support for distinguishing translation / transliterationChris Dyer
2012-03-23pf testChris Dyer
2012-03-20make c++11 compatibleChris Dyer
2012-03-15bayes bayes bayesChris Dyer
2012-03-13merge with upstreamPatrick Simianer
2012-03-09logging after alignmentChris Dyer
2012-03-09moarChris Dyer
2012-03-06a few statistical helpers i'm using to figure some algorithms outChris Dyer
2012-03-05tie hyperparameters for translation distributions; support theta < 0 for PYPLMChris Dyer
2012-03-05use template parameter inference to figure out what type to use for ↵Chris Dyer
probability computations, templatatize number of floors in MFCR rather than compile-time set
2012-03-05support strength=0 PYPs, final notation clean-upChris Dyer
2012-03-04support full range of hyperparameter values for PYP (including strength <= 0)Chris Dyer
2012-03-04clean up crpChris Dyer
2012-03-04move crp stuff aroundChris Dyer
2012-03-03fix includeChris Dyer
2012-03-03pyp lm, fixed hyperparameters inferenceChris Dyer
2012-03-01Merge branch 'master' of github.com:redpony/cdecChris Dyer
2012-03-01compile fix on old versions of gcc with MPI enabledChris Dyer
2012-02-29mpi fixesChris Dyer
2012-02-29corpus toolsChris Dyer
2012-02-29cleanup, mpi-ify lblmodelChris Dyer
2012-02-25really slow hiero lmChris Dyer
2012-02-17boost version checking, check for Eigen, get rid of old digamma stuffChris Dyer
2012-02-10move atools to utils directoryChris Dyer
2012-02-08move widely duplicated math functions into m.h headerChris Dyer
2012-01-27Silly windows users, code isn't executableKenneth Heafield
2012-01-27migration to new metric api for vest, clean up of unsupported/not functional ↵Chris Dyer
code
2012-01-03multi-floor chinese restaurant described by wood&teh (2009)Chris Dyer
2011-12-20new scorer interface is implemented, but not usedChris Dyer
2011-12-14random incomplete metric stuff, including string subsequence kernel implChris Dyer
2011-11-11more reporting, size_nonz() for fast sparse vectorPatrick Simianer
2011-10-19debugPatrick Simianer
2011-10-19merged, compiles but not workingPatrick Simianer
2011-10-19merge upstream/masterPatrick Simianer
2011-10-13svm impl, fasterPatrick Simianer
2011-10-12model lenght properly, clean upChris Dyer
2011-10-11remove implicit conversion-to-double operator from LogVal<T> that caused ↵Guest_account Guest_account prguest11
overflow errors, clean up some pf code
2011-10-11missing numwords implChris Dyer
2011-10-11check in some experimental particle filtering code, some gitignore fixesChris Dyer
2011-09-28fix broken compile on weights testChris Dyer
2011-09-23latest version from mtm6Patrick Simianer
2011-09-23a lot of stuff, fast_sparse_vector, perceptron, removed sofia, sample [...]Patrick Simianer
2011-09-23first cut for sofia-ml, little change in utils/dict.h, coarse refactoringPatrick Simianer
2011-09-23hacking in weights setting, gettingPatrick Simianer
2011-09-17add depGuest_account Guest_account prguest11
2011-09-17Merge branch 'master' of github.com:redpony/cdecChris Dyer
2011-09-17enable ramdisk scratch for per-sentence-grammarsChris Dyer
nclude "tdict.h" #include "filelib.h" #include "dict.h" #include "sampler.h" #include "ccrp_nt.h" #include "cfg_wfst_composer.h" using namespace std; using namespace tr1; namespace po = boost::program_options; static unsigned kMAX_SRC_PHRASE; static unsigned kMAX_TRG_PHRASE; struct FSTState; double log_poisson(unsigned x, const double& lambda) { assert(lambda > 0.0); return log(lambda) * x - lgamma(x + 1) - lambda; } struct ConditionalBase { explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) : kM1MIXTURE(m1mixture), kUNIFORM_MIXTURE(1.0 - m1mixture), kUNIFORM_TARGET(1.0 / vocab_e_size), kNULL(TD::Convert("<eps>")) { assert(m1mixture >= 0.0 && m1mixture <= 1.0); assert(vocab_e_size > 0); LoadModel1(model1fname); } void LoadModel1(const string& fname) { cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; ReadFile rf(fname); istream& in = *rf.stream(); string line; unsigned lc = 0; while(getline(in, line)) { ++lc; int cur = 0; int start = 0; while(cur < line.size() && line[cur] != ' ') { ++cur; } assert(cur != line.size()); line[cur] = 0; const WordID src = TD::Convert(&line[0]); ++cur; start = cur; while(cur < line.size() && line[cur] != ' ') { ++cur; } assert(cur != line.size()); line[cur] = 0; WordID trg = TD::Convert(&line[start]); const double logprob = strtod(&line[cur + 1], NULL); if (src >= ttable.size()) ttable.resize(src + 1); ttable[src][trg].logeq(logprob); } cerr << " read " << lc << " parameters.\n"; } // return logp0 of rule.e_ | rule.f_ prob_t operator()(const TRule& rule) const { const int flen = rule.f_.size(); const int elen = rule.e_.size(); prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); prob_t p; p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) for (int i = 0; i < elen; ++i) { // for each position i in e-RHS const WordID trg = rule.e_[i]; prob_t tp = prob_t::Zero(); for (int j = -1; j < flen; ++j) { const WordID src = j < 0 ? kNULL : rule.f_[j]; const map<WordID, prob_t>::const_iterator it = ttable[src].find(trg); if (it != ttable[src].end()) { tp += kM1MIXTURE * it->second; } tp += kUNIFORM_MIXTURE * kUNIFORM