From b85986c762bc8a2a74bfe0e2eb1d88fba991d554 Mon Sep 17 00:00:00 2001 From: chris dyer Date: Tue, 28 Dec 2010 20:56:42 -0500 Subject: incorporate dwarf features --- decoder/dwarf.cc | 3209 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3209 insertions(+) create mode 100644 decoder/dwarf.cc (limited to 'decoder/dwarf.cc') diff --git a/decoder/dwarf.cc b/decoder/dwarf.cc new file mode 100644 index 00000000..7968fee2 --- /dev/null +++ b/decoder/dwarf.cc @@ -0,0 +1,3209 @@ +#include "dwarf.h" +#include "tdict.h" +#include "wordid.h" +#include "lattice.h" +#include "ff_dwarf.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace std::tr1; +using namespace boost::tuples; +using namespace boost; + +Alignment::Alignment() { + //unordered_map,int> XX; + _I=0; + _J=0; + kSOS = TD::Convert(""); + kEOS = TD::Convert(""); + kUNK = TD::Convert("**UNKNOWN**"); + SourceFWAntsIdxs = new int*[MAX_ARITY]; + SourceFWAntsAbsIdxs = new int*[MAX_ARITY]; + TargetFWAntsIdxs = new int*[MAX_ARITY]; + SourceAntsIdxs = new int*[MAX_ARITY]; + TargetAntsIdxs = new int*[MAX_ARITY]; + AntsAl = new int*[MAX_ARITY]; + for (int idx=0; idx=0); + if (start==-1) start = _tSpan[j][0]; + if (_tSpan[j][0]==MINIMUM_INIT) return -1; + for (int idx=start; idx<=_tSpan[j][1]; idx++) { + if (_matrix[j][idx]) return idx; + } + return -1; +} + +int Alignment::sourceOf(int i, int start) { + assert(i>=0); + if (start==-1) start = _sSpan[i][0]; + if (_sSpan[i][0]==MINIMUM_INIT) return -1; + for (int idx=start; idx<=_sSpan[i][1]; idx++) { + if (_matrix[idx][i]) return idx; + } + return -1; +} + +void Alignment::clearAls(int prevJ, int prevI) { + for (int j=0; j<=prevJ; j++) { + for (int i=0; i neither, 1 -> leftFirst, 2 -> rightFirst, 3 -> dontCare + if (DEBUG) cerr << "DominanceSource(" << fw1 << "," << fw2 << ")" << endl; + //cerr << TD::Convert(_f[fw1]) << "," << TD::Convert(_f[fw2]) << endl; + //cerr << AsString() << endl; + int dom = 0; + curr_al.push_back(fw1); curr_al.push_back(fw2); + if (doms_hash.find(curr_al)==doms_hash.end()) { + int* block = blockSource(fw1,fw2); + //cerr << "block = " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; + if (block[0]==fw1) { + int tfw10 = _tSpan[fw1][0]; + int tfw11 = _tSpan[fw1][1]; + //cerr << "tfw = " << tfw10 << "," << tfw11 << endl; + if (tfw11<0) { + dom+=1; + } else { + if ((block[2]==tfw10 || block[3]==tfw11)) dom+=1; + } + } + if (block[1]==fw2) { + int tfw20 = _tSpan[fw2][0]; + int tfw21 = _tSpan[fw2][1]; + //cerr << "tfw = " << tfw20 << "," << tfw21 << endl; + if (tfw21<0) { + dom+=2; + } else { + if ((block[2]==tfw20 || block[3]==tfw21)) dom+=2; + } + } + delete block; + doms_hash.insert(pair,int>(curr_al,dom)); + } else { + dom = doms_hash[curr_al]; + } + if (DEBUG) cerr << " dom = " << dom << endl; + curr_al.pop_back(); curr_al.pop_back(); + return dom; +} + +vector Alignment::DominanceSource4Sampler(int fw1, int fw2) { + if (DEBUG) cerr << "DominanceSource4Sampler(" << fw1 << "," << fw2 << ")" << endl; + int dom = 0; + int* block = blockSource(fw1,fw2); + //cerr << "block = " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; + if (block[0]==fw1) { + int tfw10 = _tSpan[fw1][0]; + int tfw11 = _tSpan[fw1][1]; + //cerr << "tfw = " << tfw10 << "," << tfw11 << endl; + if (tfw11<0) { + dom+=1; + } else { + if ((block[2]==tfw10 || block[3]==tfw11)) dom+=1; + } + } + if (block[1]==fw2) { + int tfw20 = _tSpan[fw2][0]; + int tfw21 = _tSpan[fw2][1]; + //cerr << "tfw = " << tfw20 << "," << tfw21 << endl; + if (tfw21<0) { + dom+=2; + } else { + if ((block[2]==tfw20 || block[3]==tfw21)) dom+=2; + } + } + if (DEBUG) cerr << "doms = " << dom << endl; + vector ret; + ret.push_back(dom); ret.push_back(block[0]); ret.push_back(block[1]); + ret.push_back(block[2]); ret.push_back(block[3]); + delete block; + return ret; +} + +int Alignment::DominanceTarget(int fw1, int fw2) { + int dom = 0; + curr_al.push_back(fw1); curr_al.push_back(fw2); + if (domt_hash.find(curr_al)==domt_hash.end()) { + int* block = blockTarget(fw1,fw2); + if (block[2]==fw1) { + int sfw10 = _sSpan[fw1][0]; + int sfw11 = _sSpan[fw1][1]; + if (sfw11<0) { + dom+=1; + } else { + if (block[0]==sfw10 || block[1]==sfw11) dom+=1; + } + } + if (block[3]==fw2) { + int sfw20 = _sSpan[fw2][0]; + int sfw21 = _sSpan[fw2][0]; + if (sfw21<0) { + dom+=2; + } else { + if (block[0]==sfw20 || block[1]==sfw21) dom+=2; + } + } + delete block; + domt_hash.insert(pair,int>(curr_al,dom)); + } else { + dom = domt_hash[curr_al]; + } + curr_al.pop_back(); curr_al.pop_back(); + return dom; +} + +vector Alignment::DominanceTarget4Sampler(int fw1, int fw2) { + int dom = 0; + int* block = blockTarget(fw1,fw2); + if (block[2]==fw1) { + int sfw10 = _sSpan[fw1][0]; + int sfw11 = _sSpan[fw1][1]; + if (sfw11<0) { + dom+=1; + } else { + if (block[0]==sfw10 || block[1]==sfw11) dom+=1; + } + } + if (block[3]==fw2) { + int sfw20 = _sSpan[fw2][0]; + int sfw21 = _sSpan[fw2][0]; + if (sfw21<0) { + dom+=2; + } else { + if (block[0]==sfw20 || block[1]==sfw21) dom+=2; + } + } + vector ret; + ret.push_back(dom); ret.push_back(block[0]); ret.push_back(block[1]); + ret.push_back(block[2]); ret.push_back(block[3]); + delete block; + return ret; +} + +void Alignment::OrientationSource(int fw, int* oril, int* orir, bool Lcompute, bool Rcompute) { + OrientationSource(fw,fw,oril,orir,Lcompute,Rcompute); +} + +vector Alignment::OrientationSourceLeft4Sampler(int fw) { + return OrientationSourceLeft4Sampler(fw,fw); +} + +vector Alignment::OrientationSourceLeft4Sampler(int fw0, int fw1) { + if (DEBUG) cerr << "OrientationSourceLeft4Sampler(" << fw0 << "," << fw1 << ")" << endl; + int oril = 0; + int N0=fw0-1; + while (N0>=0) { + if (minTSpan(N0)!=MINIMUM_INIT) break; + N0--; + } + int N1=fw1+1; + while (N1<_J) { + if (minTSpan(N1)!=MINIMUM_INIT) break; + N1++; + } + if (minTSpan(fw0)==MINIMUM_INIT && minTSpan(fw1)==MINIMUM_INIT) { + fw0 = N1; fw1 = N0; + } + if (DEBUG) cerr << "fw0=" << fw0 << ", fw1=" << fw1 << ", N0=" << N0 << ", N1=" << N1 << endl; + if (maxTSpan(N0)=0) { + if (minTSpan(N0)!=MINIMUM_INIT) break; + N0--; + } + int N1=fw1+1; + while (N1<_J) { + if (minTSpan(N1)!=MINIMUM_INIT) break; + N1++; + } + if (minTSpan(fw0)==MINIMUM_INIT && minTSpan(fw1)==MINIMUM_INIT) { + fw0 = N1; fw1 = N0; + } + if (DEBUG) cerr << "fw0=" << fw0 << ", fw1=" << fw1 << ", N0=" << N0 << ", N1=" << N1 << endl; + if (maxTSpan(N1)=0) { + if (minTSpan(N0)!=MINIMUM_INIT) break; + N0--; + } + int N1=fw1+1; + while (N1<_J) { + if (minTSpan(N1)!=MINIMUM_INIT) break; + N1++; + } + if (minTSpan(fw0)==MINIMUM_INIT && minTSpan(fw1)==MINIMUM_INIT) { + fw0 = N1; fw1 = N0; + //cerr << "minTSpan(fw)==MINIMUM_INIT, thus fw0=" << fw0 << ", fw1=" << fw1 << endl; + } + if (DEBUG) cerr << "fw0=" << fw0 << ", fw1=" << fw1 << ", N0=" << N0 << ", N1=" << N1 << endl; + if (maxTSpan(N0) Alignment::OrientationTargetLeft4Sampler(int fw) { + return OrientationTargetLeft4Sampler(fw,fw); +} + +vector Alignment::OrientationTargetLeft4Sampler(int fw0, int fw1) { + if (DEBUG) cerr << "OrientationTargetLeft4Sampler " << fw0 << "," << fw1 << endl; + int oril=0; + int N0=fw0-1; + while (N0>=0) { + if (minSSpan(N0)!=MINIMUM_INIT) break; + N0--; + } + int N1=fw1+1; + while (N1<_I) { + if (minSSpan(N1)!=MINIMUM_INIT) break; + N1++; + } + if (minSSpan(fw0)==MINIMUM_INIT && minSSpan(fw1)==MINIMUM_INIT) { + fw0=N1; fw1=N0; + } + if (maxSSpan(N0)=0) { + if (minSSpan(N0)!=MINIMUM_INIT) break; + N0--; + } + int N1=fw1+1; + while (N1<_I) { + if (minSSpan(N1)!=MINIMUM_INIT) break; + N1++; + } + if (minSSpan(fw0)==MINIMUM_INIT && minSSpan(fw1)==MINIMUM_INIT) { + fw0=N1; fw1=N0; + } + if (maxSSpan(N1)=0) { + if (minSSpan(N0)!=MINIMUM_INIT) break; + N0--; + } + int N1=fw1+1; + while (N1<_I) { + if (minSSpan(N1)!=MINIMUM_INIT) break; + N1++; + } + if (minSSpan(fw0)==MINIMUM_INIT && minSSpan(fw1)==MINIMUM_INIT) { + fw0=N1; fw1=N0; + } + if (DEBUG) { + cerr << "fw0:" << fw0 << ", fw1:" << fw1 << ", N0:" << N0 << ", N1:" << N1 << endl ; + cerr << "minSSpan(N0)=" << minSSpan(N0) << " maxSSpan(N0)=" << maxSSpan(N0); + cerr << " minSSpan(fw0)="<< minSSpan(fw0) << " maxSSpan(fw0)=" << maxSSpan(fw0) << endl; + cerr << "minSSpan(fw1)=" << minSSpan(fw1) << " maxSSpan(fw1)=" << maxSSpan(fw1); + cerr << " minSSpan(N1)="<< minSSpan(N1) << " maxSSpan(N1)=" << maxSSpan(N1) << endl; + } + if (maxSSpan(N0)=0; j--) + if (_tSpan[j][0]!=MINIMUM_INIT) return j; + return -1; +} + +int Alignment::firstTargetAligned(int start) { + for (int i=start; i<_I; i++) + if (_sSpan[i][0]!=MINIMUM_INIT) return i; + return -1; +} + +int Alignment::lastTargetAligned(int end) { + for (int i=end; i>=0; i--) + if (_sSpan[i][0]!=MINIMUM_INIT) return i; + return -1; +} + +void Alignment::BorderingSFWsOnly() { +// removes the record of all function word alignments, except those at the borders +// the number of alignments kept may be more than two +// i.e. where the leftmost / the rightmost alignments are unaligned. +// In such cases, this function continues keeping function word alignments until the +// first (or last) alignment words. + if (SourceFWIdxs[0]>2) { + int firstCut = 1; + for (int j=2; j<=SourceFWIdxs[0]; j++) { + if (SourceFWIdxs[3*j-2]>fas) break; + firstCut=j; + } + int lastCut = SourceFWIdxs[0]; + for (int j=SourceFWIdxs[0]-1; j>=0; j--) { + if (SourceFWIdxs[3*j-2]=lastCut) return; + int delta = 0; + for (int j=lastCut; j<=SourceFWIdxs[0]; j++) { + delta++; + SourceFWIdxs[3*(firstCut+delta)-2]=SourceFWIdxs[3*j-2]; + SourceFWIdxs[3*(firstCut+delta)-1]=SourceFWIdxs[3*j-1]; + SourceFWIdxs[3*(firstCut+delta)] =SourceFWIdxs[3*j]; + } + SourceFWIdxs[0]=firstCut+delta; + } +} + +void Alignment::BorderingTFWsOnly() { +// similar to BorderingSFWsOnly() except this looks at the source side. + if (TargetFWIdxs[0]>2) { + int firstCut = 1; + for (int j=2; j<=TargetFWIdxs[0]; j++) { + if (TargetFWIdxs[3*j-2]>fat) break; + firstCut=j; + } + int lastCut = TargetFWIdxs[0]; + for (int j=TargetFWIdxs[0]-1; j>=0; j--) { + if (TargetFWIdxs[3*j-2]=lastCut) return; + int delta = 0; + for (int j=lastCut; j<=TargetFWIdxs[0]; j++) { + delta++; + TargetFWIdxs[3*(firstCut+delta)-2]=TargetFWIdxs[3*j-2]; + TargetFWIdxs[3*(firstCut+delta)-1]=TargetFWIdxs[3*j-1]; + TargetFWIdxs[3*(firstCut+delta)] =TargetFWIdxs[3*j]; + } + TargetFWIdxs[0]=firstCut+delta; + } +} + +void Alignment::FillFWIdxsState(int* state, int fas, int las, int fat, int lat) { + if (DEBUG) cerr << "FillFWIdxsState ("<< fas <<","<< las<<"," << fat <<"," << lat << ")" << endl; + if (fas==las) las+=1; + if (fat==lat) lat+=1; + for (int idx=0; idx<12; idx++) state[idx]=-1; + if (SourceFWIdxs[0]<=2) { + if (SourceFWIdxs[0]>=1) {state[0]=SourceFWIdxs[1]; state[1]=SourceFWIdxs[2]; state[2]=SourceFWIdxs[3];} + if (SourceFWIdxs[0]==2) {state[3]=SourceFWIdxs[4]; state[4]=SourceFWIdxs[5]; state[5]=SourceFWIdxs[6];} + } else { + if (SourceFWIdxs[1]>fas) { + state[0]=SourceFWIdxs[1]; state[1]=SourceFWIdxs[2]; state[2]=SourceFWIdxs[3]; + } else { + ostringstream issf; ostringstream isse; + for (int idx=1; idx<=SourceFWIdxs[0]; idx++) { + if (SourceFWIdxs[3*idx-2]>las) break; + if (idx>1) { issf << " "; isse << " ";}; + issf << TD::Convert(SourceFWIdxs[3*idx-1]); + isse << TD::Convert(SourceFWIdxs[3*idx]); + state[0]=SourceFWIdxs[3*idx-2]; + if (state[0]>=fas) break; + } + if (state[0]>=0) { + state[1]=TD::Convert(issf.str())*-1; state[2]=TD::Convert(isse.str()); //multiplying source with -1 as marker + } + } + if (SourceFWIdxs[SourceFWIdxs[0]*3-2]==las) { + state[3]=SourceFWIdxs[SourceFWIdxs[0]*3-2]; + state[4]=SourceFWIdxs[SourceFWIdxs[0]*3-1]; + state[5]=SourceFWIdxs[SourceFWIdxs[0]*3]; + } else { + int lastCut = SourceFWIdxs[0]; + for (int j=lastCut-1; j>=state[0]+1; j--) { + if (SourceFWIdxs[3*j-2]==state[0]) break; + if (SourceFWIdxs[3*j-2]lastCut) { issf << " "; isse << " ";}; + issf << TD::Convert(SourceFWIdxs[3*idx-1]); + isse << TD::Convert(SourceFWIdxs[3*idx]); + } + if (state[3]>=0) { + //multiplying source with -1 as compound marker + state[4]=TD::Convert(issf.str())*-1; state[5]=TD::Convert(isse.str()); + } + } + } + if (TargetFWIdxs[0]<=2) { + if (TargetFWIdxs[0]>=1) {state[6]=TargetFWIdxs[1]; state[7]=TargetFWIdxs[2]; state[8]=TargetFWIdxs[3];} + if (TargetFWIdxs[0]==2) {state[9]=TargetFWIdxs[4]; state[10]=TargetFWIdxs[5]; state[11]=TargetFWIdxs[6];} + } else { + if (TargetFWIdxs[1]>fat) { //shouldn't come here if SetTargetBorderingFW is invoked + state[6]=TargetFWIdxs[1]; state[7]=TargetFWIdxs[2]; state[8]=TargetFWIdxs[3]; + } else { + ostringstream issf; ostringstream isse; + for (int idx=1; idx<=TargetFWIdxs[0]; idx++) { + if (TargetFWIdxs[3*idx-2]>fat) break; + if (idx>1) { issf << " "; isse << " ";}; + issf << TD::Convert(TargetFWIdxs[3*idx-1]); + isse << TD::Convert(TargetFWIdxs[3*idx]); + state[6]=TargetFWIdxs[3*idx-2]; + } + state[7]=TD::Convert(issf.str()); state[8]=TD::Convert(isse.str())*-1; + //multiplying target with -1 as compound marker + } + if (TargetFWIdxs[TargetFWIdxs[0]*3-2]==lat) { + state[9]=TargetFWIdxs[TargetFWIdxs[0]*3-2]; + state[10]=TargetFWIdxs[TargetFWIdxs[0]*3-1]; + state[11]=TargetFWIdxs[TargetFWIdxs[0]*3]; + } else { + int lastCut = TargetFWIdxs[0]; + for (int j=lastCut-1; j>=1; j--) { + if (TargetFWIdxs[3*j-2]<=state[9]) break; + if (TargetFWIdxs[3*j-2]lastCut) issf << " "; isse << " ";; + issf << TD::Convert(TargetFWIdxs[3*idx-1]); + isse << TD::Convert(TargetFWIdxs[3*idx]); + } + state[10]=TD::Convert(issf.str()); state[11]=TD::Convert(isse.str())*-1; + } + } +} + +void Alignment::simplifyBackward(vector*blocks, int* block, const vector& danglings) { +// given a *block*, see whether its target span contains any index inside *danglings*. +// if yes, break it; otherwise, keep it. put the result(s) to *blocks* + if (DEBUG) cerr << "simplifyBackward[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << "]" << endl; + if (DEBUG) for (int i=0; idanglings[i_dangling]) { + if (i_dangling+1 >= danglings.size()) break; + i_dangling++; + } + while (danglings[i_dangling]==currIdx) { + i_dangling++; + currIdx++; + } + /*if (i_dangling>=danglings.size() && currIdx) { + blocks->push_back(block); + if (DEBUG) cerr << "pushing(1) " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; + return; + } + if (block[3]push_back(block); + if (DEBUG) cerr << "pushing(2) " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; + return; + }*/ + if (DEBUG) cerr << "i_dangling = " << i_dangling << endl; + int anchorIdx = danglings[i_dangling]; + if (i_dangling+1>=danglings.size() || anchorIdx>block[3]+1) anchorIdx=block[3]+1; + if (DEBUG) cerr << "anchorIdx = " << anchorIdx << ", currIdx = " << currIdx << endl; + do { + while(currIdx treat this as one alignment each + // record all function word alignments first, important because it may be unaligned + // return true if it's truly simple (no function word alignment involves); false, otherwise + if (DEBUG) cerr << "begin simplify" << endl; + reset(0,0); reset(_J-1,_I-1); // remove the phrase boundary alignments, NEED TO CHECK AGAIN !!! + if (SourceFWIdxs[0]+TargetFWIdxs[0]==0) { // return singleton + if (DEBUG) cerr << "no function words" << endl; + for (int idx=0; idx<12; idx++) ret[idx]=-1; + ret[12]=1; ret[13]=0; ret[14]=0; // 0-0 + FillFWIdxsState(ret,0,0,0,0); + return; + } + curr_al.insert(curr_al.begin(),curr_al.size()); + curr_al.push_back(SourceFWIdxs[0]); + for (int i=1; i<=SourceFWIdxs[0]; i++) curr_al.push_back(SourceFWIdxs[3*i-2]); + curr_al.push_back(TargetFWIdxs[0]); + for (int i=1; i<=TargetFWIdxs[0]; i++) curr_al.push_back(TargetFWIdxs[3*i-2]); + vector el; + if (simplify_hash.find(curr_al)==simplify_hash.end()) { + if (DEBUG) { + cerr << "SourceFWIdxs:" << SourceFWIdxs[0] << endl; + for (int i=1; i<=SourceFWIdxs[0]; i++) + cerr << SourceFWIdxs[3*i-2] << "," << SourceFWIdxs[3*i-1] << "," << SourceFWIdxs[3*i] << endl; + cerr << "TargetFWIdxs:" << TargetFWIdxs[0] << endl; + for (int i=1; i<=TargetFWIdxs[0]; i++) { + cerr << TargetFWIdxs[3*i-2] << "," << TargetFWIdxs[3*i-1] << "," << TargetFWIdxs[3*i] << endl; + } + } + + vector< int* > blocks; // each element contains s1,s2,t1,t2 + int currIdx = 1; // start from 1 to avoid considering phrase start + std::set FWIdxs; + std::vector DanglingTargetFWIdxs; + for (int i=1; i<= SourceFWIdxs[0]; i++) FWIdxs.insert(SourceFWIdxs[3*i-2]); + for (int i=1; i<= TargetFWIdxs[0]; i++) { + int source = sourceOf(TargetFWIdxs[3*i-2]); + if (source>=0) { + do { + FWIdxs.insert(source); + source = sourceOf(TargetFWIdxs[3*i-2],source+1); + } while(source >=0); + } else { + int *block = new int[4]; + block[0]=-1; block[1]=-1; block[2]=TargetFWIdxs[3*i-2]; block[3]=TargetFWIdxs[3*i-2]; + blocks.push_back(block); + if (DEBUG) cerr << "pushing[1] " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; + DanglingTargetFWIdxs.push_back(TargetFWIdxs[3*i-2]); + } + } + if (DEBUG) + for (std::set::const_iterator iter=FWIdxs.begin(); iter!=FWIdxs.end(); iter++) { + cerr << "FWIdxs=" << *iter << endl; + } + std::set::const_iterator currFWIdx = FWIdxs.begin(); + if (currFWIdx == FWIdxs.end()) { + int* block = new int[4]; + block[0]=1; block[1]=_J-2; block[2]=1; block[3]=_I-2; // no need to consider phrase boundaries + simplifyBackward(&blocks,block,DanglingTargetFWIdxs); + } else { + int anchorIdx = *currFWIdx; // also used to denote _J+1 + do { + // add alignments whose source from currIdx to currFWIdx-1 + while (currIdx=0) target = targetOf(anchorIdx,target+1); + } while (target>=0); + // advance indexes + currIdx = anchorIdx+1; + anchorIdx = getJ()-1; // was minus 2 + if (++currFWIdx!=FWIdxs.end()) anchorIdx = *currFWIdx; + } while (currIdx<=getJ()-2); + } + + + vector source_block_mapper(getJ(),-1); + vector target_block_mapper(getI(),-1); + for (int i = 0; i=0) source_block_mapper[blocks[i][0]]=1; + if (blocks[i][2]>=0) target_block_mapper[blocks[i][2]]=1; + } + int curr = 1; + int prev = -1; + for (int idx=0; idx0) { + source_block_mapper[idx]=curr++; + prev = curr; + } else { + source_block_mapper[idx]=prev; + } + } + curr = 1; + for (int idx=0; idx0) { + target_block_mapper[idx]=curr++; + prev = curr; + } else { + target_block_mapper[idx]=prev; + } + } + + //assert(blocks.size()<=50); + if (DEBUG) cerr << "resulting alignment:" << endl; + for (int i = 0; i, vector > (curr_al,el)); + if (DEBUG) cerr << "inserted" << endl; + } else { + el = simplify_hash[curr_al]; + } + if (DEBUG) { + cerr << "pull key:el = "; + for (int ii=0; ii1) quickSort(num,1,num[0]); +} + +void Alignment::quickSort(int arr[], int left, int right) { + int i = left, j = right; + int tmp1,tmp2,tmp3; + int mid = (left + right) / 2; + int pivot = arr[3*mid-2]; + + /* partition */ + while (i <= j) { + while (arr[3*i-2] < pivot) i++; + while (arr[3*j-2] > pivot) j--; + if (i <= j) { + tmp1 = arr[3*i-2]; tmp2 = arr[3*i-1]; tmp3 = arr[3*i]; + arr[3*i-2] = arr[3*j-2]; arr[3*i-1] = arr[3*j-1]; arr[3*i] = arr[3*j]; + arr[3*j-2] = tmp1; arr[3*j-1] = tmp2; arr[3*j] = tmp3; + i++; + j--; + } + }; + + /* recursion */ + if (left < j) quickSort(arr, left, j); + if (i < right) quickSort(arr, i, right); +} + +double Alignment::ScoreOrientation(const CountTable& table, int offset, int ori, WordID cond1, WordID cond2) { + string source = TD::Convert(cond1); + string sourceidx; + if (table.mode == 1) { + sourceidx = source; + int slashidx = sourceidx.find_last_of("/"); + source = sourceidx.substr(0,slashidx); + string idx = sourceidx.substr(slashidx+1); + if (DEBUG) cerr << " sourceidx = " << sourceidx << ", idx = " << idx << endl; + if (idx == "X") { + if (DEBUG) cerr << " idx == X, returning 0" << endl; + return 0; + } + } + string target = TD::Convert(cond2); + if (DEBUG) cerr << "sourceidx='" << sourceidx << "', source='" << source << "', target='" << target << "'" << endl; + double count = table.ultimate[offset+ori-1]; + double total = table.ultimate[offset+5]; + double alpha = 0.1; + double prob = count/total; + if (DEBUG) cerr << "level0 " << count << "/" << total << "=" << prob << endl; + + WordID key_id = (table.mode!=1) ? cond1 : TD::Convert(source); + map::const_iterator it = table.model.find(key_id); + bool stop = (it==table.model.end()); + if (!stop) { + stop=true; + if (it->second[offset+5]>=0) { + count = it->second[offset+ori-1] + alpha * prob; + total = it->second[offset+5] + alpha; + prob = count/total; + stop = false; + if (DEBUG) cerr << "level1 " << count << "/" << total << "=" << prob << endl; + } + } + if (stop) return prob; + + string key = source + " " + target; + it = table.model.find(TD::Convert(key)); + stop = (it==table.model.end()); + if (!stop) { + stop = true; + if (it->second[offset+5]>=0) { + count = it->second[offset+ori-1] + alpha * prob; + total = it->second[offset+5] + alpha; + prob = count/total; + stop = false; + if (DEBUG) cerr << "level2 " << count << "/" << total << "=" << prob << endl; + } + } + + if (stop || table.mode!=1) return prob; + + key = sourceidx + " " + target; + it = table.model.find(TD::Convert(key)); + if (it!=table.model.end()) { + if (it->second[offset+5]>=0) { + count = it->second[offset+ori-1] + alpha * prob; + total = it->second[offset+5] + alpha; + prob = count/total; + if (DEBUG) cerr << "level3 " << count << "/" << total << "=" << prob << endl; + } + } + + return prob; +} + +void Alignment::ScoreOrientation(const CountTable& table, int offset, int ori, WordID cond1, WordID cond2, + bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, + double alpha1, double beta1) { + if (DEBUG) cerr << "ScoreOrientation:" << TD::Convert(cond1) << "," << TD::Convert(cond2) << ", alpha1 = " << alpha1 << ", beta1 = " << beta1 << endl; + double ret = ScoreOrientation(table,offset,ori,cond1,cond2); + if (isBonus) { + if (table.mode == 0) *bonus += log(ret); else *bonus += ret; + } else { + if (table.mode == 0) *cost += log(ret); else *cost += ret; + } +} + +double Alignment::ScoreOrientationLeft(const CountTable& table, int ori, WordID cond1, WordID cond2) { + double ret = ScoreOrientation(table,0,ori,cond1,cond2); + if (table.mode == 0) return log(ret); + return ret; +} + +double Alignment::ScoreOrientationLeftBackward(const CountTable& table, int ori, WordID cond1, WordID cond2) { + double ret = ScoreOrientation(table,12,ori,cond1,cond2); + if (table.mode == 0) return log(ret); + return ret; +} + +double Alignment::ScoreOrientationRight(const CountTable& table, int ori, WordID cond1, WordID cond2) { + double ret = ScoreOrientation(table,6,ori,cond1,cond2); + if (table.mode == 0) return log(ret); + return ret; +} + +double Alignment::ScoreOrientationRightBackward(const CountTable& table, int ori, WordID cond1, WordID cond2) { + double ret = ScoreOrientation(table,18,ori,cond1,cond2); + if (table.mode == 0) return log(ret); + return ret; +} + +void Alignment::ScoreOrientationLeft(const CountTable& table, int ori, WordID cond1, WordID cond2, + bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, double alpha1, double beta1) { + if (DEBUG) cerr << "ScoreOrientationLeft(" << isBonus << ")" << endl; + ScoreOrientation(table,0,ori,cond1,cond2,isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha1,beta1); +} + +void Alignment::ScoreOrientationLeftBackward(const CountTable& table, int ori, WordID cond1, WordID cond2, + bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, double alpha1, double beta1) { + if (DEBUG) cerr << "ScoreOrientationLeftBackward" << endl; + ScoreOrientation(table,12,ori,cond1,cond2,isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha1,beta1); +} + +void Alignment::ScoreOrientationRight(const CountTable& table, int ori, WordID cond1, WordID cond2, + bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, double alpha1, double beta1) { + if (DEBUG) cerr << "ScoreOrientationRight(" << isBonus << ")" << endl; + ScoreOrientation(table,6,ori,cond1,cond2,isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha1,beta1); +} + +void Alignment::ScoreOrientationRightBackward(const CountTable& table, int ori, WordID cond1, WordID cond2, + bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, double alpha1, double beta1) { + if (DEBUG) cerr << "ScoreOrientationRightBackward" << endl; + ScoreOrientation(table,18,ori,cond1,cond2,isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha1,beta1); +} + +void Alignment::computeOrientationSourceBackwardPos(const CountTable& table, double *cost, double *bonus, + double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, int maxfwidx, int maxdepth1, int maxdepth2) { + if (DEBUG) cerr << "computeOrientationSourceBackward" << endl; + int oril, orir; + for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { + if (DEBUG) cerr << "considering SourceFWRuleIdxs[" << idx << "]: " << SourceFWRuleIdxs[3*idx-2] << endl; + if (!(SourceFWRuleAbsIdxs[idx]<=maxdepth1 || maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2)) continue; + int* fwblock = blockSource(SourceFWRuleIdxs[3*idx-2],SourceFWRuleIdxs[3*idx-2]); + bool aligned = (fwblock[2]!=MINIMUM_INIT); + if (aligned) { + OrientationTarget(fwblock[2],fwblock[3],&oril,&orir); + } else { + OrientationSource(SourceFWRuleIdxs[3*idx-2],&oril,&orir); + } + if (DEBUG) cerr << "oril = " << oril << ", orir = " << orir << endl; + bool isBonus = false; // fas -> first aligned source word, las -> last aligned source word + if ((aligned && fwblock[2]<=fat)|| + (!aligned && SourceFWRuleIdxs[3*idx-2]<=fas)) isBonus=true; + if (SourceFWRuleAbsIdxs[idx]<=maxdepth1) { + ostringstream nusource; + nusource << TD::Convert(SourceFWRuleIdxs[3*idx-1]) << "/" << SourceFWRuleAbsIdxs[idx]; + ScoreOrientationLeftBackward(table,oril,TD::Convert(nusource.str()),SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + if (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2) { + ostringstream nusource; + nusource << TD::Convert(SourceFWRuleIdxs[3*idx-1]) << "/" << ((maxfwidx-SourceFWRuleAbsIdxs[idx]+1)*-1); + ScoreOrientationLeftBackward(table,oril,TD::Convert(nusource.str()),SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + isBonus = false; + if ((aligned && lat<=fwblock[3])|| + (!aligned && las<=SourceFWRuleIdxs[3*idx-2])) isBonus=true; + if (SourceFWRuleAbsIdxs[idx]<=maxdepth1) { + ostringstream nusource; + nusource << TD::Convert(SourceFWRuleIdxs[3*idx-1]) << "/" << SourceFWRuleAbsIdxs[idx]; + ScoreOrientationRightBackward(table,orir,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + if (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2) { + ostringstream nusource; + nusource << TD::Convert(SourceFWRuleIdxs[3*idx-1]) << "/" << ((maxfwidx-SourceFWRuleAbsIdxs[idx]+1)*-1); + ScoreOrientationRightBackward(table,orir,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + delete fwblock; + } + for (int i_ant=0; i_ant<_Arity; i_ant++) { + // antfas -> first aligned source word antecedent-wise + // antlas -> last aligned source word antecedent-wise + int antfat = firstTargetAligned(TargetAntsIdxs[i_ant][1]); + int antlat = lastTargetAligned(TargetAntsIdxs[i_ant][TargetAntsIdxs[i_ant][0]]); + int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); + int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); + assert(antfat <= antlat); + assert(antfas <= antlas); + for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { + if (DEBUG) + cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; + if (!(SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1 || maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2)) continue; + int* fwblock = blockSource(SourceFWAntsIdxs[i_ant][3*idx-2],SourceFWAntsIdxs[i_ant][3*idx-2]); + //bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); + bool aligned = (fwblock[2]!=MINIMUM_INIT); + bool Lcompute = true; bool Rcompute = true; + if (DEBUG) { + cerr << " aligned = " << aligned << endl; + cerr << " fwblock = " << fwblock[0] << "," << fwblock[1] << "," << fwblock[2] << "," << fwblock[3] << endl; + cerr << " antfas=" << antfas << ", antlas=" << antlas << ", antfat=" << antfat << ", antlat=" << antlat << endl; + } + if (aligned) { + if (DEBUG) cerr << "laligned" << endl; + if (antfat first aligned source word, las -> last aligned source word + if (SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; + if (!isBonus) // this is unnecessary because fas <= las assertion + if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; + if (maxdepth1>0) { + oss << source << "/"; + if (SourceFWRuleAbsIdxs[idx]<=maxdepth1) + oss << SourceFWRuleAbsIdxs[idx]; + else + oss << "X"; + sourceID = TD::Convert(oss.str()); + oss.str(""); + ScoreOrientationLeft(table,oril,sourceID,SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + if (maxdepth2>0) { + oss << source << "/"; + if (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2) + oss << ((maxfwidx-SourceFWRuleAbsIdxs[idx]+1)*-1); + else + oss << "X"; + sourceID = TD::Convert(oss.str()); + oss.str(""); + ScoreOrientationLeft(table,oril,sourceID,SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + isBonus = false; + if (las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; + if (!isBonus) // this is unnecessary becuase fas <= las assertion + if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; + if (maxdepth1>0) { + oss << source << "/"; + if (SourceFWRuleAbsIdxs[idx]<=maxdepth1) + oss << SourceFWRuleAbsIdxs[idx]; + else + oss << "X"; + sourceID = TD::Convert(oss.str()); + oss.str(""); + ScoreOrientationRight(table,orir,sourceID,SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + if (maxdepth2>0) { + oss << source << "/"; + if (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2) + oss << ((maxfwidx-SourceFWRuleAbsIdxs[idx]+1)*-1); + else + oss << "X"; + sourceID = TD::Convert(oss.str()); + oss.str(""); + ScoreOrientationRight(table,orir,sourceID,SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + + } + for (int i_ant=0; i_ant<_Arity; i_ant++) { + for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { + if (DEBUG) + cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; + //if (!((SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1)||(maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2))) continue; + // antfas -> first aligned source word antecedent-wise + // antlas -> last aligned source word antecedent-wise + int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); + int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); + if (DEBUG) cerr << " SourceFWAntsAbsIdxs[i_ant][3*idx-1]=" << SourceFWAntsAbsIdxs[i_ant][3*idx-1] << endl; + string source = TD::Convert(SourceFWAntsIdxs[i_ant][3*idx-1]); + assert(antfas <= antlas); + bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); + bool Lcompute = true;bool Rcompute = true; + if ((aligned && antfas0) { + oss << source << "/"; + if (SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1) + oss << SourceFWAntsAbsIdxs[i_ant][idx]; + else + oss << "X"; + sourceID = TD::Convert(oss.str()); + oss.str(""); + ScoreOrientationLeft(table,oril,sourceID,SourceFWAntsIdxs[i_ant][3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + if (maxdepth2>0) { + oss << source << "/"; + if (maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2) + oss << ((maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1)*-1); + else + oss << "X"; + sourceID = TD::Convert(oss.str()); + oss.str(""); + ScoreOrientationLeft(table,oril,sourceID,SourceFWAntsIdxs[i_ant][3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + } + isBonus = false; + if (Rcompute) { + if (las<=SourceFWAntsIdxs[i_ant][3*idx-2]) isBonus = true; + //if (!isBonus) // this is unnecessary + // if (!aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<=fas) isBonus=true; + if (maxdepth1>0) { + oss << source << "/"; + if (SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1) + oss << SourceFWAntsAbsIdxs[i_ant][idx]; + else + oss << "X"; + sourceID = TD::Convert(oss.str()); + oss.str(""); + ScoreOrientationRight(table,orir,sourceID,SourceFWAntsIdxs[i_ant][3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + if (maxdepth2>0) { + oss << source << "/"; + if (maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2) + oss << ((maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1)*-1); + else + oss << "X"; + sourceID = TD::Convert(oss.str()); + oss.str(""); + ScoreOrientationRight(table,orir,sourceID,SourceFWAntsIdxs[i_ant][3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + } + } + } +} + +void Alignment::computeOrientationSource(const CountTable& table, double *cost, double *bonus, + double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus) { +// a bit complex due to imperfect state (TO DO!!!) +// 1. there are cases where function word alignments come from antecedents, which orientation +// (either its left or its right) has been computed earlier. +// 2. some orientation will go as bonus + if (DEBUG) cerr << "computeOrientationSource" << endl; + int oril, orir; + for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { + if (DEBUG) cerr << "considering SourceFWRuleIdxs[" << idx << "]: " << SourceFWRuleIdxs[3*idx-2] << endl; + OrientationSource(SourceFWRuleIdxs[3*idx-2],&oril,&orir); + bool isBonus = false; // fas -> first aligned source word, las -> last aligned source word + if (SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; + if (!isBonus) // this is unnecessary because fas <= las assertion + if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; + ScoreOrientationLeft(table,oril,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + isBonus = false; + if (las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; + if (!isBonus) // this is unnecessary becuase fas <= las assertion + if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; + ScoreOrientationRight(table,orir,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + for (int i_ant=0; i_ant<_Arity; i_ant++) { + for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { + if (DEBUG) + cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; + // antfas -> first aligned source word antecedent-wise + // antlas -> last aligned source word antecedent-wise + int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); + int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); + assert(antfas <= antlas); + bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); + bool Lcompute = true;bool Rcompute = true; + if ((aligned && antfas& tags) { + if (DEBUG) cerr << "computeOrientationSourceGen" << endl; + int oril, orir; + for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { + if (DEBUG) cerr << "considering SourceFWRuleIdxs[" << idx << "]: " << SourceFWRuleIdxs[3*idx-2] << endl; + OrientationSource(SourceFWRuleIdxs[3*idx-2],&oril,&orir); + bool isBonus = false; // fas -> first aligned source word, las -> last aligned source word + if (SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; + if (!isBonus) // this is unnecessary because fas <= las assertion + if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; + ScoreOrientationLeft(table,oril,generalize(SourceFWRuleIdxs[3*idx-1],tags),SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + isBonus = false; + if (las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; + if (!isBonus) // this is unnecessary becuase fas <= las assertion + if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; + ScoreOrientationRight(table,orir,generalize(SourceFWRuleIdxs[3*idx-1],tags),SourceFWRuleIdxs[3*idx], + isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); + } + for (int i_ant=0; i_ant<_Arity; i_ant++) { + for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { + if (DEBUG) + cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; + // antfas -> first aligned source word antecedent-wise + // antlas -> last aligned source word antecedent-wise + int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); + int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); + assert(antfas <= antlas); + bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); + bool Lcompute = true;bool Rcompute = true; + if ((aligned && antfas0) { + if (lfw>=0) { + int dom = DominanceSource(0,SourceFWIdxs[1]); + if (DEBUG) cerr << " --> lfw = " << lfw << "-" << TD::Convert(lfw) << endl; + if (DEBUG) cerr << " --> rfw = " << rfw << "-" << TD::Convert(rfw) << endl; + ScoreDominance(table,dom,lfw,SourceFWIdxs[2],lfw,SourceFWIdxs[3],bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); + } + if (rfw>=0) { + int dom = DominanceSource(SourceFWIdxs[3*SourceFWIdxs[0]-2],_J-1); + ScoreDominance(table,dom,SourceFWIdxs[3*SourceFWIdxs[0]-1],rfw,SourceFWIdxs[3*SourceFWIdxs[0]], + rfw,bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); + } + } +} + +void Alignment::computeDominanceSourcePos(const CountTable& table, WordID lfw, WordID rfw, + double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, int maxfwidx, int maxdepth1, int maxdepth2) { + if (DEBUG) cerr << "computeDominanceSourcePos" << endl; + if (DEBUG) cerr << " initial cost=" << *cost << ", initial bonus=" << *bonus << endl; + ostringstream oss; + for (int idx=2; idx<=SourceFWIdxs[0]; idx++) { + if (DEBUG) { + cerr << "PrevSourceFWIdxs :" << SourceFWIdxs[3*(idx-1)-2] << "," << SourceFWIdxs[3*(idx-1)-1] + << "," << SourceFWIdxs[3*(idx-1)] << endl; + cerr << "CurrSourceFWIdxs :" << SourceFWIdxs[3*(idx)-2] << "," << SourceFWIdxs[3*(idx)-1] + << "," << SourceFWIdxs[3*(idx)] << endl; + } + //if (!((SourceFWAbsIdxs[3*(idx-1)-2]<=maxdepth1 && SourceFWAbsIdxs[3*idx-2]<=maxdepth1) || + // (maxfwidx-SourceFWAbsIdxs[3*(idx-1)-2]+1<=maxdepth2 && maxfwidx-SourceFWAbsIdxs[3*idx-2]+1<=maxdepth2))) continue; + bool compute = true; + for (int i_ant=0; i_ant<_Arity && compute; i_ant++) { + if (MemberOf(SourceFWAntsIdxs[i_ant],SourceFWIdxs[3*(idx-1)-2],SourceFWIdxs[3*(idx)-2])) { + //cerr << "Skipping, they have been calculated in the " << (i_ant+1) << "-th branch" << endl; + compute=false; + } + } + if (compute) { + int dom = DominanceSource(SourceFWIdxs[3*(idx-1)-2],SourceFWIdxs[3*idx-2]); + if (DEBUG) cerr << "dom = " << dom << endl; + if (maxdepth1+maxdepth2>0) { + string source1 = TD::Convert(SourceFWIdxs[3*(idx-1)-1]); + string source2 = TD::Convert(SourceFWIdxs[3*(idx)-1]); + if (maxdepth1>0) { + oss << source1 << "/"; + if (SourceFWAbsIdxs[3*(idx-1)-2]<=maxdepth1) + oss << SourceFWAbsIdxs[3*(idx-1)-2]; + else + oss << "X"; + WordID source1id = TD::Convert(oss.str()); + oss.str(""); + oss << source2 << "/"; + if (SourceFWAbsIdxs[3*idx-2]<=maxdepth1) + oss << SourceFWAbsIdxs[3*idx-2]; + else + oss << "X"; + WordID source2id = TD::Convert(oss.str()); + oss.str(""); + ScoreDominance(table,dom,source1id,source2id,SourceFWIdxs[3*(idx-1)],SourceFWIdxs[3*idx], + cost,bo1,bo2,false,alpha_doms,beta_doms); + } + if (maxdepth2>0) { + oss << source1 << "/"; + if (maxfwidx-SourceFWAbsIdxs[3*(idx-1)-2]+1<=maxdepth2) + oss << ((maxfwidx-SourceFWAbsIdxs[3*(idx-1)-2]+1)*-1); + else + oss << "X"; + WordID source1id = TD::Convert(oss.str()); + oss.str(""); + oss << source2 << "/"; + if (maxfwidx-SourceFWAbsIdxs[3*idx-2]+1<=maxdepth2) + oss << ((maxfwidx-SourceFWAbsIdxs[3*(idx-1)-2]+1)*-1); + else + oss << "X"; + WordID source2id = TD::Convert(oss.str()); + oss.str(""); + ScoreDominance(table,dom,source1id,source2id,SourceFWIdxs[3*(idx-1)],SourceFWIdxs[3*idx], + cost,bo1,bo2,false,alpha_doms,beta_doms); + } + } + } + } + if (SourceFWIdxs[0]>0) { + if (lfw>=0) { + int dom = DominanceSource(0,SourceFWIdxs[1]); + string source1 = TD::Convert(lfw); + string source2 = TD::Convert(SourceFWIdxs[2]); + if (maxdepth1>0) { + oss << source1 << "/"; + if (SourceFWAbsIdxs[1]-1<=maxdepth1) + oss << (SourceFWAbsIdxs[1]-1); + else + oss << "X"; + WordID source1id = TD::Convert(oss.str()); + oss.str(""); + oss << source2 << "/"; + if (SourceFWAbsIdxs[1]<=maxdepth1) + oss << SourceFWAbsIdxs[1]; + else + oss << "X"; + WordID source2id = TD::Convert(oss.str()); + oss.str(""); + ScoreDominance(table,dom,source1id,source2id,lfw,SourceFWIdxs[3],bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); + } + if (maxdepth2>0) { + oss << source1 << "/"; + if (maxfwidx-(SourceFWAbsIdxs[1]-1)+1<=maxdepth2) + oss << ((maxfwidx-(SourceFWAbsIdxs[1]-1)+1)*-1); + else + oss << "X"; + WordID source1id = TD::Convert(oss.str()); + oss.str(""); + oss << source2 << "/"; + if (maxfwidx-SourceFWAbsIdxs[1]+1<=maxdepth2) + oss << ((maxfwidx-SourceFWAbsIdxs[1]+1)*-1); + else + oss << "X"; + WordID source2id = TD::Convert(oss.str()); + oss.str(""); + ScoreDominance(table,dom,source1id,source2id,lfw,SourceFWIdxs[3],bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); + } + } + if (rfw>=0) { + int dom = DominanceSource(SourceFWIdxs[3*SourceFWIdxs[0]-2],_J-1); + string source1 = TD::Convert(SourceFWIdxs[3*SourceFWIdxs[0]-1]); + string source2 = TD::Convert(rfw); + if (maxdepth1>0) { + oss << source1 << "/"; + if (SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]<=maxdepth1) + oss << SourceFWAbsIdxs[3*SourceFWIdxs[0]-2]; + else + oss << "X"; + WordID source1id = TD::Convert(oss.str()); + oss.str(""); + oss << source2 << "/"; + if (SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1<=maxdepth1) + oss << (SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1); + else + oss << "X"; + WordID source2id = TD::Convert(oss.str()); + ScoreDominance(table,dom,source1id,source2id,SourceFWIdxs[3*SourceFWIdxs[0]], + rfw,bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); + } + if (maxdepth2>0) { + oss << source1 << "/"; + if (maxfwidx-SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1<=maxdepth2) + oss << ((maxfwidx-SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1)*-1); + else + oss << "X"; + WordID source1id = TD::Convert(oss.str()); + oss.str(""); + oss << source2 << "/"; + if (maxfwidx-(SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1)+1<=maxdepth2) + oss << ((maxfwidx-(SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1)+1)*-1); + else + oss << "X"; + WordID source2id = TD::Convert(oss.str()); + oss.str(""); + ScoreDominance(table,dom,source1id,source2id,SourceFWIdxs[3*SourceFWIdxs[0]], + rfw,bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); + } + } + } +} + + +void Alignment::computeDominanceTarget(const CountTable& table, WordID lfw, WordID rfw, + double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus) { + if (DEBUG) cerr << "computeDominanceTarget" << endl; + for (int idx=2; idx<=TargetFWIdxs[0]; idx++) { + if (DEBUG) cerr << "PrevTargetFWIdxs :" << TargetFWIdxs[3*(idx-1)-2] << "," << TargetFWIdxs[3*(idx-1)-1] << "," <=0) { + int dom = DominanceTarget(0,TargetFWIdxs[1]); + if (DEBUG) cerr << "dom target (with left) = " << dom << endl; + ScoreDominance(table,dom,lfw,lfw,TargetFWIdxs[2],TargetFWIdxs[3],bonus,bo1_bonus,bo2_bonus,true,alpha_domt,beta_domt); + } + if (rfw>=0) { + int dom = DominanceTarget(TargetFWIdxs[3*TargetFWIdxs[0]-2],_I-1); + if (DEBUG) cerr << "dom target (with right) = " << dom << endl; + ScoreDominance(table,dom,TargetFWIdxs[3*TargetFWIdxs[0]-1],TargetFWIdxs[3*TargetFWIdxs[0]], + rfw,rfw,bonus,bo1_bonus,bo2_bonus,true,alpha_domt,beta_domt); + } + } + + //cerr << "END of computeDominanceTarget" << endl; +} + +double Alignment::ScoreDominance(const CountTable& table, int dom, WordID source1, WordID source2, WordID target1, WordID target2) { + if (DEBUG) { + cerr << "ScoreDominance(source1=" << TD::Convert(source1) << ",source2=" << TD::Convert(source2) + << ",target1=" << TD::Convert(target1) << ",target2=" << TD::Convert(target2) << ", dom=" << dom << endl; + } + string _source1 = TD::Convert(source1); + string _source2 = TD::Convert(source2); + string _source1idx; string _source2idx; + if (table.mode==1) { + _source1idx = _source1; _source2idx = _source2; + _source1 = _source1idx.substr(0,_source1idx.find_last_of("/")); + _source2 = _source2idx.substr(0,_source2idx.find_last_of("/")); + } + string _target1 = TD::Convert(target1); + string _target2 = TD::Convert(target2); + + double count = table.ultimate[dom]; + double total = table.ultimate[4]; + double prob = count/total; + if (DEBUG) cerr << "level0 " << count << "/" << total << "=" << prob << endl; + double alpha = 0.1; + + string key = _source1 + " " + _source2; + WordID key_id = TD::Convert(key); + map::const_iterator it = table.model.find(key_id); + bool stop = (it==table.model.end()); + if (!stop) { + stop = true; + if (it->second[4]>=0) { + count = it->second[dom] + alpha*prob; + total = it->second[4] + alpha; + prob = count/total; + if (DEBUG) cerr << "level1 " << count << "/" << total << "=" << prob << endl; + stop = false; + } + } + if (stop) return prob; + + key = _source1 + " " + _source2 + " " + _target1 + " " + _target2; + key_id = TD::Convert(key); + it = table.model.find(key_id); + stop = (it==table.model.end()); + if (!stop) { + stop = true; + if (it->second[4]>=0) { + count = it->second[dom] + alpha*prob; + total = it->second[4] + alpha; + prob = count/total; + if (DEBUG) cerr << "level2 " << count << "/" << total << "=" << prob << endl; + stop = false; + } + } + + if (table.mode!=1 || stop) return prob; + key = _source1 + " " + _source2 + " " + _target1 + " " + _target2; + key_id = TD::Convert(key); + it = table.model.find(key_id); + if (it!=table.model.end()) { + if (it->second[4]>=0) { + count = it->second[dom] + alpha*prob; + total = it->second[4] + alpha; + if (DEBUG) cerr << "level3 " << count << "/" << total << "=" << prob << endl; + prob = count/total; + } + } + + return prob; +} + +void Alignment::ScoreDominance(const CountTable& table, int dom, WordID source1, WordID source2, WordID target1, WordID target2, double *cost, double *bo1, double *bo2, bool isBonus, double alpha2, double beta2) { + if (DEBUG) + cerr << "ScoreDominance(source1=" << TD::Convert(source1) << ",source2=" << TD::Convert(source2) + << ",target1=" << TD::Convert(target1) << ",target2=" << TD::Convert(target2) << ",isBonus=" << isBonus << ", alpha2 = " << alpha2 << ", beta2 = " << beta2 << endl; + if (DEBUG) cerr << " BEFORE=" << *cost << endl; + *cost += ScoreDominance(table,dom,source1,source2,target1,target2); + if (DEBUG) cerr << " AFTER=" << *cost << endl; +} + +WordID Alignment::F2EProjectionFromExternal(int idx, const vector& als, const string& delimiter) { + if (DEBUG) { + cerr << "F2EProjectionFromExternal=" << idx << endl; + for (int i=0; i< als.size(); i++) cerr << "als[" << i << "]=" << als[i] << " "; + cerr << endl; + } + vector alignedTo; + for (int i=0; i0) projection << delimiter; + projection << TD::Convert(_e[alignedTo[i]]); + } + if (DEBUG) { + cerr << "projection = " << projection.str() << endl; + cerr << "returns = " << TD::Convert(projection.str()) << endl; + } + return TD::Convert(projection.str()); + } +} + +WordID Alignment::E2FProjectionFromExternal(int idx, const vector& als, const string& delimiter) { + vector alignedTo; + for (int i=0; i0) projection << delimiter; + projection << TD::Convert(_f[alignedTo[i]]); + } + return TD::Convert(projection.str()); + } +} + + +WordID Alignment::F2EProjection(int idx, const string& delimiter) { + if (DEBUG) cerr << "F2EProjection(" << idx << ")" << endl; + int e = targetOf(idx); + if (e<0) { + if (DEBUG) cerr << "projection = NULL" << endl; + return TD::Convert("NULL"); + } else { + if (targetOf(idx,e+1)<0) { + if (DEBUG) cerr << "e-1=" << (e-1) << ", size=" << _e.size() << endl; + return getE(e-1); // if not aligned to many, why bother continuing + } + ostringstream projection; + bool firstTime = true; + do { + if (!firstTime) projection << delimiter; + projection << TD::Convert(_e[e-1]); // transform space + firstTime = false; + e = targetOf(idx,e+1); + //if (DEBUG) cerr << "projection = " << projection.str() << endl; + } while(e>=0); + return TD::Convert(projection.str()); + } +} + +WordID Alignment::E2FProjection(int idx, const string& delimiter) { + //cerr << "E2FProjection(" << idx << ")" << endl; + //cerr << "i" << endl; + int f = sourceOf(idx); + //cerr << "j, f=" << f << endl; + if (f<0) { + //cerr << "projection = NULL" << endl; + return TD::Convert("NULL"); + } else { + if (sourceOf(idx,f+1)<0) return getF(f-1); + bool firstTime = true; + ostringstream projection(ostringstream::out); + do { + if (!firstTime) projection << delimiter; + projection << TD::Convert(_f[f-1]); //transform space + firstTime = false; + f = sourceOf(idx,f+1); + //cerr << "projection = " << projection.str() << endl; + } while(f>=0); + return TD::Convert(projection.str()); + } +} +void Alignment::computeBorderDominanceSource(const CountTable& table, double *cost, double *bonus, double *state_mono, + double *state_nonmono, TRule &rule, const std::vector& ant_contexts, const map& sfw) { + // HACK: GOAL is assumed to always be "S" + if (DEBUG) cerr << "computeBorderDominanceSource" << endl; + std::vector f = rule.f(); + std::vector e = rule.e(); + int nt_index[f.size()]; + int nt_count=0; + for (int i=0; i als; + for (std::vector::const_iterator i = rule.als().begin(); i != rule.als().end(); ++i) { + int s = i->s_; int t = i->t_; + als.push_back(link(t,s)); + } + if (DEBUG) cerr << "rule.Arity=" << rule.Arity() << endl; + if (rule.Arity()>0) { + int ntc=0; + for (int s=0; s neither, 1 -> leftFirst, 2 -> rightFirst, 3 -> dontCare + // ScoreDominance(const CountTable& table, int dom, WordID source1, WordID source2, WordID target1, WordID target2) + int prevs = 0; + for (int i=0; i(ant_contexts[nt_index[s]-1]); + *cost += Dwarf::IntegerToDouble(ants[51]); // 50->mono, 51->non-mono + if (DEBUG) cerr << " adding "<< Dwarf::IntegerToDouble(ants[51]) << " into cost, resulting = " << *cost << endl; + } + flag[s] = true; + } + } + prevs = currs; + } + if (DEBUG) cerr << "bonus and state matter" << endl; + for (int s=0; s(ant_contexts[nt_index[s]-1]); + double indbonus = Dwarf::IntegerToDouble(ants[50]); + *bonus += indbonus; + *state_mono += indbonus; + *state_nonmono += Dwarf::IntegerToDouble(ants[51]); + if (DEBUG) cerr << " propagating state=" << *state_mono <<","<< *state_nonmono<< endl; + } + } + } + if (DEBUG) cerr << "LHS:" << rule.GetLHS() << ":" << TD::Convert(rule.GetLHS()*-1) <(ant_contexts[i]); + *cost += Dwarf::IntegerToDouble(ants[50]); + } + *bonus = 0; + } + if (DEBUG) cerr << "-->>>> cost="<<*cost<<", bonus="<<*bonus<<", state_mono="<<*state_mono<<", state_nonmono="<<*state_nonmono<& ant_contexts, const map& sfw, const map& tfw,const Lattice& sourcelattice, int spanstart, int spanend) { + if (DEBUG) cerr << "===Rule===" << rule.AsString() << endl; + _f = rule.f(); + _e = rule.e(); + _Arity = rule.Arity(); + if (DEBUG) { + cerr << "F: "; + for (int idx=0; idx<_f.size(); idx++) cerr << _f[idx] << " "; + cerr << endl; + cerr << "F': "; + for (int idx=0; idx<_f.size(); idx++) + if (_f[idx]>=0) { + cerr << TD::Convert(_f[idx]) << " "; + } else { + cerr << TD::Convert(_f[idx]*-1); + } + cerr << endl; + cerr << "E: "; + for (int idx=0; idx<_e.size(); idx++) + cerr << _e[idx] << " "; + cerr << endl; + cerr << "E': "; + for (int idx=0; idx<_e.size(); idx++) + if (_e[idx]>0) { + cerr << TD::Convert(_e[idx]) << " "; + } else { + cerr << "[NT]" << " "; + } + cerr << endl; + } + + SourceFWRuleIdxs[0]=0; + SourceFWRuleAbsIdxs[0]=0; + for (int idx=1; idx<=_f.size(); idx++) { // in transformed space + if (sfw.find(_f[idx-1])!=sfw.end()) { + SourceFWRuleIdxs[0]++; + SourceFWRuleAbsIdxs[++SourceFWRuleAbsIdxs[0]]=GetFWGlobalIdx(idx,sourcelattice,_f,spanstart,spanend,ant_contexts,sfw); + SourceFWRuleIdxs[3*SourceFWRuleIdxs[0]-2]=idx; + SourceFWRuleIdxs[3*SourceFWRuleIdxs[0]-1]=_f[idx-1]; + SourceFWRuleIdxs[3*SourceFWRuleIdxs[0]] =F2EProjectionFromExternal(idx-1,rule.a_,"_SEP_"); + } + } + TargetFWRuleIdxs[0]=0; + for (int idx=1; idx<=_e.size(); idx++) { // in transformed space + if (tfw.find(_e[idx-1])!=tfw.end()) { + TargetFWRuleIdxs[0]++; + TargetFWRuleIdxs[3*TargetFWRuleIdxs[0]-2]=idx; + TargetFWRuleIdxs[3*TargetFWRuleIdxs[0]-1]=E2FProjectionFromExternal(idx-1,rule.a_,"_SEP_"); + TargetFWRuleIdxs[3*TargetFWRuleIdxs[0]] =_e[idx-1]; + } + } + + if (DEBUG) { + cerr << "SourceFWRuleIdxs[" << SourceFWRuleIdxs[0] << "]:"; + for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { + cerr << " idx:" << SourceFWRuleIdxs[3*idx-2]; + cerr << " absidx:" << SourceFWRuleAbsIdxs[idx]; + cerr << " F:" << SourceFWRuleIdxs[3*idx-1]; + cerr << " E:" << SourceFWRuleIdxs[3*idx]; + cerr << "; "; + } + cerr << endl; + cerr << "TargetFWRuleIdxs[" << TargetFWRuleIdxs[0] << "]:"; + for (int idx=1; idx<=TargetFWRuleIdxs[0]; idx++) { + cerr << " idx:" << TargetFWRuleIdxs[3*idx-2]; + cerr << " F:" << TargetFWRuleIdxs[3*idx-1]; + cerr << " E:" << TargetFWRuleIdxs[3*idx]; + } + cerr << endl; + } + if (SourceFWRuleIdxs[0]+TargetFWRuleIdxs[0]==0) { + bool nofw = true; + for (int i_ant=0; i_ant<_Arity && nofw; i_ant++) { + const int* ants = reinterpret_cast(ant_contexts[i_ant]); + if (ants[0]>=0||ants[3]>=0||ants[6]>=0||ants[9]>=0) nofw=false; + } + if (nofw) return true; + } + //cerr << "clearing als first" << endl; + clearAls(_J,_I); + + if (DEBUG) cerr << "A["<< rule.a_.size() << "]: " ; + RuleAl[0]=0; + // add phrase start boundary + RuleAl[0]++; RuleAl[RuleAl[0]*2-1]=0; RuleAl[RuleAl[0]*2]=0; + if (DEBUG) cerr << RuleAl[RuleAl[0]*2-1] << "-" << RuleAl[RuleAl[0]*2] << " "; + for (int idx=0; idx(ant_contexts[i_ant]); + int span = ants[Dwarf::STATE_SIZE-1]; + if (DEBUG) { + cerr << "antcontexts[" << i_ant << "] "; + for (int idx=0; idx=0) { + // Given a span, give the index of the first function word + int firstfwidx = GetFirstFWIdx(source(span),target(span),sourcelattice,sfw); + if (DEBUG) cerr << " firstfwidx = " << firstfwidx << endl; + int fwcount = 0; + if (ants[1]>=0) { // one function word + SourceFWAntsIdxs[i_ant][0]++; SourceFWAntsIdxs[i_ant][1]=ants[0]; + SourceFWAntsIdxs[i_ant][2]=ants[1]; SourceFWAntsIdxs[i_ant][3]=ants[2]; + fwcount++; + } else { // if ants[1] < 0 then compound fws + //cerr << "ants[1]<0" << endl; + istringstream ossf(TD::Convert(ants[1]*-1)); string ffw; + istringstream osse(TD::Convert(ants[2])); string efw; //projection would be mostly NULL + int delta=ants[0]; + while (osse >> efw && ossf >> ffw) { + SourceFWAntsIdxs[i_ant][0]++; + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-2]=ants[0]-(delta--); + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-1]=TD::Convert(ffw); + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3] =TD::Convert(efw); + fwcount++; + } + } + if (DEBUG) cerr << " fwcount=" << fwcount << endl; + SourceFWAntsAbsIdxs[i_ant][0]=fwcount; + for (int i=1; i<=fwcount; i++) SourceFWAntsAbsIdxs[i_ant][i]=firstfwidx++; + } + if (ants[3]>=0) { + int lastfwidx = GetLastFWIdx(source(span),target(span),sourcelattice,sfw); + if (DEBUG) cerr << " lastfwidx = " << lastfwidx << endl; + int fwcount=0; + if (ants[4]>=0) { + fwcount++; + SourceFWAntsIdxs[i_ant][0]++; + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-2]=ants[3]; + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-1]=ants[4]; + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3] =ants[5]; + } else { // if ants[4] < 0 then compound fws + //cerr << "ants[4]<0" << endl; + istringstream ossf(TD::Convert(ants[4]*-1)); string ffw; + istringstream osse(TD::Convert(ants[5])); string efw; + int delta=0; + while (osse >> efw && ossf >> ffw) { + fwcount++; + SourceFWAntsIdxs[i_ant][0]++; + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-2]=ants[3]+(delta++); + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-1]=TD::Convert(ffw); + SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3] =TD::Convert(efw); + } + } + if (DEBUG) cerr << " fwcount=" << fwcount << endl; + for (int i=1; i<=fwcount; i++) SourceFWAntsAbsIdxs[i_ant][SourceFWAntsAbsIdxs[i_ant][0]+i]=lastfwidx-fwcount+i; + SourceFWAntsAbsIdxs[i_ant][0]+=fwcount; + } + TargetFWAntsIdxs[i_ant][0]=0; + if (ants[6]>=0) { + if (ants[8]>=0) { // check the e part + TargetFWAntsIdxs[i_ant][0]++; + TargetFWAntsIdxs[i_ant][1]=ants[6]; + TargetFWAntsIdxs[i_ant][2]=ants[7]; + TargetFWAntsIdxs[i_ant][3]=ants[8]; + } else { // if ants[8] < 0 then compound fws + //cerr << "ants[8]<0" << endl; + //cerr << "ants[7]=" << TD::Convert(ants[7]) << endl; + //cerr << "ants[8]=" << TD::Convert(ants[8]*-1) << endl; + istringstream ossf(TD::Convert(ants[7])); string ffw; + istringstream osse(TD::Convert(ants[8]*-1)); string efw; + int delta=ants[6]; + while (osse >> efw && ossf >> ffw) { + //cerr << "efw="<< efw << ",ffw=" << ffw << endl; + TargetFWAntsIdxs[i_ant][0]++; + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-2]=ants[6]-(delta--); + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-1]=TD::Convert(ffw); + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3] =TD::Convert(efw); + } + } + } + if (ants[9]>=0) { + if (ants[11]>=0) { + TargetFWAntsIdxs[i_ant][0]++; + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-2]=ants[9]; + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-1]=ants[10]; + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3] =ants[11]; + } else { + //cerr << "ants[11]<0" << endl; + //cerr << "ants[10]=" << TD::Convert(ants[10]) << endl; + //cerr << "ants[11]=" << TD::Convert(ants[11]*-1) << endl; + istringstream ossf(TD::Convert(ants[10])); string ffw; + istringstream osse(TD::Convert(ants[11]*-1)); string efw; + int delta = 0; + while (osse >> efw && ossf >> ffw) { + //cerr << "efw="<< efw << ",ffw=" << ffw << endl; + TargetFWAntsIdxs[i_ant][0]++; + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-2]=ants[9]+(delta++); + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-1]=TD::Convert(ffw); + TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3] =TD::Convert(efw); + } + } + } + AntsAl[i_ant][0]=ants[12];//number of alignments + for (int idx=1; idx<=AntsAl[i_ant][0]; idx++) { + AntsAl[i_ant][idx*2-1] = source(ants[12+idx]); + AntsAl[i_ant][idx*2] = target(ants[12+idx]); + } + } + + for (int i_ant=0; i_ant<_Arity; i_ant++) { + int length = AntsAl[i_ant][0]; + int maxs = -1000; + int maxt = -1000; + for (int idx=0; idx=0) { + TargetRuleIdxs[idx]=curr++; + } else { + int i_ant = TargetRuleIdxs[idx]*-1-1; + if (DEBUG) cerr << "TargetRuleIdxs[" << i_ant << "]" << endl; + for (int idx2=1; idx2<=TargetAntsIdxs[i_ant][0]; idx2++) { + TargetAntsIdxs[i_ant][idx2]=curr++; + if (DEBUG) cerr << TargetAntsIdxs[i_ant][idx2] << " "; + } + if (DEBUG) cerr << endl; + } + } + if (DEBUG) { + cerr << "TargetRuleIdxs" << endl; + for (int idx=1; idx<=TargetRuleIdxs[0]; idx++) cerr << TargetRuleIdxs[idx] << " "; + cerr << endl; + } + for (int idx=1; idx<=RuleAl[0]; idx++) { + if (DEBUG) { + cerr << RuleAl[idx*2-1] << " - " << RuleAl[idx*2] << " to "; + cerr << SourceRuleIdxs[RuleAl[idx*2-1]+1] << " - " << TargetRuleIdxs[RuleAl[idx*2]+1] << endl; + } + set(SourceRuleIdxs[RuleAl[idx*2-1]+1],TargetRuleIdxs[RuleAl[idx*2]+1]); + } + for (int i_ant=0; i_ant<_Arity; i_ant++) { + for (int idx=1; idx<=AntsAl[i_ant][0]; idx++) { + if (DEBUG) { + cerr << AntsAl[i_ant][2*idx-1] << " - " << AntsAl[i_ant][2*idx] << " to "; + cerr << SourceAntsIdxs[i_ant][AntsAl[i_ant][2*idx-1]+1] << " - "; + cerr << TargetAntsIdxs[i_ant][AntsAl[i_ant][2*idx]+1] << endl; + } + set(SourceAntsIdxs[i_ant][AntsAl[i_ant][2*idx-1]+1],TargetAntsIdxs[i_ant][AntsAl[i_ant][2*idx]+1]); + } + } + SourceFWIdxs[0]=0; + SourceFWAbsIdxs[0]=0; + if (DEBUG) cerr << "SourceFWRuleIdxs:" << endl; + for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { + if (DEBUG) cerr << SourceFWRuleIdxs[3*idx-2] << " to " << SourceRuleIdxs[SourceFWRuleIdxs[3*idx-2]+1] << endl; + SourceFWRuleIdxs[3*idx-2] = SourceRuleIdxs[SourceFWRuleIdxs[3*idx-2]+1]; + SourceFWAbsIdxs[0]++; + SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]=SourceFWRuleAbsIdxs[idx]; + SourceFWIdxs[0]++; + SourceFWIdxs[3*SourceFWIdxs[0]-2]=SourceFWRuleIdxs[3*idx-2]; + SourceFWIdxs[3*SourceFWIdxs[0]-1]=SourceFWRuleIdxs[3*idx-1]; + SourceFWIdxs[3*SourceFWIdxs[0]] =SourceFWRuleIdxs[3*idx]; + } + for (int i_ant=0; i_ant<_Arity; i_ant++) { + if (DEBUG) cerr << "SourceFWAntsIdxs[" << i_ant << "]" << endl; + for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { + if (DEBUG) + cerr << SourceFWAntsIdxs[i_ant][3*idx-2] << " to " << SourceAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][3*idx-2]+1] << endl; + SourceFWAntsIdxs[i_ant][3*idx-2] = SourceAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][3*idx-2]+1]; + SourceFWAbsIdxs[0]++; + SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]=SourceFWAntsAbsIdxs[i_ant][idx]; + SourceFWIdxs[0]++; + SourceFWIdxs[3*SourceFWIdxs[0]-2]=SourceFWAntsIdxs[i_ant][3*idx-2]; + SourceFWIdxs[3*SourceFWIdxs[0]-1]=SourceFWAntsIdxs[i_ant][3*idx-1]; + SourceFWIdxs[3*SourceFWIdxs[0]] =SourceFWAntsIdxs[i_ant][3*idx]; + } + } + sort(SourceFWIdxs); + sort(SourceFWAbsIdxs); + if (DEBUG) { + cerr << "SourceFWIdxs : "; + for (int idx=1; idx<=SourceFWIdxs[0]; idx++) { + cerr << "idx:" << SourceFWIdxs[3*idx-2] << ","; + cerr << "F:" << SourceFWIdxs[3*idx-1] << ","; + cerr << "E:" << SourceFWIdxs[3*idx] << " "; + } + cerr << endl; + } + TargetFWIdxs[0]=0; + if (DEBUG) cerr << "TargetFWRuleIdxs:" << endl; + for (int idx=1; idx<=TargetFWRuleIdxs[0]; idx++) { + if (DEBUG) cerr << TargetFWRuleIdxs[3*idx-2] << " to " << TargetRuleIdxs[TargetFWRuleIdxs[3*idx-2]+1] << endl; + TargetFWRuleIdxs[3*idx-2] = TargetRuleIdxs[TargetFWRuleIdxs[3*idx-2]+1]; + TargetFWIdxs[0]++; + TargetFWIdxs[3*TargetFWIdxs[0]-2]=TargetFWRuleIdxs[3*idx-2]; + TargetFWIdxs[3*TargetFWIdxs[0]-1]=TargetFWRuleIdxs[3*idx-1]; + TargetFWIdxs[3*TargetFWIdxs[0]] =TargetFWRuleIdxs[3*idx]; + } + for (int i_ant=0; i_ant<_Arity; i_ant++) { + if (DEBUG) cerr << "TargetFWAntsIdxs[" << i_ant << "]" << endl; + for (int idx=1; idx<=TargetFWAntsIdxs[i_ant][0]; idx++) { + if (DEBUG) cerr << TargetFWAntsIdxs[i_ant][3*idx-2] << " to " << TargetAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][3*idx-2]+1] << endl; + TargetFWAntsIdxs[i_ant][3*idx-2] = TargetAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][3*idx-2]+1]; + TargetFWIdxs[0]++; + TargetFWIdxs[3*TargetFWIdxs[0]-2]=TargetFWAntsIdxs[i_ant][3*idx-2]; + TargetFWIdxs[3*TargetFWIdxs[0]-1]=TargetFWAntsIdxs[i_ant][3*idx-1]; + TargetFWIdxs[3*TargetFWIdxs[0]] =TargetFWAntsIdxs[i_ant][3*idx]; + } + } + sort(TargetFWIdxs); + if (DEBUG) { + cerr << "TargetFWIdxs : "; + for (int idx=1; idx<=TargetFWIdxs[0]; idx++) { + cerr << "idx:" << TargetFWIdxs[3*idx-2]<< ","; + cerr << "E:" << TargetFWIdxs[3*idx-1]<< ","; + cerr << "F:" << TargetFWIdxs[3*idx]<< " "; + } + cerr << endl; + cerr << AsString() << endl; + } + fas = firstSourceAligned(1); las = lastSourceAligned(_J-2); + fat = firstTargetAligned(1); lat = lastTargetAligned(_I-2); + if (DEBUG) cerr << "fas=" << fas << ", las=" << las << ", fat=" << fat << ", lat=" << lat << endl; + assert(fas<=las); + assert(fat<=lat); + SetCurrAlVector(); + if (DEBUG) cerr << "end prepare" << endl; + return false; +} + +string Alignment::AsStringSimple() { + ostringstream stream; + for (int j=0; j=0) { + stream << " " << j << "-" << t; + t = targetOf(j,t+1); + } + } + return stream.str(); +}; + + +string Alignment::AsString() { + ostringstream stream; + stream << "J:" << getJ() << " I:" << getI(); + for (int j=0; j=0) { + stream << " " << j << "-" << t; + t = targetOf(j,t+1); + } + } + stream << " TargetSpan:"; + for (int j=0; j=0) { + curr_al.push_back(link(j,i)); + i = targetOf(j,i+1); + } + } +} + +const void CountTable::print() { + cerr << "+++ Model +++" << endl; + for (map::const_iterator iter=model.begin(); iter!=model.end(); iter++) { + cerr << TD::Convert(iter->first) << " "; + for (int i=0; isecond[i] << " "; + cerr << endl; + } + cerr << "+++ Ultimate +++" << endl; + for (int i=0; i* ret) { + ret->clear(); + for (int i=0; i<_J; i++) { + int t = targetOf(i); + while (t>=0) { + ret->push_back(link(i,t)); + t = targetOf(i,t+1); + } + } +} + +int Alignment::GetFWGlobalIdx(int idx, const Lattice& sourcelattice, vector& sources, int spanstart, int spanend, const std::vector& ant_contexts, const map& sfw) { + // get the index of the function word in the lattice + if (DEBUG) cerr << " GetFWGlobalIdx(" << idx << "," << spanstart << "," << spanend << ")" << endl; + int curr = spanstart; int i_ant = 0; + for (int i=1; i and + if (sources[i]<0) { + const int* ants = reinterpret_cast(ant_contexts[i_ant++]); + int antstate = ants[Dwarf::STATE_SIZE-1]; + if (DEBUG) cerr << " found NT[" << target(antstate) << "," << source(antstate) << "]" << endl; + curr += target(antstate)-source(antstate); + } else { + curr++; + } + } + if (DEBUG) cerr << " curr = " << curr << endl; + //compute the fw index + int ret = 1; + for (int i=0; i=spanstart) return curr; + } + } +// assert(0); + return curr; +} + +int Alignment::GetLastFWIdx(int spanstart,int spanend, const Lattice& sourcelattice, const map& sfw) { + if (DEBUG) cerr << " GetLastFWIdx(" << spanstart << "," << spanend << ")" << endl; + int curr=0; + for (int i=0; i& tags, bool pos) { + if (!pos) { + map::const_iterator it = tags.find(original); + if (it!=tags.end()) { + return it->second; + } + } else { + string key,idx; + Dwarf::stripIndex(TD::Convert(original),&key,&idx); + map::const_iterator it = tags.find(TD::Convert(key)); + if (it!=tags.end()) { + ostringstream oss; + oss << TD::Convert(it->second) << "/" << idx; + return TD::Convert(oss.str()); + } + } + return original; +} + +int* Alignment::SOS() { + int* neighbor = new int[4]; + neighbor[0]=0; neighbor[1]=0; + neighbor[2]=0; neighbor[3]=0; + return neighbor; +} + +int* Alignment::EOS() { + int* neighbor = new int[4]; + neighbor[0]=getJ()-1; neighbor[1]=neighbor[0]; + neighbor[2]=getI()-1; neighbor[3]=neighbor[2]; + return neighbor; +} + +int* Alignment::neighborLeft(int startidx, int endidx, bool* getit) { + if (DEBUG) cerr << " neighborLeft("<