summaryrefslogtreecommitdiff
path: root/python/src/sa/rulefactory.pxi
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-06-19 15:06:34 +0100
committerPaul Baltescu <pauldb89@gmail.com>2013-06-19 15:06:34 +0100
commit459775095b46b4625ce26ea5a34001ec74ab3aa8 (patch)
tree844d1a650a302114ae619d37b8778ab66207a834 /python/src/sa/rulefactory.pxi
parent02099a01350a41a99ec400e9b29df08a01d88979 (diff)
parent0dc7755f7fb1ef15db5a60c70866aa61b6367898 (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'python/src/sa/rulefactory.pxi')
-rw-r--r--python/src/sa/rulefactory.pxi71
1 files changed, 36 insertions, 35 deletions
diff --git a/python/src/sa/rulefactory.pxi b/python/src/sa/rulefactory.pxi
index 559e8396..10bb9737 100644
--- a/python/src/sa/rulefactory.pxi
+++ b/python/src/sa/rulefactory.pxi
@@ -879,15 +879,15 @@ cdef class HieroCachingRuleFactory:
def advance(self, frontier, res, fwords):
cdef unsigned na
nf = []
- for (toskip, (i, alt, pathlen)) in frontier:
+ for toskip, (i, alt, pathlen) in frontier:
spanlen = fwords[i][alt][2]
- if (toskip == 0):
+ if toskip == 0:
res.append((i, alt, pathlen))
ni = i + spanlen
- if (ni < len(fwords) and (pathlen + 1) < self.max_initial_size):
+ if ni < len(fwords) and pathlen + 1 < self.max_initial_size:
for na in range(len(fwords[ni])):
nf.append((toskip - 1, (ni, na, pathlen + 1)))
- if (len(nf) > 0):
+ if len(nf) > 0:
return self.advance(nf, res, fwords)
else:
return res
@@ -895,11 +895,11 @@ cdef class HieroCachingRuleFactory:
def get_all_nodes_isteps_away(self, skip, i, spanlen, pathlen, fwords, next_states, reachable_buffer):
cdef unsigned alt_it
frontier = []
- if (i+spanlen+skip >= len(next_states)):
+ if i+spanlen+skip >= len(next_states):
return frontier
key = tuple([i,spanlen])
reachable = []
- if (key in reachable_buffer):
+ if key in reachable_buffer:
reachable = reachable_buffer[key]
else:
reachable = self.reachable(fwords, i, spanlen)
@@ -911,7 +911,7 @@ cdef class HieroCachingRuleFactory:
continue
if pathlen+jump <= self.max_initial_size:
for alt_id in range(len(fwords[next_id])):
- if (fwords[next_id][alt_id][0] != EPSILON):
+ if fwords[next_id][alt_id][0] != EPSILON:
newel = (next_id,alt_id,pathlen+jump)
if newel not in frontier:
frontier.append((next_id,alt_id,pathlen+jump))
@@ -919,18 +919,18 @@ cdef class HieroCachingRuleFactory:
def reachable(self, fwords, ifrom, dist):
ret = []
- if (ifrom >= len(fwords)):
+ if ifrom >= len(fwords):
return ret
for alt_id in range(len(fwords[ifrom])):
- if (fwords[ifrom][alt_id][0] == EPSILON):
+ if fwords[ifrom][alt_id][0] == EPSILON:
ret.extend(self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist))
else:
- if (dist==0):
- if (ifrom not in ret):
+ if dist == 0:
+ if ifrom not in ret:
ret.append(ifrom)
else:
for ifromchild in self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist-1):
- if (ifromchild not in ret):
+ if ifromchild not in ret:
ret.append(ifromchild)
return ret
@@ -938,15 +938,15 @@ cdef class HieroCachingRuleFactory:
def shortest(self, fwords, ifrom, ito):
cdef unsigned alt_id
min = 1000
- if (ifrom > ito):
+ if ifrom > ito:
return min
- if (ifrom == ito):
+ if ifrom == ito:
return 0
for alt_id in range(len(fwords[ifrom])):
currmin = self.shortest(fwords,ifrom+fwords[ifrom][alt_id][2],ito)
- if (fwords[ifrom][alt_id][0] != EPSILON):
+ if fwords[ifrom][alt_id][0] != EPSILON:
currmin += 1
- if (currmin<min):
+ if currmin < min:
min = currmin
return min
@@ -964,7 +964,7 @@ cdef class HieroCachingRuleFactory:
for alt in curr_col:
next_id = curr[0]+alt[2]
jump = 1
- if (alt[0] == EPSILON):
+ if alt[0] == EPSILON:
jump = 0
if next_id not in result and min_dist <= curr[1]+jump <= self.max_initial_size+1:
candidate.append([next_id,curr[1]+jump])
@@ -1172,20 +1172,20 @@ cdef class HieroCachingRuleFactory:
# I put spanlen=1 below
key = tuple([self.min_gap_size, i, 1, pathlen])
frontier_nodes = []
- if (key in nodes_isteps_away_buffer):
+ if key in nodes_isteps_away_buffer:
frontier_nodes = nodes_isteps_away_buffer[key]
else:
frontier_nodes = self.get_all_nodes_isteps_away(self.min_gap_size, i, 1, pathlen, fwords, next_states, reachable_buffer)
nodes_isteps_away_buffer[key] = frontier_nodes
- for (i, alt, pathlen) in frontier_nodes:
+ for i, alt, pathlen in frontier_nodes:
new_frontier.append((k, i, input_match + (i,), alt, pathlen, xnode, phrase +(xcat,), is_shadow_path))
frontier = new_frontier
# Online rule extraction and scoring
if self.online:
f_syms = tuple(word[0][0] for word in fwords)
- for (f, lex_i, lex_j) in self.get_f_phrases(f_syms):
+ for f, lex_i, lex_j in self.get_f_phrases(f_syms):
spanlen = (lex_j - lex_i) + 1
if not sym_isvar(f[0]):
spanlen += 1
@@ -1440,10 +1440,11 @@ cdef class HieroCachingRuleFactory:
step = (num_gaps+1)*2
i = 0
+ cdef IntList indexes
while i < len1:
ephr_arr._clear()
num_chunks = 0
- indexes = []
+ indexes = IntList()
for j from 0 <= j < num_gaps+1:
if e_gaps1[i+2*j] < e_gaps1[i+(2*j)+1]:
num_chunks = num_chunks + 1
@@ -1461,18 +1462,18 @@ cdef class HieroCachingRuleFactory:
free(e_gap_order)
return result
- cdef IntList create_alignments(self, int* sent_links, int num_links, findexes, eindexes):
+ cdef IntList create_alignments(self, int* sent_links, int num_links,
+ IntList findexes, IntList eindexes):
cdef unsigned i
cdef IntList ret = IntList()
- for i in range(len(findexes)):
- s = findexes[i]
- if (s<0):
- continue
+ for i in range(findexes.len):
+ s = findexes.arr[i]
+ if s < 0: continue
idx = 0
- while (idx < num_links*2):
- if (sent_links[idx] == s):
+ while idx < num_links * 2:
+ if sent_links[idx] == s:
j = eindexes.index(sent_links[idx+1])
- ret.append(i*65536+j)
+ ret.append(i * ALIGNMENT_CODE + j)
idx += 2
return ret
@@ -1507,7 +1508,7 @@ cdef class HieroCachingRuleFactory:
for j in range(chunklen[i]):
self.findexes1.append(matching.arr[matching.start+i]+j-f_sent_start);
sofar += 1
- if (i+1<num_chunks):
+ if i+1 < num_chunks:
self.findexes1.append(phrase[sofar])
sofar += 1
@@ -1677,7 +1678,7 @@ cdef class HieroCachingRuleFactory:
else:
pair_count = 0
reason_for_failure = "Didn't extract anything from [%d, %d] -> [%d, %d]" % (f_back_low, f_back_high, e_low, e_high)
- for (phrase2,eindexes) in phrase_list:
+ for phrase2, eindexes in phrase_list:
als1 = self.create_alignments(sent_links,num_links,self.findexes,eindexes)
extracts.append((fphr, phrase2, pair_count, tuple(als1)))
if (num_gaps < self.max_nonterminals and
@@ -1732,7 +1733,7 @@ cdef class HieroCachingRuleFactory:
pair_count = 1.0 / len(phrase_list)
else:
pair_count = 0
- for phrase2,eindexes in phrase_list:
+ for phrase2, eindexes in phrase_list:
als2 = self.create_alignments(sent_links,num_links,self.findexes,eindexes)
extracts.append((fphr, phrase2, pair_count, tuple(als2)))
@@ -1899,7 +1900,7 @@ cdef class HieroCachingRuleFactory:
al = [[] for i in range(f_len)]
fe_span = [[e_len + 1, -1] for i in range(f_len)]
ef_span = [[f_len + 1, -1] for i in range(e_len)]
- for (f, e) in alignment:
+ for f, e in alignment:
al[f].append(e)
fe_span[f][0] = min(fe_span[f][0], e)
fe_span[f][1] = max(fe_span[f][1], e)
@@ -2030,7 +2031,7 @@ cdef class HieroCachingRuleFactory:
# Update possible phrases (samples)
# This could be more efficiently integrated with extraction
# at the cost of readability
- for (f, lex_i, lex_j) in self.get_f_phrases(f_words):
+ for f, lex_i, lex_j in self.get_f_phrases(f_words):
self.samples_f[f] += 1
# Update phrase counts
@@ -2112,7 +2113,7 @@ cdef class HieroCachingRuleFactory:
# Create rule (f_phrase, e_phrase, links, f_link_min, f_link_max)
f = Phrase(f_sym)
e = Phrase(e_sym)
- a = tuple(self.alignment.link(i, j) for (i, j) in links)
+ a = tuple(self.alignment.link(i, j) for i, j in links)
return (f, e, a, lex_f_i, lex_f_j)
# Rule string from rule