summaryrefslogtreecommitdiff
path: root/sa-extract/manager.py
blob: 767192c1e3a23be6138d9c896ca204307960a9ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import csuf
import cdat

class Sampler(object):
	'''A Sampler implements a logic for choosing
	samples from a population range'''

	def __init__(self):
		pass

	def registerContext(self, context_manager):
		self.context_manager = context_manager

	def sample(self, phrase_location):
		return cintlist.CIntList()



class Extractor(object):
	'''Extractor is responsible for extracting rules
	from a given context; once a sentence id/location
	is found for the source fwords, extractor is 
	responsible for producing any matching rule(s).
	Optionally, extractor may return an empty list'''

	def __init__(self):
		pass

	def registerContext(self, context_manager):
		self.context_manager = context_manager

	def extract(self, fwords, loc):
		return []
	


class RuleFactory(object):
	'''RuleFactory is a class that manages the
	generation of translation rules, using the Context
	and (optionally) any of its contained classes or
	data.  The RuleFactory is responsible for handling
	any caching (i.e. when presented with an input
	sentence, it may lookup a rule from its cache
	rather than extracting a new rule)'''

	def __init__(self):
		self.num_lookups = 0
		self.num_extractions = 0
		self.num_rules = 0
		self.time = 0.0


	def registerContext(self, context_manager):
		self.context_manager = context_manager


	def input(self, fwords):
		'''Manages the process of enumerating
		rules for a given input sentence, and
		looking them with calls to Context,
		Sampler, and Extractor'''
		return []


class ContextManager(object):

	def __init__(self, ffile, efile, extractor=None, sampler=None, rulefactory=None, from_binary=False):
		# NOTE: Extractor does not have a default value because
		# the only nontrivial extractor right now depends on an
		# alignment file

		self.fsarray = csuf.SuffixArray(ffile, from_binary)
		self.edarray = cdat.DataArray(efile, from_binary)

		self.factory = rulefactory
		self.factory.registerContext(self)

		self.sampler = sampler
		self.sampler.registerContext(self)

		self.models = []
		self.owner = None


	def add_model(self, model):
		if self.owner is None:
			self.owner = model
		model_id = len(self.models)
		self.models.append(model)
		return model_id


	def input(self, model, fwords, meta):
		if model != self.owner:
			return
		self.fwords = fwords
		self.factory.input(self.fwords, meta)