summaryrefslogtreecommitdiff
path: root/sa-extract/manager.py
diff options
context:
space:
mode:
authorChris Dyer <prguest11@taipan.cs>2012-02-02 06:29:50 +0000
committerChris Dyer <prguest11@taipan.cs>2012-02-02 06:29:50 +0000
commit7d37102fde8d8b46ed5218245a11496e870fd09f (patch)
tree97bb3de03d634ffb754a6381c961b6d5cf8d3a7d /sa-extract/manager.py
parenta95fe83969d15b074892c8c06fc2a948b75910ff (diff)
lopez suffix array extractor with copyrighted david chiang code excised
Diffstat (limited to 'sa-extract/manager.py')
-rw-r--r--sa-extract/manager.py100
1 files changed, 100 insertions, 0 deletions
diff --git a/sa-extract/manager.py b/sa-extract/manager.py
new file mode 100644
index 00000000..767192c1
--- /dev/null
+++ b/sa-extract/manager.py
@@ -0,0 +1,100 @@
+import csuf
+import cdat
+
+class Sampler(object):
+ '''A Sampler implements a logic for choosing
+ samples from a population range'''
+
+ def __init__(self):
+ pass
+
+ def registerContext(self, context_manager):
+ self.context_manager = context_manager
+
+ def sample(self, phrase_location):
+ return cintlist.CIntList()
+
+
+
+class Extractor(object):
+ '''Extractor is responsible for extracting rules
+ from a given context; once a sentence id/location
+ is found for the source fwords, extractor is
+ responsible for producing any matching rule(s).
+ Optionally, extractor may return an empty list'''
+
+ def __init__(self):
+ pass
+
+ def registerContext(self, context_manager):
+ self.context_manager = context_manager
+
+ def extract(self, fwords, loc):
+ return []
+
+
+
+class RuleFactory(object):
+ '''RuleFactory is a class that manages the
+ generation of translation rules, using the Context
+ and (optionally) any of its contained classes or
+ data. The RuleFactory is responsible for handling
+ any caching (i.e. when presented with an input
+ sentence, it may lookup a rule from its cache
+ rather than extracting a new rule)'''
+
+ def __init__(self):
+ self.num_lookups = 0
+ self.num_extractions = 0
+ self.num_rules = 0
+ self.time = 0.0
+
+
+ def registerContext(self, context_manager):
+ self.context_manager = context_manager
+
+
+ def input(self, fwords):
+ '''Manages the process of enumerating
+ rules for a given input sentence, and
+ looking them with calls to Context,
+ Sampler, and Extractor'''
+ return []
+
+
+class ContextManager(object):
+
+ def __init__(self, ffile, efile, extractor=None, sampler=None, rulefactory=None, from_binary=False):
+ # NOTE: Extractor does not have a default value because
+ # the only nontrivial extractor right now depends on an
+ # alignment file
+
+ self.fsarray = csuf.SuffixArray(ffile, from_binary)
+ self.edarray = cdat.DataArray(efile, from_binary)
+
+ self.factory = rulefactory
+ self.factory.registerContext(self)
+
+ self.sampler = sampler
+ self.sampler.registerContext(self)
+
+ self.models = []
+ self.owner = None
+
+
+ def add_model(self, model):
+ if self.owner is None:
+ self.owner = model
+ model_id = len(self.models)
+ self.models.append(model)
+ return model_id
+
+
+ def input(self, model, fwords, meta):
+ if model != self.owner:
+ return
+ self.fwords = fwords
+ self.factory.input(self.fwords, meta)
+
+
+