summaryrefslogtreecommitdiff
path: root/sa-extract
diff options
context:
space:
mode:
authorVictor Chahuneau <vchahune@cs.cmu.edu>2012-06-21 17:27:48 -0400
committerVictor Chahuneau <vchahune@cs.cmu.edu>2012-06-21 17:27:48 -0400
commit37bb4a4c537ea2f9bd916f3e031c759e61bf49f7 (patch)
treeb53f8696f230b9bd54032c0d2f2fedaecdba9899 /sa-extract
parent93f768b5c9a0ab54c462901e4edddacc65cb8ecf (diff)
Allow SA rule extraction to write to a python buffer
+ very small sa-extract cleanup
Diffstat (limited to 'sa-extract')
-rw-r--r--sa-extract/cn.py46
-rw-r--r--sa-extract/manager.py2
-rw-r--r--sa-extract/model.py5
-rw-r--r--sa-extract/rulefactory.pyx15
4 files changed, 32 insertions, 36 deletions
diff --git a/sa-extract/cn.py b/sa-extract/cn.py
index e534783f..6e45bcf9 100644
--- a/sa-extract/cn.py
+++ b/sa-extract/cn.py
@@ -4,11 +4,8 @@
# vim:tabstop=4:autoindent:expandtab
-import sys
-import math
import sym
import log
-import sgml
epsilon = sym.fromstring('*EPS*');
@@ -142,23 +139,26 @@ class ConfusionNet(object):
-
-#file = open(sys.argv[1], "rb")
-#sent = sgml.process_sgml_line(file.read())
-#print sent
-#cn = ConfusionNet(sent)
-#print cn
-#results = cn.listdown()
-#for result in results:
-# print sym.tostring(result)
-#print cn.next(0);
-#print cn.next(1);
-#print cn.next(2);
-#print cn.next(3);
-#print cn
-#cn = ConfusionNet()
-#k = 0
-#while (cn.read(file)):
-# print cn
-
-#print cn.stats
+"""
+import sys
+import sgml
+file = open(sys.argv[1], "rb")
+sent = sgml.process_sgml_line(file.read())
+print sent
+cn = ConfusionNet(sent)
+print cn
+results = cn.listdown()
+for result in results:
+ print sym.tostring(result)
+print cn.next(0);
+print cn.next(1);
+print cn.next(2);
+print cn.next(3);
+print cn
+cn = ConfusionNet()
+k = 0
+while (cn.read(file)):
+ print cn
+
+print cn.stats
+"""
diff --git a/sa-extract/manager.py b/sa-extract/manager.py
index 767192c1..3a079c2a 100644
--- a/sa-extract/manager.py
+++ b/sa-extract/manager.py
@@ -1,5 +1,6 @@
import csuf
import cdat
+import cintlist
class Sampler(object):
'''A Sampler implements a logic for choosing
@@ -15,7 +16,6 @@ class Sampler(object):
return cintlist.CIntList()
-
class Extractor(object):
'''Extractor is responsible for extracting rules
from a given context; once a sentence id/location
diff --git a/sa-extract/model.py b/sa-extract/model.py
index 66c51051..bcdf129a 100644
--- a/sa-extract/model.py
+++ b/sa-extract/model.py
@@ -1,4 +1,3 @@
-
class Model(object):
def __init__(self, name=None):
object.__init__(self)
@@ -6,7 +5,3 @@ class Model(object):
self.name = self.__class__.__name__
else:
self.name = name
-
- def input(self, fwords, meta):
- pass
-
diff --git a/sa-extract/rulefactory.pyx b/sa-extract/rulefactory.pyx
index 20ea80d2..792489c4 100644
--- a/sa-extract/rulefactory.pyx
+++ b/sa-extract/rulefactory.pyx
@@ -1321,7 +1321,7 @@ cdef class HieroCachingRuleFactory:
candidate.append([next_id,curr[1]+jump])
return sorted(result);
- def input(self, fwords, meta):
+ def input(self, fwords, meta=None, output=None):
'''When this function is called on the RuleFactory,
it looks up all of the rules that can be used to translate
the input sentence'''
@@ -1342,13 +1342,14 @@ cdef class HieroCachingRuleFactory:
nodes_isteps_away_buffer = {}
hit = 0
reachable_buffer = {}
- #print "id = ",meta
- #print "rule_file = ",self.rule_file
- dattrs = sgml.attrs_to_dict(meta)
- id = dattrs.get('id', 'NOID')
- if self.per_sentence_grammar:
+ if meta:
+ dattrs = sgml.attrs_to_dict(meta)
+ id = dattrs.get('id', 'NOID')
+ self.excluded_sent_id = int(dattrs.get('exclude', '-1'))
+ if output:
+ self.rule_filehandler = output
+ elif self.per_sentence_grammar:
self.rule_filehandler = open(self.rule_file+'.'+id, 'w')
- self.excluded_sent_id = int(dattrs.get('exclude', '-1'))
#print "max_initial_size = %i" % self.max_initial_size