summaryrefslogtreecommitdiff
path: root/nlp_tools
diff options
context:
space:
mode:
Diffstat (limited to 'nlp_tools')
-rw-r--r--nlp_tools/dict_utils.py101
-rw-r--r--nlp_tools/dict_utils.pycbin3803 -> 0 bytes
-rw-r--r--nlp_tools/feature.pycbin427 -> 0 bytes
-rw-r--r--nlp_tools/vocabulary.py49
-rw-r--r--nlp_tools/vocabulary.pycbin2461 -> 0 bytes
5 files changed, 0 insertions, 150 deletions
diff --git a/nlp_tools/dict_utils.py b/nlp_tools/dict_utils.py
deleted file mode 100644
index 8b9b94b..0000000
--- a/nlp_tools/dict_utils.py
+++ /dev/null
@@ -1,101 +0,0 @@
-"""
-Utilities for doing math on sparse vectors indexed by arbitrary objects.
-(These will usually be feature vectors.)
-"""
-
-import math_utils as mu
-import math
-
-def d_elt_op_keep(op, zero, args):
- """
- Applies op to arguments elementwise, keeping entries that don't occur in
- every argument (i.e. behaves like a sum).
- """
- ret = {}
- for d in args:
- for key in d:
- if key not in ret:
- ret[key] = d[key]
- else:
- ret[key] = op([ret[key], d[key]])
- for key in ret.keys():
- if ret[key] == zero:
- del ret[key]
- return ret
-
-def d_elt_op_drop(op, args):
- """
- Applies op to arguments elementwise, discarding entries that don't occur in
- every argument (i.e. behaves like a product).
- """
- # avoid querying lots of nonexistent keys
- smallest = min(args, key=len)
- sindex = args.index(smallest)
- ret = dict(smallest)
- for i in range(len(args)):
- if i == sindex:
- continue
- d = args[i]
- for key in ret.keys():
- if key in d:
- ret[key] = op([ret[key], d[key]])
- else:
- del ret[key]
- return ret
-
-def d_sum(args):
- """
- Computes a sum of vectors.
- """
- return d_elt_op_keep(sum, 0, args)
-
-def d_logspace_sum(args):
- """
- Computes a sum of vectors whose elements are represented in logspace.
- """
- return d_elt_op_keep(mu.logspace_sum, -float('inf'), args)
-
-def d_elt_prod(args):
- """
- Computes an elementwise product of vectors.
- """
- return d_elt_op_drop(lambda l: reduce(lambda a,b: a*b, l), args)
-
-def d_dot_prod(d1, d2):
- """
- Takes the dot product of the two arguments.
- """
- # avoid querying lots of nonexistent keys
- if len(d2) < len(d1):
- d1, d2 = d2, d1
- dot_prod = 0
- for key in d1:
- if key in d2:
- dot_prod += d1[key] * d2[key]
- return dot_prod
-
-def d_logspace_scalar_prod(c, d):
- """
- Multiplies every element of d by c, where c and d are both represented in
- logspace.
- """
- ret = {}
- for key in d:
- ret[key] = c + d[key]
- return ret
-
-def d_op(op, d):
- """
- Applies op to every element of the dictionary.
- """
- ret = {}
- for key in d:
- ret[key] = op(d[key])
- return ret
-
-# convenience methods
-def d_log(d):
- return d_op(math.log, d)
-
-def d_exp(d):
- return d_op(math.exp, d)
diff --git a/nlp_tools/dict_utils.pyc b/nlp_tools/dict_utils.pyc
deleted file mode 100644
index ada4c58..0000000
--- a/nlp_tools/dict_utils.pyc
+++ /dev/null
Binary files differ
diff --git a/nlp_tools/feature.pyc b/nlp_tools/feature.pyc
deleted file mode 100644
index 9c96271..0000000
--- a/nlp_tools/feature.pyc
+++ /dev/null
Binary files differ
diff --git a/nlp_tools/vocabulary.py b/nlp_tools/vocabulary.py
deleted file mode 100644
index ed200f5..0000000
--- a/nlp_tools/vocabulary.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import cPickle
-
-class Vocabulary:
-
- OOV_VAL = -1
-
- def __init__(self):
- self.str_to_tok = {}
- self.tok_to_str = {}
-
- def put(self, string):
- if string in self.str_to_tok:
- raise ValueError("%s is already in this vocabulary (token %d)" % \
- (string, self.str_to_tok[string]))
- return self.ensure(string)
-
- def ensure(self, string):
- if string in self.str_to_tok:
- return
- tok = len(self)
- self.str_to_tok[string] = tok
- self.tok_to_str[tok] = string
- return tok
-
- def gett(self, string):
- if string not in self.str_to_tok:
- return self.OOV_VAL
- return self.str_to_tok[string]
-
- def gets(self, tok):
- return self.tok_to_str[tok]
-
- def strs(self):
- return self.str_to_tok.keys()
-
- def toks(self):
- return self.tok_to_str.keys()
-
- def __len__(self):
- return len(self.str_to_tok)
-
- def save(self, path):
- with open(path, 'w') as f:
- cPickle.dump(self, f)
-
- @classmethod
- def load(cls, path):
- with open(path) as f:
- return cPickle.load(f)
diff --git a/nlp_tools/vocabulary.pyc b/nlp_tools/vocabulary.pyc
deleted file mode 100644
index 952b7fd..0000000
--- a/nlp_tools/vocabulary.pyc
+++ /dev/null
Binary files differ