summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob <andqso@gmail.com>2013-07-28 10:29:17 +0100
committerJacob <andqso@gmail.com>2013-07-28 10:29:17 +0100
commit4f818256a13a61cbedee919a07637b8ed225783e (patch)
tree21f35b7d44556969c541fdc98cc6cb73df8160cf
parenta0c270b926b7fb1f981281c0ad8ae085272364fb (diff)
remove nlp_tools
-rw-r--r--.gitmodules3
-rw-r--r--nlp_tools/dict_utils.py101
-rw-r--r--nlp_tools/dict_utils.pycbin3803 -> 0 bytes
-rw-r--r--nlp_tools/feature.pycbin427 -> 0 bytes
-rw-r--r--nlp_tools/vocabulary.py49
-rw-r--r--nlp_tools/vocabulary.pycbin2461 -> 0 bytes
-rwxr-xr-xrun2.py55
7 files changed, 0 insertions, 208 deletions
diff --git a/.gitmodules b/.gitmodules
index 404b718..e69de29 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "nlp_tools"]
- path = nlp_tools
- url = https://github.com/jacobandreas/nlp_tools.git
diff --git a/nlp_tools/dict_utils.py b/nlp_tools/dict_utils.py
deleted file mode 100644
index 8b9b94b..0000000
--- a/nlp_tools/dict_utils.py
+++ /dev/null
@@ -1,101 +0,0 @@
-"""
-Utilities for doing math on sparse vectors indexed by arbitrary objects.
-(These will usually be feature vectors.)
-"""
-
-import math_utils as mu
-import math
-
-def d_elt_op_keep(op, zero, args):
- """
- Applies op to arguments elementwise, keeping entries that don't occur in
- every argument (i.e. behaves like a sum).
- """
- ret = {}
- for d in args:
- for key in d:
- if key not in ret:
- ret[key] = d[key]
- else:
- ret[key] = op([ret[key], d[key]])
- for key in ret.keys():
- if ret[key] == zero:
- del ret[key]
- return ret
-
-def d_elt_op_drop(op, args):
- """
- Applies op to arguments elementwise, discarding entries that don't occur in
- every argument (i.e. behaves like a product).
- """
- # avoid querying lots of nonexistent keys
- smallest = min(args, key=len)
- sindex = args.index(smallest)
- ret = dict(smallest)
- for i in range(len(args)):
- if i == sindex:
- continue
- d = args[i]
- for key in ret.keys():
- if key in d:
- ret[key] = op([ret[key], d[key]])
- else:
- del ret[key]
- return ret
-
-def d_sum(args):
- """
- Computes a sum of vectors.
- """
- return d_elt_op_keep(sum, 0, args)
-
-def d_logspace_sum(args):
- """
- Computes a sum of vectors whose elements are represented in logspace.
- """
- return d_elt_op_keep(mu.logspace_sum, -float('inf'), args)
-
-def d_elt_prod(args):
- """
- Computes an elementwise product of vectors.
- """
- return d_elt_op_drop(lambda l: reduce(lambda a,b: a*b, l), args)
-
-def d_dot_prod(d1, d2):
- """
- Takes the dot product of the two arguments.
- """
- # avoid querying lots of nonexistent keys
- if len(d2) < len(d1):
- d1, d2 = d2, d1
- dot_prod = 0
- for key in d1:
- if key in d2:
- dot_prod += d1[key] * d2[key]
- return dot_prod
-
-def d_logspace_scalar_prod(c, d):
- """
- Multiplies every element of d by c, where c and d are both represented in
- logspace.
- """
- ret = {}
- for key in d:
- ret[key] = c + d[key]
- return ret
-
-def d_op(op, d):
- """
- Applies op to every element of the dictionary.
- """
- ret = {}
- for key in d:
- ret[key] = op(d[key])
- return ret
-
-# convenience methods
-def d_log(d):
- return d_op(math.log, d)
-
-def d_exp(d):
- return d_op(math.exp, d)
diff --git a/nlp_tools/dict_utils.pyc b/nlp_tools/dict_utils.pyc
deleted file mode 100644
index ada4c58..0000000
--- a/nlp_tools/dict_utils.pyc
+++ /dev/null
Binary files differ
diff --git a/nlp_tools/feature.pyc b/nlp_tools/feature.pyc
deleted file mode 100644
index 9c96271..0000000
--- a/nlp_tools/feature.pyc
+++ /dev/null
Binary files differ
diff --git a/nlp_tools/vocabulary.py b/nlp_tools/vocabulary.py
deleted file mode 100644
index ed200f5..0000000
--- a/nlp_tools/vocabulary.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import cPickle
-
-class Vocabulary:
-
- OOV_VAL = -1
-
- def __init__(self):
- self.str_to_tok = {}
- self.tok_to_str = {}
-
- def put(self, string):
- if string in self.str_to_tok:
- raise ValueError("%s is already in this vocabulary (token %d)" % \
- (string, self.str_to_tok[string]))
- return self.ensure(string)
-
- def ensure(self, string):
- if string in self.str_to_tok:
- return
- tok = len(self)
- self.str_to_tok[string] = tok
- self.tok_to_str[tok] = string
- return tok
-
- def gett(self, string):
- if string not in self.str_to_tok:
- return self.OOV_VAL
- return self.str_to_tok[string]
-
- def gets(self, tok):
- return self.tok_to_str[tok]
-
- def strs(self):
- return self.str_to_tok.keys()
-
- def toks(self):
- return self.tok_to_str.keys()
-
- def __len__(self):
- return len(self.str_to_tok)
-
- def save(self, path):
- with open(path, 'w') as f:
- cPickle.dump(self, f)
-
- @classmethod
- def load(cls, path):
- with open(path) as f:
- return cPickle.load(f)
diff --git a/nlp_tools/vocabulary.pyc b/nlp_tools/vocabulary.pyc
deleted file mode 100644
index 952b7fd..0000000
--- a/nlp_tools/vocabulary.pyc
+++ /dev/null
Binary files differ
diff --git a/run2.py b/run2.py
deleted file mode 100755
index a92c118..0000000
--- a/run2.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env python2
-
-import os
-import datetime
-import logging
-from src.evaluator import Evaluator
-from src.smt_semparse_config import SMTSemparseConfig
-from src.smt_semparse_experiment import SMTSemparseExperiment
-
-LOGFILE_NAME = 'run.log'
-
-def run_one(config):
- # create work dir for this run
- # moses can't handle paths with colons
- timestamp = datetime.datetime.now().strftime('%Y-%m-%dT%H.%M.%S')
- run_work_dir = os.path.join(base_work_dir, timestamp)
- assert not os.path.exists(run_work_dir)
- os.makedirs(run_work_dir)
- config.put('work_dir', run_work_dir)
- if os.path.exists('latest'):
- os.remove('latest')
- os.symlink(run_work_dir, 'latest')
-
- # set up logging
- if config.run == 'debug':
- logging.basicConfig(level=logging.DEBUG)
- else:
- log_path = os.path.join(run_work_dir, LOGFILE_NAME)
- logging.basicConfig(filename=log_path, level=logging.INFO)
-
- experiment = SMTSemparseExperiment(config)
- if config.run == 'debug':
- experiment.run_fold(1)
- elif config.run == 'dev':
- for i in range(10):
- experiment.run_fold(i)
- elif config.run == 'test':
- experiment.run_split()
- else:
- assert False
-
- logging.info('evaluating')
- Evaluator(config).run()
-
-if __name__ == '__main__':
-
- # load config
- config = SMTSemparseConfig('settings.yaml', 'dependencies.yaml')
-
- # create base work dir if it doesn't exist
- base_work_dir = os.path.join(config.smt_semparse, config.workdir)
- if not os.path.exists(base_work_dir):
- os.makedirs(base_work_dir)
-
- run_one(config)