diff options
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | nlp_tools/dict_utils.py | 101 | ||||
-rw-r--r-- | nlp_tools/dict_utils.pyc | bin | 3803 -> 0 bytes | |||
-rw-r--r-- | nlp_tools/feature.pyc | bin | 427 -> 0 bytes | |||
-rw-r--r-- | nlp_tools/vocabulary.py | 49 | ||||
-rw-r--r-- | nlp_tools/vocabulary.pyc | bin | 2461 -> 0 bytes | |||
-rwxr-xr-x | run2.py | 55 |
7 files changed, 0 insertions, 208 deletions
diff --git a/.gitmodules b/.gitmodules index 404b718..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "nlp_tools"] - path = nlp_tools - url = https://github.com/jacobandreas/nlp_tools.git diff --git a/nlp_tools/dict_utils.py b/nlp_tools/dict_utils.py deleted file mode 100644 index 8b9b94b..0000000 --- a/nlp_tools/dict_utils.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -Utilities for doing math on sparse vectors indexed by arbitrary objects. -(These will usually be feature vectors.) -""" - -import math_utils as mu -import math - -def d_elt_op_keep(op, zero, args): - """ - Applies op to arguments elementwise, keeping entries that don't occur in - every argument (i.e. behaves like a sum). - """ - ret = {} - for d in args: - for key in d: - if key not in ret: - ret[key] = d[key] - else: - ret[key] = op([ret[key], d[key]]) - for key in ret.keys(): - if ret[key] == zero: - del ret[key] - return ret - -def d_elt_op_drop(op, args): - """ - Applies op to arguments elementwise, discarding entries that don't occur in - every argument (i.e. behaves like a product). - """ - # avoid querying lots of nonexistent keys - smallest = min(args, key=len) - sindex = args.index(smallest) - ret = dict(smallest) - for i in range(len(args)): - if i == sindex: - continue - d = args[i] - for key in ret.keys(): - if key in d: - ret[key] = op([ret[key], d[key]]) - else: - del ret[key] - return ret - -def d_sum(args): - """ - Computes a sum of vectors. - """ - return d_elt_op_keep(sum, 0, args) - -def d_logspace_sum(args): - """ - Computes a sum of vectors whose elements are represented in logspace. - """ - return d_elt_op_keep(mu.logspace_sum, -float('inf'), args) - -def d_elt_prod(args): - """ - Computes an elementwise product of vectors. - """ - return d_elt_op_drop(lambda l: reduce(lambda a,b: a*b, l), args) - -def d_dot_prod(d1, d2): - """ - Takes the dot product of the two arguments. - """ - # avoid querying lots of nonexistent keys - if len(d2) < len(d1): - d1, d2 = d2, d1 - dot_prod = 0 - for key in d1: - if key in d2: - dot_prod += d1[key] * d2[key] - return dot_prod - -def d_logspace_scalar_prod(c, d): - """ - Multiplies every element of d by c, where c and d are both represented in - logspace. - """ - ret = {} - for key in d: - ret[key] = c + d[key] - return ret - -def d_op(op, d): - """ - Applies op to every element of the dictionary. - """ - ret = {} - for key in d: - ret[key] = op(d[key]) - return ret - -# convenience methods -def d_log(d): - return d_op(math.log, d) - -def d_exp(d): - return d_op(math.exp, d) diff --git a/nlp_tools/dict_utils.pyc b/nlp_tools/dict_utils.pyc Binary files differdeleted file mode 100644 index ada4c58..0000000 --- a/nlp_tools/dict_utils.pyc +++ /dev/null diff --git a/nlp_tools/feature.pyc b/nlp_tools/feature.pyc Binary files differdeleted file mode 100644 index 9c96271..0000000 --- a/nlp_tools/feature.pyc +++ /dev/null diff --git a/nlp_tools/vocabulary.py b/nlp_tools/vocabulary.py deleted file mode 100644 index ed200f5..0000000 --- a/nlp_tools/vocabulary.py +++ /dev/null @@ -1,49 +0,0 @@ -import cPickle - -class Vocabulary: - - OOV_VAL = -1 - - def __init__(self): - self.str_to_tok = {} - self.tok_to_str = {} - - def put(self, string): - if string in self.str_to_tok: - raise ValueError("%s is already in this vocabulary (token %d)" % \ - (string, self.str_to_tok[string])) - return self.ensure(string) - - def ensure(self, string): - if string in self.str_to_tok: - return - tok = len(self) - self.str_to_tok[string] = tok - self.tok_to_str[tok] = string - return tok - - def gett(self, string): - if string not in self.str_to_tok: - return self.OOV_VAL - return self.str_to_tok[string] - - def gets(self, tok): - return self.tok_to_str[tok] - - def strs(self): - return self.str_to_tok.keys() - - def toks(self): - return self.tok_to_str.keys() - - def __len__(self): - return len(self.str_to_tok) - - def save(self, path): - with open(path, 'w') as f: - cPickle.dump(self, f) - - @classmethod - def load(cls, path): - with open(path) as f: - return cPickle.load(f) diff --git a/nlp_tools/vocabulary.pyc b/nlp_tools/vocabulary.pyc Binary files differdeleted file mode 100644 index 952b7fd..0000000 --- a/nlp_tools/vocabulary.pyc +++ /dev/null diff --git a/run2.py b/run2.py deleted file mode 100755 index a92c118..0000000 --- a/run2.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python2 - -import os -import datetime -import logging -from src.evaluator import Evaluator -from src.smt_semparse_config import SMTSemparseConfig -from src.smt_semparse_experiment import SMTSemparseExperiment - -LOGFILE_NAME = 'run.log' - -def run_one(config): - # create work dir for this run - # moses can't handle paths with colons - timestamp = datetime.datetime.now().strftime('%Y-%m-%dT%H.%M.%S') - run_work_dir = os.path.join(base_work_dir, timestamp) - assert not os.path.exists(run_work_dir) - os.makedirs(run_work_dir) - config.put('work_dir', run_work_dir) - if os.path.exists('latest'): - os.remove('latest') - os.symlink(run_work_dir, 'latest') - - # set up logging - if config.run == 'debug': - logging.basicConfig(level=logging.DEBUG) - else: - log_path = os.path.join(run_work_dir, LOGFILE_NAME) - logging.basicConfig(filename=log_path, level=logging.INFO) - - experiment = SMTSemparseExperiment(config) - if config.run == 'debug': - experiment.run_fold(1) - elif config.run == 'dev': - for i in range(10): - experiment.run_fold(i) - elif config.run == 'test': - experiment.run_split() - else: - assert False - - logging.info('evaluating') - Evaluator(config).run() - -if __name__ == '__main__': - - # load config - config = SMTSemparseConfig('settings.yaml', 'dependencies.yaml') - - # create base work dir if it doesn't exist - base_work_dir = os.path.join(config.smt_semparse, config.workdir) - if not os.path.exists(base_work_dir): - os.makedirs(base_work_dir) - - run_one(config) |