diff options
| author | Jacob <andqso@gmail.com> | 2013-07-28 10:29:17 +0100 | 
|---|---|---|
| committer | Jacob <andqso@gmail.com> | 2013-07-28 10:29:17 +0100 | 
| commit | 4f818256a13a61cbedee919a07637b8ed225783e (patch) | |
| tree | 21f35b7d44556969c541fdc98cc6cb73df8160cf | |
| parent | a0c270b926b7fb1f981281c0ad8ae085272364fb (diff) | |
remove nlp_tools
| -rw-r--r-- | .gitmodules | 3 | ||||
| -rw-r--r-- | nlp_tools/dict_utils.py | 101 | ||||
| -rw-r--r-- | nlp_tools/dict_utils.pyc | bin | 3803 -> 0 bytes | |||
| -rw-r--r-- | nlp_tools/feature.pyc | bin | 427 -> 0 bytes | |||
| -rw-r--r-- | nlp_tools/vocabulary.py | 49 | ||||
| -rw-r--r-- | nlp_tools/vocabulary.pyc | bin | 2461 -> 0 bytes | |||
| -rwxr-xr-x | run2.py | 55 | 
7 files changed, 0 insertions, 208 deletions
diff --git a/.gitmodules b/.gitmodules index 404b718..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "nlp_tools"] -	path = nlp_tools -	url = https://github.com/jacobandreas/nlp_tools.git diff --git a/nlp_tools/dict_utils.py b/nlp_tools/dict_utils.py deleted file mode 100644 index 8b9b94b..0000000 --- a/nlp_tools/dict_utils.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -Utilities for doing math on sparse vectors indexed by arbitrary objects. -(These will usually be feature vectors.) -""" - -import math_utils as mu -import math - -def d_elt_op_keep(op, zero, args): -  """ -  Applies op to arguments elementwise, keeping entries that don't occur in -  every argument (i.e. behaves like a sum). -  """ -  ret = {} -  for d in args: -    for key in d: -      if key not in ret: -        ret[key] = d[key] -      else: -        ret[key] = op([ret[key], d[key]]) -  for key in ret.keys(): -    if ret[key] == zero: -      del ret[key] -  return ret - -def d_elt_op_drop(op, args): -  """ -  Applies op to arguments elementwise, discarding entries that don't occur in -  every argument (i.e. behaves like a product). -  """ -  # avoid querying lots of nonexistent keys -  smallest = min(args, key=len) -  sindex = args.index(smallest) -  ret = dict(smallest) -  for i in range(len(args)): -    if i == sindex: -      continue -    d = args[i] -    for key in ret.keys(): -      if key in d: -        ret[key] = op([ret[key], d[key]]) -      else: -        del ret[key] -  return ret - -def d_sum(args): -  """ -  Computes a sum of vectors. -  """ -  return d_elt_op_keep(sum, 0, args) - -def d_logspace_sum(args): -  """ -  Computes a sum of vectors whose elements are represented in logspace. -  """ -  return d_elt_op_keep(mu.logspace_sum, -float('inf'), args) - -def d_elt_prod(args): -  """ -  Computes an elementwise product of vectors. -  """ -  return d_elt_op_drop(lambda l: reduce(lambda a,b: a*b, l), args) - -def d_dot_prod(d1, d2): -  """ -  Takes the dot product of the two arguments. -  """ -  # avoid querying lots of nonexistent keys -  if len(d2) < len(d1): -    d1, d2 = d2, d1 -  dot_prod = 0 -  for key in d1: -    if key in d2: -      dot_prod += d1[key] * d2[key] -  return dot_prod - -def d_logspace_scalar_prod(c, d): -  """ -  Multiplies every element of d by c, where c and d are both represented in -  logspace. -  """ -  ret = {} -  for key in d: -    ret[key] = c + d[key] -  return ret - -def d_op(op, d): -  """ -  Applies op to every element of the dictionary. -  """ -  ret = {} -  for key in d: -    ret[key] = op(d[key]) -  return ret - -# convenience methods -def d_log(d): -  return d_op(math.log, d) - -def d_exp(d): -  return d_op(math.exp, d) diff --git a/nlp_tools/dict_utils.pyc b/nlp_tools/dict_utils.pyc Binary files differdeleted file mode 100644 index ada4c58..0000000 --- a/nlp_tools/dict_utils.pyc +++ /dev/null diff --git a/nlp_tools/feature.pyc b/nlp_tools/feature.pyc Binary files differdeleted file mode 100644 index 9c96271..0000000 --- a/nlp_tools/feature.pyc +++ /dev/null diff --git a/nlp_tools/vocabulary.py b/nlp_tools/vocabulary.py deleted file mode 100644 index ed200f5..0000000 --- a/nlp_tools/vocabulary.py +++ /dev/null @@ -1,49 +0,0 @@ -import cPickle - -class Vocabulary: - -  OOV_VAL = -1 - -  def __init__(self): -    self.str_to_tok = {} -    self.tok_to_str = {} - -  def put(self, string): -    if string in self.str_to_tok: -      raise ValueError("%s is already in this vocabulary (token %d)" % \ -          (string, self.str_to_tok[string])) -    return self.ensure(string) - -  def ensure(self, string): -    if string in self.str_to_tok: -      return -    tok = len(self) -    self.str_to_tok[string] = tok -    self.tok_to_str[tok] = string -    return tok - -  def gett(self, string): -    if string not in self.str_to_tok: -      return self.OOV_VAL -    return self.str_to_tok[string] - -  def gets(self, tok): -    return self.tok_to_str[tok] - -  def strs(self): -    return self.str_to_tok.keys() - -  def toks(self): -    return self.tok_to_str.keys() - -  def __len__(self): -    return len(self.str_to_tok) - -  def save(self, path): -    with open(path, 'w') as f: -      cPickle.dump(self, f) - -  @classmethod -  def load(cls, path): -    with open(path) as f: -      return cPickle.load(f) diff --git a/nlp_tools/vocabulary.pyc b/nlp_tools/vocabulary.pyc Binary files differdeleted file mode 100644 index 952b7fd..0000000 --- a/nlp_tools/vocabulary.pyc +++ /dev/null diff --git a/run2.py b/run2.py deleted file mode 100755 index a92c118..0000000 --- a/run2.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python2 - -import os -import datetime -import logging -from src.evaluator import Evaluator -from src.smt_semparse_config import SMTSemparseConfig -from src.smt_semparse_experiment import SMTSemparseExperiment - -LOGFILE_NAME = 'run.log' - -def run_one(config): -  # create work dir for this run -  # moses can't handle paths with colons -  timestamp = datetime.datetime.now().strftime('%Y-%m-%dT%H.%M.%S') -  run_work_dir = os.path.join(base_work_dir, timestamp) -  assert not os.path.exists(run_work_dir) -  os.makedirs(run_work_dir) -  config.put('work_dir', run_work_dir) -  if os.path.exists('latest'): -    os.remove('latest') -  os.symlink(run_work_dir, 'latest') - -  # set up logging -  if config.run == 'debug': -    logging.basicConfig(level=logging.DEBUG) -  else: -    log_path = os.path.join(run_work_dir, LOGFILE_NAME) -    logging.basicConfig(filename=log_path, level=logging.INFO) - -  experiment = SMTSemparseExperiment(config) -  if config.run == 'debug': -    experiment.run_fold(1) -  elif config.run == 'dev': -    for i in range(10): -      experiment.run_fold(i) -  elif config.run == 'test': -    experiment.run_split() -  else: -    assert False - -  logging.info('evaluating') -  Evaluator(config).run() - -if __name__ == '__main__': - -  # load config -  config = SMTSemparseConfig('settings.yaml', 'dependencies.yaml') - -  # create base work dir if it doesn't exist -  base_work_dir = os.path.join(config.smt_semparse, config.workdir) -  if not os.path.exists(base_work_dir): -    os.makedirs(base_work_dir) - -  run_one(config)  | 
