summaryrefslogtreecommitdiff
path: root/klm/lm/neural/wordvecs.hh
diff options
context:
space:
mode:
authorarmatthews <armatthe@cmu.edu>2014-10-13 14:59:23 -0400
committerarmatthews <armatthe@cmu.edu>2014-10-13 14:59:23 -0400
commitb26cda84e05d4523eee069234a975a0153bf8608 (patch)
tree61c9da4f8dd6070f27c8e81812a76fc0a8cf2d8d /klm/lm/neural/wordvecs.hh
parentcd7bc67f475fdfd07fba003ac4cca40e83944740 (diff)
parentb1ed81ef3216b212295afa76c5d20a56fb647204 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'klm/lm/neural/wordvecs.hh')
-rw-r--r--klm/lm/neural/wordvecs.hh38
1 files changed, 38 insertions, 0 deletions
diff --git a/klm/lm/neural/wordvecs.hh b/klm/lm/neural/wordvecs.hh
new file mode 100644
index 00000000..921a2b22
--- /dev/null
+++ b/klm/lm/neural/wordvecs.hh
@@ -0,0 +1,38 @@
+#ifndef LM_NEURAL_WORDVECS_H
+#define LM_NEURAL_WORDVECS_H
+
+#include "util/scoped.hh"
+#include "lm/vocab.hh"
+
+#include <Eigen/Dense>
+
+namespace util { class FilePiece; }
+
+namespace lm {
+namespace neural {
+
+class WordVecs {
+ public:
+ // Columns of the matrix are word vectors. The column index is the word.
+ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> Storage;
+
+ /* The file should begin with a line stating the number of word vectors and
+ * the length of the vectors. Then it's followed by lines containing a
+ * word followed by floating-point values.
+ */
+ explicit WordVecs(util::FilePiece &in);
+
+ const Storage &Vectors() const { return vecs_; }
+
+ WordIndex Index(StringPiece str) const { return vocab_.Index(str); }
+
+ private:
+ util::scoped_malloc vocab_backing_;
+ ngram::ProbingVocabulary vocab_;
+
+ Storage vecs_;
+};
+
+}} // namespaces
+
+#endif // LM_NEURAL_WORDVECS_H