From 129a22cfcc7651daa4b11ed52e7870249f6373a5 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 16 Sep 2014 10:23:14 +0100
Subject: spring cleaning
---
.gitignore | 13 +-
.gitmodules | 2 +-
Makefile | 58 ++++++-
README.md | 39 +++++
external/json-cpp | 1 +
fast/Makefile | 15 --
fast/README.md | 38 -----
fast/grammar.hh | 316 ----------------------------------
fast/hypergraph.cc | 363 ----------------------------------------
fast/hypergraph.hh | 103 ------------
fast/main.cc | 27 ---
fast/parse.hh | 108 ------------
fast/semiring.hh | 35 ----
fast/sparse_vector.hh | 186 --------------------
fast/test/Makefile | 19 ---
fast/test/test_grammar | Bin 60943 -> 0 bytes
fast/test/test_grammar.cc | 20 ---
fast/test/test_sparse_vector | Bin 44288 -> 0 bytes
fast/test/test_sparse_vector.cc | 37 ----
fast/util.hh | 47 ------
fast/weaver.hh | 10 --
grammar.rb | 139 ---------------
hg.rb | 222 ------------------------
main.rb | 72 --------
parse.rb | 210 -----------------------
prototype/grammar.rb | 137 +++++++++++++++
prototype/hypergraph.rb | 219 ++++++++++++++++++++++++
prototype/parse.rb | 207 +++++++++++++++++++++++
prototype/test_hg.rb | 32 ++++
prototype/test_parse.rb | 49 ++++++
prototype/weaver.rb | 70 ++++++++
src/fast_weaver.cc | 26 +++
src/grammar.hh | 334 ++++++++++++++++++++++++++++++++++++
src/hypergraph.cc | 362 +++++++++++++++++++++++++++++++++++++++
src/hypergraph.hh | 102 +++++++++++
src/make_pak.cc | 103 ++++++++++++
src/parse.hh | 301 +++++++++++++++++++++++++++++++++
src/read_pak.cc | 26 +++
src/semiring.hh | 35 ++++
src/sparse_vector.hh | 186 ++++++++++++++++++++
src/test_grammar.cc | 19 +++
src/test_parse.cc | 19 +++
src/test_sparse_vector.cc | 36 ++++
src/types.hh | 10 ++
src/util.hh | 47 ++++++
test/test_hg.rb | 32 ----
test/test_parse.rb | 49 ------
util/Makefile | 14 --
util/cdec2json.py | 96 -----------
util/json-cpp | 1 -
util/make_pak.cc | 104 ------------
util/read_pak.cc | 27 ---
52 files changed, 2419 insertions(+), 2304 deletions(-)
create mode 160000 external/json-cpp
delete mode 100644 fast/Makefile
delete mode 100644 fast/README.md
delete mode 100644 fast/grammar.hh
delete mode 100644 fast/hypergraph.cc
delete mode 100644 fast/hypergraph.hh
delete mode 100644 fast/main.cc
delete mode 100644 fast/parse.hh
delete mode 100644 fast/semiring.hh
delete mode 100644 fast/sparse_vector.hh
delete mode 100644 fast/test/Makefile
delete mode 100755 fast/test/test_grammar
delete mode 100644 fast/test/test_grammar.cc
delete mode 100755 fast/test/test_sparse_vector
delete mode 100644 fast/test/test_sparse_vector.cc
delete mode 100644 fast/util.hh
delete mode 100644 fast/weaver.hh
delete mode 100644 grammar.rb
delete mode 100644 hg.rb
delete mode 100755 main.rb
delete mode 100644 parse.rb
create mode 100644 prototype/grammar.rb
create mode 100644 prototype/hypergraph.rb
create mode 100644 prototype/parse.rb
create mode 100755 prototype/test_hg.rb
create mode 100755 prototype/test_parse.rb
create mode 100755 prototype/weaver.rb
create mode 100644 src/fast_weaver.cc
create mode 100644 src/grammar.hh
create mode 100644 src/hypergraph.cc
create mode 100644 src/hypergraph.hh
create mode 100644 src/make_pak.cc
create mode 100644 src/parse.hh
create mode 100644 src/read_pak.cc
create mode 100644 src/semiring.hh
create mode 100644 src/sparse_vector.hh
create mode 100644 src/test_grammar.cc
create mode 100644 src/test_parse.cc
create mode 100644 src/test_sparse_vector.cc
create mode 100644 src/types.hh
create mode 100644 src/util.hh
delete mode 100755 test/test_hg.rb
delete mode 100755 test/test_parse.rb
delete mode 100644 util/Makefile
delete mode 100755 util/cdec2json.py
delete mode 160000 util/json-cpp
delete mode 100644 util/make_pak.cc
delete mode 100644 util/read_pak.cc
diff --git a/.gitignore b/.gitignore
index d8a671e..8bb6628 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,7 @@
*.o
-fast/example/
-fast/fast_weaver
-fast/test_grammar
-fast/test_sparse_vector
-util/make_pak
-util/read_pak
-fast/gperftools-2.1/
+fast_weaver
+test_grammar
+test_sparse_vector
+make_pak
+read_pak
+external/gperftools-2.1/
diff --git a/.gitmodules b/.gitmodules
index 843caa2..7fe83f1 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
[submodule "util/json-cpp"]
- path = util/json-cpp
+ path = external/json-cpp
url = https://github.com/ascheglov/json-cpp.git
diff --git a/Makefile b/Makefile
index f499591..56f89d4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,53 @@
-json_examples:
- ./main.rb -w example/3/weights.init < example/3/in.sgm > example/3/3.json 2>/dev/null
- ./main.rb -w example/3/weights.init -l < example/3/in.sgm > example/3/3-with-glue.json 2>/dev/null
- ./main.rb -w example/glue/weights -l < example/glue/in.sgm > example/glue/glue.json 2>/dev/null
- ./main.rb -w example/toy/weights < example/toy/in.sgm > example/toy/toy.json 2>/dev/null
- ./main.rb -w example/toy/weights < example/toy/in-test.sgm > example/toy/toy-test.json 2>/dev/null
+COMPILER=clang
+CFLAGS=-std=c++11 -O3 -Wall
+TCMALLOC=$(shell pwd)/external/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread
+SRC=src
+
+all: $(SRC)/hypergraph.o $(SRC)/fast_weaver.cc
+ $(COMPILER) $(CFLAGS) -lstdc++ -lm -lmsgpack $(TCMALLOC) $(SRC)/hypergraph.o \
+ $(SRC)/fast_weaver.cc \
+ -o fast_weaver
+
+$(SRC)/hypergraph.o: $(SRC)/hypergraph.cc $(SRC)/hypergraph.hh \
+ $(SRC)/semiring.hh $(SRC)/sparse_vector.hh \
+ $(SRC)/types.hh
+ $(COMPILER) $(CFLAGS) -g -c $(TCMALLOC) \
+ $(SRC)/hypergraph.cc \
+ -o $(SRC)/hypergraph.o
+
+util: make_pak read_pak
+
+make_pak: $(SRC)/make_pak.cc external/json-cpp/single_include/json-cpp.hpp \
+ $(SRC)/hypergraph.hh $(SRC)/types.hh
+ $(COMPILER) $(CFLAGS) -lstdc++ -lm -lmsgpack -I./external \
+ $(SRC)/make_pak.cc \
+ -o make_pak
+
+read_pak: $(SRC)/read_pak.cc
+ $(COMPILER) $(CFLAGS) -lstdc++ -lmsgpack \
+ $(SRC)/read_pak.cc \
+ -o read_pak
+
+test: test_grammar test_parse test_sparse_vector
+
+test_grammar: $(SRC)/test_grammar.cc $(SRC)/grammar.hh
+ $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) \
+ $(SRC)/test_grammar.cc \
+ -o test_grammar
+
+test_parse: $(SRC)/test_parse.cc $(SRC)/parse.hh \
+ $(SRC)/grammar.hh $(SRC)/util.hh
+ $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) \
+ $(SRC)/test_parse.cc \
+ -o test_parse
+
+test_sparse_vector: $(SRC)/test_sparse_vector.cc $(SRC)/sparse_vector.hh
+ $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) \
+ $(SRC)/test_sparse_vector.cc \
+ -o test_sparse_vector
+
+clean:
+ rm -f fast_weaver hypergraph.o
+ rm -f make_pak read_pak
+ rm -f test_grammar test_sparse_vector test_parse
diff --git a/README.md b/README.md
index 4cdcf31..e462d41 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,42 @@
+TODO
+ * sparse vector (unordered_map) -> where to store?
+ * parser
+ * Rule -> ChartItem -> Node ?
+ * k-best
+ * other semirings
+ * include language model
+ * compress/hash words/feature strings?
+ * cast? Rule -> Edge, ChartItem -> Node
+ * feature factory, observer
+
+Dependencies:
+ * MessagePack for object serialization [1]
+ * kenlm language model [2]
+
+This is Linux only.
+
+
+[1] http://msgpack.org
+[2] http://kheafield.com/code/kenlm/
+
+
+stuff to have a look at:
+http://math.nist.gov/spblas/
+http://lapackpp.sourceforge.net/
+http://www.cvmlib.com/
+http://sourceforge.net/projects/lpp/
+http://math-atlas.sourceforge.net/
+http://www.netlib.org/lapack/
+http://bytes.com/topic/c/answers/702569-blas-vs-cblas-c
+http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
+http://www.osl.iu.edu/research/mtl/download.php3
+http://scicomp.stackexchange.com/questions/351/recommendations-for-a-usable-fast-c-matrix-library
+https://software.intel.com/en-us/tbb_4.2_doc
+http://goog-perftools.sourceforge.net/doc/tcmalloc.html
+http://www.sgi.com/tech/stl/Rope.html
+http://www.cs.unc.edu/Research/compgeom/gzstream/
+https://github.com/facebook/folly/blob/6e46d468cf2876dd59c7a4dddcb4e37abf070b7a/folly/docs/Overview.md
+---
not much to see here, yet
(SCFG machine translation decoder in ruby, currently implements CKY+ parsing and hypergraph viterbi)
diff --git a/external/json-cpp b/external/json-cpp
new file mode 160000
index 0000000..4eb4b47
--- /dev/null
+++ b/external/json-cpp
@@ -0,0 +1 @@
+Subproject commit 4eb4b47cf4d622bc7bf34071d6b68fc5beb37051
diff --git a/fast/Makefile b/fast/Makefile
deleted file mode 100644
index 1a7f5b9..0000000
--- a/fast/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-COMPILER=g++
-CFLAGS=-std=c++11 -O3
-TCMALLOC=/home/pks/src/weaver/fast/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread
-
-
-all: hypergraph.o main.cc
- $(COMPILER) $(CFLAGS) -lstdc++ -lm -lmsgpack $(TCMALLOC) hypergraph.o main.cc -o fast_weaver
-
-hypergraph.o: hypergraph.cc hypergraph.hh semiring.hh sparse_vector.hh weaver.hh
- $(COMPILER) $(CFLAGS) -g -c $(TCMALLOC) hypergraph.cc
-
-clean:
- rm -f fast_weaver
- rm -f hypergraph.o parse.o
-
diff --git a/fast/README.md b/fast/README.md
deleted file mode 100644
index f92245b..0000000
--- a/fast/README.md
+++ /dev/null
@@ -1,38 +0,0 @@
-TODO
- * sparse vector (unordered_map) -> where to store?
- * parser
- * Rule -> ChartItem -> Node ?
- * k-best
- * other semirings
- * include language model
- * compress/hash words/feature strings?
- * cast? Rule -> Edge, ChartItem -> Node
- * feature factory, observer
-
-Dependencies:
- * MessagePack for object serialization [1]
- * kenlm language model [2]
-
-This is Linux only.
-
-
-[1] http://msgpack.org
-[2] http://kheafield.com/code/kenlm/
-
-
-stuff to have a look at:
-http://math.nist.gov/spblas/
-http://lapackpp.sourceforge.net/
-http://www.cvmlib.com/
-http://sourceforge.net/projects/lpp/
-http://math-atlas.sourceforge.net/
-http://www.netlib.org/lapack/
-http://bytes.com/topic/c/answers/702569-blas-vs-cblas-c
-http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
-http://www.osl.iu.edu/research/mtl/download.php3
-http://scicomp.stackexchange.com/questions/351/recommendations-for-a-usable-fast-c-matrix-library
-https://software.intel.com/en-us/tbb_4.2_doc
-http://goog-perftools.sourceforge.net/doc/tcmalloc.html
-http://www.sgi.com/tech/stl/Rope.html
-http://www.cs.unc.edu/Research/compgeom/gzstream/
-
diff --git a/fast/grammar.hh b/fast/grammar.hh
deleted file mode 100644
index 4906c46..0000000
--- a/fast/grammar.hh
+++ /dev/null
@@ -1,316 +0,0 @@
-#pragma once
-
-#include
-#include
-#include