From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- configure.ac | 5 ----- 1 file changed, 5 deletions(-) (limited to 'configure.ac') diff --git a/configure.ac b/configure.ac index ea9e84fb..07ef9fe1 100644 --- a/configure.ac +++ b/configure.ac @@ -114,7 +114,6 @@ AC_CONFIG_FILES([Makefile]) AC_CONFIG_FILES([utils/Makefile]) AC_CONFIG_FILES([mteval/Makefile]) AC_CONFIG_FILES([decoder/Makefile]) -AC_CONFIG_FILES([phrasinator/Makefile]) AC_CONFIG_FILES([training/Makefile]) AC_CONFIG_FILES([training/liblbfgs/Makefile]) AC_CONFIG_FILES([dpmert/Makefile]) @@ -125,10 +124,6 @@ AC_CONFIG_FILES([klm/util/Makefile]) AC_CONFIG_FILES([klm/lm/Makefile]) AC_CONFIG_FILES([mira/Makefile]) AC_CONFIG_FILES([dtrain/Makefile]) -AC_CONFIG_FILES([gi/pyp-topics/src/Makefile]) -AC_CONFIG_FILES([gi/clda/src/Makefile]) -AC_CONFIG_FILES([gi/pf/Makefile]) -AC_CONFIG_FILES([gi/markov_al/Makefile]) AC_CONFIG_FILES([rst_parser/Makefile]) AC_CONFIG_FILES([python/setup.py]) -- cgit v1.2.3 From d9918e2a47edf5887a179a566243a37e7b9a1c03 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:27:21 -0400 Subject: fix build --- configure.ac | 1 - utils/Makefile.am | 6 ------ 2 files changed, 7 deletions(-) (limited to 'configure.ac') diff --git a/configure.ac b/configure.ac index 07ef9fe1..70e8e932 100644 --- a/configure.ac +++ b/configure.ac @@ -124,7 +124,6 @@ AC_CONFIG_FILES([klm/util/Makefile]) AC_CONFIG_FILES([klm/lm/Makefile]) AC_CONFIG_FILES([mira/Makefile]) AC_CONFIG_FILES([dtrain/Makefile]) -AC_CONFIG_FILES([rst_parser/Makefile]) AC_CONFIG_FILES([python/setup.py]) diff --git a/utils/Makefile.am b/utils/Makefile.am index 55d97354..3ad9d69e 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -45,18 +45,12 @@ m_test_SOURCES = m_test.cc m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz dict_test_SOURCES = dict_test.cc dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -mfcr_test_SOURCES = mfcr_test.cc -mfcr_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz weights_test_SOURCES = weights_test.cc weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -crp_test_SOURCES = crp_test.cc -crp_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz logval_test_SOURCES = logval_test.cc logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz small_vector_test_SOURCES = small_vector_test.cc small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz -unigram_pyp_lm_SOURCES = unigram_pyp_lm.cc -unigram_pyp_lm_LDADD = libutils.a -lz ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -- cgit v1.2.3 From 64f5a686dbfb128b7dfce1d6d842ba976da8c22f Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 11 Oct 2012 21:57:18 -0400 Subject: add support for dlopen'd feature functions --- configure.ac | 3 ++- decoder/Makefile.am | 1 + decoder/cdec_ff.cc | 2 ++ decoder/ff.h | 1 + decoder/ff_external.cc | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ decoder/ff_external.h | 26 +++++++++++++++++++++++ 6 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 decoder/ff_external.cc create mode 100644 decoder/ff_external.h (limited to 'configure.ac') diff --git a/configure.ac b/configure.ac index 70e8e932..967b657c 100644 --- a/configure.ac +++ b/configure.ac @@ -14,7 +14,8 @@ BOOST_REQUIRE([1.44]) BOOST_PROGRAM_OPTIONS BOOST_TEST AM_PATH_PYTHON -# TODO detect Cython, generate python/Makefile that calls "python setup.py build" +AC_CHECK_HEADER(dlfcn.h,AC_DEFINE(HAVE_DLFCN_H)) +AC_CHECK_LIB(dl, dlopen) AC_ARG_ENABLE(mpi, [ --enable-mpi Build MPI binaries, assumes mpi.h is present ], diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 4a98a4f1..28863dbe 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -33,6 +33,7 @@ libcdec_a_SOURCES = \ cfg.cc \ dwarf.cc \ ff_dwarf.cc \ + ff_external.cc \ rule_lexer.cc \ fst_translator.cc \ csplit.cc \ diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index b516c386..54f6e12b 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -18,6 +18,7 @@ #include "ff_charset.h" #include "ff_wordset.h" #include "ff_dwarf.h" +#include "ff_external.h" #ifdef HAVE_GLC #include @@ -69,6 +70,7 @@ void register_feature_functions() { ff_registry.Register("WordPairFeatures", new FFFactory); ff_registry.Register("WordSet", new FFFactory); ff_registry.Register("Dwarf", new FFFactory); + ff_registry.Register("External", new FFFactory); #ifdef HAVE_GLC ff_registry.Register("ContextCRF", new FFFactory); #endif diff --git a/decoder/ff.h b/decoder/ff.h index 6c22d39f..227787ca 100644 --- a/decoder/ff.h +++ b/decoder/ff.h @@ -27,6 +27,7 @@ typedef std::vector Features; // set of features ids // depends on context, you may also need to implement // FinalTraversalFeatures(...) class FeatureFunction { + friend class ExternalFeature; public: std::string name_; // set by FF factory using usage() bool debug_; // also set by FF factory checking param for immediate initial "debug" diff --git a/decoder/ff_external.cc b/decoder/ff_external.cc new file mode 100644 index 00000000..520e98b1 --- /dev/null +++ b/decoder/ff_external.cc @@ -0,0 +1,57 @@ +#include "ff_external.h" +#include "stringlib.h" + +#include + +using namespace std; + +ExternalFeature::ExternalFeature(const string& param) { + size_t pos = param.find(' '); + string nparam; + string file = param; + if (pos < param.size()) { + nparam = Trim(param.substr(pos + 1)); + file = param.substr(0, pos); + } + if (file.size() < 1) { + cerr << "External requires a path to a dynamic library!\n"; + abort(); + } + lib_handle = dlopen(file.c_str(), RTLD_LAZY); + if (!lib_handle) { + cerr << "dlopen reports: " << dlerror() << endl; + cerr << "Did you provide a full path to the dynamic library?\n"; + abort(); + } + FeatureFunction* (*fn)(const string&) = + (FeatureFunction* (*)(const string&))(dlsym(lib_handle, "create_ff")); + if (!fn) { + cerr << "dlsym reports: " << dlerror() << endl; + abort(); + } + ff_ext = (*fn)(nparam); +} + +ExternalFeature::~ExternalFeature() { + delete ff_ext; + dlclose(lib_handle); +} + +void ExternalFeature::PrepareForInput(const SentenceMetadata& smeta) { + ff_ext->PrepareForInput(smeta); +} + +void ExternalFeature::FinalTraversalFeatures(const void* context, + SparseVector* features) const { + ff_ext->FinalTraversalFeatures(context, features); +} + +void ExternalFeature::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + FeatureVector* features, + FeatureVector* estimated_features, + void* context) const { + ff_ext->TraversalFeaturesImpl(smeta, edge, ant_contexts, features, estimated_features, context); +} + diff --git a/decoder/ff_external.h b/decoder/ff_external.h new file mode 100644 index 00000000..283e58e8 --- /dev/null +++ b/decoder/ff_external.h @@ -0,0 +1,26 @@ +#ifndef _FFEXTERNAL_H_ +#define _FFEXTERNAL_H_ + +#include "ff.h" + +// dynamically loaded feature function +class ExternalFeature : public FeatureFunction { + public: + ExternalFeature(const std::string& param); + ~ExternalFeature(); + virtual void PrepareForInput(const SentenceMetadata& smeta); + virtual void FinalTraversalFeatures(const void* context, + SparseVector* features) const; + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + FeatureVector* features, + FeatureVector* estimated_features, + void* context) const; + private: + void* lib_handle; + FeatureFunction* ff_ext; +}; + +#endif -- cgit v1.2.3 From f49ebcba9974a60ec3df2b6c97c7048186a5954d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 11 Oct 2012 23:13:12 -0400 Subject: example external feature function --- Makefile.am | 3 ++- configure.ac | 1 + example_extff/Makefile.am | 5 ++++ example_extff/README.md | 8 +++++++ example_extff/ff_example.cc | 56 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 example_extff/Makefile.am create mode 100644 example_extff/README.md create mode 100644 example_extff/ff_example.cc (limited to 'configure.ac') diff --git a/Makefile.am b/Makefile.am index c0826532..3e0103a8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,7 +14,8 @@ SUBDIRS = \ dpmert \ pro-train \ rampion \ - minrisk + minrisk \ + example_extff #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava diff --git a/configure.ac b/configure.ac index 967b657c..03a0ee87 100644 --- a/configure.ac +++ b/configure.ac @@ -125,6 +125,7 @@ AC_CONFIG_FILES([klm/util/Makefile]) AC_CONFIG_FILES([klm/lm/Makefile]) AC_CONFIG_FILES([mira/Makefile]) AC_CONFIG_FILES([dtrain/Makefile]) +AC_CONFIG_FILES([example_extff/Makefile]) AC_CONFIG_FILES([python/setup.py]) diff --git a/example_extff/Makefile.am b/example_extff/Makefile.am new file mode 100644 index 00000000..ac2694ca --- /dev/null +++ b/example_extff/Makefile.am @@ -0,0 +1,5 @@ +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm -I../decoder + +lib_LTLIBRARIES = libff_example.la +libff_example_la_SOURCES = ff_example.cc +libff_example_la_LDFLAGS = -version-info 1:0:0 -module diff --git a/example_extff/README.md b/example_extff/README.md new file mode 100644 index 00000000..f2aba487 --- /dev/null +++ b/example_extff/README.md @@ -0,0 +1,8 @@ +This is an example of an _external_ feature function which is loaded as a dynamically linked library at run time to compute feature functions over derivations in a hypergraph. To load feature external feature functions, you can specify them in your `cdec.ini` configuration file as follows: + + feature_function=External /path/to/libmy_feature.so + +Any extra options are passed to the external library. + +*Note*: the build system uses [GNU Libtool](http://www.gnu.org/software/libtool/) to create the shared library. This may be placed in a hidden directory called `./libs`. + diff --git a/example_extff/ff_example.cc b/example_extff/ff_example.cc new file mode 100644 index 00000000..51ebf364 --- /dev/null +++ b/example_extff/ff_example.cc @@ -0,0 +1,56 @@ +#include "ff.h" +#include +#include + +using namespace std; + +// example of a "stateful" feature made available as an external library +// This feature looks nodes and their daughters and fires an indicator based +// on the arities of the rules involved. +// (X (X a) b (X c)) - this is a 2 arity parent with children of 0 and 0 arity +// so you get MAF_2_0_0=1 +class ParentChildrenArityFeatures : public FeatureFunction { + public: + ParentChildrenArityFeatures(const string& param) : fids(16, vector(256, -1)) { + SetStateSize(1); // number of bytes extra state required by this Feature + } + virtual void FinalTraversalFeatures(const void* context, + SparseVector* features) const { + // Goal always is arity 1, so there's no discriminative value of + // computing a feature + } + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + FeatureVector* features, + FeatureVector* estimated_features, + void* context) const { + unsigned child_arity_code = 0; + for (unsigned j = 0; j < ant_contexts.size(); ++j) { + child_arity_code <<= 4; + child_arity_code |= *reinterpret_cast(ant_contexts[j]); + } + int& fid = fids[edge.Arity()][child_arity_code]; // reference! + if (fid < 0) { + ostringstream feature_string; + feature_string << "MAF_" << edge.Arity(); + for (unsigned j = 0; j < ant_contexts.size(); ++j) + feature_string << '_' << + static_cast(*reinterpret_cast(ant_contexts[j])); + fid = FD::Convert(feature_string.str()); + } + features->set_value(fid, 1.0); + *reinterpret_cast(context) = edge.Arity(); // save state + } + private: + mutable vector > fids; +}; + +// IMPORTANT: this function must be implemented by any external FF library +// if your library has multiple features, you can use str to configure things +extern "C" FeatureFunction* create_ff(const string& str) { + return new ParentChildrenArityFeatures(str); +} + + -- cgit v1.2.3