summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pyp-topics/src')
-rw-r--r--gi/pyp-topics/src/Makefile.mpi3
-rw-r--r--gi/pyp-topics/src/contexts_corpus.cc4
-rw-r--r--gi/pyp-topics/src/contexts_corpus.hh2
-rw-r--r--gi/pyp-topics/src/makefile.depend228
-rw-r--r--gi/pyp-topics/src/mpi-pyp-topics.cc148
-rw-r--r--gi/pyp-topics/src/mpi-pyp-topics.hh17
-rw-r--r--gi/pyp-topics/src/mpi-pyp.hh273
-rw-r--r--gi/pyp-topics/src/mpi-train-contexts.cc2
-rw-r--r--gi/pyp-topics/src/pyp.hh9
9 files changed, 582 insertions, 104 deletions
diff --git a/gi/pyp-topics/src/Makefile.mpi b/gi/pyp-topics/src/Makefile.mpi
index 8c859881..b7b8a290 100644
--- a/gi/pyp-topics/src/Makefile.mpi
+++ b/gi/pyp-topics/src/Makefile.mpi
@@ -16,7 +16,8 @@ mpi-pyp-contexts-train: mpi-train-contexts.o $(local_objs)
.PHONY: depend echo
depend:
- $(CXX) -MM $(CXXFLAGS) *.cc *.c | sed 's/^\(.*\.o:\)/obj\/\1/' > makefile.depend
+#$(CXX) -MM $(CXXFLAGS) *.cc *.c | sed 's/^\(.*\.o:\)/obj\/\1/' > makefile.depend
+ $(CXX) -MM $(CXXFLAGS) *.cc *.c > makefile.depend
clean:
rm -f *.o
diff --git a/gi/pyp-topics/src/contexts_corpus.cc b/gi/pyp-topics/src/contexts_corpus.cc
index 26d5718a..1cf69429 100644
--- a/gi/pyp-topics/src/contexts_corpus.cc
+++ b/gi/pyp-topics/src/contexts_corpus.cc
@@ -28,7 +28,7 @@ void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void*
Document* doc(new Document());
//cout << "READ: " << new_contexts.phrase << "\t";
- for (int i=0; i < new_contexts.counts.size(); ++i) {
+ for (int i=0; i < (int)new_contexts.counts.size(); ++i) {
int cache_word_count = corpus_ptr->m_dict.max();
//string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[i]);
@@ -101,7 +101,7 @@ void filter_callback(const ContextsLexer::PhraseContextsType& new_contexts, void
map<string,int>* context_counts = (static_cast<map<string,int>*>(extra));
- for (int i=0; i < new_contexts.counts.size(); ++i) {
+ for (int i=0; i < (int)new_contexts.counts.size(); ++i) {
int context_index = new_contexts.counts.at(i).first;
int count = new_contexts.counts.at(i).second;
//int count = new_contexts.counts[i];
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh
index 66b71783..4d3d5669 100644
--- a/gi/pyp-topics/src/contexts_corpus.hh
+++ b/gi/pyp-topics/src/contexts_corpus.hh
@@ -63,6 +63,8 @@ public:
std::vector<std::string> context2string(const WordID& id) const {
std::vector<std::string> res;
+ assert (id >= 0);
+ std::cerr << m_dict.Convert(id) << std::endl;
m_dict.AsVector(id, &res);
return res;
}
diff --git a/gi/pyp-topics/src/makefile.depend b/gi/pyp-topics/src/makefile.depend
index 88bab79e..88bc73c1 100644
--- a/gi/pyp-topics/src/makefile.depend
+++ b/gi/pyp-topics/src/makefile.depend
@@ -1,4 +1,4 @@
-obj/contexts_corpus.o: contexts_corpus.cc contexts_corpus.hh \
+contexts_corpus.o: contexts_corpus.cc contexts_corpus.hh \
/Users/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
/Users/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
/Users/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
@@ -432,7 +432,7 @@ obj/contexts_corpus.o: contexts_corpus.cc contexts_corpus.hh \
/Users/pblunsom/packages/include/boost/type_traits/add_cv.hpp \
/Users/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \
/Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp
-obj/contexts_lexer.o: contexts_lexer.cc contexts_lexer.h \
+contexts_lexer.o: contexts_lexer.cc contexts_lexer.h \
../../../decoder/dict.h \
/Users/pblunsom/packages/include/boost/functional/hash.hpp \
/Users/pblunsom/packages/include/boost/functional/hash/hash.hpp \
@@ -463,7 +463,7 @@ obj/contexts_lexer.o: contexts_lexer.cc contexts_lexer.h \
/Users/pblunsom/packages/include/boost/detail/container_fwd.hpp \
../../../decoder/wordid.h ../../../decoder/filelib.h \
../../../decoder/gzstream.h
-obj/corpus.o: corpus.cc corpus.hh \
+corpus.o: corpus.cc corpus.hh \
/Users/pblunsom/packages/include/boost/shared_ptr.hpp \
/Users/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
/Users/pblunsom/packages/include/boost/config.hpp \
@@ -874,8 +874,8 @@ obj/corpus.o: corpus.cc corpus.hh \
/Users/pblunsom/packages/include/boost/detail/is_incrementable.hpp \
/Users/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \
gzstream.hh
-obj/gzstream.o: gzstream.cc gzstream.hh
-obj/mpi-pyp-topics.o: mpi-pyp-topics.cc \
+gzstream.o: gzstream.cc gzstream.hh
+mpi-pyp-topics.o: mpi-pyp-topics.cc \
/Users/pblunsom/packages/include/boost/mpi/communicator.hpp \
/Users/pblunsom/packages/include/boost/mpi/config.hpp \
/Users/pblunsom/packages/include/boost/config.hpp \
@@ -1448,8 +1448,108 @@ obj/mpi-pyp-topics.o: mpi-pyp-topics.cc \
/Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
/Users/pblunsom/packages/include/boost/random/detail/seed.hpp \
/Users/pblunsom/packages/include/boost/mpi/environment.hpp mpi-pyp.hh \
- log_add.h slice-sampler.h mt19937ar.h corpus.hh
-obj/mpi-train-contexts.o: mpi-train-contexts.cc \
+ /Users/pblunsom/packages/include/boost/tuple/tuple.hpp \
+ /Users/pblunsom/packages/include/boost/ref.hpp \
+ /Users/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/add_cv.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/map.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/utility.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/collections_save_imp.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/collections_load_imp.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/detail/stack_constructor.hpp \
+ /Users/pblunsom/packages/include/boost/aligned_storage.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \
+ /Users/pblunsom/packages/include/boost/mpi.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/all_gather.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/vector.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/collection_traits.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/broadcast.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives_fwd.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/gather.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/all_reduce.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/reduce.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/computation_tree.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/operations.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/all_to_all.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/scatter.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/scan.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/graph_communicator.hpp \
+ /Users/pblunsom/packages/include/boost/graph/graph_traits.hpp \
+ /Users/pblunsom/packages/include/boost/pending/property.hpp \
+ /Users/pblunsom/packages/include/boost/pending/detail/property.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/same_traits.hpp \
+ /Users/pblunsom/packages/include/boost/graph/properties.hpp \
+ /Users/pblunsom/packages/include/boost/property_map/property_map.hpp \
+ /Users/pblunsom/packages/include/boost/pending/cstddef.hpp \
+ /Users/pblunsom/packages/include/boost/concept_check.hpp \
+ /Users/pblunsom/packages/include/boost/concept/assert.hpp \
+ /Users/pblunsom/packages/include/boost/concept/detail/general.hpp \
+ /Users/pblunsom/packages/include/boost/concept/detail/has_constraints.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/conversion_traits.hpp \
+ /Users/pblunsom/packages/include/boost/concept/usage.hpp \
+ /Users/pblunsom/packages/include/boost/concept/detail/concept_def.hpp \
+ /Users/pblunsom/packages/include/boost/concept/detail/concept_undef.hpp \
+ /Users/pblunsom/packages/include/boost/concept_archetype.hpp \
+ /Users/pblunsom/packages/include/boost/property_map/vector_property_map.hpp \
+ /Users/pblunsom/packages/include/boost/graph/property_maps/constant_property_map.hpp \
+ /Users/pblunsom/packages/include/boost/graph/property_maps/null_property_map.hpp \
+ /Users/pblunsom/packages/include/boost/iterator/counting_iterator.hpp \
+ /Users/pblunsom/packages/include/boost/detail/numeric_traits.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_assign.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_trivial_assign.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_constructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_trivial_constructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_destructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_virtual_destructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_signed.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_compound.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_floating_point.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_member_object_pointer.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_object.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_stateless.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/rank.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/extent.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/remove_all_extents.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/aligned_storage.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/promote.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_signed.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/make_signed.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/decay.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_complex.hpp \
+ /Users/pblunsom/packages/include/boost/detail/select_type.hpp \
+ /Users/pblunsom/packages/include/boost/graph/iteration_macros.hpp \
+ /Users/pblunsom/packages/include/boost/shared_array.hpp \
+ /Users/pblunsom/packages/include/boost/smart_ptr/shared_array.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/group.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/intercommunicator.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/nonblocking.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/skeleton_and_content.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_iarchive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_oarchive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/ignore_iprimitive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/ignore_oprimitive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/content_oarchive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/broadcast_sc.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/communicator_sc.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/timer.hpp pyp.hh \
+ slice-sampler.h log_add.h mt19937ar.h corpus.hh
+mpi-train-contexts.o: mpi-train-contexts.cc \
/Users/pblunsom/packages/include/boost/program_options/parsers.hpp \
/Users/pblunsom/packages/include/boost/program_options/config.hpp \
/Users/pblunsom/packages/include/boost/config.hpp \
@@ -2064,8 +2164,100 @@ obj/mpi-train-contexts.o: mpi-train-contexts.cc \
/Users/pblunsom/packages/include/boost/random/linear_congruential.hpp \
/Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
/Users/pblunsom/packages/include/boost/random/detail/seed.hpp \
- mpi-pyp.hh log_add.h slice-sampler.h mt19937ar.h corpus.hh \
- contexts_corpus.hh contexts_lexer.h ../../../decoder/dict.h \
+ mpi-pyp.hh /Users/pblunsom/packages/include/boost/tuple/tuple.hpp \
+ /Users/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/add_cv.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/map.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/utility.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/collections_save_imp.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/collections_load_imp.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/detail/stack_constructor.hpp \
+ /Users/pblunsom/packages/include/boost/aligned_storage.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \
+ /Users/pblunsom/packages/include/boost/mpi.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/all_gather.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/vector.hpp \
+ /Users/pblunsom/packages/include/boost/serialization/collection_traits.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/broadcast.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives_fwd.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/gather.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/all_reduce.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/reduce.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/computation_tree.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/operations.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/all_to_all.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/scatter.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/collectives/scan.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/graph_communicator.hpp \
+ /Users/pblunsom/packages/include/boost/graph/graph_traits.hpp \
+ /Users/pblunsom/packages/include/boost/pending/property.hpp \
+ /Users/pblunsom/packages/include/boost/pending/detail/property.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/same_traits.hpp \
+ /Users/pblunsom/packages/include/boost/graph/properties.hpp \
+ /Users/pblunsom/packages/include/boost/property_map/property_map.hpp \
+ /Users/pblunsom/packages/include/boost/pending/cstddef.hpp \
+ /Users/pblunsom/packages/include/boost/concept_check.hpp \
+ /Users/pblunsom/packages/include/boost/concept/assert.hpp \
+ /Users/pblunsom/packages/include/boost/concept/detail/general.hpp \
+ /Users/pblunsom/packages/include/boost/concept/detail/has_constraints.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/conversion_traits.hpp \
+ /Users/pblunsom/packages/include/boost/concept/usage.hpp \
+ /Users/pblunsom/packages/include/boost/concept/detail/concept_def.hpp \
+ /Users/pblunsom/packages/include/boost/concept/detail/concept_undef.hpp \
+ /Users/pblunsom/packages/include/boost/concept_archetype.hpp \
+ /Users/pblunsom/packages/include/boost/property_map/vector_property_map.hpp \
+ /Users/pblunsom/packages/include/boost/graph/property_maps/constant_property_map.hpp \
+ /Users/pblunsom/packages/include/boost/graph/property_maps/null_property_map.hpp \
+ /Users/pblunsom/packages/include/boost/iterator/counting_iterator.hpp \
+ /Users/pblunsom/packages/include/boost/detail/numeric_traits.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_assign.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_trivial_assign.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_constructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_trivial_constructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_destructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/has_virtual_destructor.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_compound.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_floating_point.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_member_object_pointer.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_object.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_stateless.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/rank.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/extent.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/remove_all_extents.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/aligned_storage.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/promote.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/make_signed.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/decay.hpp \
+ /Users/pblunsom/packages/include/boost/type_traits/is_complex.hpp \
+ /Users/pblunsom/packages/include/boost/detail/select_type.hpp \
+ /Users/pblunsom/packages/include/boost/graph/iteration_macros.hpp \
+ /Users/pblunsom/packages/include/boost/shared_array.hpp \
+ /Users/pblunsom/packages/include/boost/smart_ptr/shared_array.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/group.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/intercommunicator.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/nonblocking.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/skeleton_and_content.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_iarchive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_oarchive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/ignore_iprimitive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/ignore_oprimitive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/content_oarchive.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/broadcast_sc.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/detail/communicator_sc.hpp \
+ /Users/pblunsom/packages/include/boost/mpi/timer.hpp pyp.hh \
+ slice-sampler.h log_add.h mt19937ar.h corpus.hh contexts_corpus.hh \
+ contexts_lexer.h ../../../decoder/dict.h \
/Users/pblunsom/packages/include/boost/functional/hash.hpp \
/Users/pblunsom/packages/include/boost/functional/hash/hash.hpp \
/Users/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \
@@ -2078,7 +2270,7 @@ obj/mpi-train-contexts.o: mpi-train-contexts.cc \
/Users/pblunsom/packages/include/boost/functional/hash/extensions.hpp \
/Users/pblunsom/packages/include/boost/detail/container_fwd.hpp \
../../../decoder/wordid.h gzstream.hh
-obj/pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \
+pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \
/Users/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \
/Users/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \
/Users/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \
@@ -2484,7 +2676,7 @@ obj/pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \
/Users/pblunsom/packages/include/boost/random/linear_congruential.hpp \
/Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
/Users/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \
- log_add.h slice-sampler.h mt19937ar.h corpus.hh \
+ slice-sampler.h log_add.h mt19937ar.h corpus.hh \
/Users/pblunsom/packages/include/boost/shared_ptr.hpp \
/Users/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \
/Users/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \
@@ -2658,7 +2850,7 @@ obj/pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \
/Users/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \
/Users/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \
/Users/pblunsom/packages/include/boost/thread/condition.hpp
-obj/train-contexts.o: train-contexts.cc \
+train-contexts.o: train-contexts.cc \
/Users/pblunsom/packages/include/boost/program_options/parsers.hpp \
/Users/pblunsom/packages/include/boost/program_options/config.hpp \
/Users/pblunsom/packages/include/boost/config.hpp \
@@ -3140,7 +3332,7 @@ obj/train-contexts.o: train-contexts.cc \
/Users/pblunsom/packages/include/boost/random/linear_congruential.hpp \
/Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
/Users/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \
- log_add.h slice-sampler.h mt19937ar.h corpus.hh workers.hh \
+ slice-sampler.h log_add.h mt19937ar.h corpus.hh workers.hh \
/Users/pblunsom/packages/include/boost/bind.hpp \
/Users/pblunsom/packages/include/boost/bind/bind.hpp \
/Users/pblunsom/packages/include/boost/is_placeholder.hpp \
@@ -3275,7 +3467,7 @@ obj/train-contexts.o: train-contexts.cc \
/Users/pblunsom/packages/include/boost/functional/hash/extensions.hpp \
/Users/pblunsom/packages/include/boost/detail/container_fwd.hpp \
../../../decoder/wordid.h gzstream.hh
-obj/train.o: train.cc \
+train.o: train.cc \
/Users/pblunsom/packages/include/boost/program_options/parsers.hpp \
/Users/pblunsom/packages/include/boost/program_options/config.hpp \
/Users/pblunsom/packages/include/boost/config.hpp \
@@ -3757,7 +3949,7 @@ obj/train.o: train.cc \
/Users/pblunsom/packages/include/boost/random/linear_congruential.hpp \
/Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \
/Users/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \
- log_add.h slice-sampler.h mt19937ar.h corpus.hh workers.hh \
+ slice-sampler.h log_add.h mt19937ar.h corpus.hh workers.hh \
/Users/pblunsom/packages/include/boost/bind.hpp \
/Users/pblunsom/packages/include/boost/bind/bind.hpp \
/Users/pblunsom/packages/include/boost/is_placeholder.hpp \
@@ -3892,6 +4084,6 @@ obj/train.o: train.cc \
/Users/pblunsom/packages/include/boost/functional/hash/extensions.hpp \
/Users/pblunsom/packages/include/boost/detail/container_fwd.hpp \
../../../decoder/wordid.h gzstream.hh
-obj/clock_gettime_stub.o: clock_gettime_stub.c
-obj/gammadist.o: gammadist.c gammadist.h mt19937ar.h
-obj/mt19937ar.o: mt19937ar.c mt19937ar.h
+clock_gettime_stub.o: clock_gettime_stub.c
+gammadist.o: gammadist.c gammadist.h mt19937ar.h
+mt19937ar.o: mt19937ar.c mt19937ar.h
diff --git a/gi/pyp-topics/src/mpi-pyp-topics.cc b/gi/pyp-topics/src/mpi-pyp-topics.cc
index 2ad28278..4525302e 100644
--- a/gi/pyp-topics/src/mpi-pyp-topics.cc
+++ b/gi/pyp-topics/src/mpi-pyp-topics.cc
@@ -4,7 +4,7 @@
#include "mpi-pyp-topics.hh"
//#include <boost/date_time/posix_time/posix_time_types.hpp>
-void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
+void MPIPYPTopics::sample_corpus(const Corpus& corpus, int samples,
int freq_cutoff_start, int freq_cutoff_end,
int freq_cutoff_interval,
int max_contexts_per_document) {
@@ -23,33 +23,33 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
}
int local_documents = m_mpi_end - m_mpi_start;
-
if (!m_backoff.get()) {
m_word_pyps.clear();
- m_word_pyps.push_back(PYPs());
+ m_word_pyps.push_back(MPIPYPs());
}
if (m_am_root) std::cerr << "\n Training with " << m_word_pyps.size()-1 << " backoff level"
- << (m_word_pyps.size()==2 ? ":" : "s:") << std::endl;
+ << (m_word_pyps.size()>1 ? ":" : "s:") << std::endl;
- for (int i=0; i<(int)m_word_pyps.size(); ++i)
- {
+ for (int i=0; i<(int)m_word_pyps.size(); ++i) {
m_word_pyps.at(i).reserve(m_num_topics);
for (int j=0; j<m_num_topics; ++j)
- m_word_pyps.at(i).push_back(new PYP<int>(0.5, 1.0));
+ m_word_pyps.at(i).push_back(new MPIPYP<int>(0.5, 1.0));
}
if (m_am_root) std::cerr << std::endl;
- m_document_pyps.reserve(corpus.num_documents());
- for (int j=0; j<corpus.num_documents(); ++j)
+ m_document_pyps.reserve(local_documents);
+ //m_document_pyps.reserve(corpus.num_documents());
+ //for (int j=0; j<corpus.num_documents(); ++j)
+ for (int j=0; j<local_documents; ++j)
m_document_pyps.push_back(new PYP<int>(0.5, 1.0));
m_topic_p0 = 1.0/m_num_topics;
m_term_p0 = 1.0/corpus.num_types();
m_backoff_p0 = 1.0/corpus.num_documents();
- if (m_am_root) std::cerr << " Documents: " << corpus.num_documents() << " Terms: "
- << corpus.num_types() << std::endl;
+ if (m_am_root) std::cerr << " Documents: " << corpus.num_documents() << "("
+ << local_documents << ")" << " Terms: " << corpus.num_types() << std::endl;
int frequency_cutoff = freq_cutoff_start;
if (m_am_root) std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl;
@@ -57,13 +57,16 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
timer.Reset();
// Initialisation pass
int document_id=0, topic_counter=0;
- for (Corpus::const_iterator corpusIt=corpus.begin();
- corpusIt != corpus.end(); ++corpusIt, ++document_id) {
- m_corpus_topics.push_back(DocumentTopics(corpusIt->size(), 0));
+ for (int i=0; i<local_documents; ++i) {
+ document_id = i+m_mpi_start;
+
+ //for (Corpus::const_iterator corpusIt=corpus.begin();
+ // corpusIt != corpus.end(); ++corpusIt, ++document_id) {
+ m_corpus_topics.push_back(DocumentTopics(corpus.at(document_id).size(), 0));
int term_index=0;
- for (Document::const_iterator docIt=corpusIt->begin();
- docIt != corpusIt->end(); ++docIt, ++term_index) {
+ for (Document::const_iterator docIt=corpus.at(document_id).begin();
+ docIt != corpus.at(document_id).end(); ++docIt, ++term_index) {
topic_counter++;
Term term = *docIt;
@@ -80,21 +83,41 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
if (m_use_topic_pyp) {
F p0 = m_topic_pyp.prob(new_topic, m_topic_p0);
- int table_delta = m_document_pyps[document_id].increment(new_topic, p0);
+ int table_delta = m_document_pyps.at(i).increment(new_topic, p0);
if (table_delta)
m_topic_pyp.increment(new_topic, m_topic_p0);
}
- else m_document_pyps[document_id].increment(new_topic, m_topic_p0);
+ else m_document_pyps.at(i).increment(new_topic, m_topic_p0);
}
- m_corpus_topics[document_id][term_index] = new_topic;
+ m_corpus_topics.at(i).at(term_index) = new_topic;
+ }
+ }
+
+ // Synchronise the topic->word counds across the processes.
+ for (std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin();
+ levelIt != m_word_pyps.end(); ++levelIt) {
+ for (MPIPYPs::iterator pypIt=levelIt->begin();
+ pypIt != levelIt->end(); ++pypIt) {
+ if (!m_am_root) boost::mpi::communicator().barrier();
+ std::cerr << "Before Sync Process " << m_rank << ":";
+ pypIt->debug_info(std::cerr); std::cerr << std::endl;
+ if (m_am_root) boost::mpi::communicator().barrier();
+
+ pypIt->synchronise();
+
+ if (!m_am_root) boost::mpi::communicator().barrier();
+ std::cerr << "After Sync Process " << m_rank << ":";
+ pypIt->debug_info(std::cerr); std::cerr << std::endl;
+ if (m_am_root) boost::mpi::communicator().barrier();
}
}
+
if (m_am_root) std::cerr << " Initialized in " << timer.Elapsed() << " seconds\n";
int* randomDocIndices = new int[local_documents];
for (int i = 0; i < local_documents; ++i)
- randomDocIndices[i] = i+m_mpi_start;
+ randomDocIndices[i] = i;
// Sampling phase
for (int curr_sample=0; curr_sample < samples; ++curr_sample) {
@@ -110,8 +133,8 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
// Randomize the corpus indexing array
int tmp;
int processed_terms=0;
- for (int i = local_documents-1; i > 0; --i) {
- //i+1 since j \in [0,i] but rnd() \in [0,1)
+ for (int i = (local_documents-1); i > 0; --i) {
+ //i+1 since j \in [0,i] but rnd() \in [0,1)
int j = (int)(rnd() * (i+1));
assert(j >= 0 && j <= i);
tmp = randomDocIndices[i];
@@ -120,15 +143,17 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
}
// for each document in the corpus
- int document_id;
- for (int i=0; i<local_documents; ++i) {
- document_id = randomDocIndices[i];
+ for (int rand_doc=0; rand_doc<local_documents; ++rand_doc) {
+ int doc_index = randomDocIndices[rand_doc];
+ int document_id = doc_index + m_mpi_start;
+ const Document& doc = corpus.at(document_id);
// for each term in the document
int term_index=0;
- Document::const_iterator docEnd = corpus.at(document_id).end();
- for (Document::const_iterator docIt=corpus.at(document_id).begin();
+ Document::const_iterator docEnd = doc.end();
+ for (Document::const_iterator docIt=doc.begin();
docIt != docEnd; ++docIt, ++term_index) {
+
if (max_contexts_per_document && term_index > max_contexts_per_document)
break;
@@ -140,36 +165,49 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
processed_terms++;
// remove the prevous topic from the PYPs
- int current_topic = m_corpus_topics[document_id][term_index];
+ int current_topic = m_corpus_topics.at(doc_index).at(term_index);
// a negative label mean that term hasn't been sampled yet
if (current_topic >= 0) {
decrement(term, current_topic);
- int table_delta = m_document_pyps[document_id].decrement(current_topic);
+ int table_delta = m_document_pyps.at(doc_index).decrement(current_topic);
if (m_use_topic_pyp && table_delta < 0)
m_topic_pyp.decrement(current_topic);
}
// sample a new_topic
- int new_topic = sample(document_id, term);
+ int new_topic = sample(doc_index, term);
// add the new topic to the PYPs
- m_corpus_topics[document_id][term_index] = new_topic;
+ m_corpus_topics.at(doc_index).at(term_index) = new_topic;
increment(term, new_topic);
if (m_use_topic_pyp) {
F p0 = m_topic_pyp.prob(new_topic, m_topic_p0);
- int table_delta = m_document_pyps[document_id].increment(new_topic, p0);
+ int table_delta = m_document_pyps.at(doc_index).increment(new_topic, p0);
if (table_delta)
m_topic_pyp.increment(new_topic, m_topic_p0);
}
- else m_document_pyps[document_id].increment(new_topic, m_topic_p0);
+ else m_document_pyps.at(doc_index).increment(new_topic, m_topic_p0);
}
if (document_id && document_id % 10000 == 0) {
if (m_am_root) std::cerr << "."; std::cerr.flush();
}
}
m_world.barrier();
+ // Synchronise the topic->word counds across the processes.
+ for (std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin();
+ levelIt != m_word_pyps.end(); ++levelIt) {
+ for (MPIPYPs::iterator pypIt=levelIt->begin();
+ pypIt != levelIt->end(); ++pypIt) {
+ std::cerr << "Before Sync Process " << m_rank << ":";
+ pypIt->debug_info(std::cerr); std::cerr << std::endl;
+ pypIt->synchronise();
+ std::cerr << "After Sync Process " << m_rank << ":";
+ pypIt->debug_info(std::cerr); std::cerr << std::endl;
+ }
+ }
+
if (m_am_root) std::cerr << " ||| sampled " << processed_terms << " terms.";
if (curr_sample != 0 && curr_sample % 10 == 0) {
@@ -179,9 +217,9 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
// resample the hyperparamters
F log_p=0.0;
- for (std::vector<PYPs>::iterator levelIt=m_word_pyps.begin();
+ for (std::vector<MPIPYPs>::iterator levelIt=m_word_pyps.begin();
levelIt != m_word_pyps.end(); ++levelIt) {
- for (PYPs::iterator pypIt=levelIt->begin();
+ for (MPIPYPs::iterator pypIt=levelIt->begin();
pypIt != levelIt->end(); ++pypIt) {
pypIt->resample_prior();
log_p += pypIt->log_restaurant_prob();
@@ -206,7 +244,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
int k=0;
if (m_am_root) std::cerr << "Topics distribution: ";
std::cerr.precision(2);
- for (PYPs::iterator pypIt=m_word_pyps.front().begin();
+ for (MPIPYPs::iterator pypIt=m_word_pyps.front().begin();
pypIt != m_word_pyps.front().end(); ++pypIt, ++k) {
if (m_am_root && k % 5 == 0) std::cerr << std::endl << '\t';
if (m_am_root) std::cerr << "<" << k << ":" << pypIt->num_customers() << ","
@@ -220,8 +258,8 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,
}
-void PYPTopics::decrement(const Term& term, int topic, int level) {
- //std::cerr << "PYPTopics::decrement(" << term << "," << topic << "," << level << ")" << std::endl;
+void MPIPYPTopics::decrement(const Term& term, int topic, int level) {
+ //std::cerr << "MPIPYPTopics::decrement(" << term << "," << topic << "," << level << ")" << std::endl;
m_word_pyps.at(level).at(topic).decrement(term);
if (m_backoff.get()) {
Term backoff_term = (*m_backoff)[term];
@@ -230,8 +268,8 @@ void PYPTopics::decrement(const Term& term, int topic, int level) {
}
}
-void PYPTopics::increment(const Term& term, int topic, int level) {
- //std::cerr << "PYPTopics::increment(" << term << "," << topic << "," << level << ")" << std::endl;
+void MPIPYPTopics::increment(const Term& term, int topic, int level) {
+ //std::cerr << "MPIPYPTopics::increment(" << term << "," << topic << "," << level << ")" << std::endl;
m_word_pyps.at(level).at(topic).increment(term, word_pyps_p0(term, topic, level));
if (m_backoff.get()) {
@@ -241,7 +279,7 @@ void PYPTopics::increment(const Term& term, int topic, int level) {
}
}
-int PYPTopics::sample(const DocumentId& doc, const Term& term) {
+int MPIPYPTopics::sample(const DocumentId& doc, const Term& term) {
// First pass: collect probs
F sum=0.0;
std::vector<F> sums;
@@ -252,7 +290,7 @@ int PYPTopics::sample(const DocumentId& doc, const Term& term) {
if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0);
//F p_k_d = m_document_pyps[doc].prob(k, topic_prob);
- F p_k_d = m_document_pyps[doc].unnormalised_prob(k, topic_prob);
+ F p_k_d = m_document_pyps.at(doc).unnormalised_prob(k, topic_prob);
sum += (p_w_k*p_k_d);
sums.push_back(sum);
@@ -266,9 +304,9 @@ int PYPTopics::sample(const DocumentId& doc, const Term& term) {
assert(false);
}
-PYPTopics::F PYPTopics::word_pyps_p0(const Term& term, int topic, int level) const {
+MPIPYPTopics::F MPIPYPTopics::word_pyps_p0(const Term& term, int topic, int level) const {
//for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "PYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ")" << std::endl;
+ //std::cerr << "MPIPYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ")" << std::endl;
F p0 = m_term_p0;
if (m_backoff.get()) {
@@ -283,24 +321,24 @@ PYPTopics::F PYPTopics::word_pyps_p0(const Term& term, int topic, int level) con
p0 = m_term_p0;
}
//for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "PYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ") = " << p0 << std::endl;
+ //std::cerr << "MPIPYPTopics::word_pyps_p0(" << term << "," << topic << "," << level << ") = " << p0 << std::endl;
return p0;
}
-PYPTopics::F PYPTopics::prob(const Term& term, int topic, int level) const {
+MPIPYPTopics::F MPIPYPTopics::prob(const Term& term, int topic, int level) const {
//for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "PYPTopics::prob(" << term << "," << topic << "," << level << " " << factor << ")" << std::endl;
+ //std::cerr << "MPIPYPTopics::prob(" << term << "," << topic << "," << level << " " << factor << ")" << std::endl;
F p0 = word_pyps_p0(term, topic, level);
F p_w_k = m_word_pyps.at(level).at(topic).prob(term, p0);
//for (int i=0; i<level+1; ++i) std::cerr << " ";
- //std::cerr << "PYPTopics::prob(" << term << "," << topic << "," << level << ") = " << p_w_k << std::endl;
+ //std::cerr << "MPIPYPTopics::prob(" << term << "," << topic << "," << level << ") = " << p_w_k << std::endl;
return p_w_k;
}
-int PYPTopics::max_topic() const {
+int MPIPYPTopics::max_topic() const {
if (!m_use_topic_pyp)
return -1;
@@ -317,8 +355,8 @@ int PYPTopics::max_topic() const {
return current_topic;
}
-int PYPTopics::max(const DocumentId& doc) const {
- //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
+int MPIPYPTopics::max(const DocumentId& doc) const {
+ //std::cerr << "MPIPYPTopics::max(" << doc << "," << term << ")" << std::endl;
// collect probs
F current_max=0.0;
int current_topic=-1;
@@ -342,8 +380,8 @@ int PYPTopics::max(const DocumentId& doc) const {
return current_topic;
}
-int PYPTopics::max(const DocumentId& doc, const Term& term) const {
- //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
+int MPIPYPTopics::max(const DocumentId& doc, const Term& term) const {
+ //std::cerr << "MPIPYPTopics::max(" << doc << "," << term << ")" << std::endl;
// collect probs
F current_max=0.0;
int current_topic=-1;
@@ -368,7 +406,7 @@ int PYPTopics::max(const DocumentId& doc, const Term& term) const {
return current_topic;
}
-std::ostream& PYPTopics::print_document_topics(std::ostream& out) const {
+std::ostream& MPIPYPTopics::print_document_topics(std::ostream& out) const {
for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin();
corpusIt != m_corpus_topics.end(); ++corpusIt) {
int term_index=0;
@@ -382,7 +420,7 @@ std::ostream& PYPTopics::print_document_topics(std::ostream& out) const {
return out;
}
-std::ostream& PYPTopics::print_topic_terms(std::ostream& out) const {
+std::ostream& MPIPYPTopics::print_topic_terms(std::ostream& out) const {
for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin();
pypsIt != m_word_pyps.front().end(); ++pypsIt) {
int term_index=0;
diff --git a/gi/pyp-topics/src/mpi-pyp-topics.hh b/gi/pyp-topics/src/mpi-pyp-topics.hh
index 5da35d82..a85a776d 100644
--- a/gi/pyp-topics/src/mpi-pyp-topics.hh
+++ b/gi/pyp-topics/src/mpi-pyp-topics.hh
@@ -1,5 +1,5 @@
-#ifndef PYP_TOPICS_HH
-#define PYP_TOPICS_HH
+#ifndef MPI_PYP_TOPICS_HH
+#define MPI_PYP_TOPICS_HH
#include <vector>
#include <iostream>
@@ -14,14 +14,14 @@
#include "mpi-pyp.hh"
#include "corpus.hh"
-class PYPTopics {
+class MPIPYPTopics {
public:
typedef std::vector<int> DocumentTopics;
typedef std::vector<DocumentTopics> CorpusTopics;
typedef double F;
public:
- PYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0)
+ MPIPYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0)
: m_num_topics(num_topics), m_word_pyps(1),
m_topic_pyp(0.5,1.0), m_use_topic_pyp(use_topic_pyp),
m_seed(seed),
@@ -47,12 +47,12 @@ public:
m_backoff.reset(new TermBackoff);
m_backoff->read(filename);
m_word_pyps.clear();
- m_word_pyps.resize(m_backoff->order(), PYPs());
+ m_word_pyps.resize(m_backoff->order(), MPIPYPs());
}
void set_backoff(TermBackoffPtr backoff) {
m_backoff = backoff;
m_word_pyps.clear();
- m_word_pyps.resize(m_backoff->order(), PYPs());
+ m_word_pyps.resize(m_backoff->order(), MPIPYPs());
}
F prob(const Term& term, int topic, int level=0) const;
@@ -70,9 +70,10 @@ private:
CorpusTopics m_corpus_topics;
typedef boost::ptr_vector< PYP<int> > PYPs;
+ typedef boost::ptr_vector< MPIPYP<int> > MPIPYPs;
PYPs m_document_pyps;
- std::vector<PYPs> m_word_pyps;
- PYP<int> m_topic_pyp;
+ std::vector<MPIPYPs> m_word_pyps;
+ MPIPYP<int> m_topic_pyp;
bool m_use_topic_pyp;
unsigned long m_seed;
diff --git a/gi/pyp-topics/src/mpi-pyp.hh b/gi/pyp-topics/src/mpi-pyp.hh
index 58be7c5c..65358d20 100644
--- a/gi/pyp-topics/src/mpi-pyp.hh
+++ b/gi/pyp-topics/src/mpi-pyp.hh
@@ -1,5 +1,5 @@
-#ifndef _pyp_hh
-#define _pyp_hh
+#ifndef _mpipyp_hh
+#define _mpipyp_hh
#include <math.h>
#include <map>
@@ -9,11 +9,15 @@
#include <boost/random/uniform_real.hpp>
#include <boost/random/variate_generator.hpp>
#include <boost/random/mersenne_twister.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <boost/serialization/map.hpp>
+#include <boost/mpi.hpp>
+#include <boost/mpi/environment.hpp>
+#include <boost/mpi/communicator.hpp>
+#include <boost/mpi/operations.hpp>
-#include "pyp.h"
-#include "log_add.h"
-#include "slice-sampler.h"
-#include "mt19937ar.h"
+
+#include "pyp.hh"
//
// Pitman-Yor process with customer and table tracking
@@ -28,25 +32,104 @@ public:
virtual int decrement(Dish d);
void clear();
+ void reset_deltas();
- void reset_deltas() { m_count_delta.clear(); }
+ void synchronise();
private:
typedef std::map<Dish, int> dish_delta_type;
- typedef std::map<Dish, TableCounter> table_delta_type;
+ typedef std::map<Dish, typename PYP<Dish,Hash>::TableCounter> table_delta_type;
dish_delta_type m_count_delta;
table_delta_type m_table_delta;
};
template <typename Dish, typename Hash>
-MPIPYP<Dish,Hash>::MPIPYP(double a, double b, Hash)
-: PYP(a, b, Hash) {}
+MPIPYP<Dish,Hash>::MPIPYP(double a, double b, Hash h)
+: PYP<Dish,Hash>(a, b, 0, h) {}
template <typename Dish, typename Hash>
int
MPIPYP<Dish,Hash>::increment(Dish dish, double p0) {
- int delta = PYP<Dish,Hash>::increment(dish, p0);
+ int delta = 0;
+ int table_joined=-1;
+ typename PYP<Dish,Hash>::TableCounter &tc = PYP<Dish,Hash>::_dish_tables[dish];
+
+ // seated on a new or existing table?
+ int c = PYP<Dish,Hash>::count(dish);
+ int t = PYP<Dish,Hash>::num_tables(dish);
+ int T = PYP<Dish,Hash>::num_tables();
+ double& a = PYP<Dish,Hash>::_a;
+ double& b = PYP<Dish,Hash>::_b;
+ double pshare = (c > 0) ? (c - a*t) : 0.0;
+ double pnew = (b + a*T) * p0;
+ assert (pshare >= 0.0);
+
+ if (mt_genrand_res53() < pnew / (pshare + pnew)) {
+ // assign to a new table
+ tc.tables += 1;
+ tc.table_histogram[1] += 1;
+ PYP<Dish,Hash>::_total_tables += 1;
+ delta = 1;
+ }
+ else {
+ // randomly assign to an existing table
+ // remove constant denominator from inner loop
+ double r = mt_genrand_res53() * (c - a*t);
+ for (std::map<int,int>::iterator
+ hit = tc.table_histogram.begin();
+ hit != tc.table_histogram.end(); ++hit) {
+ r -= ((hit->first - a) * hit->second);
+ if (r <= 0) {
+ tc.table_histogram[hit->first+1] += 1;
+ hit->second -= 1;
+ if (hit->second == 0)
+ tc.table_histogram.erase(hit);
+ table_joined = hit->first+1;
+ break;
+ }
+ }
+ if (r > 0) {
+ std::cerr << r << " " << c << " " << a << " " << t << std::endl;
+ assert(false);
+ }
+ delta = 0;
+ }
+
+ std::tr1::unordered_map<Dish,int,Hash>::operator[](dish) += 1;
+ //google::sparse_hash_map<Dish,int,Hash>::operator[](dish) += 1;
+ PYP<Dish,Hash>::_total_customers += 1;
+
+ // MPI Delta handling
+ // track the customer entering
+ typename dish_delta_type::iterator customer_it;
+ bool customer_insert_result;
+ boost::tie(customer_it, customer_insert_result)
+ = m_count_delta.insert(std::make_pair(dish,0));
+
+ customer_it->second += 1;
+ if (customer_it->second == 0)
+ m_count_delta.erase(customer_it);
+
+ // increment the histogram bar for the table joined
+ if (!delta) {
+ assert (table_joined >= 0);
+ std::map<int,int> &histogram = m_table_delta[dish].table_histogram;
+ typename std::map<int,int>::iterator table_it; bool table_insert_result;
+ boost::tie(table_it, table_insert_result) = histogram.insert(std::make_pair(table_joined,0));
+ table_it->second += 1;
+ if (table_it->second == 0) histogram.erase(table_it);
+
+ // decrement the histogram bar for the table left
+ boost::tie(table_it, table_insert_result) = histogram.insert(std::make_pair(table_joined-1,0));
+ table_it->second -= 1;
+ if (table_it->second == 0) histogram.erase(table_it);
+ }
+ else {
+ typename PYP<Dish,Hash>::TableCounter &delta_tc = m_table_delta[dish];
+ delta_tc.tables += 1;
+ delta_tc.table_histogram[1] += 1;
+ }
return delta;
}
@@ -55,15 +138,177 @@ template <typename Dish, typename Hash>
int
MPIPYP<Dish,Hash>::decrement(Dish dish)
{
- int delta = PYP<Dish,Hash>::decrement(dish);
+ typename std::tr1::unordered_map<Dish, int>::iterator dcit = find(dish);
+ //typename google::sparse_hash_map<Dish, int>::iterator dcit = find(dish);
+ if (dcit == PYP<Dish,Hash>::end()) {
+ std::cerr << dish << std::endl;
+ assert(false);
+ }
+
+ int delta = 0, table_left=-1;
+
+ typename std::tr1::unordered_map<Dish, typename PYP<Dish,Hash>::TableCounter>::iterator dtit
+ = PYP<Dish,Hash>::_dish_tables.find(dish);
+ //typename google::sparse_hash_map<Dish, TableCounter>::iterator dtit = _dish_tables.find(dish);
+ if (dtit == PYP<Dish,Hash>::_dish_tables.end()) {
+ std::cerr << dish << std::endl;
+ assert(false);
+ }
+ typename PYP<Dish,Hash>::TableCounter &tc = dtit->second;
+
+ double r = mt_genrand_res53() * PYP<Dish,Hash>::count(dish);
+ for (std::map<int,int>::iterator hit = tc.table_histogram.begin();
+ hit != tc.table_histogram.end(); ++hit) {
+ r -= (hit->first * hit->second);
+ if (r <= 0) {
+ table_left = hit->first;
+ if (hit->first > 1) {
+ tc.table_histogram[hit->first-1] += 1;
+ }
+ else {
+ delta = -1;
+ tc.tables -= 1;
+ PYP<Dish,Hash>::_total_tables -= 1;
+ }
+
+ hit->second -= 1;
+ if (hit->second == 0) tc.table_histogram.erase(hit);
+ break;
+ }
+ }
+ if (r > 0) {
+ std::cerr << r << " " << PYP<Dish,Hash>::count(dish) << " " << PYP<Dish,Hash>::_a << " "
+ << PYP<Dish,Hash>::num_tables(dish) << std::endl;
+ assert(false);
+ }
+
+ // remove the customer
+ dcit->second -= 1;
+ PYP<Dish,Hash>::_total_customers -= 1;
+ assert(dcit->second >= 0);
+ if (dcit->second == 0) {
+ PYP<Dish,Hash>::erase(dcit);
+ PYP<Dish,Hash>::_dish_tables.erase(dtit);
+ }
+
+ typename dish_delta_type::iterator it;
+ bool insert_result;
+ boost::tie(it, insert_result) = m_count_delta.insert(std::make_pair(dish,0));
+
+ it->second -= 1;
+
+ if (it->second == 0)
+ m_count_delta.erase(it);
+
+ assert (table_left >= 0);
+ typename PYP<Dish,Hash>::TableCounter& delta_tc = m_table_delta[dish];
+ if (table_left > 1)
+ delta_tc.table_histogram[table_left-1] += 1;
+ else delta_tc.tables -= 1;
+
+ std::map<int,int>::iterator tit = delta_tc.table_histogram.find(table_left);
+ //assert (tit != delta_tc.table_histogram.end());
+ tit->second -= 1;
+ if (tit->second == 0) delta_tc.table_histogram.erase(tit);
+
return delta;
}
template <typename Dish, typename Hash>
void
-MPIPYP<Dish,Hash>::clear()
-{
+MPIPYP<Dish,Hash>::clear() {
PYP<Dish,Hash>::clear();
+ reset_deltas();
+}
+
+template <typename Dish, typename Hash>
+void
+MPIPYP<Dish,Hash>::reset_deltas() {
+ m_count_delta.clear();
+ m_table_delta.clear();
+}
+
+template <typename Dish>
+struct sum_maps {
+ typedef std::map<Dish,int> map_type;
+ map_type& operator() (map_type& l, map_type const & r) const {
+ for (typename map_type::const_iterator it=r.begin(); it != r.end(); it++)
+ l[it->first] += it->second;
+ return l;
+ }
+};
+
+// Needed Boost definitions
+namespace boost {
+ namespace mpi {
+ template <>
+ struct is_commutative< sum_maps<int>, std::map<int,int> > : mpl::true_ {};
+ }
+
+ namespace serialization {
+ template<class Archive>
+ void serialize(Archive & ar, PYP<int>::TableCounter& t, const unsigned int version) {
+ ar & t.table_histogram;
+ ar & t.tables;
+ }
+
+ } // namespace serialization
+} // namespace boost
+
+
+template <typename Dish, typename Hash>
+void
+MPIPYP<Dish,Hash>::synchronise() {
+ boost::mpi::communicator world;
+ int rank = world.rank(), size = world.size();
+
+ // communicate the customer count deltas
+ dish_delta_type global_dish_delta; // the “merged” map
+ boost::mpi::all_reduce(world, m_count_delta, global_dish_delta, sum_maps<Dish>());
+
+ // update this restaurant
+ for (typename dish_delta_type::const_iterator it=global_dish_delta.begin();
+ it != global_dish_delta.end(); ++it) {
+ std::tr1::unordered_map<Dish,int,Hash>::operator[](it->first) += (it->second - m_count_delta[it->first]);
+ PYP<Dish,Hash>::_total_customers += (it->second - m_count_delta[it->first]);
+ //std::cerr << "Process " << rank << " adding " << (it->second - m_count_delta[it->first]) << " customers." << std::endl;
+ }
+
+ // communicate the table count deltas
+// for (int process = 0; process < size; ++process) {
+// if (rank == process) {
+// // broadcast deltas
+// std::cerr << " -- Rank " << rank << " broadcasting -- " << std::endl;
+//
+// boost::mpi::broadcast(world, m_table_delta, process);
+//
+// std::cerr << " -- Rank " << rank << " done broadcasting -- " << std::endl;
+// }
+// else {
+// std::cerr << " -- Rank " << rank << " receiving -- " << std::endl;
+// // receive deltas
+// table_delta_type recv_table_delta;
+//
+// boost::mpi::broadcast(world, recv_table_delta, process);
+//
+// std::cerr << " -- Rank " << rank << " done receiving -- " << std::endl;
+//
+// for (typename table_delta_type::const_iterator dish_it=recv_table_delta.begin();
+// dish_it != recv_table_delta.end(); ++dish_it) {
+// typename PYP<Dish,Hash>::TableCounter &tc = PYP<Dish,Hash>::_dish_tables[dish_it->first];
+//
+// for (std::map<int,int>::const_iterator it=dish_it->second.table_histogram.begin();
+// it != dish_it->second.table_histogram.end(); ++it) {
+// tc.table_histogram[it->first] += it->second;
+// }
+// tc.tables += dish_it->second.tables;
+// PYP<Dish,Hash>::_total_tables += dish_it->second.tables;
+// }
+// }
+// }
+// std::cerr << " -- Done Reducing -- " << std::endl;
+
+ reset_deltas();
}
#endif
diff --git a/gi/pyp-topics/src/mpi-train-contexts.cc b/gi/pyp-topics/src/mpi-train-contexts.cc
index 956ce123..0651ecac 100644
--- a/gi/pyp-topics/src/mpi-train-contexts.cc
+++ b/gi/pyp-topics/src/mpi-train-contexts.cc
@@ -86,7 +86,7 @@ int main(int argc, char **argv)
// seed the random number generator: 0 = automatic, specify value otherwise
unsigned long seed = 0;
- PYPTopics model(vm["topics"].as<int>(), vm.count("hierarchical-topics"), seed);
+ MPIPYPTopics model(vm["topics"].as<int>(), vm.count("hierarchical-topics"), seed);
// read the data
BackoffGenerator* backoff_gen=0;
diff --git a/gi/pyp-topics/src/pyp.hh b/gi/pyp-topics/src/pyp.hh
index 26f6ab2e..84decb0f 100644
--- a/gi/pyp-topics/src/pyp.hh
+++ b/gi/pyp-topics/src/pyp.hh
@@ -1,6 +1,7 @@
#ifndef _pyp_hh
#define _pyp_hh
+#include "slice-sampler.h"
#include <math.h>
#include <map>
#include <tr1/unordered_map>
@@ -11,7 +12,6 @@
#include <boost/random/mersenne_twister.hpp>
#include "log_add.h"
-#include "slice-sampler.h"
#include "mt19937ar.h"
//
@@ -63,7 +63,7 @@ public:
double b() const { return _b; }
void set_b(double b) { _b = b; }
- void clear();
+ virtual void clear();
std::ostream& debug_info(std::ostream& os) const;
double log_restaurant_prob() const;
@@ -75,13 +75,12 @@ public:
void resample_prior_a();
void resample_prior_b();
-private:
+protected:
double _a, _b; // parameters of the Pitman-Yor distribution
double _a_beta_a, _a_beta_b; // parameters of Beta prior on a
double _b_gamma_s, _b_gamma_c; // parameters of Gamma prior on b
- struct TableCounter
- {
+ struct TableCounter {
TableCounter() : tables(0) {};
int tables;
std::map<int, int> table_histogram; // num customers at table -> number tables