From d5d7820830c2a2beda775a674f39fc8506f8a8f6 Mon Sep 17 00:00:00 2001 From: "philblunsom@gmail.com" Date: Fri, 16 Jul 2010 04:50:35 +0000 Subject: working on mpi implementation. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@283 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pyp-topics/src/Makefile.mpi | 3 +- gi/pyp-topics/src/contexts_corpus.cc | 4 +- gi/pyp-topics/src/contexts_corpus.hh | 2 + gi/pyp-topics/src/makefile.depend | 228 +++++++++++++++++++++++--- gi/pyp-topics/src/mpi-pyp-topics.cc | 148 ++++++++++------- gi/pyp-topics/src/mpi-pyp-topics.hh | 17 +- gi/pyp-topics/src/mpi-pyp.hh | 273 ++++++++++++++++++++++++++++++-- gi/pyp-topics/src/mpi-train-contexts.cc | 2 +- gi/pyp-topics/src/pyp.hh | 9 +- 9 files changed, 582 insertions(+), 104 deletions(-) (limited to 'gi/pyp-topics/src') diff --git a/gi/pyp-topics/src/Makefile.mpi b/gi/pyp-topics/src/Makefile.mpi index 8c859881..b7b8a290 100644 --- a/gi/pyp-topics/src/Makefile.mpi +++ b/gi/pyp-topics/src/Makefile.mpi @@ -16,7 +16,8 @@ mpi-pyp-contexts-train: mpi-train-contexts.o $(local_objs) .PHONY: depend echo depend: - $(CXX) -MM $(CXXFLAGS) *.cc *.c | sed 's/^\(.*\.o:\)/obj\/\1/' > makefile.depend +#$(CXX) -MM $(CXXFLAGS) *.cc *.c | sed 's/^\(.*\.o:\)/obj\/\1/' > makefile.depend + $(CXX) -MM $(CXXFLAGS) *.cc *.c > makefile.depend clean: rm -f *.o diff --git a/gi/pyp-topics/src/contexts_corpus.cc b/gi/pyp-topics/src/contexts_corpus.cc index 26d5718a..1cf69429 100644 --- a/gi/pyp-topics/src/contexts_corpus.cc +++ b/gi/pyp-topics/src/contexts_corpus.cc @@ -28,7 +28,7 @@ void read_callback(const ContextsLexer::PhraseContextsType& new_contexts, void* Document* doc(new Document()); //cout << "READ: " << new_contexts.phrase << "\t"; - for (int i=0; i < new_contexts.counts.size(); ++i) { + for (int i=0; i < (int)new_contexts.counts.size(); ++i) { int cache_word_count = corpus_ptr->m_dict.max(); //string context_str = corpus_ptr->m_dict.toString(new_contexts.contexts[i]); @@ -101,7 +101,7 @@ void filter_callback(const ContextsLexer::PhraseContextsType& new_contexts, void map* context_counts = (static_cast*>(extra)); - for (int i=0; i < new_contexts.counts.size(); ++i) { + for (int i=0; i < (int)new_contexts.counts.size(); ++i) { int context_index = new_contexts.counts.at(i).first; int count = new_contexts.counts.at(i).second; //int count = new_contexts.counts[i]; diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh index 66b71783..4d3d5669 100644 --- a/gi/pyp-topics/src/contexts_corpus.hh +++ b/gi/pyp-topics/src/contexts_corpus.hh @@ -63,6 +63,8 @@ public: std::vector context2string(const WordID& id) const { std::vector res; + assert (id >= 0); + std::cerr << m_dict.Convert(id) << std::endl; m_dict.AsVector(id, &res); return res; } diff --git a/gi/pyp-topics/src/makefile.depend b/gi/pyp-topics/src/makefile.depend index 88bab79e..88bc73c1 100644 --- a/gi/pyp-topics/src/makefile.depend +++ b/gi/pyp-topics/src/makefile.depend @@ -1,4 +1,4 @@ -obj/contexts_corpus.o: contexts_corpus.cc contexts_corpus.hh \ +contexts_corpus.o: contexts_corpus.cc contexts_corpus.hh \ /Users/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ /Users/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ /Users/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ @@ -432,7 +432,7 @@ obj/contexts_corpus.o: contexts_corpus.cc contexts_corpus.hh \ /Users/pblunsom/packages/include/boost/type_traits/add_cv.hpp \ /Users/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \ /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp -obj/contexts_lexer.o: contexts_lexer.cc contexts_lexer.h \ +contexts_lexer.o: contexts_lexer.cc contexts_lexer.h \ ../../../decoder/dict.h \ /Users/pblunsom/packages/include/boost/functional/hash.hpp \ /Users/pblunsom/packages/include/boost/functional/hash/hash.hpp \ @@ -463,7 +463,7 @@ obj/contexts_lexer.o: contexts_lexer.cc contexts_lexer.h \ /Users/pblunsom/packages/include/boost/detail/container_fwd.hpp \ ../../../decoder/wordid.h ../../../decoder/filelib.h \ ../../../decoder/gzstream.h -obj/corpus.o: corpus.cc corpus.hh \ +corpus.o: corpus.cc corpus.hh \ /Users/pblunsom/packages/include/boost/shared_ptr.hpp \ /Users/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ /Users/pblunsom/packages/include/boost/config.hpp \ @@ -874,8 +874,8 @@ obj/corpus.o: corpus.cc corpus.hh \ /Users/pblunsom/packages/include/boost/detail/is_incrementable.hpp \ /Users/pblunsom/packages/include/boost/ptr_container/detail/void_ptr_iterator.hpp \ gzstream.hh -obj/gzstream.o: gzstream.cc gzstream.hh -obj/mpi-pyp-topics.o: mpi-pyp-topics.cc \ +gzstream.o: gzstream.cc gzstream.hh +mpi-pyp-topics.o: mpi-pyp-topics.cc \ /Users/pblunsom/packages/include/boost/mpi/communicator.hpp \ /Users/pblunsom/packages/include/boost/mpi/config.hpp \ /Users/pblunsom/packages/include/boost/config.hpp \ @@ -1448,8 +1448,108 @@ obj/mpi-pyp-topics.o: mpi-pyp-topics.cc \ /Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ /Users/pblunsom/packages/include/boost/random/detail/seed.hpp \ /Users/pblunsom/packages/include/boost/mpi/environment.hpp mpi-pyp.hh \ - log_add.h slice-sampler.h mt19937ar.h corpus.hh -obj/mpi-train-contexts.o: mpi-train-contexts.cc \ + /Users/pblunsom/packages/include/boost/tuple/tuple.hpp \ + /Users/pblunsom/packages/include/boost/ref.hpp \ + /Users/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/add_volatile.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/add_cv.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp \ + /Users/pblunsom/packages/include/boost/serialization/map.hpp \ + /Users/pblunsom/packages/include/boost/serialization/utility.hpp \ + /Users/pblunsom/packages/include/boost/serialization/collections_save_imp.hpp \ + /Users/pblunsom/packages/include/boost/serialization/collections_load_imp.hpp \ + /Users/pblunsom/packages/include/boost/serialization/detail/stack_constructor.hpp \ + /Users/pblunsom/packages/include/boost/aligned_storage.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \ + /Users/pblunsom/packages/include/boost/mpi.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/all_gather.hpp \ + /Users/pblunsom/packages/include/boost/serialization/vector.hpp \ + /Users/pblunsom/packages/include/boost/serialization/collection_traits.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/broadcast.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives_fwd.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/gather.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/all_reduce.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/reduce.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/computation_tree.hpp \ + /Users/pblunsom/packages/include/boost/mpi/operations.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/all_to_all.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/scatter.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/scan.hpp \ + /Users/pblunsom/packages/include/boost/mpi/graph_communicator.hpp \ + /Users/pblunsom/packages/include/boost/graph/graph_traits.hpp \ + /Users/pblunsom/packages/include/boost/pending/property.hpp \ + /Users/pblunsom/packages/include/boost/pending/detail/property.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/same_traits.hpp \ + /Users/pblunsom/packages/include/boost/graph/properties.hpp \ + /Users/pblunsom/packages/include/boost/property_map/property_map.hpp \ + /Users/pblunsom/packages/include/boost/pending/cstddef.hpp \ + /Users/pblunsom/packages/include/boost/concept_check.hpp \ + /Users/pblunsom/packages/include/boost/concept/assert.hpp \ + /Users/pblunsom/packages/include/boost/concept/detail/general.hpp \ + /Users/pblunsom/packages/include/boost/concept/detail/has_constraints.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/conversion_traits.hpp \ + /Users/pblunsom/packages/include/boost/concept/usage.hpp \ + /Users/pblunsom/packages/include/boost/concept/detail/concept_def.hpp \ + /Users/pblunsom/packages/include/boost/concept/detail/concept_undef.hpp \ + /Users/pblunsom/packages/include/boost/concept_archetype.hpp \ + /Users/pblunsom/packages/include/boost/property_map/vector_property_map.hpp \ + /Users/pblunsom/packages/include/boost/graph/property_maps/constant_property_map.hpp \ + /Users/pblunsom/packages/include/boost/graph/property_maps/null_property_map.hpp \ + /Users/pblunsom/packages/include/boost/iterator/counting_iterator.hpp \ + /Users/pblunsom/packages/include/boost/detail/numeric_traits.hpp \ + /Users/pblunsom/packages/include/boost/type_traits.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_assign.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_trivial_assign.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_constructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_trivial_constructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_destructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_trivial_destructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_virtual_destructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_signed.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_compound.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_floating_point.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_member_object_pointer.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_object.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_stateless.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/rank.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/extent.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/remove_all_extents.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/aligned_storage.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/promote.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/make_unsigned.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_signed.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_unsigned.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/make_signed.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/decay.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_complex.hpp \ + /Users/pblunsom/packages/include/boost/detail/select_type.hpp \ + /Users/pblunsom/packages/include/boost/graph/iteration_macros.hpp \ + /Users/pblunsom/packages/include/boost/shared_array.hpp \ + /Users/pblunsom/packages/include/boost/smart_ptr/shared_array.hpp \ + /Users/pblunsom/packages/include/boost/mpi/group.hpp \ + /Users/pblunsom/packages/include/boost/mpi/intercommunicator.hpp \ + /Users/pblunsom/packages/include/boost/mpi/nonblocking.hpp \ + /Users/pblunsom/packages/include/boost/mpi/skeleton_and_content.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_iarchive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_oarchive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/ignore_iprimitive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/ignore_oprimitive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/content_oarchive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/broadcast_sc.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/communicator_sc.hpp \ + /Users/pblunsom/packages/include/boost/mpi/timer.hpp pyp.hh \ + slice-sampler.h log_add.h mt19937ar.h corpus.hh +mpi-train-contexts.o: mpi-train-contexts.cc \ /Users/pblunsom/packages/include/boost/program_options/parsers.hpp \ /Users/pblunsom/packages/include/boost/program_options/config.hpp \ /Users/pblunsom/packages/include/boost/config.hpp \ @@ -2064,8 +2164,100 @@ obj/mpi-train-contexts.o: mpi-train-contexts.cc \ /Users/pblunsom/packages/include/boost/random/linear_congruential.hpp \ /Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ /Users/pblunsom/packages/include/boost/random/detail/seed.hpp \ - mpi-pyp.hh log_add.h slice-sampler.h mt19937ar.h corpus.hh \ - contexts_corpus.hh contexts_lexer.h ../../../decoder/dict.h \ + mpi-pyp.hh /Users/pblunsom/packages/include/boost/tuple/tuple.hpp \ + /Users/pblunsom/packages/include/boost/tuple/detail/tuple_basic.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/cv_traits.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/add_cv.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/remove_volatile.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp \ + /Users/pblunsom/packages/include/boost/serialization/map.hpp \ + /Users/pblunsom/packages/include/boost/serialization/utility.hpp \ + /Users/pblunsom/packages/include/boost/serialization/collections_save_imp.hpp \ + /Users/pblunsom/packages/include/boost/serialization/collections_load_imp.hpp \ + /Users/pblunsom/packages/include/boost/serialization/detail/stack_constructor.hpp \ + /Users/pblunsom/packages/include/boost/aligned_storage.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/type_with_alignment.hpp \ + /Users/pblunsom/packages/include/boost/mpi.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/all_gather.hpp \ + /Users/pblunsom/packages/include/boost/serialization/vector.hpp \ + /Users/pblunsom/packages/include/boost/serialization/collection_traits.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/broadcast.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives_fwd.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/gather.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/all_reduce.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/reduce.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/computation_tree.hpp \ + /Users/pblunsom/packages/include/boost/mpi/operations.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/all_to_all.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/scatter.hpp \ + /Users/pblunsom/packages/include/boost/mpi/collectives/scan.hpp \ + /Users/pblunsom/packages/include/boost/mpi/graph_communicator.hpp \ + /Users/pblunsom/packages/include/boost/graph/graph_traits.hpp \ + /Users/pblunsom/packages/include/boost/pending/property.hpp \ + /Users/pblunsom/packages/include/boost/pending/detail/property.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/same_traits.hpp \ + /Users/pblunsom/packages/include/boost/graph/properties.hpp \ + /Users/pblunsom/packages/include/boost/property_map/property_map.hpp \ + /Users/pblunsom/packages/include/boost/pending/cstddef.hpp \ + /Users/pblunsom/packages/include/boost/concept_check.hpp \ + /Users/pblunsom/packages/include/boost/concept/assert.hpp \ + /Users/pblunsom/packages/include/boost/concept/detail/general.hpp \ + /Users/pblunsom/packages/include/boost/concept/detail/has_constraints.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/conversion_traits.hpp \ + /Users/pblunsom/packages/include/boost/concept/usage.hpp \ + /Users/pblunsom/packages/include/boost/concept/detail/concept_def.hpp \ + /Users/pblunsom/packages/include/boost/concept/detail/concept_undef.hpp \ + /Users/pblunsom/packages/include/boost/concept_archetype.hpp \ + /Users/pblunsom/packages/include/boost/property_map/vector_property_map.hpp \ + /Users/pblunsom/packages/include/boost/graph/property_maps/constant_property_map.hpp \ + /Users/pblunsom/packages/include/boost/graph/property_maps/null_property_map.hpp \ + /Users/pblunsom/packages/include/boost/iterator/counting_iterator.hpp \ + /Users/pblunsom/packages/include/boost/detail/numeric_traits.hpp \ + /Users/pblunsom/packages/include/boost/type_traits.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_assign.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_trivial_assign.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_constructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_trivial_constructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_copy.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_nothrow_destructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/has_virtual_destructor.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_compound.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_floating_point.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_member_object_pointer.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_object.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_stateless.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/rank.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/extent.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/remove_all_extents.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/function_traits.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/aligned_storage.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/promote.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/integral_promotion.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/floating_point_promotion.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/make_signed.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/decay.hpp \ + /Users/pblunsom/packages/include/boost/type_traits/is_complex.hpp \ + /Users/pblunsom/packages/include/boost/detail/select_type.hpp \ + /Users/pblunsom/packages/include/boost/graph/iteration_macros.hpp \ + /Users/pblunsom/packages/include/boost/shared_array.hpp \ + /Users/pblunsom/packages/include/boost/smart_ptr/shared_array.hpp \ + /Users/pblunsom/packages/include/boost/mpi/group.hpp \ + /Users/pblunsom/packages/include/boost/mpi/intercommunicator.hpp \ + /Users/pblunsom/packages/include/boost/mpi/nonblocking.hpp \ + /Users/pblunsom/packages/include/boost/mpi/skeleton_and_content.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_iarchive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/forward_skeleton_oarchive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/ignore_iprimitive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/ignore_oprimitive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/content_oarchive.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/broadcast_sc.hpp \ + /Users/pblunsom/packages/include/boost/mpi/detail/communicator_sc.hpp \ + /Users/pblunsom/packages/include/boost/mpi/timer.hpp pyp.hh \ + slice-sampler.h log_add.h mt19937ar.h corpus.hh contexts_corpus.hh \ + contexts_lexer.h ../../../decoder/dict.h \ /Users/pblunsom/packages/include/boost/functional/hash.hpp \ /Users/pblunsom/packages/include/boost/functional/hash/hash.hpp \ /Users/pblunsom/packages/include/boost/functional/hash/hash_fwd.hpp \ @@ -2078,7 +2270,7 @@ obj/mpi-train-contexts.o: mpi-train-contexts.cc \ /Users/pblunsom/packages/include/boost/functional/hash/extensions.hpp \ /Users/pblunsom/packages/include/boost/detail/container_fwd.hpp \ ../../../decoder/wordid.h gzstream.hh -obj/pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \ +pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \ /Users/pblunsom/packages/include/boost/ptr_container/ptr_vector.hpp \ /Users/pblunsom/packages/include/boost/ptr_container/ptr_sequence_adapter.hpp \ /Users/pblunsom/packages/include/boost/ptr_container/detail/reversible_ptr_container.hpp \ @@ -2484,7 +2676,7 @@ obj/pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \ /Users/pblunsom/packages/include/boost/random/linear_congruential.hpp \ /Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ /Users/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \ - log_add.h slice-sampler.h mt19937ar.h corpus.hh \ + slice-sampler.h log_add.h mt19937ar.h corpus.hh \ /Users/pblunsom/packages/include/boost/shared_ptr.hpp \ /Users/pblunsom/packages/include/boost/smart_ptr/shared_ptr.hpp \ /Users/pblunsom/packages/include/boost/config/no_tr1/memory.hpp \ @@ -2658,7 +2850,7 @@ obj/pyp-topics.o: pyp-topics.cc timing.h clock_gettime_stub.c pyp-topics.hh \ /Users/pblunsom/packages/include/boost/smart_ptr/scoped_ptr.hpp \ /Users/pblunsom/packages/include/boost/type_traits/is_fundamental.hpp \ /Users/pblunsom/packages/include/boost/thread/condition.hpp -obj/train-contexts.o: train-contexts.cc \ +train-contexts.o: train-contexts.cc \ /Users/pblunsom/packages/include/boost/program_options/parsers.hpp \ /Users/pblunsom/packages/include/boost/program_options/config.hpp \ /Users/pblunsom/packages/include/boost/config.hpp \ @@ -3140,7 +3332,7 @@ obj/train-contexts.o: train-contexts.cc \ /Users/pblunsom/packages/include/boost/random/linear_congruential.hpp \ /Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ /Users/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \ - log_add.h slice-sampler.h mt19937ar.h corpus.hh workers.hh \ + slice-sampler.h log_add.h mt19937ar.h corpus.hh workers.hh \ /Users/pblunsom/packages/include/boost/bind.hpp \ /Users/pblunsom/packages/include/boost/bind/bind.hpp \ /Users/pblunsom/packages/include/boost/is_placeholder.hpp \ @@ -3275,7 +3467,7 @@ obj/train-contexts.o: train-contexts.cc \ /Users/pblunsom/packages/include/boost/functional/hash/extensions.hpp \ /Users/pblunsom/packages/include/boost/detail/container_fwd.hpp \ ../../../decoder/wordid.h gzstream.hh -obj/train.o: train.cc \ +train.o: train.cc \ /Users/pblunsom/packages/include/boost/program_options/parsers.hpp \ /Users/pblunsom/packages/include/boost/program_options/config.hpp \ /Users/pblunsom/packages/include/boost/config.hpp \ @@ -3757,7 +3949,7 @@ obj/train.o: train.cc \ /Users/pblunsom/packages/include/boost/random/linear_congruential.hpp \ /Users/pblunsom/packages/include/boost/random/detail/const_mod.hpp \ /Users/pblunsom/packages/include/boost/random/detail/seed.hpp pyp.hh \ - log_add.h slice-sampler.h mt19937ar.h corpus.hh workers.hh \ + slice-sampler.h log_add.h mt19937ar.h corpus.hh workers.hh \ /Users/pblunsom/packages/include/boost/bind.hpp \ /Users/pblunsom/packages/include/boost/bind/bind.hpp \ /Users/pblunsom/packages/include/boost/is_placeholder.hpp \ @@ -3892,6 +4084,6 @@ obj/train.o: train.cc \ /Users/pblunsom/packages/include/boost/functional/hash/extensions.hpp \ /Users/pblunsom/packages/include/boost/detail/container_fwd.hpp \ ../../../decoder/wordid.h gzstream.hh -obj/clock_gettime_stub.o: clock_gettime_stub.c -obj/gammadist.o: gammadist.c gammadist.h mt19937ar.h -obj/mt19937ar.o: mt19937ar.c mt19937ar.h +clock_gettime_stub.o: clock_gettime_stub.c +gammadist.o: gammadist.c gammadist.h mt19937ar.h +mt19937ar.o: mt19937ar.c mt19937ar.h diff --git a/gi/pyp-topics/src/mpi-pyp-topics.cc b/gi/pyp-topics/src/mpi-pyp-topics.cc index 2ad28278..4525302e 100644 --- a/gi/pyp-topics/src/mpi-pyp-topics.cc +++ b/gi/pyp-topics/src/mpi-pyp-topics.cc @@ -4,7 +4,7 @@ #include "mpi-pyp-topics.hh" //#include -void PYPTopics::sample_corpus(const Corpus& corpus, int samples, +void MPIPYPTopics::sample_corpus(const Corpus& corpus, int samples, int freq_cutoff_start, int freq_cutoff_end, int freq_cutoff_interval, int max_contexts_per_document) { @@ -23,33 +23,33 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, } int local_documents = m_mpi_end - m_mpi_start; - if (!m_backoff.get()) { m_word_pyps.clear(); - m_word_pyps.push_back(PYPs()); + m_word_pyps.push_back(MPIPYPs()); } if (m_am_root) std::cerr << "\n Training with " << m_word_pyps.size()-1 << " backoff level" - << (m_word_pyps.size()==2 ? ":" : "s:") << std::endl; + << (m_word_pyps.size()>1 ? ":" : "s:") << std::endl; - for (int i=0; i<(int)m_word_pyps.size(); ++i) - { + for (int i=0; i<(int)m_word_pyps.size(); ++i) { m_word_pyps.at(i).reserve(m_num_topics); for (int j=0; j(0.5, 1.0)); + m_word_pyps.at(i).push_back(new MPIPYP(0.5, 1.0)); } if (m_am_root) std::cerr << std::endl; - m_document_pyps.reserve(corpus.num_documents()); - for (int j=0; j(0.5, 1.0)); m_topic_p0 = 1.0/m_num_topics; m_term_p0 = 1.0/corpus.num_types(); m_backoff_p0 = 1.0/corpus.num_documents(); - if (m_am_root) std::cerr << " Documents: " << corpus.num_documents() << " Terms: " - << corpus.num_types() << std::endl; + if (m_am_root) std::cerr << " Documents: " << corpus.num_documents() << "(" + << local_documents << ")" << " Terms: " << corpus.num_types() << std::endl; int frequency_cutoff = freq_cutoff_start; if (m_am_root) std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl; @@ -57,13 +57,16 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, timer.Reset(); // Initialisation pass int document_id=0, topic_counter=0; - for (Corpus::const_iterator corpusIt=corpus.begin(); - corpusIt != corpus.end(); ++corpusIt, ++document_id) { - m_corpus_topics.push_back(DocumentTopics(corpusIt->size(), 0)); + for (int i=0; ibegin(); - docIt != corpusIt->end(); ++docIt, ++term_index) { + for (Document::const_iterator docIt=corpus.at(document_id).begin(); + docIt != corpus.at(document_id).end(); ++docIt, ++term_index) { topic_counter++; Term term = *docIt; @@ -80,21 +83,41 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, if (m_use_topic_pyp) { F p0 = m_topic_pyp.prob(new_topic, m_topic_p0); - int table_delta = m_document_pyps[document_id].increment(new_topic, p0); + int table_delta = m_document_pyps.at(i).increment(new_topic, p0); if (table_delta) m_topic_pyp.increment(new_topic, m_topic_p0); } - else m_document_pyps[document_id].increment(new_topic, m_topic_p0); + else m_document_pyps.at(i).increment(new_topic, m_topic_p0); } - m_corpus_topics[document_id][term_index] = new_topic; + m_corpus_topics.at(i).at(term_index) = new_topic; + } + } + + // Synchronise the topic->word counds across the processes. + for (std::vector::iterator levelIt=m_word_pyps.begin(); + levelIt != m_word_pyps.end(); ++levelIt) { + for (MPIPYPs::iterator pypIt=levelIt->begin(); + pypIt != levelIt->end(); ++pypIt) { + if (!m_am_root) boost::mpi::communicator().barrier(); + std::cerr << "Before Sync Process " << m_rank << ":"; + pypIt->debug_info(std::cerr); std::cerr << std::endl; + if (m_am_root) boost::mpi::communicator().barrier(); + + pypIt->synchronise(); + + if (!m_am_root) boost::mpi::communicator().barrier(); + std::cerr << "After Sync Process " << m_rank << ":"; + pypIt->debug_info(std::cerr); std::cerr << std::endl; + if (m_am_root) boost::mpi::communicator().barrier(); } } + if (m_am_root) std::cerr << " Initialized in " << timer.Elapsed() << " seconds\n"; int* randomDocIndices = new int[local_documents]; for (int i = 0; i < local_documents; ++i) - randomDocIndices[i] = i+m_mpi_start; + randomDocIndices[i] = i; // Sampling phase for (int curr_sample=0; curr_sample < samples; ++curr_sample) { @@ -110,8 +133,8 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, // Randomize the corpus indexing array int tmp; int processed_terms=0; - for (int i = local_documents-1; i > 0; --i) { - //i+1 since j \in [0,i] but rnd() \in [0,1) + for (int i = (local_documents-1); i > 0; --i) { + //i+1 since j \in [0,i] but rnd() \in [0,1) int j = (int)(rnd() * (i+1)); assert(j >= 0 && j <= i); tmp = randomDocIndices[i]; @@ -120,15 +143,17 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, } // for each document in the corpus - int document_id; - for (int i=0; i max_contexts_per_document) break; @@ -140,36 +165,49 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, processed_terms++; // remove the prevous topic from the PYPs - int current_topic = m_corpus_topics[document_id][term_index]; + int current_topic = m_corpus_topics.at(doc_index).at(term_index); // a negative label mean that term hasn't been sampled yet if (current_topic >= 0) { decrement(term, current_topic); - int table_delta = m_document_pyps[document_id].decrement(current_topic); + int table_delta = m_document_pyps.at(doc_index).decrement(current_topic); if (m_use_topic_pyp && table_delta < 0) m_topic_pyp.decrement(current_topic); } // sample a new_topic - int new_topic = sample(document_id, term); + int new_topic = sample(doc_index, term); // add the new topic to the PYPs - m_corpus_topics[document_id][term_index] = new_topic; + m_corpus_topics.at(doc_index).at(term_index) = new_topic; increment(term, new_topic); if (m_use_topic_pyp) { F p0 = m_topic_pyp.prob(new_topic, m_topic_p0); - int table_delta = m_document_pyps[document_id].increment(new_topic, p0); + int table_delta = m_document_pyps.at(doc_index).increment(new_topic, p0); if (table_delta) m_topic_pyp.increment(new_topic, m_topic_p0); } - else m_document_pyps[document_id].increment(new_topic, m_topic_p0); + else m_document_pyps.at(doc_index).increment(new_topic, m_topic_p0); } if (document_id && document_id % 10000 == 0) { if (m_am_root) std::cerr << "."; std::cerr.flush(); } } m_world.barrier(); + // Synchronise the topic->word counds across the processes. + for (std::vector::iterator levelIt=m_word_pyps.begin(); + levelIt != m_word_pyps.end(); ++levelIt) { + for (MPIPYPs::iterator pypIt=levelIt->begin(); + pypIt != levelIt->end(); ++pypIt) { + std::cerr << "Before Sync Process " << m_rank << ":"; + pypIt->debug_info(std::cerr); std::cerr << std::endl; + pypIt->synchronise(); + std::cerr << "After Sync Process " << m_rank << ":"; + pypIt->debug_info(std::cerr); std::cerr << std::endl; + } + } + if (m_am_root) std::cerr << " ||| sampled " << processed_terms << " terms."; if (curr_sample != 0 && curr_sample % 10 == 0) { @@ -179,9 +217,9 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, // resample the hyperparamters F log_p=0.0; - for (std::vector::iterator levelIt=m_word_pyps.begin(); + for (std::vector::iterator levelIt=m_word_pyps.begin(); levelIt != m_word_pyps.end(); ++levelIt) { - for (PYPs::iterator pypIt=levelIt->begin(); + for (MPIPYPs::iterator pypIt=levelIt->begin(); pypIt != levelIt->end(); ++pypIt) { pypIt->resample_prior(); log_p += pypIt->log_restaurant_prob(); @@ -206,7 +244,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, int k=0; if (m_am_root) std::cerr << "Topics distribution: "; std::cerr.precision(2); - for (PYPs::iterator pypIt=m_word_pyps.front().begin(); + for (MPIPYPs::iterator pypIt=m_word_pyps.front().begin(); pypIt != m_word_pyps.front().end(); ++pypIt, ++k) { if (m_am_root && k % 5 == 0) std::cerr << std::endl << '\t'; if (m_am_root) std::cerr << "<" << k << ":" << pypIt->num_customers() << "," @@ -220,8 +258,8 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, } -void PYPTopics::decrement(const Term& term, int topic, int level) { - //std::cerr << "PYPTopics::decrement(" << term << "," << topic << "," << level << ")" << std::endl; +void MPIPYPTopics::decrement(const Term& term, int topic, int level) { + //std::cerr << "MPIPYPTopics::decrement(" << term << "," << topic << "," << level << ")" << std::endl; m_word_pyps.at(level).at(topic).decrement(term); if (m_backoff.get()) { Term backoff_term = (*m_backoff)[term]; @@ -230,8 +268,8 @@ void PYPTopics::decrement(const Term& term, int topic, int level) { } } -void PYPTopics::increment(const Term& term, int topic, int level) { - //std::cerr << "PYPTopics::increment(" << term << "," << topic << "," << level << ")" << std::endl; +void MPIPYPTopics::increment(const Term& term, int topic, int level) { + //std::cerr << "MPIPYPTopics::increment(" << term << "," << topic << "," << level << ")" << std::endl; m_word_pyps.at(level).at(topic).increment(term, word_pyps_p0(term, topic, level)); if (m_backoff.get()) { @@ -241,7 +279,7 @@ void PYPTopics::increment(const Term& term, int topic, int level) { } } -int PYPTopics::sample(const DocumentId& doc, const Term& term) { +int MPIPYPTopics::sample(const DocumentId& doc, const Term& term) { // First pass: collect probs F sum=0.0; std::vector sums; @@ -252,7 +290,7 @@ int PYPTopics::sample(const DocumentId& doc, const Term& term) { if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0); //F p_k_d = m_document_pyps[doc].prob(k, topic_prob); - F p_k_d = m_document_pyps[doc].unnormalised_prob(k, topic_prob); + F p_k_d = m_document_pyps.at(doc).unnormalised_prob(k, topic_prob); sum += (p_w_k*p_k_d); sums.push_back(sum); @@ -266,9 +304,9 @@ int PYPTopics::sample(const DocumentId& doc, const Term& term) { assert(false); } -PYPTopics::F PYPTopics::word_pyps_p0(const Term& term, int topic, int level) const { +MPIPYPTopics::F MPIPYPTopics::word_pyps_p0(const Term& term, int topic, int level) const { //for (int i=0; i #include @@ -14,14 +14,14 @@ #include "mpi-pyp.hh" #include "corpus.hh" -class PYPTopics { +class MPIPYPTopics { public: typedef std::vector DocumentTopics; typedef std::vector CorpusTopics; typedef double F; public: - PYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0) + MPIPYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0) : m_num_topics(num_topics), m_word_pyps(1), m_topic_pyp(0.5,1.0), m_use_topic_pyp(use_topic_pyp), m_seed(seed), @@ -47,12 +47,12 @@ public: m_backoff.reset(new TermBackoff); m_backoff->read(filename); m_word_pyps.clear(); - m_word_pyps.resize(m_backoff->order(), PYPs()); + m_word_pyps.resize(m_backoff->order(), MPIPYPs()); } void set_backoff(TermBackoffPtr backoff) { m_backoff = backoff; m_word_pyps.clear(); - m_word_pyps.resize(m_backoff->order(), PYPs()); + m_word_pyps.resize(m_backoff->order(), MPIPYPs()); } F prob(const Term& term, int topic, int level=0) const; @@ -70,9 +70,10 @@ private: CorpusTopics m_corpus_topics; typedef boost::ptr_vector< PYP > PYPs; + typedef boost::ptr_vector< MPIPYP > MPIPYPs; PYPs m_document_pyps; - std::vector m_word_pyps; - PYP m_topic_pyp; + std::vector m_word_pyps; + MPIPYP m_topic_pyp; bool m_use_topic_pyp; unsigned long m_seed; diff --git a/gi/pyp-topics/src/mpi-pyp.hh b/gi/pyp-topics/src/mpi-pyp.hh index 58be7c5c..65358d20 100644 --- a/gi/pyp-topics/src/mpi-pyp.hh +++ b/gi/pyp-topics/src/mpi-pyp.hh @@ -1,5 +1,5 @@ -#ifndef _pyp_hh -#define _pyp_hh +#ifndef _mpipyp_hh +#define _mpipyp_hh #include #include @@ -9,11 +9,15 @@ #include #include #include +#include +#include +#include +#include +#include +#include -#include "pyp.h" -#include "log_add.h" -#include "slice-sampler.h" -#include "mt19937ar.h" + +#include "pyp.hh" // // Pitman-Yor process with customer and table tracking @@ -28,25 +32,104 @@ public: virtual int decrement(Dish d); void clear(); + void reset_deltas(); - void reset_deltas() { m_count_delta.clear(); } + void synchronise(); private: typedef std::map dish_delta_type; - typedef std::map table_delta_type; + typedef std::map::TableCounter> table_delta_type; dish_delta_type m_count_delta; table_delta_type m_table_delta; }; template -MPIPYP::MPIPYP(double a, double b, Hash) -: PYP(a, b, Hash) {} +MPIPYP::MPIPYP(double a, double b, Hash h) +: PYP(a, b, 0, h) {} template int MPIPYP::increment(Dish dish, double p0) { - int delta = PYP::increment(dish, p0); + int delta = 0; + int table_joined=-1; + typename PYP::TableCounter &tc = PYP::_dish_tables[dish]; + + // seated on a new or existing table? + int c = PYP::count(dish); + int t = PYP::num_tables(dish); + int T = PYP::num_tables(); + double& a = PYP::_a; + double& b = PYP::_b; + double pshare = (c > 0) ? (c - a*t) : 0.0; + double pnew = (b + a*T) * p0; + assert (pshare >= 0.0); + + if (mt_genrand_res53() < pnew / (pshare + pnew)) { + // assign to a new table + tc.tables += 1; + tc.table_histogram[1] += 1; + PYP::_total_tables += 1; + delta = 1; + } + else { + // randomly assign to an existing table + // remove constant denominator from inner loop + double r = mt_genrand_res53() * (c - a*t); + for (std::map::iterator + hit = tc.table_histogram.begin(); + hit != tc.table_histogram.end(); ++hit) { + r -= ((hit->first - a) * hit->second); + if (r <= 0) { + tc.table_histogram[hit->first+1] += 1; + hit->second -= 1; + if (hit->second == 0) + tc.table_histogram.erase(hit); + table_joined = hit->first+1; + break; + } + } + if (r > 0) { + std::cerr << r << " " << c << " " << a << " " << t << std::endl; + assert(false); + } + delta = 0; + } + + std::tr1::unordered_map::operator[](dish) += 1; + //google::sparse_hash_map::operator[](dish) += 1; + PYP::_total_customers += 1; + + // MPI Delta handling + // track the customer entering + typename dish_delta_type::iterator customer_it; + bool customer_insert_result; + boost::tie(customer_it, customer_insert_result) + = m_count_delta.insert(std::make_pair(dish,0)); + + customer_it->second += 1; + if (customer_it->second == 0) + m_count_delta.erase(customer_it); + + // increment the histogram bar for the table joined + if (!delta) { + assert (table_joined >= 0); + std::map &histogram = m_table_delta[dish].table_histogram; + typename std::map::iterator table_it; bool table_insert_result; + boost::tie(table_it, table_insert_result) = histogram.insert(std::make_pair(table_joined,0)); + table_it->second += 1; + if (table_it->second == 0) histogram.erase(table_it); + + // decrement the histogram bar for the table left + boost::tie(table_it, table_insert_result) = histogram.insert(std::make_pair(table_joined-1,0)); + table_it->second -= 1; + if (table_it->second == 0) histogram.erase(table_it); + } + else { + typename PYP::TableCounter &delta_tc = m_table_delta[dish]; + delta_tc.tables += 1; + delta_tc.table_histogram[1] += 1; + } return delta; } @@ -55,15 +138,177 @@ template int MPIPYP::decrement(Dish dish) { - int delta = PYP::decrement(dish); + typename std::tr1::unordered_map::iterator dcit = find(dish); + //typename google::sparse_hash_map::iterator dcit = find(dish); + if (dcit == PYP::end()) { + std::cerr << dish << std::endl; + assert(false); + } + + int delta = 0, table_left=-1; + + typename std::tr1::unordered_map::TableCounter>::iterator dtit + = PYP::_dish_tables.find(dish); + //typename google::sparse_hash_map::iterator dtit = _dish_tables.find(dish); + if (dtit == PYP::_dish_tables.end()) { + std::cerr << dish << std::endl; + assert(false); + } + typename PYP::TableCounter &tc = dtit->second; + + double r = mt_genrand_res53() * PYP::count(dish); + for (std::map::iterator hit = tc.table_histogram.begin(); + hit != tc.table_histogram.end(); ++hit) { + r -= (hit->first * hit->second); + if (r <= 0) { + table_left = hit->first; + if (hit->first > 1) { + tc.table_histogram[hit->first-1] += 1; + } + else { + delta = -1; + tc.tables -= 1; + PYP::_total_tables -= 1; + } + + hit->second -= 1; + if (hit->second == 0) tc.table_histogram.erase(hit); + break; + } + } + if (r > 0) { + std::cerr << r << " " << PYP::count(dish) << " " << PYP::_a << " " + << PYP::num_tables(dish) << std::endl; + assert(false); + } + + // remove the customer + dcit->second -= 1; + PYP::_total_customers -= 1; + assert(dcit->second >= 0); + if (dcit->second == 0) { + PYP::erase(dcit); + PYP::_dish_tables.erase(dtit); + } + + typename dish_delta_type::iterator it; + bool insert_result; + boost::tie(it, insert_result) = m_count_delta.insert(std::make_pair(dish,0)); + + it->second -= 1; + + if (it->second == 0) + m_count_delta.erase(it); + + assert (table_left >= 0); + typename PYP::TableCounter& delta_tc = m_table_delta[dish]; + if (table_left > 1) + delta_tc.table_histogram[table_left-1] += 1; + else delta_tc.tables -= 1; + + std::map::iterator tit = delta_tc.table_histogram.find(table_left); + //assert (tit != delta_tc.table_histogram.end()); + tit->second -= 1; + if (tit->second == 0) delta_tc.table_histogram.erase(tit); + return delta; } template void -MPIPYP::clear() -{ +MPIPYP::clear() { PYP::clear(); + reset_deltas(); +} + +template +void +MPIPYP::reset_deltas() { + m_count_delta.clear(); + m_table_delta.clear(); +} + +template +struct sum_maps { + typedef std::map map_type; + map_type& operator() (map_type& l, map_type const & r) const { + for (typename map_type::const_iterator it=r.begin(); it != r.end(); it++) + l[it->first] += it->second; + return l; + } +}; + +// Needed Boost definitions +namespace boost { + namespace mpi { + template <> + struct is_commutative< sum_maps, std::map > : mpl::true_ {}; + } + + namespace serialization { + template + void serialize(Archive & ar, PYP::TableCounter& t, const unsigned int version) { + ar & t.table_histogram; + ar & t.tables; + } + + } // namespace serialization +} // namespace boost + + +template +void +MPIPYP::synchronise() { + boost::mpi::communicator world; + int rank = world.rank(), size = world.size(); + + // communicate the customer count deltas + dish_delta_type global_dish_delta; // the “merged” map + boost::mpi::all_reduce(world, m_count_delta, global_dish_delta, sum_maps()); + + // update this restaurant + for (typename dish_delta_type::const_iterator it=global_dish_delta.begin(); + it != global_dish_delta.end(); ++it) { + std::tr1::unordered_map::operator[](it->first) += (it->second - m_count_delta[it->first]); + PYP::_total_customers += (it->second - m_count_delta[it->first]); + //std::cerr << "Process " << rank << " adding " << (it->second - m_count_delta[it->first]) << " customers." << std::endl; + } + + // communicate the table count deltas +// for (int process = 0; process < size; ++process) { +// if (rank == process) { +// // broadcast deltas +// std::cerr << " -- Rank " << rank << " broadcasting -- " << std::endl; +// +// boost::mpi::broadcast(world, m_table_delta, process); +// +// std::cerr << " -- Rank " << rank << " done broadcasting -- " << std::endl; +// } +// else { +// std::cerr << " -- Rank " << rank << " receiving -- " << std::endl; +// // receive deltas +// table_delta_type recv_table_delta; +// +// boost::mpi::broadcast(world, recv_table_delta, process); +// +// std::cerr << " -- Rank " << rank << " done receiving -- " << std::endl; +// +// for (typename table_delta_type::const_iterator dish_it=recv_table_delta.begin(); +// dish_it != recv_table_delta.end(); ++dish_it) { +// typename PYP::TableCounter &tc = PYP::_dish_tables[dish_it->first]; +// +// for (std::map::const_iterator it=dish_it->second.table_histogram.begin(); +// it != dish_it->second.table_histogram.end(); ++it) { +// tc.table_histogram[it->first] += it->second; +// } +// tc.tables += dish_it->second.tables; +// PYP::_total_tables += dish_it->second.tables; +// } +// } +// } +// std::cerr << " -- Done Reducing -- " << std::endl; + + reset_deltas(); } #endif diff --git a/gi/pyp-topics/src/mpi-train-contexts.cc b/gi/pyp-topics/src/mpi-train-contexts.cc index 956ce123..0651ecac 100644 --- a/gi/pyp-topics/src/mpi-train-contexts.cc +++ b/gi/pyp-topics/src/mpi-train-contexts.cc @@ -86,7 +86,7 @@ int main(int argc, char **argv) // seed the random number generator: 0 = automatic, specify value otherwise unsigned long seed = 0; - PYPTopics model(vm["topics"].as(), vm.count("hierarchical-topics"), seed); + MPIPYPTopics model(vm["topics"].as(), vm.count("hierarchical-topics"), seed); // read the data BackoffGenerator* backoff_gen=0; diff --git a/gi/pyp-topics/src/pyp.hh b/gi/pyp-topics/src/pyp.hh index 26f6ab2e..84decb0f 100644 --- a/gi/pyp-topics/src/pyp.hh +++ b/gi/pyp-topics/src/pyp.hh @@ -1,6 +1,7 @@ #ifndef _pyp_hh #define _pyp_hh +#include "slice-sampler.h" #include #include #include @@ -11,7 +12,6 @@ #include #include "log_add.h" -#include "slice-sampler.h" #include "mt19937ar.h" // @@ -63,7 +63,7 @@ public: double b() const { return _b; } void set_b(double b) { _b = b; } - void clear(); + virtual void clear(); std::ostream& debug_info(std::ostream& os) const; double log_restaurant_prob() const; @@ -75,13 +75,12 @@ public: void resample_prior_a(); void resample_prior_b(); -private: +protected: double _a, _b; // parameters of the Pitman-Yor distribution double _a_beta_a, _a_beta_b; // parameters of Beta prior on a double _b_gamma_s, _b_gamma_c; // parameters of Gamma prior on b - struct TableCounter - { + struct TableCounter { TableCounter() : tables(0) {}; int tables; std::map table_histogram; // num customers at table -> number tables -- cgit v1.2.3