From f305e7b0e23b952fb4b7299b2607176ab7409ef9 Mon Sep 17 00:00:00 2001 From: "linh.kitty" Date: Tue, 13 Jul 2010 20:20:55 +0000 Subject: linh added git-svn-id: https://ws10smt.googlecode.com/svn/trunk@241 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/scfg/abc/Release/IConv.d | 3 + gi/scfg/abc/Release/Util.d | 8 + gi/scfg/abc/Release/agrammar.d | 193 +++++++++++++++++ gi/scfg/abc/Release/dict_test | Bin 0 -> 1485797 bytes gi/scfg/abc/Release/grammar | 13 ++ gi/scfg/abc/Release/grammar.pr | 13 ++ gi/scfg/abc/Release/makefile | 66 ++++++ gi/scfg/abc/Release/process_grammar.pl | 36 ++++ gi/scfg/abc/Release/scfg | Bin 0 -> 4277125 bytes gi/scfg/abc/Release/scfg.d | 209 ++++++++++++++++++ gi/scfg/abc/Release/sources.mk | 27 +++ gi/scfg/abc/Release/subdir.mk | 59 +++++ gi/scfg/abc/Release/tmp.grammar | 2 + gi/scfg/abc/Release/toy-grammar | 1 + gi/scfg/abc/a.out | Bin 0 -> 22639 bytes gi/scfg/abc/agrammar.cc | 378 ++++++++++++++++++++++++++++++++ gi/scfg/abc/agrammar.h | 68 ++++++ gi/scfg/abc/old_agrammar.cc | 383 +++++++++++++++++++++++++++++++++ gi/scfg/abc/old_agrammar.h | 45 ++++ gi/scfg/abc/scfg.cpp | 152 +++++++++++++ gi/scfg/abc/tmp.cpp | 36 ++++ 21 files changed, 1692 insertions(+) create mode 100644 gi/scfg/abc/Release/IConv.d create mode 100644 gi/scfg/abc/Release/Util.d create mode 100644 gi/scfg/abc/Release/agrammar.d create mode 100755 gi/scfg/abc/Release/dict_test create mode 100644 gi/scfg/abc/Release/grammar create mode 100644 gi/scfg/abc/Release/grammar.pr create mode 100644 gi/scfg/abc/Release/makefile create mode 100644 gi/scfg/abc/Release/process_grammar.pl create mode 100755 gi/scfg/abc/Release/scfg create mode 100644 gi/scfg/abc/Release/scfg.d create mode 100644 gi/scfg/abc/Release/sources.mk create mode 100644 gi/scfg/abc/Release/subdir.mk create mode 100644 gi/scfg/abc/Release/tmp.grammar create mode 120000 gi/scfg/abc/Release/toy-grammar create mode 100755 gi/scfg/abc/a.out create mode 100644 gi/scfg/abc/agrammar.cc create mode 100644 gi/scfg/abc/agrammar.h create mode 100644 gi/scfg/abc/old_agrammar.cc create mode 100644 gi/scfg/abc/old_agrammar.h create mode 100644 gi/scfg/abc/scfg.cpp create mode 100644 gi/scfg/abc/tmp.cpp (limited to 'gi') diff --git a/gi/scfg/abc/Release/IConv.d b/gi/scfg/abc/Release/IConv.d new file mode 100644 index 00000000..082cb15b --- /dev/null +++ b/gi/scfg/abc/Release/IConv.d @@ -0,0 +1,3 @@ +IConv.d IConv.o: ../../utils/IConv.cc ../../utils/IConv.hpp + +../../utils/IConv.hpp: diff --git a/gi/scfg/abc/Release/Util.d b/gi/scfg/abc/Release/Util.d new file mode 100644 index 00000000..586d4d60 --- /dev/null +++ b/gi/scfg/abc/Release/Util.d @@ -0,0 +1,8 @@ +Util.d Util.o: ../../utils/Util.cc ../../utils/Util.h \ + ../../utils/UtfConverter.h ../../utils/ConvertUTF.h + +../../utils/Util.h: + +../../utils/UtfConverter.h: + +../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/agrammar.d b/gi/scfg/abc/Release/agrammar.d new file mode 100644 index 00000000..6cf14f0d --- /dev/null +++ b/gi/scfg/abc/Release/agrammar.d @@ -0,0 +1,193 @@ +agrammar.d agrammar.o: ../agrammar.cc \ + /home/tnguyen/ws10smt/decoder/rule_lexer.h \ + /home/tnguyen/ws10smt/decoder/trule.h \ + /export/ws10smt/software/include/boost/shared_ptr.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ + /export/ws10smt/software/include/boost/config.hpp \ + /export/ws10smt/software/include/boost/config/user.hpp \ + /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ + /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ + /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ + /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ + /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ + /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ + /export/ws10smt/software/include/boost/config/platform/linux.hpp \ + /export/ws10smt/software/include/boost/config/posix_features.hpp \ + /export/ws10smt/software/include/boost/config/suffix.hpp \ + /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ + /export/ws10smt/software/include/boost/assert.hpp \ + /export/ws10smt/software/include/boost/checked_delete.hpp \ + /export/ws10smt/software/include/boost/throw_exception.hpp \ + /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ + /export/ws10smt/software/include/boost/detail/workaround.hpp \ + /export/ws10smt/software/include/boost/exception/exception.hpp \ + /export/ws10smt/software/include/boost/current_function.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ + /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ + /export/ws10smt/software/include/boost/memory_order.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ + /home/tnguyen/ws10smt/decoder/sparse_vector.h \ + /home/tnguyen/ws10smt/decoder/fdict.h \ + /home/tnguyen/ws10smt/decoder/dict.h \ + /export/ws10smt/software/include/boost/functional/hash.hpp \ + /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ + /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ + /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ + /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ + /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ + /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ + /export/ws10smt/software/include/boost/limits.hpp \ + /export/ws10smt/software/include/boost/integer/static_log2.hpp \ + /export/ws10smt/software/include/boost/integer_fwd.hpp \ + /export/ws10smt/software/include/boost/cstdint.hpp \ + /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ + /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ + /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ + /home/tnguyen/ws10smt/decoder/wordid.h \ + /home/tnguyen/ws10smt/decoder/filelib.h \ + /home/tnguyen/ws10smt/decoder/gzstream.h \ + /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ + /home/tnguyen/ws10smt/decoder/grammar.h \ + /home/tnguyen/ws10smt/decoder/lattice.h \ + /home/tnguyen/ws10smt/decoder/array2d.h ../../utils/Util.h \ + ../../utils/UtfConverter.h ../../utils/ConvertUTF.h + +/home/tnguyen/ws10smt/decoder/rule_lexer.h: + +/home/tnguyen/ws10smt/decoder/trule.h: + +/export/ws10smt/software/include/boost/shared_ptr.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: + +/export/ws10smt/software/include/boost/config.hpp: + +/export/ws10smt/software/include/boost/config/user.hpp: + +/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: + +/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: + +/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: + +/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: + +/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: + +/export/ws10smt/software/include/boost/config/select_platform_config.hpp: + +/export/ws10smt/software/include/boost/config/platform/linux.hpp: + +/export/ws10smt/software/include/boost/config/posix_features.hpp: + +/export/ws10smt/software/include/boost/config/suffix.hpp: + +/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: + +/export/ws10smt/software/include/boost/assert.hpp: + +/export/ws10smt/software/include/boost/checked_delete.hpp: + +/export/ws10smt/software/include/boost/throw_exception.hpp: + +/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: + +/export/ws10smt/software/include/boost/detail/workaround.hpp: + +/export/ws10smt/software/include/boost/exception/exception.hpp: + +/export/ws10smt/software/include/boost/current_function.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: + +/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: + +/export/ws10smt/software/include/boost/memory_order.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: + +/home/tnguyen/ws10smt/decoder/sparse_vector.h: + +/home/tnguyen/ws10smt/decoder/fdict.h: + +/home/tnguyen/ws10smt/decoder/dict.h: + +/export/ws10smt/software/include/boost/functional/hash.hpp: + +/export/ws10smt/software/include/boost/functional/hash/hash.hpp: + +/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: + +/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: + +/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: + +/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: + +/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: + +/export/ws10smt/software/include/boost/limits.hpp: + +/export/ws10smt/software/include/boost/integer/static_log2.hpp: + +/export/ws10smt/software/include/boost/integer_fwd.hpp: + +/export/ws10smt/software/include/boost/cstdint.hpp: + +/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: + +/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: + +/export/ws10smt/software/include/boost/detail/container_fwd.hpp: + +/home/tnguyen/ws10smt/decoder/wordid.h: + +/home/tnguyen/ws10smt/decoder/filelib.h: + +/home/tnguyen/ws10smt/decoder/gzstream.h: + +/home/tnguyen/ws10smt/decoder/tdict.h: + +../agrammar.h: + +/home/tnguyen/ws10smt/decoder/grammar.h: + +/home/tnguyen/ws10smt/decoder/lattice.h: + +/home/tnguyen/ws10smt/decoder/array2d.h: + +../../utils/Util.h: + +../../utils/UtfConverter.h: + +../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/dict_test b/gi/scfg/abc/Release/dict_test new file mode 100755 index 00000000..1ba94218 Binary files /dev/null and b/gi/scfg/abc/Release/dict_test differ diff --git a/gi/scfg/abc/Release/grammar b/gi/scfg/abc/Release/grammar new file mode 100644 index 00000000..75fac3a0 --- /dev/null +++ b/gi/scfg/abc/Release/grammar @@ -0,0 +1,13 @@ +[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 +[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 +[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 +[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 +[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 +[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 +[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 +[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 +[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 +[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 +[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 +[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 +[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 diff --git a/gi/scfg/abc/Release/grammar.pr b/gi/scfg/abc/Release/grammar.pr new file mode 100644 index 00000000..e4e327cf --- /dev/null +++ b/gi/scfg/abc/Release/grammar.pr @@ -0,0 +1,13 @@ +[X] ||| . ||| . ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] . ||| [1] . ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] anciano ||| [1] old man ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] anciano . ||| [1] old man . ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] feo ||| ugly [1] ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] feo . ||| ugly [1] . ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] gato ||| [1] cat ||| MinusLogP=2.56494935746154 +[X] ||| [X,1] gato . ||| [1] cat . ||| MinusLogP=2.56494935746154 +[X] ||| el ||| the ||| MinusLogP=2.56494935746154 +[X] ||| el [X,1] ||| the [1] ||| MinusLogP=2.56494935746154 +[X] ||| el [X,1] . ||| the [1] . ||| MinusLogP=2.56494935746154 diff --git a/gi/scfg/abc/Release/makefile b/gi/scfg/abc/Release/makefile new file mode 100644 index 00000000..25949e74 --- /dev/null +++ b/gi/scfg/abc/Release/makefile @@ -0,0 +1,66 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +#-include ../makefile.init + +RM := rm -rf + +# All of the sources participating in the build are defined here +-include sources.mk +-include subdir.mk +-include objects.mk + +ifneq ($(MAKECMDGOALS),clean) +ifneq ($(strip $(C++_DEPS)),) +-include $(C++_DEPS) +endif +ifneq ($(strip $(CC_DEPS)),) +-include $(CC_DEPS) +endif +ifneq ($(strip $(C_DEPS)),) +-include $(C_DEPS) +endif +ifneq ($(strip $(CPP_DEPS)),) +-include $(CPP_DEPS) +endif +ifneq ($(strip $(CXX_DEPS)),) +-include $(CXX_DEPS) +endif +ifneq ($(strip $(C_UPPER_DEPS)),) +-include $(C_UPPER_DEPS) +endif +endif + +#-include ../makefile.defs + +# Add inputs and outputs from these tool invocations to the build variables + +# All Target +all: scfg + +# Tool invocations + +# scfg.o: ../scfg.cpp +# @echo 'Building file: $<' +# @echo 'Invoking: GCC C++ Compiler' +# g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" +# @echo 'Finished building: $<' +# @echo ' ' + +scfg: $(OBJS) $(USER_OBJS) + @echo 'Building target: $@' + @echo 'Invoking: GCC C++ Linker' + /bin/sh ../../../../libtool --tag=CXX --mode=link g++ -g -O2 -lz -L/export/ws10smt/software/lib -R/export/ws10smt/software/lib -L/export/ws10smt/software/srilm-1.5.10/lib/i686 -o scfg $(OBJS) -L/export/ws10smt/software/lib -lgtest -pthread ../../../../decoder/libcdec.a -lboost_program_options -loolm -ldstruct -lmisc + @echo 'Finished building target: $@' + @echo ' ' +#g++ -I/home/tnguyen/ws10smt/gi/scfg/cdec/ -I/export/ws10smt/software/srilm-1.5.10/include/ -L/home/tnguyen/ws10smt/decoder -lpthread -ldl -lm $(OBJS) $(USER_OBJS) $(LIBS) -o"scfg" +# Other Targets +clean: + -$(RM) $(OBJS)$(C++_DEPS)$(EXECUTABLES)$(CC_DEPS)$(C_DEPS)$(CPP_DEPS)$(CXX_DEPS)$(C_UPPER_DEPS) scfg + -@echo ' ' + +.PHONY: all clean dependents +.SECONDARY: + +-include ../makefile.targets diff --git a/gi/scfg/abc/Release/process_grammar.pl b/gi/scfg/abc/Release/process_grammar.pl new file mode 100644 index 00000000..f82a8e5a --- /dev/null +++ b/gi/scfg/abc/Release/process_grammar.pl @@ -0,0 +1,36 @@ +#!perl + +use warnings; +use strict; + +my $grammar_file = $ARGV[0]; + +my %nt_count; #maps nt--> count rules whose lhs is nt + +open(G, "<$grammar_file") or die "Can't open file $grammar_file"; + +while (){ + + chomp(); + + s/\|\|\|.*//g; + s/\s//g; + + $nt_count{$_}++; +} + + +close (G); + +open(G, "<$grammar_file") or die "Can't open file $grammar_file"; + +while (){ + + chomp(); + + (my $nt = $_) =~ s/\|\|\|.*//g; + $nt =~ s/\s//g; + + s/(.+\|\|\|.+\|\|\|.+\|\|\|).+/$1/g; + print $_ . " MinusLogP=" .(log($nt_count{$nt})) ."\n"; +} diff --git a/gi/scfg/abc/Release/scfg b/gi/scfg/abc/Release/scfg new file mode 100755 index 00000000..4b6cfb19 Binary files /dev/null and b/gi/scfg/abc/Release/scfg differ diff --git a/gi/scfg/abc/Release/scfg.d b/gi/scfg/abc/Release/scfg.d new file mode 100644 index 00000000..ae7a87bb --- /dev/null +++ b/gi/scfg/abc/Release/scfg.d @@ -0,0 +1,209 @@ +scfg.d scfg.o: ../scfg.cpp /home/tnguyen/ws10smt/decoder/lattice.h \ + /home/tnguyen/ws10smt/decoder/wordid.h \ + /home/tnguyen/ws10smt/decoder/array2d.h \ + /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ + /home/tnguyen/ws10smt/decoder/grammar.h \ + /export/ws10smt/software/include/boost/shared_ptr.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ + /export/ws10smt/software/include/boost/config.hpp \ + /export/ws10smt/software/include/boost/config/user.hpp \ + /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ + /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ + /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ + /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ + /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ + /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ + /export/ws10smt/software/include/boost/config/platform/linux.hpp \ + /export/ws10smt/software/include/boost/config/posix_features.hpp \ + /export/ws10smt/software/include/boost/config/suffix.hpp \ + /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ + /export/ws10smt/software/include/boost/assert.hpp \ + /export/ws10smt/software/include/boost/checked_delete.hpp \ + /export/ws10smt/software/include/boost/throw_exception.hpp \ + /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ + /export/ws10smt/software/include/boost/detail/workaround.hpp \ + /export/ws10smt/software/include/boost/exception/exception.hpp \ + /export/ws10smt/software/include/boost/current_function.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ + /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ + /export/ws10smt/software/include/boost/memory_order.hpp \ + /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ + /home/tnguyen/ws10smt/decoder/lattice.h \ + /home/tnguyen/ws10smt/decoder/trule.h \ + /home/tnguyen/ws10smt/decoder/sparse_vector.h \ + /home/tnguyen/ws10smt/decoder/fdict.h \ + /home/tnguyen/ws10smt/decoder/dict.h \ + /export/ws10smt/software/include/boost/functional/hash.hpp \ + /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ + /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ + /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ + /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ + /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ + /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ + /export/ws10smt/software/include/boost/limits.hpp \ + /export/ws10smt/software/include/boost/integer/static_log2.hpp \ + /export/ws10smt/software/include/boost/integer_fwd.hpp \ + /export/ws10smt/software/include/boost/cstdint.hpp \ + /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ + /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ + /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ + /home/tnguyen/ws10smt/decoder/bottom_up_parser.h \ + /home/tnguyen/ws10smt/decoder/grammar.h \ + /home/tnguyen/ws10smt/decoder/hg.h \ + /home/tnguyen/ws10smt/decoder/small_vector.h \ + /home/tnguyen/ws10smt/decoder/prob.h \ + /home/tnguyen/ws10smt/decoder/logval.h \ + /home/tnguyen/ws10smt/decoder/hg_intersect.h ../../utils/ParamsArray.h \ + ../../utils/Util.h ../../utils/UtfConverter.h ../../utils/ConvertUTF.h + +/home/tnguyen/ws10smt/decoder/lattice.h: + +/home/tnguyen/ws10smt/decoder/wordid.h: + +/home/tnguyen/ws10smt/decoder/array2d.h: + +/home/tnguyen/ws10smt/decoder/tdict.h: + +../agrammar.h: + +/home/tnguyen/ws10smt/decoder/grammar.h: + +/export/ws10smt/software/include/boost/shared_ptr.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: + +/export/ws10smt/software/include/boost/config.hpp: + +/export/ws10smt/software/include/boost/config/user.hpp: + +/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: + +/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: + +/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: + +/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: + +/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: + +/export/ws10smt/software/include/boost/config/select_platform_config.hpp: + +/export/ws10smt/software/include/boost/config/platform/linux.hpp: + +/export/ws10smt/software/include/boost/config/posix_features.hpp: + +/export/ws10smt/software/include/boost/config/suffix.hpp: + +/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: + +/export/ws10smt/software/include/boost/assert.hpp: + +/export/ws10smt/software/include/boost/checked_delete.hpp: + +/export/ws10smt/software/include/boost/throw_exception.hpp: + +/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: + +/export/ws10smt/software/include/boost/detail/workaround.hpp: + +/export/ws10smt/software/include/boost/exception/exception.hpp: + +/export/ws10smt/software/include/boost/current_function.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: + +/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: + +/export/ws10smt/software/include/boost/memory_order.hpp: + +/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: + +/home/tnguyen/ws10smt/decoder/lattice.h: + +/home/tnguyen/ws10smt/decoder/trule.h: + +/home/tnguyen/ws10smt/decoder/sparse_vector.h: + +/home/tnguyen/ws10smt/decoder/fdict.h: + +/home/tnguyen/ws10smt/decoder/dict.h: + +/export/ws10smt/software/include/boost/functional/hash.hpp: + +/export/ws10smt/software/include/boost/functional/hash/hash.hpp: + +/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: + +/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: + +/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: + +/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: + +/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: + +/export/ws10smt/software/include/boost/limits.hpp: + +/export/ws10smt/software/include/boost/integer/static_log2.hpp: + +/export/ws10smt/software/include/boost/integer_fwd.hpp: + +/export/ws10smt/software/include/boost/cstdint.hpp: + +/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: + +/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: + +/export/ws10smt/software/include/boost/detail/container_fwd.hpp: + +/home/tnguyen/ws10smt/decoder/bottom_up_parser.h: + +/home/tnguyen/ws10smt/decoder/grammar.h: + +/home/tnguyen/ws10smt/decoder/hg.h: + +/home/tnguyen/ws10smt/decoder/small_vector.h: + +/home/tnguyen/ws10smt/decoder/prob.h: + +/home/tnguyen/ws10smt/decoder/logval.h: + +/home/tnguyen/ws10smt/decoder/hg_intersect.h: + +../../utils/ParamsArray.h: + +../../utils/Util.h: + +../../utils/UtfConverter.h: + +../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/sources.mk b/gi/scfg/abc/Release/sources.mk new file mode 100644 index 00000000..6c7070aa --- /dev/null +++ b/gi/scfg/abc/Release/sources.mk @@ -0,0 +1,27 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +C_UPPER_SRCS := +C_SRCS := +CPP_SRCS := +O_SRCS := +ASM_SRCS := +S_SRCS := +C++_SRCS := +CXX_SRCS := +CC_SRCS := +OBJ_SRCS := +OBJS := +C++_DEPS := +EXECUTABLES := +CC_DEPS := +C_DEPS := +CPP_DEPS := +CXX_DEPS := +C_UPPER_DEPS := + +# Every subdirectory with source files must be described here +SUBDIRS := \ +. \ + diff --git a/gi/scfg/abc/Release/subdir.mk b/gi/scfg/abc/Release/subdir.mk new file mode 100644 index 00000000..49080b36 --- /dev/null +++ b/gi/scfg/abc/Release/subdir.mk @@ -0,0 +1,59 @@ + +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +CPP_SRCS += \ +../../utils/Util.cc \ +../agrammar.cc \ +../scfg.cpp + + +OBJS += \ +./Util.o \ +./agrammar.o \ +./scfg.o + + +CPP_DEPS += \ +./Util.d \ +./agrammar.d \ +./scfg.d + +# Each subdirectory must supply rules for building sources it contributes +# %.o: ../%.cpp +# @echo 'Building file: $<' +# @echo 'Invoking: GCC C++ Compiler' +# g++ -g -p -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" +# +# @echo ' ' + +%.o: ../../utils/%.cc + @echo 'Building file: $<' + @echo 'Invoking: GCC C++ Compiler' + g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + +%.o: ../../utils/%.c + @echo 'Building file: $<' + @echo 'Invoking: GCC C++ Compiler' + g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + +%.o: ../%.cpp + @echo 'Building file: $<' + @echo 'Invoking: GCC C++ Compiler' + g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + +%.o: ../%.cc + @echo 'Building file: $<' + @echo 'Invoking: GCC C++ Compiler' + g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + diff --git a/gi/scfg/abc/Release/tmp.grammar b/gi/scfg/abc/Release/tmp.grammar new file mode 100644 index 00000000..9df1b77d --- /dev/null +++ b/gi/scfg/abc/Release/tmp.grammar @@ -0,0 +1,2 @@ +[A] ||| [B] [C] . ||| [B] [C]. ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 +[A] ||| [B] asd . ||| [B] asd . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 \ No newline at end of file diff --git a/gi/scfg/abc/Release/toy-grammar b/gi/scfg/abc/Release/toy-grammar new file mode 120000 index 00000000..50dea8df --- /dev/null +++ b/gi/scfg/abc/Release/toy-grammar @@ -0,0 +1 @@ +/export/ws10smt/toy-grammar/ \ No newline at end of file diff --git a/gi/scfg/abc/a.out b/gi/scfg/abc/a.out new file mode 100755 index 00000000..0467acf0 Binary files /dev/null and b/gi/scfg/abc/a.out differ diff --git a/gi/scfg/abc/agrammar.cc b/gi/scfg/abc/agrammar.cc new file mode 100644 index 00000000..585255e3 --- /dev/null +++ b/gi/scfg/abc/agrammar.cc @@ -0,0 +1,378 @@ +#include +#include +#include + +#include "rule_lexer.h" +#include "filelib.h" +#include "tdict.h" +#include "agrammar.h" +#include "../utils/Util.h" + +bool equal(TRulePtr const & rule1, TRulePtr const & rule2){ + if (rule1->lhs_ != rule2->lhs_) return false; + if (rule1->f_.size() != rule2->f_.size()) return false; + if (rule1->e_.size() != rule2->e_.size()) return false; + + for (int i=0; if_.size(); i++) + if (rule1->f_.at(i) != rule2->f_.at(i)) return false; + for (int i=0; ie_.size(); i++) + if (rule1->e_.at(i) != rule2->e_.at(i)) return false; + return true; +} + +//const vector Grammar::NO_RULES; + +void aRemoveRule(vector & v, const TRulePtr & rule){ // remove rule from v if found + for (int i=0; i< v.size(); i++) + if (equal(v[i], rule )){ + cout<<"erase rule from vector:"<AsString()<Arity(); + } + void Dump() const { + for (int i = 0; i < rules_.size(); ++i) + cerr << rules_[i]->AsString() << endl; + } + private: + vector rules_; +}; + +struct aTextGrammarNode : public GrammarIter { + aTextGrammarNode() : rb_(NULL) {} + ~aTextGrammarNode() { + delete rb_; + } + const GrammarIter* Extend(int symbol) const { + map::const_iterator i = tree_.find(symbol); + if (i == tree_.end()) return NULL; + return &i->second; + } + + const RuleBin* GetRules() const { + if (rb_) { + //rb_->Dump(); + } + return rb_; + } + + map tree_; + aTextRuleBin* rb_; +}; + +struct aTGImpl { + aTextGrammarNode root_; +}; + +aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} +aTextGrammar::aTextGrammar(const string& file) : + max_span_(10), + pimpl_(new aTGImpl) { + ReadFromFile(file); +} + +const GrammarIter* aTextGrammar::GetRoot() const { + return &pimpl_->root_; +} + +void aTextGrammar::SetGoalNT(const string & goal_str){ + goalID = TD::Convert(goal_str); + +} +void getNTRule( const TRulePtr & rule, map & ntrule_map){ + + NTRule lhs_ntrule(rule, rule->lhs_ * -1); + ntrule_map[rule->lhs_ * -1] = lhs_ntrule; + + for (int i=0; i< (rule->f_).size(); i++) + if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){ + NTRule rhs_ntrule(rule, rule->f_.at(i) * -1); + ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule; + } + + +} +void aTextGrammar::AddRule(const TRulePtr& rule) { + if (rule->IsUnary()) { + rhs2unaries_[rule->f().front()].push_back(rule); + unaries_.push_back(rule); + } else { + aTextGrammarNode* cur = &pimpl_->root_; + for (int i = 0; i < rule->f_.size(); ++i) + cur = &cur->tree_[rule->f_[i]]; + if (cur->rb_ == NULL) + cur->rb_ = new aTextRuleBin; + cur->rb_->AddRule(rule); + } + + //add the rule to lhs_rules_ + lhs_rules_[rule->lhs_* -1].push_back(rule); + + //add the rule to nt_rules_ + map ntrule_map; + getNTRule (rule, ntrule_map); + for (map::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ + nt_rules_[it->first].push_back(it->second); + } +} + +void aTextGrammar::RemoveRule(const TRulePtr & rule){ + cout<<"Remove rule: "<AsString()<IsUnary()) { + aRemoveRule(rhs2unaries_[rule->f().front()], rule); + aRemoveRule(unaries_, rule); + } else { + aTextGrammarNode* cur = &pimpl_->root_; + for (int i = 0; i < rule->f_.size(); ++i) + cur = &cur->tree_[rule->f_[i]]; +// if (cur->rb_ == NULL) +// cur->rb_ = new aTextRuleBin; + cur->rb_->RemoveRule(rule); + } + + //remove rules from lhs_rules_ + + aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule); + +} + +void aTextGrammar::RemoveNonterminal(WordID wordID){ + vector rules = nt_rules_[wordID]; +// // remove the nonterminal from ntrules_ + nt_rules_.erase(wordID); + for (int i =0; i & nts){ + + vector rules = nt_rules_[nt_old]; + + // cout<<"\n\n\n start add splitting rules"< ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old + //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts' + //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions, + //hence the rules' new probabilities have to divide to cnt also + // cout<<"splitting NT in rule "<AsString()< e_ = (old_rule.rule_)->e_; + newrule -> f_ = old_rule.rule_->f_; + newrule->lhs_ = old_rule.rule_->lhs_; + newrule -> arity_ = old_rule.rule_->arity_; + newrule -> scores_ = old_rule.rule_->scores_; + + // cout<<"end up update score\n"; + if (ntPos[0] == -1){ //update the lhs + newrule->lhs_ = nts[j_vector[0]] * -1; + + //score has to randomly add/minus a small epsilon to break the balance + if (nts.size() >1 && ntPos.size() >1){ + // cout<<"start to add/minus epsilon"<lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon + newrule-> scores_ -= epsilon; + else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) + newrule-> scores_ += epsilon; + else{ + double random = rand()/RAND_MAX; + if (random > .5){ + newrule-> scores_ += epsilon; + cnt_addepsilon[newrule->lhs_]++; + } + else{ + newrule-> scores_ -= epsilon; + cnt_minusepsilon[newrule->lhs_]++; + } + } + } + + + for (int k=1; klhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon + newrule-> scores_ -= epsilon; + else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 ) + newrule-> scores_ += epsilon; + else{ + double random = rand()/RAND_MAX; + if (random > .5){ + newrule-> scores_ += epsilon; + cnt_addepsilon[newrule->lhs_]++; + } + else{ + newrule-> scores_ -= epsilon; + cnt_minusepsilon[newrule->lhs_]++; + } + } + } + + + for (int k=0; k X1; X->X2,... if X is the goal NT + for (int i =0; ilhs_ = goalID * -1; + rule ->f_.push_back(v_splits[i] * -1); + rule->e_.push_back(0); + + rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) ); + AddRule(rule); + } + + } + + +} + + + +void aTextGrammar::PrintAllRules() const{ + map >::const_iterator it; + for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ + + vector v = it-> second; + for (int i =0; i< v.size(); i++){ + cout<AsString()<<"\t"< v; + map >::const_iterator mit= nt_rules_.find(nt); + if (mit == nt_rules_.end()) + return; + + v = mit->second; + + for (vector::const_iterator it = v.begin(); it != v.end(); it++) + cout<rule_->AsString()<(extra)->AddRule(new_rule); +} + +void aTextGrammar::ReadFromFile(const string& filename) { + ReadFile in(filename); + RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this); +} + +bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { + return (max_span_ >= distance); +} + diff --git a/gi/scfg/abc/agrammar.h b/gi/scfg/abc/agrammar.h new file mode 100644 index 00000000..8a7186bf --- /dev/null +++ b/gi/scfg/abc/agrammar.h @@ -0,0 +1,68 @@ +#ifndef AGRAMMAR_H_ +#define AGRAMMAR_H_ + +#include "grammar.h" + + +using namespace std; + +class aTGImpl; +struct NTRule{ + + NTRule(){}; + NTRule(const TRulePtr & rule, WordID nt){ + nt_ = nt; + rule_ = rule; + + if (rule->lhs_ * -1 == nt) + ntPos_.push_back(-1); + + for (int i=0; i< rule->f().size(); i++) + if (rule->f().at(i) * -1 == nt) + ntPos_.push_back(i); + } + + TRulePtr rule_; + WordID nt_; //the labelID of the nt (WordID>0); + + vector ntPos_; //position of nt_ -1: lhs, from 0...f_.size() for nt of f_() + //i.e the rules is: NP-> DET NP; if nt_=5 is the labelID of NP then ntPos_ = (-1, 1): the indexes of nonterminal NP + + +}; + +struct aTextGrammar : public Grammar { + aTextGrammar(); + aTextGrammar(const std::string& file); + void SetMaxSpan(int m) { max_span_ = m; } + + virtual const GrammarIter* GetRoot() const; + void AddRule(const TRulePtr& rule); + void ReadFromFile(const std::string& filename); + virtual bool HasRuleForSpan(int i, int j, int distance) const; + const std::vector& GetUnaryRules(const WordID& cat) const; + + void AddSplitNonTerminal(WordID nt_old, vector & nts); + void setMaxSplit(int max_split); + void splitNonterminal(WordID wordID); + + void PrintAllRules() const; + void PrintNonterminalRules(WordID nt) const; + void SetGoalNT(const string & goal_str); + private: + + void RemoveRule(const TRulePtr & rule); + void RemoveNonterminal(WordID wordID); + + int max_span_; + int max_split_; + boost::shared_ptr pimpl_; + map > lhs_rules_;// WordID >0 + map > nt_rules_; + + // map > grSplitNonterminals; + WordID goalID; +}; + + +#endif diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc new file mode 100644 index 00000000..33d70dfc --- /dev/null +++ b/gi/scfg/abc/old_agrammar.cc @@ -0,0 +1,383 @@ +#include "agrammar.h" +#include "Util.h" + +#include +#include +#include + +#include "rule_lexer.h" +#include "filelib.h" +#include "tdict.h" +#include +#include + +map > grSplitNonterminals; +//const vector Grammar::NO_RULES; + + +// vector substituteF(TRulePtr & rule, WordID wordID, vector & v){ +// vector vRules; //outputs + +// vector f = rule->f(); +// vector > newfvector; +// for (int i =0; i< f.size(); i++){ +// if (f[i] == wordID){ +// newfvector.push_back(v); +// } +// else +// newfvector.push_back(vector (1, f[i])); +// } + +// //now creates new rules; + + +// return vRules; +// } + + +struct aTextRuleBin : public RuleBin { + int GetNumRules() const { + return rules_.size(); + } + TRulePtr GetIthRule(int i) const { + return rules_[i]; + } + void AddRule(TRulePtr t) { + rules_.push_back(t); + } + int Arity() const { + return rules_.front()->Arity(); + } + void Dump() const { + for (int i = 0; i < rules_.size(); ++i) + cerr << rules_[i]->AsString() << endl; + } + + + vector getRules(){ return rules_;} + + + void substituteF(vector & f_path, map > & grSplitNonterminals){ + //this substituteF method is different with substituteF procedure found in cdec code; + // + //aTextRuleBin has a collection of rules with the same f() on the rhs, + //substituteF() replaces the f_ of all the rules with f_path vector, + //the grSplitNonterminals input to split the lhs_ nonterminals of the rules incase the lhs_ nonterminal found in grSplitNonterminals + + vector newrules; + for (vector::iterator it = rules_.begin() ; it != rules_.end(); it++){ + assert(f_path.size() == (*it)->f_.size()); + + if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){ + (*it)->f_ = f_path; + } + else{ // split the lhs NT, + vector new_lhs = grSplitNonterminals[ (*it)->lhs_ ]; + for (vector::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){ + TRulePtr newrule; + newrule -> e_ = (*it)->e_; + newrule -> f_ = (*it)->f_; + newrule->lhs_ = *vit; + newrule -> scores_ = (*it)->scores_; + newrule -> arity_ = (*it)->arity_; + newrules.push_back (newrule); + } + rules_.erase(it); + } + } + + //now add back newrules(output of splitting lhs_) to rules_ + rules_.insert(newrules.begin(),newrules.begin(), newrules.end()); + } + +private: + vector rules_; +}; + + + +struct aTextGrammarNode : public GrammarIter { + aTextGrammarNode() : rb_(NULL) {} + + aTextGrammarNode(const aTextGrammarNode & a){ + nonterminals_ = a.nonterminals_; + tree_ = a.tree_; + rb_ = new aTextRuleBin(); //cp constructor: don't cp the set of rules over + } + + ~aTextGrammarNode() { + delete rb_; + } + const GrammarIter* Extend(int symbol) const { + map::const_iterator i = tree_.find(symbol); + if (i == tree_.end()) return NULL; + return &i->second; + } + + const RuleBin* GetRules() const { + if (rb_) { + //rb_->Dump(); + } + return rb_; + } + + void DFS(); + + void visit (); //todo: make this as a function pointer + + vector path_; //vector of f_ nonterminals/terminals from the top to the current node; + set nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID is the label in the dict; i.e WordID>0 + map tree_; + aTextRuleBin* rb_; + + void print_path(){ //for debug only + cout<<"path="<::iterator it = tree_.begin(); it != tree_.end(); it++){ + (it->second).DFS(); + } +} + + +void aTextGrammarNode::visit( ){ + + cout<<"start visit()"< vsplits = grSplitNonterminals[*it]; //split *it into vsplits + + //iterate through next terminals/nonterminals in tree_ + vector tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_ + + for (map::iterator it = tree_.begin() ; it != tree_.end(); it++){ + cout<<"in visit(): inside for loop: wordID=="<first< >::const_iterator git = grSplitNonterminals.find(it->first * -1 ); + + if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split + cout<<"not split\n"; + tree_[it->first ].path_ = path_; + tree_[it->first ].path_.push_back(it->first); + cout<<"in visit() tree_[it->first ].path_= "; + tree_[it->first ].print_path(); + continue; + } + + + cout<<"tmp2"; + vector vsplits = grSplitNonterminals[it->first * -1]; + // vector vsplits = git->second; + cout<<"tmp3"; + // vector vsplits = agrammar_ ->splitNonterminals_[it->first * -1]; + cout <<"got vsplits"<first]); //cp the subtree to new nonterminal + tree_[vsplits[i] * -1].path_ = path_; //update the path if the subtrees + tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1); + tree_[vsplits[i] * -1].print_path(); + } + + //remove the old node: + tobe_removedNTs.push_back(it->first); + + } + + for (int i =0; isubstituteF(path_, grSplitNonterminals); + + } + cout<<"visit() end"<root_; +} + + +void aTextGrammar::addNonterminal(WordID wordID){ + //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map) nonterminals_ of grammar + //if the input parameter wordID<0 then do nothing + + if (wordID <0){ //it is a nonterminal + + map::iterator it = nonterminals_.find(wordID * -1); + if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals) + nonterminals_[wordID * -1] = 1; + } +} + + + +void aTextGrammar::AddRule(const TRulePtr& rule) { + //add the LHS nonterminal to nonterminals_ map + + this->addNonterminal(rule->lhs_); + + if (rule->IsUnary()) { + rhs2unaries_[rule->f().front()].push_back(rule); + unaries_.push_back(rule); + if (rule->f().front() <0) + //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) + this->addNonterminal(rule->f().front()); + + + } else { + aTextGrammarNode* cur = &pimpl_->root_; + for (int i = 0; i < rule->f_.size(); ++i){ + if (rule->f_[i] <0){ + cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set + this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the grammar's list of nonterminals + } + cur = &cur->tree_[rule->f_[i]]; + + } + if (cur->rb_ == NULL) + cur->rb_ = new aTextRuleBin; + cur->rb_->AddRule(rule); + + } +} + +static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) { + static_cast(extra)->AddRule(new_rule); +} + + +void aTextGrammar::ReadFromFile(const string& filename) { + ReadFile in(filename); + RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this); +} + +bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { + return (max_span_ >= distance); +} + + +////Linh added + +void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} + + +void aTextGrammar::printAllNonterminals() const{ + for (map::const_iterator it =nonterminals_.begin(); + it != nonterminals_.end(); it++){ + if (it->second >0){ + cout <first<<"\t"<first)< v_splits;//split nonterminal wordID into the list of nonterminals in v_splits + for (int i =0; i< this->max_split_; i++){ + string split_str = old_str + "+" + itos(i); + WordID splitID = TD::Convert(split_str); + v_splits.push_back(splitID); + nonterminals_[splitID] = 1; + } + + grSplitNonterminals[wordID] = v_splits; + //set wordID to be an inactive nonterminal + nonterminals_[wordID] = 0; + + //print split nonterminas of wordID + v_splits = grSplitNonterminals[wordID]; + cout<<"print split nonterminals\n"; + for (int i =0; i newrules; + //first unary rules: + //iterate through unary rules + for (int i =0; i < unaries_.size(); i++){ + TRulePtr rule = unaries_[i]; + WordID lhs = rule.lhs_; + if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal + && grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too + ){ + vector rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the rhs nonterminal into the list of nonterminals in 'rhs_nonterminals' + vector lhs_nonterminals = grSplitNonterminals[lhs]; //split the rhs nonterminal into the list of nonterminals in 'lhs_nonterminals' + for (int k =0; k e_ = rule->e_; + newrule -> f_ = rhs_nonterminals[k]->f_; + newrule->lhs_ = lhs_nonterminals[j]->lhs_; + newrule -> scores_ = rule->scores_; + newrule -> arity_ = (*it)->arity_; + newrules.push_back (newrule); + + //update + } + } + else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal + + + } + } + + // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){ + + // } + // if (rule->IsUnary()) { + // rhs2unaries_[rule->f().front()].push_back(rule); + // unaries_.push_back(rule); + // if (rule->f().front() <0) + // //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) + // this->addNonterminal(rule->f().front()); + + + pimpl_->root_.DFS(); + +} + + +// void aTextGrammar::splitNonterminal0(WordID wordID){ + +// TextGrammarNode* cur = &pimpl_->root_; +// for (int i = 0; i < rule->f_.size(); ++i) +// cur = &cur->tree_[rule->f_[i]]; + +// } + +void aTextGrammar::splitAllNonterminals(){ + + +} + diff --git a/gi/scfg/abc/old_agrammar.h b/gi/scfg/abc/old_agrammar.h new file mode 100644 index 00000000..d68c2548 --- /dev/null +++ b/gi/scfg/abc/old_agrammar.h @@ -0,0 +1,45 @@ +#ifndef _AGRAMMAR_H_ +#define _AGRAMMAR_H_ + +#include "grammar.h" + +using namespace std; + +class aTGImpl; + +struct aTextGrammar : public Grammar { + aTextGrammar(); + aTextGrammar(const std::string& file); + void SetMaxSpan(int m) { max_span_ = m; } + + virtual const GrammarIter* GetRoot() const; + void AddRule(const TRulePtr& rule); + void ReadFromFile(const std::string& filename); + virtual bool HasRuleForSpan(int i, int j, int distance) const; + const std::vector& GetUnaryRules(const WordID& cat) const; + + void setMaxSplit(int max_split); + + void printAllNonterminals() const; + void addNonterminal(WordID wordID); + + void splitAllNonterminals(); + void splitNonterminal(WordID wordID); + + // inline map > & getSplitNonterminals(){return splitNonterminals_;} + // map > splitNonterminals_; + private: + int max_span_; + boost::shared_ptr pimpl_; + int max_split_; + + map nonterminals_; //list of nonterminals of the grammar if nonterminals_[WordID] > 0 the nonterminal WordID is found in the grammar + + + +}; + + + + +#endif diff --git a/gi/scfg/abc/scfg.cpp b/gi/scfg/abc/scfg.cpp new file mode 100644 index 00000000..4d094488 --- /dev/null +++ b/gi/scfg/abc/scfg.cpp @@ -0,0 +1,152 @@ +#include "lattice.h" +#include "tdict.h" +#include "agrammar.h" +#include "bottom_up_parser.h" +#include "hg.h" +#include "hg_intersect.h" +#include "../utils/ParamsArray.h" + + +using namespace std; + +typedef aTextGrammar aGrammar; +aGrammar * load_grammar(string & grammar_filename){ + cerr<<"start_load_grammar "< vID; + TD::ConvertSentence(str , &vID); + Lattice lsentence; + lsentence.resize(vID.size()); + + + for (int i=0; i grammars(1, g); + + ExhaustiveBottomUpParser parser = ExhaustiveBottomUpParser(goal_sym, grammars); + + if (!parser.Parse(lsource, &hg)){ + + cerr<<"source sentence does not parse by the grammar!"<SetGoalNT(goal_sym); + cout<<"before split nonterminal"<PrintAllRules(); + /*split grammar*/ + cout<<"split NTs\n"; + cerr<<"first of all write all nonterminals"<printAllNonterminals(); + agrammar->setMaxSplit(2); + agrammar->splitNonterminal(4); + cout<<"after split nonterminal"<PrintAllRules(); + Hypergraph hg1; + if (! parseSentencePair(goal_sym, src, tgt, g, hg1) ){ + cerr<<"target sentence is not parsed by the grammar!\n"; + return 1; + + } + + hg1.PrintGraphviz(); + + + agrammar->splitNonterminal(15); + cout<<"after split nonterminal"<PrintAllRules(); + + + /*load training corpus*/ + + + /*for each sentence pair in training corpus*/ + + // forest.PrintGraphviz(); + /*calculate expected count*/ + +} diff --git a/gi/scfg/abc/tmp.cpp b/gi/scfg/abc/tmp.cpp new file mode 100644 index 00000000..967a601d --- /dev/null +++ b/gi/scfg/abc/tmp.cpp @@ -0,0 +1,36 @@ +#include +#include +#include +using namespace std; + +int x = 5; + +class A{A(){x++;}}; +// { +// int a_; + +// }; + +class B: public A{ + + int b_; +}; + +int main(){ + + cout<<"Hello World"; + set s; + + s.insert(1); + s.insert(2); + + x++; + cout<<"x="< t; + t.push_back(2); t.push_back(1); t.push_back(2); t.push_back(3); t.push_back(2); t.push_back(4); + for(vector::iterator it = t.begin(); it != t.end(); it++){ + if (*it ==2) t.erase(it); + cout <<*it<