diff options
Diffstat (limited to 'gi')
-rw-r--r-- | gi/scfg/abc/Release/IConv.d | 3 | ||||
-rw-r--r-- | gi/scfg/abc/Release/Util.d | 8 | ||||
-rw-r--r-- | gi/scfg/abc/Release/agrammar.d | 205 | ||||
-rwxr-xr-x | gi/scfg/abc/Release/dict_test | bin | 1485797 -> 0 bytes | |||
-rw-r--r-- | gi/scfg/abc/Release/grammar | 13 | ||||
-rw-r--r-- | gi/scfg/abc/Release/grammar.pr | 13 | ||||
-rw-r--r-- | gi/scfg/abc/Release/makefile | 66 | ||||
-rw-r--r-- | gi/scfg/abc/Release/process_grammar.pl | 36 | ||||
-rwxr-xr-x | gi/scfg/abc/Release/scfg | bin | 4438644 -> 0 bytes | |||
-rw-r--r-- | gi/scfg/abc/Release/scfg.d | 213 | ||||
-rw-r--r-- | gi/scfg/abc/Release/sources.mk | 27 | ||||
-rw-r--r-- | gi/scfg/abc/Release/subdir.mk | 59 | ||||
-rw-r--r-- | gi/scfg/abc/Release/tmp.grammar | 2 | ||||
l--------- | gi/scfg/abc/Release/toy-grammar | 1 | ||||
-rwxr-xr-x | gi/scfg/abc/a.out | bin | 22639 -> 0 bytes | |||
-rw-r--r-- | gi/scfg/abc/agrammar.cc | 489 | ||||
-rw-r--r-- | gi/scfg/abc/agrammar.h | 116 | ||||
-rw-r--r-- | gi/scfg/abc/old_agrammar.cc | 383 | ||||
-rw-r--r-- | gi/scfg/abc/old_agrammar.h | 45 | ||||
-rw-r--r-- | gi/scfg/abc/scfg.cpp | 277 | ||||
-rw-r--r-- | gi/scfg/abc/tmp.cpp | 36 |
21 files changed, 0 insertions, 1992 deletions
diff --git a/gi/scfg/abc/Release/IConv.d b/gi/scfg/abc/Release/IConv.d deleted file mode 100644 index 082cb15b..00000000 --- a/gi/scfg/abc/Release/IConv.d +++ /dev/null @@ -1,3 +0,0 @@ -IConv.d IConv.o: ../../utils/IConv.cc ../../utils/IConv.hpp - -../../utils/IConv.hpp: diff --git a/gi/scfg/abc/Release/Util.d b/gi/scfg/abc/Release/Util.d deleted file mode 100644 index 586d4d60..00000000 --- a/gi/scfg/abc/Release/Util.d +++ /dev/null @@ -1,8 +0,0 @@ -Util.d Util.o: ../../utils/Util.cc ../../utils/Util.h \ - ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/agrammar.d b/gi/scfg/abc/Release/agrammar.d deleted file mode 100644 index 553752ca..00000000 --- a/gi/scfg/abc/Release/agrammar.d +++ /dev/null @@ -1,205 +0,0 @@ -agrammar.d agrammar.o: ../agrammar.cc \ - /home/tnguyen/ws10smt/decoder/rule_lexer.h \ - /home/tnguyen/ws10smt/decoder/trule.h \ - /export/ws10smt/software/include/boost/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/config.hpp \ - /export/ws10smt/software/include/boost/config/user.hpp \ - /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ - /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ - /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ - /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ - /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ - /export/ws10smt/software/include/boost/config/platform/linux.hpp \ - /export/ws10smt/software/include/boost/config/posix_features.hpp \ - /export/ws10smt/software/include/boost/config/suffix.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ - /export/ws10smt/software/include/boost/assert.hpp \ - /export/ws10smt/software/include/boost/checked_delete.hpp \ - /export/ws10smt/software/include/boost/throw_exception.hpp \ - /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ - /export/ws10smt/software/include/boost/detail/workaround.hpp \ - /export/ws10smt/software/include/boost/exception/exception.hpp \ - /export/ws10smt/software/include/boost/current_function.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ - /export/ws10smt/software/include/boost/memory_order.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/tnguyen/ws10smt/decoder/sparse_vector.h \ - /home/tnguyen/ws10smt/decoder/fdict.h \ - /home/tnguyen/ws10smt/decoder/dict.h \ - /export/ws10smt/software/include/boost/functional/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ - /export/ws10smt/software/include/boost/limits.hpp \ - /export/ws10smt/software/include/boost/integer/static_log2.hpp \ - /export/ws10smt/software/include/boost/integer_fwd.hpp \ - /export/ws10smt/software/include/boost/cstdint.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ - /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ - /home/tnguyen/ws10smt/decoder/wordid.h \ - /home/tnguyen/ws10smt/decoder/filelib.h \ - /home/tnguyen/ws10smt/decoder/gzstream.h \ - /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/array2d.h \ - /home/tnguyen/ws10smt/decoder/hg.h \ - /home/tnguyen/ws10smt/decoder/small_vector.h \ - /home/tnguyen/ws10smt/decoder/prob.h \ - /home/tnguyen/ws10smt/decoder/logval.h ../../utils/Util.h \ - ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -/home/tnguyen/ws10smt/decoder/rule_lexer.h: - -/home/tnguyen/ws10smt/decoder/trule.h: - -/export/ws10smt/software/include/boost/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/config.hpp: - -/export/ws10smt/software/include/boost/config/user.hpp: - -/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: - -/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: - -/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: - -/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: - -/export/ws10smt/software/include/boost/config/select_platform_config.hpp: - -/export/ws10smt/software/include/boost/config/platform/linux.hpp: - -/export/ws10smt/software/include/boost/config/posix_features.hpp: - -/export/ws10smt/software/include/boost/config/suffix.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: - -/export/ws10smt/software/include/boost/assert.hpp: - -/export/ws10smt/software/include/boost/checked_delete.hpp: - -/export/ws10smt/software/include/boost/throw_exception.hpp: - -/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: - -/export/ws10smt/software/include/boost/detail/workaround.hpp: - -/export/ws10smt/software/include/boost/exception/exception.hpp: - -/export/ws10smt/software/include/boost/current_function.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: - -/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: - -/export/ws10smt/software/include/boost/memory_order.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: - -/home/tnguyen/ws10smt/decoder/sparse_vector.h: - -/home/tnguyen/ws10smt/decoder/fdict.h: - -/home/tnguyen/ws10smt/decoder/dict.h: - -/export/ws10smt/software/include/boost/functional/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: - -/export/ws10smt/software/include/boost/limits.hpp: - -/export/ws10smt/software/include/boost/integer/static_log2.hpp: - -/export/ws10smt/software/include/boost/integer_fwd.hpp: - -/export/ws10smt/software/include/boost/cstdint.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: - -/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: - -/export/ws10smt/software/include/boost/detail/container_fwd.hpp: - -/home/tnguyen/ws10smt/decoder/wordid.h: - -/home/tnguyen/ws10smt/decoder/filelib.h: - -/home/tnguyen/ws10smt/decoder/gzstream.h: - -/home/tnguyen/ws10smt/decoder/tdict.h: - -../agrammar.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/array2d.h: - -/home/tnguyen/ws10smt/decoder/hg.h: - -/home/tnguyen/ws10smt/decoder/small_vector.h: - -/home/tnguyen/ws10smt/decoder/prob.h: - -/home/tnguyen/ws10smt/decoder/logval.h: - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/dict_test b/gi/scfg/abc/Release/dict_test Binary files differdeleted file mode 100755 index 1ba94218..00000000 --- a/gi/scfg/abc/Release/dict_test +++ /dev/null diff --git a/gi/scfg/abc/Release/grammar b/gi/scfg/abc/Release/grammar deleted file mode 100644 index 75fac3a0..00000000 --- a/gi/scfg/abc/Release/grammar +++ /dev/null @@ -1,13 +0,0 @@ -[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 diff --git a/gi/scfg/abc/Release/grammar.pr b/gi/scfg/abc/Release/grammar.pr deleted file mode 100644 index e4e327cf..00000000 --- a/gi/scfg/abc/Release/grammar.pr +++ /dev/null @@ -1,13 +0,0 @@ -[X] ||| . ||| . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] . ||| [1] . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano ||| [1] old man ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano . ||| [1] old man . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo ||| ugly [1] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo . ||| ugly [1] . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] gato ||| [1] cat ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] gato . ||| [1] cat . ||| MinusLogP=2.56494935746154 -[X] ||| el ||| the ||| MinusLogP=2.56494935746154 -[X] ||| el [X,1] ||| the [1] ||| MinusLogP=2.56494935746154 -[X] ||| el [X,1] . ||| the [1] . ||| MinusLogP=2.56494935746154 diff --git a/gi/scfg/abc/Release/makefile b/gi/scfg/abc/Release/makefile deleted file mode 100644 index 25949e74..00000000 --- a/gi/scfg/abc/Release/makefile +++ /dev/null @@ -1,66 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -#-include ../makefile.init - -RM := rm -rf - -# All of the sources participating in the build are defined here --include sources.mk --include subdir.mk --include objects.mk - -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(strip $(C++_DEPS)),) --include $(C++_DEPS) -endif -ifneq ($(strip $(CC_DEPS)),) --include $(CC_DEPS) -endif -ifneq ($(strip $(C_DEPS)),) --include $(C_DEPS) -endif -ifneq ($(strip $(CPP_DEPS)),) --include $(CPP_DEPS) -endif -ifneq ($(strip $(CXX_DEPS)),) --include $(CXX_DEPS) -endif -ifneq ($(strip $(C_UPPER_DEPS)),) --include $(C_UPPER_DEPS) -endif -endif - -#-include ../makefile.defs - -# Add inputs and outputs from these tool invocations to the build variables - -# All Target -all: scfg - -# Tool invocations - -# scfg.o: ../scfg.cpp -# @echo 'Building file: $<' -# @echo 'Invoking: GCC C++ Compiler' -# g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -# @echo 'Finished building: $<' -# @echo ' ' - -scfg: $(OBJS) $(USER_OBJS) - @echo 'Building target: $@' - @echo 'Invoking: GCC C++ Linker' - /bin/sh ../../../../libtool --tag=CXX --mode=link g++ -g -O2 -lz -L/export/ws10smt/software/lib -R/export/ws10smt/software/lib -L/export/ws10smt/software/srilm-1.5.10/lib/i686 -o scfg $(OBJS) -L/export/ws10smt/software/lib -lgtest -pthread ../../../../decoder/libcdec.a -lboost_program_options -loolm -ldstruct -lmisc - @echo 'Finished building target: $@' - @echo ' ' -#g++ -I/home/tnguyen/ws10smt/gi/scfg/cdec/ -I/export/ws10smt/software/srilm-1.5.10/include/ -L/home/tnguyen/ws10smt/decoder -lpthread -ldl -lm $(OBJS) $(USER_OBJS) $(LIBS) -o"scfg" -# Other Targets -clean: - -$(RM) $(OBJS)$(C++_DEPS)$(EXECUTABLES)$(CC_DEPS)$(C_DEPS)$(CPP_DEPS)$(CXX_DEPS)$(C_UPPER_DEPS) scfg - -@echo ' ' - -.PHONY: all clean dependents -.SECONDARY: - --include ../makefile.targets diff --git a/gi/scfg/abc/Release/process_grammar.pl b/gi/scfg/abc/Release/process_grammar.pl deleted file mode 100644 index f82a8e5a..00000000 --- a/gi/scfg/abc/Release/process_grammar.pl +++ /dev/null @@ -1,36 +0,0 @@ -#!perl - -use warnings; -use strict; - -my $grammar_file = $ARGV[0]; - -my %nt_count; #maps nt--> count rules whose lhs is nt - -open(G, "<$grammar_file") or die "Can't open file $grammar_file"; - -while (<G>){ - - chomp(); - - s/\|\|\|.*//g; - s/\s//g; - - $nt_count{$_}++; -} - - -close (G); - -open(G, "<$grammar_file") or die "Can't open file $grammar_file"; - -while (<G>){ - - chomp(); - - (my $nt = $_) =~ s/\|\|\|.*//g; - $nt =~ s/\s//g; - - s/(.+\|\|\|.+\|\|\|.+\|\|\|).+/$1/g; - print $_ . " MinusLogP=" .(log($nt_count{$nt})) ."\n"; -} diff --git a/gi/scfg/abc/Release/scfg b/gi/scfg/abc/Release/scfg Binary files differdeleted file mode 100755 index 3faa52cc..00000000 --- a/gi/scfg/abc/Release/scfg +++ /dev/null diff --git a/gi/scfg/abc/Release/scfg.d b/gi/scfg/abc/Release/scfg.d deleted file mode 100644 index b3cfbbb5..00000000 --- a/gi/scfg/abc/Release/scfg.d +++ /dev/null @@ -1,213 +0,0 @@ -scfg.d scfg.o: ../scfg.cpp \ - /export/ws10smt/software/include/boost/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/config.hpp \ - /export/ws10smt/software/include/boost/config/user.hpp \ - /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ - /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ - /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ - /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ - /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ - /export/ws10smt/software/include/boost/config/platform/linux.hpp \ - /export/ws10smt/software/include/boost/config/posix_features.hpp \ - /export/ws10smt/software/include/boost/config/suffix.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ - /export/ws10smt/software/include/boost/assert.hpp \ - /export/ws10smt/software/include/boost/checked_delete.hpp \ - /export/ws10smt/software/include/boost/throw_exception.hpp \ - /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ - /export/ws10smt/software/include/boost/detail/workaround.hpp \ - /export/ws10smt/software/include/boost/exception/exception.hpp \ - /export/ws10smt/software/include/boost/current_function.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ - /export/ws10smt/software/include/boost/memory_order.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ - /export/ws10smt/software/include/boost/pointer_cast.hpp \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/wordid.h \ - /home/tnguyen/ws10smt/decoder/array2d.h \ - /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/trule.h \ - /home/tnguyen/ws10smt/decoder/sparse_vector.h \ - /home/tnguyen/ws10smt/decoder/fdict.h \ - /home/tnguyen/ws10smt/decoder/dict.h \ - /export/ws10smt/software/include/boost/functional/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ - /export/ws10smt/software/include/boost/limits.hpp \ - /export/ws10smt/software/include/boost/integer/static_log2.hpp \ - /export/ws10smt/software/include/boost/integer_fwd.hpp \ - /export/ws10smt/software/include/boost/cstdint.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ - /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ - /home/tnguyen/ws10smt/decoder/hg.h \ - /home/tnguyen/ws10smt/decoder/small_vector.h \ - /home/tnguyen/ws10smt/decoder/prob.h \ - /home/tnguyen/ws10smt/decoder/logval.h \ - /home/tnguyen/ws10smt/decoder/bottom_up_parser.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/hg_intersect.h ../../utils/ParamsArray.h \ - ../../utils/Util.h ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -/export/ws10smt/software/include/boost/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/config.hpp: - -/export/ws10smt/software/include/boost/config/user.hpp: - -/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: - -/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: - -/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: - -/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: - -/export/ws10smt/software/include/boost/config/select_platform_config.hpp: - -/export/ws10smt/software/include/boost/config/platform/linux.hpp: - -/export/ws10smt/software/include/boost/config/posix_features.hpp: - -/export/ws10smt/software/include/boost/config/suffix.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: - -/export/ws10smt/software/include/boost/assert.hpp: - -/export/ws10smt/software/include/boost/checked_delete.hpp: - -/export/ws10smt/software/include/boost/throw_exception.hpp: - -/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: - -/export/ws10smt/software/include/boost/detail/workaround.hpp: - -/export/ws10smt/software/include/boost/exception/exception.hpp: - -/export/ws10smt/software/include/boost/current_function.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: - -/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: - -/export/ws10smt/software/include/boost/memory_order.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: - -/export/ws10smt/software/include/boost/pointer_cast.hpp: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/wordid.h: - -/home/tnguyen/ws10smt/decoder/array2d.h: - -/home/tnguyen/ws10smt/decoder/tdict.h: - -../agrammar.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/trule.h: - -/home/tnguyen/ws10smt/decoder/sparse_vector.h: - -/home/tnguyen/ws10smt/decoder/fdict.h: - -/home/tnguyen/ws10smt/decoder/dict.h: - -/export/ws10smt/software/include/boost/functional/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: - -/export/ws10smt/software/include/boost/limits.hpp: - -/export/ws10smt/software/include/boost/integer/static_log2.hpp: - -/export/ws10smt/software/include/boost/integer_fwd.hpp: - -/export/ws10smt/software/include/boost/cstdint.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: - -/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: - -/export/ws10smt/software/include/boost/detail/container_fwd.hpp: - -/home/tnguyen/ws10smt/decoder/hg.h: - -/home/tnguyen/ws10smt/decoder/small_vector.h: - -/home/tnguyen/ws10smt/decoder/prob.h: - -/home/tnguyen/ws10smt/decoder/logval.h: - -/home/tnguyen/ws10smt/decoder/bottom_up_parser.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/hg_intersect.h: - -../../utils/ParamsArray.h: - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/sources.mk b/gi/scfg/abc/Release/sources.mk deleted file mode 100644 index 6c7070aa..00000000 --- a/gi/scfg/abc/Release/sources.mk +++ /dev/null @@ -1,27 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -C_UPPER_SRCS := -C_SRCS := -CPP_SRCS := -O_SRCS := -ASM_SRCS := -S_SRCS := -C++_SRCS := -CXX_SRCS := -CC_SRCS := -OBJ_SRCS := -OBJS := -C++_DEPS := -EXECUTABLES := -CC_DEPS := -C_DEPS := -CPP_DEPS := -CXX_DEPS := -C_UPPER_DEPS := - -# Every subdirectory with source files must be described here -SUBDIRS := \ -. \ - diff --git a/gi/scfg/abc/Release/subdir.mk b/gi/scfg/abc/Release/subdir.mk deleted file mode 100644 index 49080b36..00000000 --- a/gi/scfg/abc/Release/subdir.mk +++ /dev/null @@ -1,59 +0,0 @@ - -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -# Add inputs and outputs from these tool invocations to the build variables -CPP_SRCS += \ -../../utils/Util.cc \ -../agrammar.cc \ -../scfg.cpp - - -OBJS += \ -./Util.o \ -./agrammar.o \ -./scfg.o - - -CPP_DEPS += \ -./Util.d \ -./agrammar.d \ -./scfg.d - -# Each subdirectory must supply rules for building sources it contributes -# %.o: ../%.cpp -# @echo 'Building file: $<' -# @echo 'Invoking: GCC C++ Compiler' -# g++ -g -p -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -# -# @echo ' ' - -%.o: ../../utils/%.cc - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../../utils/%.c - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../%.cpp - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../%.cc - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - diff --git a/gi/scfg/abc/Release/tmp.grammar b/gi/scfg/abc/Release/tmp.grammar deleted file mode 100644 index 9df1b77d..00000000 --- a/gi/scfg/abc/Release/tmp.grammar +++ /dev/null @@ -1,2 +0,0 @@ -[A] ||| [B] [C] . ||| [B] [C]. ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[A] ||| [B] asd . ||| [B] asd . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
\ No newline at end of file diff --git a/gi/scfg/abc/Release/toy-grammar b/gi/scfg/abc/Release/toy-grammar deleted file mode 120000 index 50dea8df..00000000 --- a/gi/scfg/abc/Release/toy-grammar +++ /dev/null @@ -1 +0,0 @@ -/export/ws10smt/toy-grammar/
\ No newline at end of file diff --git a/gi/scfg/abc/a.out b/gi/scfg/abc/a.out Binary files differdeleted file mode 100755 index 0467acf0..00000000 --- a/gi/scfg/abc/a.out +++ /dev/null diff --git a/gi/scfg/abc/agrammar.cc b/gi/scfg/abc/agrammar.cc deleted file mode 100644 index 016a0189..00000000 --- a/gi/scfg/abc/agrammar.cc +++ /dev/null @@ -1,489 +0,0 @@ -#include <algorithm> -#include <utility> -#include <map> - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include "agrammar.h" -#include "../utils/Util.h" - - - -aTRule::aTRule(TRulePtr rule){ - - this -> e_ = rule->e_; - this -> f_ = rule->f_; - this ->lhs_ = rule->lhs_; - this -> arity_ = rule->arity_; - this -> scores_ = rule->scores_; - ResetScore(0.00000001); -} - -bool equal(TRulePtr const & rule1, TRulePtr const & rule2){ - if (rule1->lhs_ != rule2->lhs_) return false; - if (rule1->f_.size() != rule2->f_.size()) return false; - if (rule1->e_.size() != rule2->e_.size()) return false; - - for (int i=0; i<rule1->f_.size(); i++) - if (rule1->f_.at(i) != rule2->f_.at(i)) return false; - for (int i=0; i<rule1->e_.size(); i++) - if (rule1->e_.at(i) != rule2->e_.at(i)) return false; - return true; -} - - -//const vector<TRulePtr> Grammar::NO_RULES; - -void aRemoveRule(vector<TRulePtr> & v, const TRulePtr & rule){ // remove rule from v if found - for (int i=0; i< v.size(); i++) - if (equal(v[i], rule )){ - // cout<<"erase rule from vector:"<<rule->AsString()<<endl; - v.erase(v.begin()+i); - } -} - -void aRemoveRule(vector<NTRule> & v, const NTRule & ntrule){ // remove rule from v if found - for (int i=0; i< v.size(); i++) - if (equal(v[i].rule_, ntrule.rule_ )){ - // cout<<"erase rule from vector:"<<rule->AsString()<<endl; - v.erase(v.begin()+i); - } -} - -struct aTextRuleBin : public RuleBin { - int GetNumRules() const { - return rules_.size(); - } - TRulePtr GetIthRule(int i) const { - return rules_[i]; - } - void AddRule(TRulePtr t) { - rules_.push_back(t); - } - - void RemoveRule(const TRulePtr & rule ){ - aRemoveRule(rules_, rule); - } - - - int Arity() const { - return rules_.front()->Arity(); - } - - void Dump() const { - for (int i = 0; i < rules_.size(); ++i) - cerr << rules_[i]->AsString() << endl; - } - private: - vector<TRulePtr> rules_; -}; - - -struct aTextGrammarNode : public GrammarIter { - aTextGrammarNode() : rb_(NULL) {} - ~aTextGrammarNode() { - delete rb_; - } - const GrammarIter* Extend(int symbol) const { - map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol); - if (i == tree_.end()) return NULL; - return &i->second; - } - - const RuleBin* GetRules() const { - if (rb_) { - //rb_->Dump(); - } - return rb_; - } - - map<WordID, aTextGrammarNode> tree_; - aTextRuleBin* rb_; -}; - -struct aTGImpl { - aTextGrammarNode root_; -}; - -aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} -aTextGrammar::aTextGrammar(const string& file) : - max_span_(10), - pimpl_(new aTGImpl) { - ReadFromFile(file); -} - -const GrammarIter* aTextGrammar::GetRoot() const { - return &pimpl_->root_; -} - -void aTextGrammar::SetGoalNT(const string & goal_str){ - goalID = TD::Convert(goal_str); - -} - -void getNTRule( const TRulePtr & rule, map<WordID, NTRule> & ntrule_map){ - - NTRule lhs_ntrule(rule, rule->lhs_ * -1); - ntrule_map[rule->lhs_ * -1] = lhs_ntrule; - - for (int i=0; i< (rule->f_).size(); i++) - if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){ - NTRule rhs_ntrule(rule, rule->f_.at(i) * -1); - ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule; - } -} - - -void aTextGrammar::AddRule(const TRulePtr& rule) { - if (rule->IsUnary()) { - rhs2unaries_[rule->f().front()].push_back(rule); - unaries_.push_back(rule); - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i) - cur = &cur->tree_[rule->f_[i]]; - if (cur->rb_ == NULL) - cur->rb_ = new aTextRuleBin; - cur->rb_->AddRule(rule); - } - - //add the rule to lhs_rules_ - lhs_rules_[rule->lhs_* -1].push_back(rule); - - //add the rule to nt_rules_ - map<WordID, NTRule> ntrule_map; - getNTRule (rule, ntrule_map); - for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ - nt_rules_[it->first].push_back(it->second); - } -} - -void aTextGrammar::RemoveRule(const TRulePtr & rule){ - // cout<<"Remove rule: "<<rule->AsString()<<endl; - if (rule->IsUnary()) { - aRemoveRule(rhs2unaries_[rule->f().front()], rule); - aRemoveRule(unaries_, rule); - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i) - cur = &cur->tree_[rule->f_[i]]; -// if (cur->rb_ == NULL) -// cur->rb_ = new aTextRuleBin; - cur->rb_->RemoveRule(rule); - } - - //remove rules from lhs_rules_ - - aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule); - - - //remove the rule from nt_rules_ - map<WordID, NTRule> ntrule_map; - getNTRule (rule, ntrule_map); - for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ - aRemoveRule(nt_rules_[it->first], it->second); - } - -} - -void aTextGrammar::RemoveNonterminal(WordID wordID){ - vector<NTRule> rules = nt_rules_[wordID]; -// // remove the nonterminal from ntrules_ - nt_rules_.erase(wordID); - for (int i =0; i<rules.size(); i++) - RemoveRule(rules[i].rule_); - sum_probs_.erase(wordID); - cnt_rules.erase(wordID); - -} - -void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} - - - - -void aTextGrammar::AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts){ - - vector<NTRule> rules = nt_rules_[nt_old]; - - // cout<<"\n\n\n start add splitting rules"<<endl; - - const double epsilon = 0.001; - for (int i=0; i<rules.size(); i++){ - NTRule old_rule = rules.at(i); - vector<int> ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old - //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts' - //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions, - //hence the rules' new probabilities have to divide to cnt also - // cout<<"splitting NT in rule "<<old_rule.rule_->AsString()<<endl; - -// cout<<"nt position in the rules"<<endl; -// for (int j=0; j<ntPos.size();j++) cout<<ntPos[j]<<" "; cout<<endl; - - int cnt_newrules = pow( nts.size(), ntPos.size() ); - // cout<<"cnt_newrules="<<cnt_newrules<<endl; - - double log_nts_size = log(nts.size()); - - - map<WordID, int> cnt_addepsilon; //cnt_addepsilon and cont_minusepsilon to track the number of rules epsilon is added or minus for each lhs nonterminal, ideally we want these two numbers are equal - map<WordID, int> cnt_minusepsilon; - cnt_addepsilon[old_rule.rule_->lhs_] = 0; - cnt_minusepsilon[old_rule.rule_->lhs_] = 0; - for (int j =0; j<nts.size(); j++) { cnt_addepsilon[nts[j] ] = 0; cnt_minusepsilon[nts[j] ] = 0;} - - - for (int j=0; j<cnt_newrules; j++){ //each j represents a new rule - //convert j to a vector of size ntPos.size(), each entry in the vector >=0 and <nts.size() - int mod = nts.size(); - vector <int> j_vector(ntPos.size(), 0); //initiate the vector to all 0 - int j_tmp =j; - for (int k=0; k<ntPos.size(); k++){ - j_vector[k] = j_tmp % mod; - j_tmp = (j_tmp - j_vector[k]) / mod; - } - // cout<<"print vector j_vector"<<endl; - // for (int k=0; k<ntPos.size();k++) cout<<j_vector[k]<<" "; cout<<endl; - //now use the vector to create a new rule - TRulePtr newrule(new aTRule()); - - newrule -> e_ = (old_rule.rule_)->e_; - newrule -> f_ = old_rule.rule_->f_; - newrule->lhs_ = old_rule.rule_->lhs_; - newrule -> arity_ = old_rule.rule_->arity_; - newrule -> scores_ = old_rule.rule_->scores_; - - // cout<<"end up update score\n"; - if (ntPos[0] == -1){ //update the lhs - newrule->lhs_ = nts[j_vector[0]] * -1; - - //score has to randomly add/minus a small epsilon to break the balance - if (nts.size() >1 && ntPos.size() >1){ - // cout<<"start to add/minus epsilon"<<endl; - if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon - newrule-> scores_ -= epsilon; - else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) - newrule-> scores_ += epsilon; - else{ - double random = rand()/RAND_MAX; - if (random > .5){ - newrule-> scores_ += epsilon; - cnt_addepsilon[newrule->lhs_]++; - } - else{ - newrule-> scores_ -= epsilon; - cnt_minusepsilon[newrule->lhs_]++; - } - } - } - - - for (int k=1; k<ntPos.size(); k++){//update f_ - // cout<<"ntPos[k]="<<ntPos[k]<<endl; - newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1; //update the ntPos[k-1]-th nonterminal in f_ to the j_vector[k] NT in nts - } - newrule -> scores_ += (ntPos.size() -1) * log_nts_size; - - - } - else{ - //score has to randomly add/minus a small epsilon to break the balance - if ( ntPos.size() >0 && nts.size()>1){ - // cout<<"start to add/minus epsilon"<<endl; - if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon - newrule-> scores_ -= epsilon; - else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 ) - newrule-> scores_ += epsilon; - else{ - double random = rand()/RAND_MAX; - if (random > .5){ - newrule-> scores_ += epsilon; - cnt_addepsilon[newrule->lhs_]++; - } - else{ - newrule-> scores_ -= epsilon; - cnt_minusepsilon[newrule->lhs_]++; - } - } - } - - - for (int k=0; k<ntPos.size(); k++){ //update f_ - // cout<<"ntPos[k]="<<ntPos[k]<<endl; - newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1; - } - newrule -> scores_ += ntPos.size() * log_nts_size; - } - this->AddRule (newrule); - }//add new rules for each grammar rules - - } //iterate through all grammar rules - -} - - -void aTextGrammar::splitNonterminal(WordID wordID){ - - //first added the splits nonterminal into the TD dictionary - - string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split - - vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits - for (int i =0; i< this->max_split_; i++){ - string split_str = old_str + "+" + itos(i); - WordID splitID = TD::Convert(split_str); - v_splits.push_back(splitID); - - } - - // grSplitNonterminals[wordID] = v_splits; - - //print split nonterminas of wordID - // v_splits = grSplitNonterminals[wordID]; - // cout<<"print split nonterminals\n"; - // for (int i =0; i<v_splits.size(); i++) - // cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl; - - AddSplitNonTerminal(wordID, v_splits); - RemoveNonterminal(wordID); - - // grSplitNonterminals.erase (grSplitNonterminals.find(WordID) ); - - if (wordID == goalID){ //add rule X-> X1; X->X2,... if X is the goal NT - for (int i =0; i<v_splits.size(); i++){ - TRulePtr rule (new aTRule()); - rule ->lhs_ = goalID * -1; - rule ->f_.push_back(v_splits[i] * -1); - rule->e_.push_back(0); - - rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) ); - AddRule(rule); - } - - } - -} - - -void aTextGrammar::splitAllNonterminals(){ - map<WordID, vector<TRulePtr> >::const_iterator it; - vector<WordID> v ; // WordID >0 - for (it = lhs_rules_.begin(); it != lhs_rules_.end(); it++) //iterate through all nts - if (it->first != goalID || lhs_rules_.size() ==1) - v.push_back(it->first); - - for (int i=0; i< v.size(); i++) - splitNonterminal(v[i]); -} - - -void aTextGrammar::PrintAllRules(const string & filename) const{ - - - cerr<<"print grammar to "<<filename<<endl; - - ofstream outfile(filename.c_str()); - if (!outfile.good()) { - cerr << "error opening output file " << filename << endl; - exit(1); - } - - map<WordID, vector<TRulePtr > >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - - vector<TRulePtr> v = it-> second; - for (int i =0; i< v.size(); i++){ - outfile<<v[i]->AsString()<<"\t"<<endl; - } - } -} - - -void aTextGrammar::ResetScore(){ - - map<WordID, vector<TRulePtr > >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - vector<TRulePtr> v = it-> second; - for (int i =0; i< v.size(); i++){ - // cerr<<"Reset score of Rule "<<v[i]->AsString()<<endl; - boost::static_pointer_cast<aTRule>(v[i])->ResetScore(alpha_ /v.size()); - } - lhs_rules_[it->first] = v; - sum_probs_[it->first] = alpha_; - } - -} - -void aTextGrammar::UpdateScore(){ - - map<WordID, vector<TRulePtr > >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - vector<TRulePtr> v = it-> second; - for (int i =0; i< v.size(); i++){ - boost::static_pointer_cast<aTRule>(v[i])->UpdateScore(sum_probs_[it->first] ); - } - - // cerr<<"sum_probs_[it->first] ="<<sum_probs_[it->first] <<endl; - sum_probs_[it->first] = alpha_; - } - -} - - -void aTextGrammar::UpdateHgProsteriorProb(Hypergraph & hg){ - std::vector<prob_t> posts ; - - prob_t goal_score = hg.ComputeEdgePosteriors(1, &posts); - for (int i =0; i<posts.size(); i++){ - - //cout<<posts[i]<<endl; - Hypergraph::Edge& e = hg.edges_[i]; - string goalstr("Goal"); - string str_lhs = TD::Convert(e.rule_->lhs_ * -1); - - if (str_lhs.find(goalstr) != string::npos) - continue; - - // cerr<<e.rule_->AsString()<<endl; - // cerr<<e.rule_->parent_rule_->AsString()<<endl; - - boost::static_pointer_cast<aTRule>(e.rule_->parent_rule_)->AddProb(posts[i] / goal_score); - // cerr<<"add count for rule\n"; -// cerr<<"posts[i]="<<posts[i]<<" goal_score="<<goal_score<<endl; -// cerr<<"posts[i] /goal_score="<<(posts[i] /goal_score)<<endl; - sum_probs_[e.rule_->parent_rule_->lhs_* -1 ] += posts[i] /goal_score; - - } - - -} - - -void aTextGrammar::PrintNonterminalRules(WordID nt) const{ - vector< NTRule > v; - map<WordID, vector<NTRule> >::const_iterator mit= nt_rules_.find(nt); - if (mit == nt_rules_.end()) - return; - - v = mit->second; - - for (vector<NTRule>::const_iterator it = v.begin(); it != v.end(); it++) - cout<<it->rule_->AsString()<<endl; -} - -static void AddRuleHelper(const TRulePtr& new_rule, void* extra) { - aTRule *p = new aTRule(new_rule); - - static_cast<aTextGrammar*>(extra)->AddRule(TRulePtr(p)); -} - -void aTextGrammar::ReadFromFile(const string& filename) { - ReadFile in(filename); - RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this); -} - -bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { - return (max_span_ >= distance); -} - diff --git a/gi/scfg/abc/agrammar.h b/gi/scfg/abc/agrammar.h deleted file mode 100644 index 0910aae6..00000000 --- a/gi/scfg/abc/agrammar.h +++ /dev/null @@ -1,116 +0,0 @@ -#ifndef AGRAMMAR_H_ -#define AGRAMMAR_H_ - -#include "grammar.h" -#include "hg.h" - - -using namespace std; - -class aTRule: public TRule{ - public: - aTRule() : TRule(){ResetScore(0.00000001); } - aTRule(TRulePtr rule_); - - void ResetScore(double initscore){//cerr<<"Reset Score "<<this->AsString()<<endl; - sum_scores_.set_value(FD::Convert("Prob"), initscore);} - void AddProb(double p ){ - // cerr<<"in AddProb p="<<p<<endl; - // cerr<<"prob sumscores ="<<sum_scores_[FD::Convert("Prob")]<<endl; - sum_scores_.add_value(FD::Convert("Prob"), p); - // cerr<<"after AddProb\n"; - } - - void UpdateScore(double sumprob){ - double minuslogp = 0 - log( sum_scores_.value(FD::Convert("Prob")) /sumprob); - if (sumprob< sum_scores_.value(FD::Convert("Prob"))){ - cerr<<"UpdateScore sumprob="<<sumprob<< " sum_scores_.value(FD::Convert(\"Prob\"))="<< sum_scores_.value(FD::Convert("Prob"))<< this->AsString()<<endl; - exit(1); - } - this->scores_.set_value(FD::Convert("MinusLogP"), minuslogp); - - } - private: - SparseVector<double> sum_scores_; -}; - - -class aTGImpl; -struct NTRule{ - - NTRule(){}; - NTRule(const TRulePtr & rule, WordID nt){ - nt_ = nt; - rule_ = rule; - - if (rule->lhs_ * -1 == nt) - ntPos_.push_back(-1); - - for (int i=0; i< rule->f().size(); i++) - if (rule->f().at(i) * -1 == nt) - ntPos_.push_back(i); - - - } - - TRulePtr rule_; - WordID nt_; //the labelID of the nt (nt_>0); - - vector<int> ntPos_; //position of nt_ -1: lhs, from 0...f_.size() for nt of f_() - //i.e the rules is: NP-> DET NP; if nt_=5 is the labelID of NP then ntPos_ = (-1, 1): the indexes of nonterminal NP - -}; - - -struct aTextGrammar : public Grammar { - aTextGrammar(); - aTextGrammar(const std::string& file); - void SetMaxSpan(int m) { max_span_ = m; } - - virtual const GrammarIter* GetRoot() const; - void AddRule(const TRulePtr& rule); - void ReadFromFile(const std::string& filename); - virtual bool HasRuleForSpan(int i, int j, int distance) const; - const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const; - - void AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts); - void setMaxSplit(int max_split); - void splitNonterminal(WordID wordID); - - - void splitAllNonterminals(); - - void PrintAllRules(const string & filename) const; - void PrintNonterminalRules(WordID nt) const; - void SetGoalNT(const string & goal_str); - - void ResetScore(); - - void UpdateScore(); - - void UpdateHgProsteriorProb(Hypergraph & hg); - - void set_alpha(double alpha){alpha_ = alpha;} - private: - - void RemoveRule(const TRulePtr & rule); - void RemoveNonterminal(WordID wordID); - - int max_span_; - int max_split_; - boost::shared_ptr<aTGImpl> pimpl_; - - map <WordID, vector<TRulePtr> > lhs_rules_;// WordID >0 - map <WordID, vector<NTRule> > nt_rules_; - - map <WordID, double> sum_probs_; - map <WordID, double> cnt_rules; - - double alpha_; - - // map<WordID, vector<WordID> > grSplitNonterminals; - WordID goalID; -}; - - -#endif diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc deleted file mode 100644 index 33d70dfc..00000000 --- a/gi/scfg/abc/old_agrammar.cc +++ /dev/null @@ -1,383 +0,0 @@ -#include "agrammar.h" -#include "Util.h" - -#include <algorithm> -#include <utility> -#include <map> - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include <iostream> -#include <fstream> - -map<WordID, vector<WordID> > grSplitNonterminals; -//const vector<TRulePtr> Grammar::NO_RULES; - - -// vector<TRulePtr> substituteF(TRulePtr & rule, WordID wordID, vector<WordID> & v){ -// vector<TRulePtr> vRules; //outputs - -// vector<WordID> f = rule->f(); -// vector<vector<WordID> > newfvector; -// for (int i =0; i< f.size(); i++){ -// if (f[i] == wordID){ -// newfvector.push_back(v); -// } -// else -// newfvector.push_back(vector<WordID> (1, f[i])); -// } - -// //now creates new rules; - - -// return vRules; -// } - - -struct aTextRuleBin : public RuleBin { - int GetNumRules() const { - return rules_.size(); - } - TRulePtr GetIthRule(int i) const { - return rules_[i]; - } - void AddRule(TRulePtr t) { - rules_.push_back(t); - } - int Arity() const { - return rules_.front()->Arity(); - } - void Dump() const { - for (int i = 0; i < rules_.size(); ++i) - cerr << rules_[i]->AsString() << endl; - } - - - vector<TRulePtr> getRules(){ return rules_;} - - - void substituteF(vector<WordID> & f_path, map<WordID, vector<WordID> > & grSplitNonterminals){ - //this substituteF method is different with substituteF procedure found in cdec code; - // - //aTextRuleBin has a collection of rules with the same f() on the rhs, - //substituteF() replaces the f_ of all the rules with f_path vector, - //the grSplitNonterminals input to split the lhs_ nonterminals of the rules incase the lhs_ nonterminal found in grSplitNonterminals - - vector <TRulePtr> newrules; - for (vector<TRulePtr>::iterator it = rules_.begin() ; it != rules_.end(); it++){ - assert(f_path.size() == (*it)->f_.size()); - - if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){ - (*it)->f_ = f_path; - } - else{ // split the lhs NT, - vector<WordID> new_lhs = grSplitNonterminals[ (*it)->lhs_ ]; - for (vector<WordID>::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){ - TRulePtr newrule; - newrule -> e_ = (*it)->e_; - newrule -> f_ = (*it)->f_; - newrule->lhs_ = *vit; - newrule -> scores_ = (*it)->scores_; - newrule -> arity_ = (*it)->arity_; - newrules.push_back (newrule); - } - rules_.erase(it); - } - } - - //now add back newrules(output of splitting lhs_) to rules_ - rules_.insert(newrules.begin(),newrules.begin(), newrules.end()); - } - -private: - vector<TRulePtr> rules_; -}; - - - -struct aTextGrammarNode : public GrammarIter { - aTextGrammarNode() : rb_(NULL) {} - - aTextGrammarNode(const aTextGrammarNode & a){ - nonterminals_ = a.nonterminals_; - tree_ = a.tree_; - rb_ = new aTextRuleBin(); //cp constructor: don't cp the set of rules over - } - - ~aTextGrammarNode() { - delete rb_; - } - const GrammarIter* Extend(int symbol) const { - map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol); - if (i == tree_.end()) return NULL; - return &i->second; - } - - const RuleBin* GetRules() const { - if (rb_) { - //rb_->Dump(); - } - return rb_; - } - - void DFS(); - - void visit (); //todo: make this as a function pointer - - vector <WordID > path_; //vector of f_ nonterminals/terminals from the top to the current node; - set<WordID> nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID is the label in the dict; i.e WordID>0 - map<WordID, aTextGrammarNode> tree_; - aTextRuleBin* rb_; - - void print_path(){ //for debug only - cout<<"path="<<endl; - for (int i =0; i< path_.size(); i++) - cout<<path_[i]<<" "; - cout<<endl; - } -}; - -void aTextGrammarNode::DFS(){ //because the grammar is a tree without circle, DFS does not require to color the nodes - - visit(); - - for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin(); it != tree_.end(); it++){ - (it->second).DFS(); - } -} - - -void aTextGrammarNode::visit( ){ - - cout<<"start visit()"<<endl; - - cout<<"got grSplitNonterminals"<<endl; -// if (grSplitNonterminals.find(*it) != grSplitNonterminals.end()){ //split this *it nonterminal -// vector<WordID> vsplits = grSplitNonterminals[*it]; //split *it into vsplits - - //iterate through next terminals/nonterminals in tree_ - vector<WordID> tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_ - - for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin() ; it != tree_.end(); it++){ - cout<<"in visit(): inside for loop: wordID=="<<it->first<<endl; - - map<WordID, vector<WordID> >::const_iterator git = grSplitNonterminals.find(it->first * -1 ); - - if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split - cout<<"not split\n"; - tree_[it->first ].path_ = path_; - tree_[it->first ].path_.push_back(it->first); - cout<<"in visit() tree_[it->first ].path_= "; - tree_[it->first ].print_path(); - continue; - } - - - cout<<"tmp2"; - vector<WordID> vsplits = grSplitNonterminals[it->first * -1]; - // vector<WordID> vsplits = git->second; - cout<<"tmp3"; - // vector<WordID> vsplits = agrammar_ ->splitNonterminals_[it->first * -1]; - cout <<"got vsplits"<<endl; - for (int i =0 ; i<vsplits.size(); i++){ - // nonterminals_.insert(vsplits[i]); //add vsplits[i] into nonterminals_ of the current TextGrammarNode - tree_[vsplits[i] * -1] = aTextGrammarNode(tree_[it->first]); //cp the subtree to new nonterminal - tree_[vsplits[i] * -1].path_ = path_; //update the path if the subtrees - tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1); - tree_[vsplits[i] * -1].print_path(); - } - - //remove the old node: - tobe_removedNTs.push_back(it->first); - - } - - for (int i =0; i<tobe_removedNTs.size(); i++) - tree_.erase(tobe_removedNTs[i]); - - if (tree_.size() ==0){ //the last (terminal/nonterminal - cout<<"inside visit(): the last terminal/nonterminal"<<endl; - rb_->substituteF(path_, grSplitNonterminals); - - } - cout<<"visit() end"<<endl; -} - -struct aTGImpl { - aTextGrammarNode root_; -}; - -aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} -aTextGrammar::aTextGrammar(const std::string& file) : - max_span_(10), - pimpl_(new aTGImpl) { - ReadFromFile(file); -} - - -const GrammarIter* aTextGrammar::GetRoot() const { - return &pimpl_->root_; -} - - -void aTextGrammar::addNonterminal(WordID wordID){ - //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map<WordID, int>) nonterminals_ of grammar - //if the input parameter wordID<0 then do nothing - - if (wordID <0){ //it is a nonterminal - - map<WordID, int>::iterator it = nonterminals_.find(wordID * -1); - if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals) - nonterminals_[wordID * -1] = 1; - } -} - - - -void aTextGrammar::AddRule(const TRulePtr& rule) { - //add the LHS nonterminal to nonterminals_ map - - this->addNonterminal(rule->lhs_); - - if (rule->IsUnary()) { - rhs2unaries_[rule->f().front()].push_back(rule); - unaries_.push_back(rule); - if (rule->f().front() <0) - //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) - this->addNonterminal(rule->f().front()); - - - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i){ - if (rule->f_[i] <0){ - cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set - this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the grammar's list of nonterminals - } - cur = &cur->tree_[rule->f_[i]]; - - } - if (cur->rb_ == NULL) - cur->rb_ = new aTextRuleBin; - cur->rb_->AddRule(rule); - - } -} - -static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) { - static_cast<aTextGrammar*>(extra)->AddRule(new_rule); -} - - -void aTextGrammar::ReadFromFile(const string& filename) { - ReadFile in(filename); - RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this); -} - -bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { - return (max_span_ >= distance); -} - - -////Linh added - -void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} - - -void aTextGrammar::printAllNonterminals() const{ - for (map<WordID, int>::const_iterator it =nonterminals_.begin(); - it != nonterminals_.end(); it++){ - if (it->second >0){ - cout <<it->first<<"\t"<<TD::Convert(it->first)<<endl; - } - } - -} - - -void aTextGrammar::splitNonterminal(WordID wordID){ - - //first added the splits nonterminal into the TD dictionary - - string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split - - vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits - for (int i =0; i< this->max_split_; i++){ - string split_str = old_str + "+" + itos(i); - WordID splitID = TD::Convert(split_str); - v_splits.push_back(splitID); - nonterminals_[splitID] = 1; - } - - grSplitNonterminals[wordID] = v_splits; - //set wordID to be an inactive nonterminal - nonterminals_[wordID] = 0; - - //print split nonterminas of wordID - v_splits = grSplitNonterminals[wordID]; - cout<<"print split nonterminals\n"; - for (int i =0; i<v_splits.size(); i++) - cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl; - - - //now update in grammar rules and gramar tree: - vector<TRulePtr> newrules; - //first unary rules: - //iterate through unary rules - for (int i =0; i < unaries_.size(); i++){ - TRulePtr rule = unaries_[i]; - WordID lhs = rule.lhs_; - if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal - && grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too - ){ - vector<WordID> rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the rhs nonterminal into the list of nonterminals in 'rhs_nonterminals' - vector<WordID> lhs_nonterminals = grSplitNonterminals[lhs]; //split the rhs nonterminal into the list of nonterminals in 'lhs_nonterminals' - for (int k =0; k <rhs_nonterminals.size(); k++) - for (int j =0; j <lhs_nonterminals.size(); j++){ - TRulePtr newrule; - newrule -> e_ = rule->e_; - newrule -> f_ = rhs_nonterminals[k]->f_; - newrule->lhs_ = lhs_nonterminals[j]->lhs_; - newrule -> scores_ = rule->scores_; - newrule -> arity_ = (*it)->arity_; - newrules.push_back (newrule); - - //update - } - } - else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal - - - } - } - - // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){ - - // } - // if (rule->IsUnary()) { - // rhs2unaries_[rule->f().front()].push_back(rule); - // unaries_.push_back(rule); - // if (rule->f().front() <0) - // //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) - // this->addNonterminal(rule->f().front()); - - - pimpl_->root_.DFS(); - -} - - -// void aTextGrammar::splitNonterminal0(WordID wordID){ - -// TextGrammarNode* cur = &pimpl_->root_; -// for (int i = 0; i < rule->f_.size(); ++i) -// cur = &cur->tree_[rule->f_[i]]; - -// } - -void aTextGrammar::splitAllNonterminals(){ - - -} - diff --git a/gi/scfg/abc/old_agrammar.h b/gi/scfg/abc/old_agrammar.h deleted file mode 100644 index d68c2548..00000000 --- a/gi/scfg/abc/old_agrammar.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _AGRAMMAR_H_ -#define _AGRAMMAR_H_ - -#include "grammar.h" - -using namespace std; - -class aTGImpl; - -struct aTextGrammar : public Grammar { - aTextGrammar(); - aTextGrammar(const std::string& file); - void SetMaxSpan(int m) { max_span_ = m; } - - virtual const GrammarIter* GetRoot() const; - void AddRule(const TRulePtr& rule); - void ReadFromFile(const std::string& filename); - virtual bool HasRuleForSpan(int i, int j, int distance) const; - const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const; - - void setMaxSplit(int max_split); - - void printAllNonterminals() const; - void addNonterminal(WordID wordID); - - void splitAllNonterminals(); - void splitNonterminal(WordID wordID); - - // inline map<WordID, vector<WordID> > & getSplitNonterminals(){return splitNonterminals_;} - // map<WordID, vector<WordID> > splitNonterminals_; - private: - int max_span_; - boost::shared_ptr<aTGImpl> pimpl_; - int max_split_; - - map<WordID, int> nonterminals_; //list of nonterminals of the grammar if nonterminals_[WordID] > 0 the nonterminal WordID is found in the grammar - - - -}; - - - - -#endif diff --git a/gi/scfg/abc/scfg.cpp b/gi/scfg/abc/scfg.cpp deleted file mode 100644 index 1e59fb4a..00000000 --- a/gi/scfg/abc/scfg.cpp +++ /dev/null @@ -1,277 +0,0 @@ -#include <iostream> -#include <fstream> - -#include <boost/shared_ptr.hpp> -#include <boost/pointer_cast.hpp> -#include "lattice.h" -#include "tdict.h" -#include "agrammar.h" -#include "bottom_up_parser.h" -#include "hg.h" -#include "hg_intersect.h" -#include "../utils/ParamsArray.h" - - -using namespace std; - -vector<string> src_corpus; -vector<string> tgt_corpus; - -bool openParallelCorpora(string & input_filename){ - ifstream input_file; - - input_file.open(input_filename.c_str()); - if (!input_file) { - cerr << "Cannot open input file " << input_filename << ". Exiting..." << endl; - return false; - } - - int line =0; - while (!input_file.eof()) { - // get a line of source language data - // cerr<<"new line "<<ctr<<endl; - string str; - - getline(input_file, str); - line++; - if (str.length()==0){ - cerr<<" sentence number "<<line<<" is empty, skip the sentence\n"; - continue; - } - string delimiters("|||"); - - vector<string> v = tokenize(str, delimiters); - - if ( (v.size() != 2) and (v.size() != 3) ) { - cerr<<str<<endl; - cerr<<" source or target sentence is not found in sentence number "<<line<<" , skip the sentence\n"; - continue; - } - - src_corpus.push_back(v[0]); - tgt_corpus.push_back(v[1]); - } - return true; -} - - -typedef aTextGrammar aGrammar; -aGrammar * load_grammar(string & grammar_filename){ - cerr<<"start_load_grammar "<<grammar_filename<<endl; - - aGrammar * test = new aGrammar(grammar_filename); - - return test; -} - -Lattice convertSentenceToLattice(const string & str){ - - std::vector<WordID> vID; - TD::ConvertSentence(str , &vID); - Lattice lsentence; - lsentence.resize(vID.size()); - - for (int i=0; i<vID.size(); i++){ - - lsentence[i].push_back( LatticeArc(vID[i], 0.0, 1) ); - } - - // if(!lsentence.IsSentence()) - // cout<<"not a sentence"<<endl; - - return lsentence; - -} - -bool parseSentencePair(const string & goal_sym, const string & src, const string & tgt, GrammarPtr & g, Hypergraph &hg){ - - - // cout<<" Start parse the sentence pairs\n"<<endl; - Lattice lsource = convertSentenceToLattice(src); - - //parse the source sentence by the grammar - - vector<GrammarPtr> grammars(1, g); - - ExhaustiveBottomUpParser parser = ExhaustiveBottomUpParser(goal_sym, grammars); - - if (!parser.Parse(lsource, &hg)){ - - cerr<<"source sentence is not parsed by the grammar!"<<endl; - return false; - } - - //intersect the hg with the target sentence - Lattice ltarget = convertSentenceToLattice(tgt); - - //forest.PrintGraphviz(); - if (!HG::Intersect(ltarget, & hg)) return false; - - SparseVector<double> reweight; - - reweight.set_value(FD::Convert("MinusLogP"), -1 ); - hg.Reweight(reweight); - - return true; - -} - - - - -int main(int argc, char** argv){ - - ParamsArray params(argc, argv); - params.setDescription("scfg models"); - - params.addConstraint("grammar_file", "grammar file (default ./grammar.pr )", true); // optional - - params.addConstraint("input_file", "parallel input file (default ./parallel_corpora)", true); //optional - - params.addConstraint("output_file", "grammar output file (default ./grammar_output)", true); //optional - - params.addConstraint("goal_symbol", "top nonterminal symbol (default: X)", true); //optional - - params.addConstraint("split", "split one nonterminal into 'split' nonterminals (default: 2)", true); //optional - - params.addConstraint("prob_iters", "number of iterations (default: 10)", true); //optional - - params.addConstraint("split_iters", "number of splitting iterations (default: 3)", true); //optional - - params.addConstraint("alpha", "alpha (default: 0.1)", true); //optional - - if (!params.runConstraints("scfg")) { - return 0; - } - cerr<<"get parametters\n\n\n"; - - - string grammar_file = params.asString("grammar_file", "./grammar.pr"); - - string input_file = params.asString("input_file", "parallel_corpora"); - - string output_file = params.asString("output_file", "grammar_output"); - - string goal_sym = params.asString("goal_symbol", "X"); - - int max_split = atoi(params.asString("split", "2").c_str()); - - int prob_iters = atoi(params.asString("prob_iters", "2").c_str()); - int split_iters = atoi(params.asString("split_iters", "1").c_str()); - double alpha = atof(params.asString("alpha", ".001").c_str()); - - ///// - cerr<<"grammar_file ="<<grammar_file<<endl; - cerr<<"input_file ="<< input_file<<endl; - cerr<<"output_file ="<< output_file<<endl; - cerr<<"goal_sym ="<< goal_sym<<endl; - cerr<<"max_split ="<< max_split<<endl; - cerr<<"prob_iters ="<< prob_iters<<endl; - cerr<<"split_iters ="<< split_iters<<endl; - cerr<<"alpha ="<< alpha<<endl; - ////////////////////////// - - cerr<<"\n\nLoad parallel corpus...\n"; - if (! openParallelCorpora(input_file)) - exit(1); - - cerr<<"Load grammar file ...\n"; - aGrammar * agrammar = load_grammar(grammar_file); - agrammar->SetGoalNT(goal_sym); - agrammar->setMaxSplit(max_split); - agrammar->set_alpha(alpha); - - srand(123); - - GrammarPtr g( agrammar); - Hypergraph hg; - - int data_size = src_corpus.size(); - int cnt_unparsed =0; - for (int i =0; i <split_iters; i++){ - - cerr<<"Split Nonterminals, iteration "<<(i+1)<<endl; - agrammar->PrintAllRules(output_file+".s" + itos(i+1)); - agrammar->splitAllNonterminals(); - - //vector<string> src_corpus; - //vector<string> tgt_corpus; - - for (int j=0; j<prob_iters; j++){ - cerr<<"reset grammar score\n"; - agrammar->ResetScore(); - // cerr<<"done reset grammar score\n"; - for (int k=0; k <data_size; k++){ - string src = src_corpus[k]; - - string tgt = tgt_corpus[k]; - cerr <<"parse sentence pair: "<<src<<" ||| "<<tgt<<endl; - - if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){ - cerr<<"target sentence is not parsed by the grammar!\n"; - //return 1; - cnt_unparsed++; - continue; - - } - - cerr<<"update edge posterior prob"<<endl; - boost::static_pointer_cast<aGrammar>(g)->UpdateHgProsteriorProb(hg); - hg.clear(); - if (k%1000 ==0 ) cerr<<"sentences "<<k<<endl; - } - cerr<<"cnt_unparased="<<cnt_unparsed<<endl; - boost::static_pointer_cast<aGrammar>(g)->UpdateScore(); - } - boost::static_pointer_cast<aGrammar>(g)->PrintAllRules(output_file+".e" + itos(i+1)); - } - - - - - - - - - - // // agrammar->ResetScore(); - // // agrammar->UpdateScore(); - // if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){ - // cerr<<"target sentence is not parsed by the grammar!\n"; - // return 1; - - // } - // // hg.PrintGraphviz(); - // //hg.clear(); - - // agrammar->PrintAllRules(); - // /*split grammar*/ - // cout<<"split NTs\n"; - // cerr<<"first of all write all nonterminals"<<endl; - // // agrammar->printAllNonterminals(); - // cout<<"after split nonterminal"<<endl; - // agrammar->PrintAllRules(); - // Hypergraph hg1; - // if (! parseSentencePair(goal_sym, src, tgt, g, hg1) ){ - // cerr<<"target sentence is not parsed by the grammar!\n"; - // return 1; - - // } - - // hg1.PrintGraphviz(); - - - // agrammar->splitNonterminal(15); - // cout<<"after split nonterminal"<<TD::Convert(15)<<endl; - // agrammar->PrintAllRules(); - - - /*load training corpus*/ - - - /*for each sentence pair in training corpus*/ - - // forest.PrintGraphviz(); - /*calculate expected count*/ - -} diff --git a/gi/scfg/abc/tmp.cpp b/gi/scfg/abc/tmp.cpp deleted file mode 100644 index 967a601d..00000000 --- a/gi/scfg/abc/tmp.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include <iostream> -#include <set> -#include <vector> -using namespace std; - -int x = 5; - -class A{A(){x++;}}; -// { -// int a_; - -// }; - -class B: public A{ - - int b_; -}; - -int main(){ - - cout<<"Hello World"; - set<int> s; - - s.insert(1); - s.insert(2); - - x++; - cout<<"x="<<x<<endl; - - vector<int> t; - t.push_back(2); t.push_back(1); t.push_back(2); t.push_back(3); t.push_back(2); t.push_back(4); - for(vector<int>::iterator it = t.begin(); it != t.end(); it++){ - if (*it ==2) t.erase(it); - cout <<*it<<endl; - } -} |