From ab38dc57a6a64aa7ef60a845a4176e18e1ac7f27 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 27 May 2012 00:43:48 -0400 Subject: remove dead code --- gi/scfg/abc/Release/IConv.d | 3 - gi/scfg/abc/Release/Util.d | 8 - gi/scfg/abc/Release/agrammar.d | 205 -------------- gi/scfg/abc/Release/dict_test | Bin 1485797 -> 0 bytes gi/scfg/abc/Release/grammar | 13 - gi/scfg/abc/Release/grammar.pr | 13 - gi/scfg/abc/Release/makefile | 66 ----- gi/scfg/abc/Release/process_grammar.pl | 36 --- gi/scfg/abc/Release/scfg | Bin 4438644 -> 0 bytes gi/scfg/abc/Release/scfg.d | 213 -------------- gi/scfg/abc/Release/sources.mk | 27 -- gi/scfg/abc/Release/subdir.mk | 59 ---- gi/scfg/abc/Release/tmp.grammar | 2 - gi/scfg/abc/Release/toy-grammar | 1 - gi/scfg/abc/a.out | Bin 22639 -> 0 bytes gi/scfg/abc/agrammar.cc | 489 --------------------------------- gi/scfg/abc/agrammar.h | 116 -------- gi/scfg/abc/old_agrammar.cc | 383 -------------------------- gi/scfg/abc/old_agrammar.h | 45 --- gi/scfg/abc/scfg.cpp | 277 ------------------- gi/scfg/abc/tmp.cpp | 36 --- 21 files changed, 1992 deletions(-) delete mode 100644 gi/scfg/abc/Release/IConv.d delete mode 100644 gi/scfg/abc/Release/Util.d delete mode 100644 gi/scfg/abc/Release/agrammar.d delete mode 100755 gi/scfg/abc/Release/dict_test delete mode 100644 gi/scfg/abc/Release/grammar delete mode 100644 gi/scfg/abc/Release/grammar.pr delete mode 100644 gi/scfg/abc/Release/makefile delete mode 100644 gi/scfg/abc/Release/process_grammar.pl delete mode 100755 gi/scfg/abc/Release/scfg delete mode 100644 gi/scfg/abc/Release/scfg.d delete mode 100644 gi/scfg/abc/Release/sources.mk delete mode 100644 gi/scfg/abc/Release/subdir.mk delete mode 100644 gi/scfg/abc/Release/tmp.grammar delete mode 120000 gi/scfg/abc/Release/toy-grammar delete mode 100755 gi/scfg/abc/a.out delete mode 100644 gi/scfg/abc/agrammar.cc delete mode 100644 gi/scfg/abc/agrammar.h delete mode 100644 gi/scfg/abc/old_agrammar.cc delete mode 100644 gi/scfg/abc/old_agrammar.h delete mode 100644 gi/scfg/abc/scfg.cpp delete mode 100644 gi/scfg/abc/tmp.cpp diff --git a/gi/scfg/abc/Release/IConv.d b/gi/scfg/abc/Release/IConv.d deleted file mode 100644 index 082cb15b..00000000 --- a/gi/scfg/abc/Release/IConv.d +++ /dev/null @@ -1,3 +0,0 @@ -IConv.d IConv.o: ../../utils/IConv.cc ../../utils/IConv.hpp - -../../utils/IConv.hpp: diff --git a/gi/scfg/abc/Release/Util.d b/gi/scfg/abc/Release/Util.d deleted file mode 100644 index 586d4d60..00000000 --- a/gi/scfg/abc/Release/Util.d +++ /dev/null @@ -1,8 +0,0 @@ -Util.d Util.o: ../../utils/Util.cc ../../utils/Util.h \ - ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/agrammar.d b/gi/scfg/abc/Release/agrammar.d deleted file mode 100644 index 553752ca..00000000 --- a/gi/scfg/abc/Release/agrammar.d +++ /dev/null @@ -1,205 +0,0 @@ -agrammar.d agrammar.o: ../agrammar.cc \ - /home/tnguyen/ws10smt/decoder/rule_lexer.h \ - /home/tnguyen/ws10smt/decoder/trule.h \ - /export/ws10smt/software/include/boost/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/config.hpp \ - /export/ws10smt/software/include/boost/config/user.hpp \ - /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ - /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ - /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ - /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ - /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ - /export/ws10smt/software/include/boost/config/platform/linux.hpp \ - /export/ws10smt/software/include/boost/config/posix_features.hpp \ - /export/ws10smt/software/include/boost/config/suffix.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ - /export/ws10smt/software/include/boost/assert.hpp \ - /export/ws10smt/software/include/boost/checked_delete.hpp \ - /export/ws10smt/software/include/boost/throw_exception.hpp \ - /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ - /export/ws10smt/software/include/boost/detail/workaround.hpp \ - /export/ws10smt/software/include/boost/exception/exception.hpp \ - /export/ws10smt/software/include/boost/current_function.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ - /export/ws10smt/software/include/boost/memory_order.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/tnguyen/ws10smt/decoder/sparse_vector.h \ - /home/tnguyen/ws10smt/decoder/fdict.h \ - /home/tnguyen/ws10smt/decoder/dict.h \ - /export/ws10smt/software/include/boost/functional/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ - /export/ws10smt/software/include/boost/limits.hpp \ - /export/ws10smt/software/include/boost/integer/static_log2.hpp \ - /export/ws10smt/software/include/boost/integer_fwd.hpp \ - /export/ws10smt/software/include/boost/cstdint.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ - /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ - /home/tnguyen/ws10smt/decoder/wordid.h \ - /home/tnguyen/ws10smt/decoder/filelib.h \ - /home/tnguyen/ws10smt/decoder/gzstream.h \ - /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/array2d.h \ - /home/tnguyen/ws10smt/decoder/hg.h \ - /home/tnguyen/ws10smt/decoder/small_vector.h \ - /home/tnguyen/ws10smt/decoder/prob.h \ - /home/tnguyen/ws10smt/decoder/logval.h ../../utils/Util.h \ - ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -/home/tnguyen/ws10smt/decoder/rule_lexer.h: - -/home/tnguyen/ws10smt/decoder/trule.h: - -/export/ws10smt/software/include/boost/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/config.hpp: - -/export/ws10smt/software/include/boost/config/user.hpp: - -/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: - -/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: - -/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: - -/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: - -/export/ws10smt/software/include/boost/config/select_platform_config.hpp: - -/export/ws10smt/software/include/boost/config/platform/linux.hpp: - -/export/ws10smt/software/include/boost/config/posix_features.hpp: - -/export/ws10smt/software/include/boost/config/suffix.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: - -/export/ws10smt/software/include/boost/assert.hpp: - -/export/ws10smt/software/include/boost/checked_delete.hpp: - -/export/ws10smt/software/include/boost/throw_exception.hpp: - -/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: - -/export/ws10smt/software/include/boost/detail/workaround.hpp: - -/export/ws10smt/software/include/boost/exception/exception.hpp: - -/export/ws10smt/software/include/boost/current_function.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: - -/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: - -/export/ws10smt/software/include/boost/memory_order.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: - -/home/tnguyen/ws10smt/decoder/sparse_vector.h: - -/home/tnguyen/ws10smt/decoder/fdict.h: - -/home/tnguyen/ws10smt/decoder/dict.h: - -/export/ws10smt/software/include/boost/functional/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: - -/export/ws10smt/software/include/boost/limits.hpp: - -/export/ws10smt/software/include/boost/integer/static_log2.hpp: - -/export/ws10smt/software/include/boost/integer_fwd.hpp: - -/export/ws10smt/software/include/boost/cstdint.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: - -/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: - -/export/ws10smt/software/include/boost/detail/container_fwd.hpp: - -/home/tnguyen/ws10smt/decoder/wordid.h: - -/home/tnguyen/ws10smt/decoder/filelib.h: - -/home/tnguyen/ws10smt/decoder/gzstream.h: - -/home/tnguyen/ws10smt/decoder/tdict.h: - -../agrammar.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/array2d.h: - -/home/tnguyen/ws10smt/decoder/hg.h: - -/home/tnguyen/ws10smt/decoder/small_vector.h: - -/home/tnguyen/ws10smt/decoder/prob.h: - -/home/tnguyen/ws10smt/decoder/logval.h: - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/dict_test b/gi/scfg/abc/Release/dict_test deleted file mode 100755 index 1ba94218..00000000 Binary files a/gi/scfg/abc/Release/dict_test and /dev/null differ diff --git a/gi/scfg/abc/Release/grammar b/gi/scfg/abc/Release/grammar deleted file mode 100644 index 75fac3a0..00000000 --- a/gi/scfg/abc/Release/grammar +++ /dev/null @@ -1,13 +0,0 @@ -[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 diff --git a/gi/scfg/abc/Release/grammar.pr b/gi/scfg/abc/Release/grammar.pr deleted file mode 100644 index e4e327cf..00000000 --- a/gi/scfg/abc/Release/grammar.pr +++ /dev/null @@ -1,13 +0,0 @@ -[X] ||| . ||| . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] . ||| [1] . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano ||| [1] old man ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano . ||| [1] old man . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo ||| ugly [1] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo . ||| ugly [1] . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] gato ||| [1] cat ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] gato . ||| [1] cat . ||| MinusLogP=2.56494935746154 -[X] ||| el ||| the ||| MinusLogP=2.56494935746154 -[X] ||| el [X,1] ||| the [1] ||| MinusLogP=2.56494935746154 -[X] ||| el [X,1] . ||| the [1] . ||| MinusLogP=2.56494935746154 diff --git a/gi/scfg/abc/Release/makefile b/gi/scfg/abc/Release/makefile deleted file mode 100644 index 25949e74..00000000 --- a/gi/scfg/abc/Release/makefile +++ /dev/null @@ -1,66 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -#-include ../makefile.init - -RM := rm -rf - -# All of the sources participating in the build are defined here --include sources.mk --include subdir.mk --include objects.mk - -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(strip $(C++_DEPS)),) --include $(C++_DEPS) -endif -ifneq ($(strip $(CC_DEPS)),) --include $(CC_DEPS) -endif -ifneq ($(strip $(C_DEPS)),) --include $(C_DEPS) -endif -ifneq ($(strip $(CPP_DEPS)),) --include $(CPP_DEPS) -endif -ifneq ($(strip $(CXX_DEPS)),) --include $(CXX_DEPS) -endif -ifneq ($(strip $(C_UPPER_DEPS)),) --include $(C_UPPER_DEPS) -endif -endif - -#-include ../makefile.defs - -# Add inputs and outputs from these tool invocations to the build variables - -# All Target -all: scfg - -# Tool invocations - -# scfg.o: ../scfg.cpp -# @echo 'Building file: $<' -# @echo 'Invoking: GCC C++ Compiler' -# g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -# @echo 'Finished building: $<' -# @echo ' ' - -scfg: $(OBJS) $(USER_OBJS) - @echo 'Building target: $@' - @echo 'Invoking: GCC C++ Linker' - /bin/sh ../../../../libtool --tag=CXX --mode=link g++ -g -O2 -lz -L/export/ws10smt/software/lib -R/export/ws10smt/software/lib -L/export/ws10smt/software/srilm-1.5.10/lib/i686 -o scfg $(OBJS) -L/export/ws10smt/software/lib -lgtest -pthread ../../../../decoder/libcdec.a -lboost_program_options -loolm -ldstruct -lmisc - @echo 'Finished building target: $@' - @echo ' ' -#g++ -I/home/tnguyen/ws10smt/gi/scfg/cdec/ -I/export/ws10smt/software/srilm-1.5.10/include/ -L/home/tnguyen/ws10smt/decoder -lpthread -ldl -lm $(OBJS) $(USER_OBJS) $(LIBS) -o"scfg" -# Other Targets -clean: - -$(RM) $(OBJS)$(C++_DEPS)$(EXECUTABLES)$(CC_DEPS)$(C_DEPS)$(CPP_DEPS)$(CXX_DEPS)$(C_UPPER_DEPS) scfg - -@echo ' ' - -.PHONY: all clean dependents -.SECONDARY: - --include ../makefile.targets diff --git a/gi/scfg/abc/Release/process_grammar.pl b/gi/scfg/abc/Release/process_grammar.pl deleted file mode 100644 index f82a8e5a..00000000 --- a/gi/scfg/abc/Release/process_grammar.pl +++ /dev/null @@ -1,36 +0,0 @@ -#!perl - -use warnings; -use strict; - -my $grammar_file = $ARGV[0]; - -my %nt_count; #maps nt--> count rules whose lhs is nt - -open(G, "<$grammar_file") or die "Can't open file $grammar_file"; - -while (){ - - chomp(); - - s/\|\|\|.*//g; - s/\s//g; - - $nt_count{$_}++; -} - - -close (G); - -open(G, "<$grammar_file") or die "Can't open file $grammar_file"; - -while (){ - - chomp(); - - (my $nt = $_) =~ s/\|\|\|.*//g; - $nt =~ s/\s//g; - - s/(.+\|\|\|.+\|\|\|.+\|\|\|).+/$1/g; - print $_ . " MinusLogP=" .(log($nt_count{$nt})) ."\n"; -} diff --git a/gi/scfg/abc/Release/scfg b/gi/scfg/abc/Release/scfg deleted file mode 100755 index 3faa52cc..00000000 Binary files a/gi/scfg/abc/Release/scfg and /dev/null differ diff --git a/gi/scfg/abc/Release/scfg.d b/gi/scfg/abc/Release/scfg.d deleted file mode 100644 index b3cfbbb5..00000000 --- a/gi/scfg/abc/Release/scfg.d +++ /dev/null @@ -1,213 +0,0 @@ -scfg.d scfg.o: ../scfg.cpp \ - /export/ws10smt/software/include/boost/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/config.hpp \ - /export/ws10smt/software/include/boost/config/user.hpp \ - /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ - /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ - /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ - /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ - /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ - /export/ws10smt/software/include/boost/config/platform/linux.hpp \ - /export/ws10smt/software/include/boost/config/posix_features.hpp \ - /export/ws10smt/software/include/boost/config/suffix.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ - /export/ws10smt/software/include/boost/assert.hpp \ - /export/ws10smt/software/include/boost/checked_delete.hpp \ - /export/ws10smt/software/include/boost/throw_exception.hpp \ - /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ - /export/ws10smt/software/include/boost/detail/workaround.hpp \ - /export/ws10smt/software/include/boost/exception/exception.hpp \ - /export/ws10smt/software/include/boost/current_function.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ - /export/ws10smt/software/include/boost/memory_order.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ - /export/ws10smt/software/include/boost/pointer_cast.hpp \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/wordid.h \ - /home/tnguyen/ws10smt/decoder/array2d.h \ - /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/trule.h \ - /home/tnguyen/ws10smt/decoder/sparse_vector.h \ - /home/tnguyen/ws10smt/decoder/fdict.h \ - /home/tnguyen/ws10smt/decoder/dict.h \ - /export/ws10smt/software/include/boost/functional/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ - /export/ws10smt/software/include/boost/limits.hpp \ - /export/ws10smt/software/include/boost/integer/static_log2.hpp \ - /export/ws10smt/software/include/boost/integer_fwd.hpp \ - /export/ws10smt/software/include/boost/cstdint.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ - /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ - /home/tnguyen/ws10smt/decoder/hg.h \ - /home/tnguyen/ws10smt/decoder/small_vector.h \ - /home/tnguyen/ws10smt/decoder/prob.h \ - /home/tnguyen/ws10smt/decoder/logval.h \ - /home/tnguyen/ws10smt/decoder/bottom_up_parser.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/hg_intersect.h ../../utils/ParamsArray.h \ - ../../utils/Util.h ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -/export/ws10smt/software/include/boost/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/config.hpp: - -/export/ws10smt/software/include/boost/config/user.hpp: - -/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: - -/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: - -/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: - -/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: - -/export/ws10smt/software/include/boost/config/select_platform_config.hpp: - -/export/ws10smt/software/include/boost/config/platform/linux.hpp: - -/export/ws10smt/software/include/boost/config/posix_features.hpp: - -/export/ws10smt/software/include/boost/config/suffix.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: - -/export/ws10smt/software/include/boost/assert.hpp: - -/export/ws10smt/software/include/boost/checked_delete.hpp: - -/export/ws10smt/software/include/boost/throw_exception.hpp: - -/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: - -/export/ws10smt/software/include/boost/detail/workaround.hpp: - -/export/ws10smt/software/include/boost/exception/exception.hpp: - -/export/ws10smt/software/include/boost/current_function.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: - -/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: - -/export/ws10smt/software/include/boost/memory_order.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: - -/export/ws10smt/software/include/boost/pointer_cast.hpp: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/wordid.h: - -/home/tnguyen/ws10smt/decoder/array2d.h: - -/home/tnguyen/ws10smt/decoder/tdict.h: - -../agrammar.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/trule.h: - -/home/tnguyen/ws10smt/decoder/sparse_vector.h: - -/home/tnguyen/ws10smt/decoder/fdict.h: - -/home/tnguyen/ws10smt/decoder/dict.h: - -/export/ws10smt/software/include/boost/functional/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: - -/export/ws10smt/software/include/boost/limits.hpp: - -/export/ws10smt/software/include/boost/integer/static_log2.hpp: - -/export/ws10smt/software/include/boost/integer_fwd.hpp: - -/export/ws10smt/software/include/boost/cstdint.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: - -/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: - -/export/ws10smt/software/include/boost/detail/container_fwd.hpp: - -/home/tnguyen/ws10smt/decoder/hg.h: - -/home/tnguyen/ws10smt/decoder/small_vector.h: - -/home/tnguyen/ws10smt/decoder/prob.h: - -/home/tnguyen/ws10smt/decoder/logval.h: - -/home/tnguyen/ws10smt/decoder/bottom_up_parser.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/hg_intersect.h: - -../../utils/ParamsArray.h: - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/sources.mk b/gi/scfg/abc/Release/sources.mk deleted file mode 100644 index 6c7070aa..00000000 --- a/gi/scfg/abc/Release/sources.mk +++ /dev/null @@ -1,27 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -C_UPPER_SRCS := -C_SRCS := -CPP_SRCS := -O_SRCS := -ASM_SRCS := -S_SRCS := -C++_SRCS := -CXX_SRCS := -CC_SRCS := -OBJ_SRCS := -OBJS := -C++_DEPS := -EXECUTABLES := -CC_DEPS := -C_DEPS := -CPP_DEPS := -CXX_DEPS := -C_UPPER_DEPS := - -# Every subdirectory with source files must be described here -SUBDIRS := \ -. \ - diff --git a/gi/scfg/abc/Release/subdir.mk b/gi/scfg/abc/Release/subdir.mk deleted file mode 100644 index 49080b36..00000000 --- a/gi/scfg/abc/Release/subdir.mk +++ /dev/null @@ -1,59 +0,0 @@ - -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -# Add inputs and outputs from these tool invocations to the build variables -CPP_SRCS += \ -../../utils/Util.cc \ -../agrammar.cc \ -../scfg.cpp - - -OBJS += \ -./Util.o \ -./agrammar.o \ -./scfg.o - - -CPP_DEPS += \ -./Util.d \ -./agrammar.d \ -./scfg.d - -# Each subdirectory must supply rules for building sources it contributes -# %.o: ../%.cpp -# @echo 'Building file: $<' -# @echo 'Invoking: GCC C++ Compiler' -# g++ -g -p -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -# -# @echo ' ' - -%.o: ../../utils/%.cc - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../../utils/%.c - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../%.cpp - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - -%.o: ../%.cc - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - diff --git a/gi/scfg/abc/Release/tmp.grammar b/gi/scfg/abc/Release/tmp.grammar deleted file mode 100644 index 9df1b77d..00000000 --- a/gi/scfg/abc/Release/tmp.grammar +++ /dev/null @@ -1,2 +0,0 @@ -[A] ||| [B] [C] . ||| [B] [C]. ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[A] ||| [B] asd . ||| [B] asd . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 \ No newline at end of file diff --git a/gi/scfg/abc/Release/toy-grammar b/gi/scfg/abc/Release/toy-grammar deleted file mode 120000 index 50dea8df..00000000 --- a/gi/scfg/abc/Release/toy-grammar +++ /dev/null @@ -1 +0,0 @@ -/export/ws10smt/toy-grammar/ \ No newline at end of file diff --git a/gi/scfg/abc/a.out b/gi/scfg/abc/a.out deleted file mode 100755 index 0467acf0..00000000 Binary files a/gi/scfg/abc/a.out and /dev/null differ diff --git a/gi/scfg/abc/agrammar.cc b/gi/scfg/abc/agrammar.cc deleted file mode 100644 index 016a0189..00000000 --- a/gi/scfg/abc/agrammar.cc +++ /dev/null @@ -1,489 +0,0 @@ -#include -#include -#include - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include "agrammar.h" -#include "../utils/Util.h" - - - -aTRule::aTRule(TRulePtr rule){ - - this -> e_ = rule->e_; - this -> f_ = rule->f_; - this ->lhs_ = rule->lhs_; - this -> arity_ = rule->arity_; - this -> scores_ = rule->scores_; - ResetScore(0.00000001); -} - -bool equal(TRulePtr const & rule1, TRulePtr const & rule2){ - if (rule1->lhs_ != rule2->lhs_) return false; - if (rule1->f_.size() != rule2->f_.size()) return false; - if (rule1->e_.size() != rule2->e_.size()) return false; - - for (int i=0; if_.size(); i++) - if (rule1->f_.at(i) != rule2->f_.at(i)) return false; - for (int i=0; ie_.size(); i++) - if (rule1->e_.at(i) != rule2->e_.at(i)) return false; - return true; -} - - -//const vector Grammar::NO_RULES; - -void aRemoveRule(vector & v, const TRulePtr & rule){ // remove rule from v if found - for (int i=0; i< v.size(); i++) - if (equal(v[i], rule )){ - // cout<<"erase rule from vector:"<AsString()< & v, const NTRule & ntrule){ // remove rule from v if found - for (int i=0; i< v.size(); i++) - if (equal(v[i].rule_, ntrule.rule_ )){ - // cout<<"erase rule from vector:"<AsString()<Arity(); - } - - void Dump() const { - for (int i = 0; i < rules_.size(); ++i) - cerr << rules_[i]->AsString() << endl; - } - private: - vector rules_; -}; - - -struct aTextGrammarNode : public GrammarIter { - aTextGrammarNode() : rb_(NULL) {} - ~aTextGrammarNode() { - delete rb_; - } - const GrammarIter* Extend(int symbol) const { - map::const_iterator i = tree_.find(symbol); - if (i == tree_.end()) return NULL; - return &i->second; - } - - const RuleBin* GetRules() const { - if (rb_) { - //rb_->Dump(); - } - return rb_; - } - - map tree_; - aTextRuleBin* rb_; -}; - -struct aTGImpl { - aTextGrammarNode root_; -}; - -aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} -aTextGrammar::aTextGrammar(const string& file) : - max_span_(10), - pimpl_(new aTGImpl) { - ReadFromFile(file); -} - -const GrammarIter* aTextGrammar::GetRoot() const { - return &pimpl_->root_; -} - -void aTextGrammar::SetGoalNT(const string & goal_str){ - goalID = TD::Convert(goal_str); - -} - -void getNTRule( const TRulePtr & rule, map & ntrule_map){ - - NTRule lhs_ntrule(rule, rule->lhs_ * -1); - ntrule_map[rule->lhs_ * -1] = lhs_ntrule; - - for (int i=0; i< (rule->f_).size(); i++) - if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){ - NTRule rhs_ntrule(rule, rule->f_.at(i) * -1); - ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule; - } -} - - -void aTextGrammar::AddRule(const TRulePtr& rule) { - if (rule->IsUnary()) { - rhs2unaries_[rule->f().front()].push_back(rule); - unaries_.push_back(rule); - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i) - cur = &cur->tree_[rule->f_[i]]; - if (cur->rb_ == NULL) - cur->rb_ = new aTextRuleBin; - cur->rb_->AddRule(rule); - } - - //add the rule to lhs_rules_ - lhs_rules_[rule->lhs_* -1].push_back(rule); - - //add the rule to nt_rules_ - map ntrule_map; - getNTRule (rule, ntrule_map); - for (map::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ - nt_rules_[it->first].push_back(it->second); - } -} - -void aTextGrammar::RemoveRule(const TRulePtr & rule){ - // cout<<"Remove rule: "<AsString()<IsUnary()) { - aRemoveRule(rhs2unaries_[rule->f().front()], rule); - aRemoveRule(unaries_, rule); - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i) - cur = &cur->tree_[rule->f_[i]]; -// if (cur->rb_ == NULL) -// cur->rb_ = new aTextRuleBin; - cur->rb_->RemoveRule(rule); - } - - //remove rules from lhs_rules_ - - aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule); - - - //remove the rule from nt_rules_ - map ntrule_map; - getNTRule (rule, ntrule_map); - for (map::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ - aRemoveRule(nt_rules_[it->first], it->second); - } - -} - -void aTextGrammar::RemoveNonterminal(WordID wordID){ - vector rules = nt_rules_[wordID]; -// // remove the nonterminal from ntrules_ - nt_rules_.erase(wordID); - for (int i =0; i & nts){ - - vector rules = nt_rules_[nt_old]; - - // cout<<"\n\n\n start add splitting rules"< ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old - //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts' - //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions, - //hence the rules' new probabilities have to divide to cnt also - // cout<<"splitting NT in rule "<AsString()< e_ = (old_rule.rule_)->e_; - newrule -> f_ = old_rule.rule_->f_; - newrule->lhs_ = old_rule.rule_->lhs_; - newrule -> arity_ = old_rule.rule_->arity_; - newrule -> scores_ = old_rule.rule_->scores_; - - // cout<<"end up update score\n"; - if (ntPos[0] == -1){ //update the lhs - newrule->lhs_ = nts[j_vector[0]] * -1; - - //score has to randomly add/minus a small epsilon to break the balance - if (nts.size() >1 && ntPos.size() >1){ - // cout<<"start to add/minus epsilon"<lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon - newrule-> scores_ -= epsilon; - else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) - newrule-> scores_ += epsilon; - else{ - double random = rand()/RAND_MAX; - if (random > .5){ - newrule-> scores_ += epsilon; - cnt_addepsilon[newrule->lhs_]++; - } - else{ - newrule-> scores_ -= epsilon; - cnt_minusepsilon[newrule->lhs_]++; - } - } - } - - - for (int k=1; klhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon - newrule-> scores_ -= epsilon; - else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 ) - newrule-> scores_ += epsilon; - else{ - double random = rand()/RAND_MAX; - if (random > .5){ - newrule-> scores_ += epsilon; - cnt_addepsilon[newrule->lhs_]++; - } - else{ - newrule-> scores_ -= epsilon; - cnt_minusepsilon[newrule->lhs_]++; - } - } - } - - - for (int k=0; k X1; X->X2,... if X is the goal NT - for (int i =0; ilhs_ = goalID * -1; - rule ->f_.push_back(v_splits[i] * -1); - rule->e_.push_back(0); - - rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) ); - AddRule(rule); - } - - } - -} - - -void aTextGrammar::splitAllNonterminals(){ - map >::const_iterator it; - vector v ; // WordID >0 - for (it = lhs_rules_.begin(); it != lhs_rules_.end(); it++) //iterate through all nts - if (it->first != goalID || lhs_rules_.size() ==1) - v.push_back(it->first); - - for (int i=0; i< v.size(); i++) - splitNonterminal(v[i]); -} - - -void aTextGrammar::PrintAllRules(const string & filename) const{ - - - cerr<<"print grammar to "< >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - - vector v = it-> second; - for (int i =0; i< v.size(); i++){ - outfile<AsString()<<"\t"< >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - vector v = it-> second; - for (int i =0; i< v.size(); i++){ - // cerr<<"Reset score of Rule "<AsString()<(v[i])->ResetScore(alpha_ /v.size()); - } - lhs_rules_[it->first] = v; - sum_probs_[it->first] = alpha_; - } - -} - -void aTextGrammar::UpdateScore(){ - - map >::const_iterator it; - for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - vector v = it-> second; - for (int i =0; i< v.size(); i++){ - boost::static_pointer_cast(v[i])->UpdateScore(sum_probs_[it->first] ); - } - - // cerr<<"sum_probs_[it->first] ="<first] <first] = alpha_; - } - -} - - -void aTextGrammar::UpdateHgProsteriorProb(Hypergraph & hg){ - std::vector posts ; - - prob_t goal_score = hg.ComputeEdgePosteriors(1, &posts); - for (int i =0; ilhs_ * -1); - - if (str_lhs.find(goalstr) != string::npos) - continue; - - // cerr<AsString()<parent_rule_->AsString()<(e.rule_->parent_rule_)->AddProb(posts[i] / goal_score); - // cerr<<"add count for rule\n"; -// cerr<<"posts[i]="<AsString()<AsString()<scores_.set_value(FD::Convert("MinusLogP"), minuslogp); - - } - private: - SparseVector sum_scores_; -}; - - -class aTGImpl; -struct NTRule{ - - NTRule(){}; - NTRule(const TRulePtr & rule, WordID nt){ - nt_ = nt; - rule_ = rule; - - if (rule->lhs_ * -1 == nt) - ntPos_.push_back(-1); - - for (int i=0; i< rule->f().size(); i++) - if (rule->f().at(i) * -1 == nt) - ntPos_.push_back(i); - - - } - - TRulePtr rule_; - WordID nt_; //the labelID of the nt (nt_>0); - - vector ntPos_; //position of nt_ -1: lhs, from 0...f_.size() for nt of f_() - //i.e the rules is: NP-> DET NP; if nt_=5 is the labelID of NP then ntPos_ = (-1, 1): the indexes of nonterminal NP - -}; - - -struct aTextGrammar : public Grammar { - aTextGrammar(); - aTextGrammar(const std::string& file); - void SetMaxSpan(int m) { max_span_ = m; } - - virtual const GrammarIter* GetRoot() const; - void AddRule(const TRulePtr& rule); - void ReadFromFile(const std::string& filename); - virtual bool HasRuleForSpan(int i, int j, int distance) const; - const std::vector& GetUnaryRules(const WordID& cat) const; - - void AddSplitNonTerminal(WordID nt_old, vector & nts); - void setMaxSplit(int max_split); - void splitNonterminal(WordID wordID); - - - void splitAllNonterminals(); - - void PrintAllRules(const string & filename) const; - void PrintNonterminalRules(WordID nt) const; - void SetGoalNT(const string & goal_str); - - void ResetScore(); - - void UpdateScore(); - - void UpdateHgProsteriorProb(Hypergraph & hg); - - void set_alpha(double alpha){alpha_ = alpha;} - private: - - void RemoveRule(const TRulePtr & rule); - void RemoveNonterminal(WordID wordID); - - int max_span_; - int max_split_; - boost::shared_ptr pimpl_; - - map > lhs_rules_;// WordID >0 - map > nt_rules_; - - map sum_probs_; - map cnt_rules; - - double alpha_; - - // map > grSplitNonterminals; - WordID goalID; -}; - - -#endif diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc deleted file mode 100644 index 33d70dfc..00000000 --- a/gi/scfg/abc/old_agrammar.cc +++ /dev/null @@ -1,383 +0,0 @@ -#include "agrammar.h" -#include "Util.h" - -#include -#include -#include - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include -#include - -map > grSplitNonterminals; -//const vector Grammar::NO_RULES; - - -// vector substituteF(TRulePtr & rule, WordID wordID, vector & v){ -// vector vRules; //outputs - -// vector f = rule->f(); -// vector > newfvector; -// for (int i =0; i< f.size(); i++){ -// if (f[i] == wordID){ -// newfvector.push_back(v); -// } -// else -// newfvector.push_back(vector (1, f[i])); -// } - -// //now creates new rules; - - -// return vRules; -// } - - -struct aTextRuleBin : public RuleBin { - int GetNumRules() const { - return rules_.size(); - } - TRulePtr GetIthRule(int i) const { - return rules_[i]; - } - void AddRule(TRulePtr t) { - rules_.push_back(t); - } - int Arity() const { - return rules_.front()->Arity(); - } - void Dump() const { - for (int i = 0; i < rules_.size(); ++i) - cerr << rules_[i]->AsString() << endl; - } - - - vector getRules(){ return rules_;} - - - void substituteF(vector & f_path, map > & grSplitNonterminals){ - //this substituteF method is different with substituteF procedure found in cdec code; - // - //aTextRuleBin has a collection of rules with the same f() on the rhs, - //substituteF() replaces the f_ of all the rules with f_path vector, - //the grSplitNonterminals input to split the lhs_ nonterminals of the rules incase the lhs_ nonterminal found in grSplitNonterminals - - vector newrules; - for (vector::iterator it = rules_.begin() ; it != rules_.end(); it++){ - assert(f_path.size() == (*it)->f_.size()); - - if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){ - (*it)->f_ = f_path; - } - else{ // split the lhs NT, - vector new_lhs = grSplitNonterminals[ (*it)->lhs_ ]; - for (vector::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){ - TRulePtr newrule; - newrule -> e_ = (*it)->e_; - newrule -> f_ = (*it)->f_; - newrule->lhs_ = *vit; - newrule -> scores_ = (*it)->scores_; - newrule -> arity_ = (*it)->arity_; - newrules.push_back (newrule); - } - rules_.erase(it); - } - } - - //now add back newrules(output of splitting lhs_) to rules_ - rules_.insert(newrules.begin(),newrules.begin(), newrules.end()); - } - -private: - vector rules_; -}; - - - -struct aTextGrammarNode : public GrammarIter { - aTextGrammarNode() : rb_(NULL) {} - - aTextGrammarNode(const aTextGrammarNode & a){ - nonterminals_ = a.nonterminals_; - tree_ = a.tree_; - rb_ = new aTextRuleBin(); //cp constructor: don't cp the set of rules over - } - - ~aTextGrammarNode() { - delete rb_; - } - const GrammarIter* Extend(int symbol) const { - map::const_iterator i = tree_.find(symbol); - if (i == tree_.end()) return NULL; - return &i->second; - } - - const RuleBin* GetRules() const { - if (rb_) { - //rb_->Dump(); - } - return rb_; - } - - void DFS(); - - void visit (); //todo: make this as a function pointer - - vector path_; //vector of f_ nonterminals/terminals from the top to the current node; - set nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID is the label in the dict; i.e WordID>0 - map tree_; - aTextRuleBin* rb_; - - void print_path(){ //for debug only - cout<<"path="<::iterator it = tree_.begin(); it != tree_.end(); it++){ - (it->second).DFS(); - } -} - - -void aTextGrammarNode::visit( ){ - - cout<<"start visit()"< vsplits = grSplitNonterminals[*it]; //split *it into vsplits - - //iterate through next terminals/nonterminals in tree_ - vector tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_ - - for (map::iterator it = tree_.begin() ; it != tree_.end(); it++){ - cout<<"in visit(): inside for loop: wordID=="<first< >::const_iterator git = grSplitNonterminals.find(it->first * -1 ); - - if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split - cout<<"not split\n"; - tree_[it->first ].path_ = path_; - tree_[it->first ].path_.push_back(it->first); - cout<<"in visit() tree_[it->first ].path_= "; - tree_[it->first ].print_path(); - continue; - } - - - cout<<"tmp2"; - vector vsplits = grSplitNonterminals[it->first * -1]; - // vector vsplits = git->second; - cout<<"tmp3"; - // vector vsplits = agrammar_ ->splitNonterminals_[it->first * -1]; - cout <<"got vsplits"<first]); //cp the subtree to new nonterminal - tree_[vsplits[i] * -1].path_ = path_; //update the path if the subtrees - tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1); - tree_[vsplits[i] * -1].print_path(); - } - - //remove the old node: - tobe_removedNTs.push_back(it->first); - - } - - for (int i =0; isubstituteF(path_, grSplitNonterminals); - - } - cout<<"visit() end"<root_; -} - - -void aTextGrammar::addNonterminal(WordID wordID){ - //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map) nonterminals_ of grammar - //if the input parameter wordID<0 then do nothing - - if (wordID <0){ //it is a nonterminal - - map::iterator it = nonterminals_.find(wordID * -1); - if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals) - nonterminals_[wordID * -1] = 1; - } -} - - - -void aTextGrammar::AddRule(const TRulePtr& rule) { - //add the LHS nonterminal to nonterminals_ map - - this->addNonterminal(rule->lhs_); - - if (rule->IsUnary()) { - rhs2unaries_[rule->f().front()].push_back(rule); - unaries_.push_back(rule); - if (rule->f().front() <0) - //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) - this->addNonterminal(rule->f().front()); - - - } else { - aTextGrammarNode* cur = &pimpl_->root_; - for (int i = 0; i < rule->f_.size(); ++i){ - if (rule->f_[i] <0){ - cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set - this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the grammar's list of nonterminals - } - cur = &cur->tree_[rule->f_[i]]; - - } - if (cur->rb_ == NULL) - cur->rb_ = new aTextRuleBin; - cur->rb_->AddRule(rule); - - } -} - -static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) { - static_cast(extra)->AddRule(new_rule); -} - - -void aTextGrammar::ReadFromFile(const string& filename) { - ReadFile in(filename); - RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this); -} - -bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { - return (max_span_ >= distance); -} - - -////Linh added - -void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} - - -void aTextGrammar::printAllNonterminals() const{ - for (map::const_iterator it =nonterminals_.begin(); - it != nonterminals_.end(); it++){ - if (it->second >0){ - cout <first<<"\t"<first)< v_splits;//split nonterminal wordID into the list of nonterminals in v_splits - for (int i =0; i< this->max_split_; i++){ - string split_str = old_str + "+" + itos(i); - WordID splitID = TD::Convert(split_str); - v_splits.push_back(splitID); - nonterminals_[splitID] = 1; - } - - grSplitNonterminals[wordID] = v_splits; - //set wordID to be an inactive nonterminal - nonterminals_[wordID] = 0; - - //print split nonterminas of wordID - v_splits = grSplitNonterminals[wordID]; - cout<<"print split nonterminals\n"; - for (int i =0; i newrules; - //first unary rules: - //iterate through unary rules - for (int i =0; i < unaries_.size(); i++){ - TRulePtr rule = unaries_[i]; - WordID lhs = rule.lhs_; - if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal - && grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too - ){ - vector rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the rhs nonterminal into the list of nonterminals in 'rhs_nonterminals' - vector lhs_nonterminals = grSplitNonterminals[lhs]; //split the rhs nonterminal into the list of nonterminals in 'lhs_nonterminals' - for (int k =0; k e_ = rule->e_; - newrule -> f_ = rhs_nonterminals[k]->f_; - newrule->lhs_ = lhs_nonterminals[j]->lhs_; - newrule -> scores_ = rule->scores_; - newrule -> arity_ = (*it)->arity_; - newrules.push_back (newrule); - - //update - } - } - else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal - - - } - } - - // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){ - - // } - // if (rule->IsUnary()) { - // rhs2unaries_[rule->f().front()].push_back(rule); - // unaries_.push_back(rule); - // if (rule->f().front() <0) - // //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1) - // this->addNonterminal(rule->f().front()); - - - pimpl_->root_.DFS(); - -} - - -// void aTextGrammar::splitNonterminal0(WordID wordID){ - -// TextGrammarNode* cur = &pimpl_->root_; -// for (int i = 0; i < rule->f_.size(); ++i) -// cur = &cur->tree_[rule->f_[i]]; - -// } - -void aTextGrammar::splitAllNonterminals(){ - - -} - diff --git a/gi/scfg/abc/old_agrammar.h b/gi/scfg/abc/old_agrammar.h deleted file mode 100644 index d68c2548..00000000 --- a/gi/scfg/abc/old_agrammar.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _AGRAMMAR_H_ -#define _AGRAMMAR_H_ - -#include "grammar.h" - -using namespace std; - -class aTGImpl; - -struct aTextGrammar : public Grammar { - aTextGrammar(); - aTextGrammar(const std::string& file); - void SetMaxSpan(int m) { max_span_ = m; } - - virtual const GrammarIter* GetRoot() const; - void AddRule(const TRulePtr& rule); - void ReadFromFile(const std::string& filename); - virtual bool HasRuleForSpan(int i, int j, int distance) const; - const std::vector& GetUnaryRules(const WordID& cat) const; - - void setMaxSplit(int max_split); - - void printAllNonterminals() const; - void addNonterminal(WordID wordID); - - void splitAllNonterminals(); - void splitNonterminal(WordID wordID); - - // inline map > & getSplitNonterminals(){return splitNonterminals_;} - // map > splitNonterminals_; - private: - int max_span_; - boost::shared_ptr pimpl_; - int max_split_; - - map nonterminals_; //list of nonterminals of the grammar if nonterminals_[WordID] > 0 the nonterminal WordID is found in the grammar - - - -}; - - - - -#endif diff --git a/gi/scfg/abc/scfg.cpp b/gi/scfg/abc/scfg.cpp deleted file mode 100644 index 1e59fb4a..00000000 --- a/gi/scfg/abc/scfg.cpp +++ /dev/null @@ -1,277 +0,0 @@ -#include -#include - -#include -#include -#include "lattice.h" -#include "tdict.h" -#include "agrammar.h" -#include "bottom_up_parser.h" -#include "hg.h" -#include "hg_intersect.h" -#include "../utils/ParamsArray.h" - - -using namespace std; - -vector src_corpus; -vector tgt_corpus; - -bool openParallelCorpora(string & input_filename){ - ifstream input_file; - - input_file.open(input_filename.c_str()); - if (!input_file) { - cerr << "Cannot open input file " << input_filename << ". Exiting..." << endl; - return false; - } - - int line =0; - while (!input_file.eof()) { - // get a line of source language data - // cerr<<"new line "< v = tokenize(str, delimiters); - - if ( (v.size() != 2) and (v.size() != 3) ) { - cerr< vID; - TD::ConvertSentence(str , &vID); - Lattice lsentence; - lsentence.resize(vID.size()); - - for (int i=0; i grammars(1, g); - - ExhaustiveBottomUpParser parser = ExhaustiveBottomUpParser(goal_sym, grammars); - - if (!parser.Parse(lsource, &hg)){ - - cerr<<"source sentence is not parsed by the grammar!"< reweight; - - reweight.set_value(FD::Convert("MinusLogP"), -1 ); - hg.Reweight(reweight); - - return true; - -} - - - - -int main(int argc, char** argv){ - - ParamsArray params(argc, argv); - params.setDescription("scfg models"); - - params.addConstraint("grammar_file", "grammar file (default ./grammar.pr )", true); // optional - - params.addConstraint("input_file", "parallel input file (default ./parallel_corpora)", true); //optional - - params.addConstraint("output_file", "grammar output file (default ./grammar_output)", true); //optional - - params.addConstraint("goal_symbol", "top nonterminal symbol (default: X)", true); //optional - - params.addConstraint("split", "split one nonterminal into 'split' nonterminals (default: 2)", true); //optional - - params.addConstraint("prob_iters", "number of iterations (default: 10)", true); //optional - - params.addConstraint("split_iters", "number of splitting iterations (default: 3)", true); //optional - - params.addConstraint("alpha", "alpha (default: 0.1)", true); //optional - - if (!params.runConstraints("scfg")) { - return 0; - } - cerr<<"get parametters\n\n\n"; - - - string grammar_file = params.asString("grammar_file", "./grammar.pr"); - - string input_file = params.asString("input_file", "parallel_corpora"); - - string output_file = params.asString("output_file", "grammar_output"); - - string goal_sym = params.asString("goal_symbol", "X"); - - int max_split = atoi(params.asString("split", "2").c_str()); - - int prob_iters = atoi(params.asString("prob_iters", "2").c_str()); - int split_iters = atoi(params.asString("split_iters", "1").c_str()); - double alpha = atof(params.asString("alpha", ".001").c_str()); - - ///// - cerr<<"grammar_file ="<SetGoalNT(goal_sym); - agrammar->setMaxSplit(max_split); - agrammar->set_alpha(alpha); - - srand(123); - - GrammarPtr g( agrammar); - Hypergraph hg; - - int data_size = src_corpus.size(); - int cnt_unparsed =0; - for (int i =0; i PrintAllRules(output_file+".s" + itos(i+1)); - agrammar->splitAllNonterminals(); - - //vector src_corpus; - //vector tgt_corpus; - - for (int j=0; jResetScore(); - // cerr<<"done reset grammar score\n"; - for (int k=0; k (g)->UpdateHgProsteriorProb(hg); - hg.clear(); - if (k%1000 ==0 ) cerr<<"sentences "<ResetScore(); - // // agrammar->UpdateScore(); - // if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){ - // cerr<<"target sentence is not parsed by the grammar!\n"; - // return 1; - - // } - // // hg.PrintGraphviz(); - // //hg.clear(); - - // agrammar->PrintAllRules(); - // /*split grammar*/ - // cout<<"split NTs\n"; - // cerr<<"first of all write all nonterminals"<printAllNonterminals(); - // cout<<"after split nonterminal"<PrintAllRules(); - // Hypergraph hg1; - // if (! parseSentencePair(goal_sym, src, tgt, g, hg1) ){ - // cerr<<"target sentence is not parsed by the grammar!\n"; - // return 1; - - // } - - // hg1.PrintGraphviz(); - - - // agrammar->splitNonterminal(15); - // cout<<"after split nonterminal"<PrintAllRules(); - - - /*load training corpus*/ - - - /*for each sentence pair in training corpus*/ - - // forest.PrintGraphviz(); - /*calculate expected count*/ - -} diff --git a/gi/scfg/abc/tmp.cpp b/gi/scfg/abc/tmp.cpp deleted file mode 100644 index 967a601d..00000000 --- a/gi/scfg/abc/tmp.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include -#include -#include -using namespace std; - -int x = 5; - -class A{A(){x++;}}; -// { -// int a_; - -// }; - -class B: public A{ - - int b_; -}; - -int main(){ - - cout<<"Hello World"; - set s; - - s.insert(1); - s.insert(2); - - x++; - cout<<"x="< t; - t.push_back(2); t.push_back(1); t.push_back(2); t.push_back(3); t.push_back(2); t.push_back(4); - for(vector::iterator it = t.begin(); it != t.end(); it++){ - if (*it ==2) t.erase(it); - cout <<*it<