diff options
Diffstat (limited to 'gi')
| -rw-r--r-- | gi/scfg/abc/Release/IConv.d | 3 | ||||
| -rw-r--r-- | gi/scfg/abc/Release/Util.d | 8 | ||||
| -rw-r--r-- | gi/scfg/abc/Release/agrammar.d | 205 | ||||
| -rwxr-xr-x | gi/scfg/abc/Release/dict_test | bin | 1485797 -> 0 bytes | |||
| -rw-r--r-- | gi/scfg/abc/Release/grammar | 13 | ||||
| -rw-r--r-- | gi/scfg/abc/Release/grammar.pr | 13 | ||||
| -rw-r--r-- | gi/scfg/abc/Release/makefile | 66 | ||||
| -rw-r--r-- | gi/scfg/abc/Release/process_grammar.pl | 36 | ||||
| -rwxr-xr-x | gi/scfg/abc/Release/scfg | bin | 4438644 -> 0 bytes | |||
| -rw-r--r-- | gi/scfg/abc/Release/scfg.d | 213 | ||||
| -rw-r--r-- | gi/scfg/abc/Release/sources.mk | 27 | ||||
| -rw-r--r-- | gi/scfg/abc/Release/subdir.mk | 59 | ||||
| -rw-r--r-- | gi/scfg/abc/Release/tmp.grammar | 2 | ||||
| l--------- | gi/scfg/abc/Release/toy-grammar | 1 | ||||
| -rwxr-xr-x | gi/scfg/abc/a.out | bin | 22639 -> 0 bytes | |||
| -rw-r--r-- | gi/scfg/abc/agrammar.cc | 489 | ||||
| -rw-r--r-- | gi/scfg/abc/agrammar.h | 116 | ||||
| -rw-r--r-- | gi/scfg/abc/old_agrammar.cc | 383 | ||||
| -rw-r--r-- | gi/scfg/abc/old_agrammar.h | 45 | ||||
| -rw-r--r-- | gi/scfg/abc/scfg.cpp | 277 | ||||
| -rw-r--r-- | gi/scfg/abc/tmp.cpp | 36 | 
21 files changed, 0 insertions, 1992 deletions
| diff --git a/gi/scfg/abc/Release/IConv.d b/gi/scfg/abc/Release/IConv.d deleted file mode 100644 index 082cb15b..00000000 --- a/gi/scfg/abc/Release/IConv.d +++ /dev/null @@ -1,3 +0,0 @@ -IConv.d IConv.o: ../../utils/IConv.cc ../../utils/IConv.hpp - -../../utils/IConv.hpp: diff --git a/gi/scfg/abc/Release/Util.d b/gi/scfg/abc/Release/Util.d deleted file mode 100644 index 586d4d60..00000000 --- a/gi/scfg/abc/Release/Util.d +++ /dev/null @@ -1,8 +0,0 @@ -Util.d Util.o: ../../utils/Util.cc ../../utils/Util.h \ - ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/agrammar.d b/gi/scfg/abc/Release/agrammar.d deleted file mode 100644 index 553752ca..00000000 --- a/gi/scfg/abc/Release/agrammar.d +++ /dev/null @@ -1,205 +0,0 @@ -agrammar.d agrammar.o: ../agrammar.cc \ - /home/tnguyen/ws10smt/decoder/rule_lexer.h \ - /home/tnguyen/ws10smt/decoder/trule.h \ - /export/ws10smt/software/include/boost/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/config.hpp \ - /export/ws10smt/software/include/boost/config/user.hpp \ - /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ - /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ - /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ - /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ - /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ - /export/ws10smt/software/include/boost/config/platform/linux.hpp \ - /export/ws10smt/software/include/boost/config/posix_features.hpp \ - /export/ws10smt/software/include/boost/config/suffix.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ - /export/ws10smt/software/include/boost/assert.hpp \ - /export/ws10smt/software/include/boost/checked_delete.hpp \ - /export/ws10smt/software/include/boost/throw_exception.hpp \ - /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ - /export/ws10smt/software/include/boost/detail/workaround.hpp \ - /export/ws10smt/software/include/boost/exception/exception.hpp \ - /export/ws10smt/software/include/boost/current_function.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ - /export/ws10smt/software/include/boost/memory_order.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ - /home/tnguyen/ws10smt/decoder/sparse_vector.h \ - /home/tnguyen/ws10smt/decoder/fdict.h \ - /home/tnguyen/ws10smt/decoder/dict.h \ - /export/ws10smt/software/include/boost/functional/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ - /export/ws10smt/software/include/boost/limits.hpp \ - /export/ws10smt/software/include/boost/integer/static_log2.hpp \ - /export/ws10smt/software/include/boost/integer_fwd.hpp \ - /export/ws10smt/software/include/boost/cstdint.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ - /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ - /home/tnguyen/ws10smt/decoder/wordid.h \ - /home/tnguyen/ws10smt/decoder/filelib.h \ - /home/tnguyen/ws10smt/decoder/gzstream.h \ - /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/array2d.h \ - /home/tnguyen/ws10smt/decoder/hg.h \ - /home/tnguyen/ws10smt/decoder/small_vector.h \ - /home/tnguyen/ws10smt/decoder/prob.h \ - /home/tnguyen/ws10smt/decoder/logval.h ../../utils/Util.h \ - ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -/home/tnguyen/ws10smt/decoder/rule_lexer.h: - -/home/tnguyen/ws10smt/decoder/trule.h: - -/export/ws10smt/software/include/boost/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/config.hpp: - -/export/ws10smt/software/include/boost/config/user.hpp: - -/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: - -/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: - -/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: - -/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: - -/export/ws10smt/software/include/boost/config/select_platform_config.hpp: - -/export/ws10smt/software/include/boost/config/platform/linux.hpp: - -/export/ws10smt/software/include/boost/config/posix_features.hpp: - -/export/ws10smt/software/include/boost/config/suffix.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: - -/export/ws10smt/software/include/boost/assert.hpp: - -/export/ws10smt/software/include/boost/checked_delete.hpp: - -/export/ws10smt/software/include/boost/throw_exception.hpp: - -/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: - -/export/ws10smt/software/include/boost/detail/workaround.hpp: - -/export/ws10smt/software/include/boost/exception/exception.hpp: - -/export/ws10smt/software/include/boost/current_function.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: - -/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: - -/export/ws10smt/software/include/boost/memory_order.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: - -/home/tnguyen/ws10smt/decoder/sparse_vector.h: - -/home/tnguyen/ws10smt/decoder/fdict.h: - -/home/tnguyen/ws10smt/decoder/dict.h: - -/export/ws10smt/software/include/boost/functional/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: - -/export/ws10smt/software/include/boost/limits.hpp: - -/export/ws10smt/software/include/boost/integer/static_log2.hpp: - -/export/ws10smt/software/include/boost/integer_fwd.hpp: - -/export/ws10smt/software/include/boost/cstdint.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: - -/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: - -/export/ws10smt/software/include/boost/detail/container_fwd.hpp: - -/home/tnguyen/ws10smt/decoder/wordid.h: - -/home/tnguyen/ws10smt/decoder/filelib.h: - -/home/tnguyen/ws10smt/decoder/gzstream.h: - -/home/tnguyen/ws10smt/decoder/tdict.h: - -../agrammar.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/array2d.h: - -/home/tnguyen/ws10smt/decoder/hg.h: - -/home/tnguyen/ws10smt/decoder/small_vector.h: - -/home/tnguyen/ws10smt/decoder/prob.h: - -/home/tnguyen/ws10smt/decoder/logval.h: - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/dict_test b/gi/scfg/abc/Release/dict_testBinary files differ deleted file mode 100755 index 1ba94218..00000000 --- a/gi/scfg/abc/Release/dict_test +++ /dev/null diff --git a/gi/scfg/abc/Release/grammar b/gi/scfg/abc/Release/grammar deleted file mode 100644 index 75fac3a0..00000000 --- a/gi/scfg/abc/Release/grammar +++ /dev/null @@ -1,13 +0,0 @@ -[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 diff --git a/gi/scfg/abc/Release/grammar.pr b/gi/scfg/abc/Release/grammar.pr deleted file mode 100644 index e4e327cf..00000000 --- a/gi/scfg/abc/Release/grammar.pr +++ /dev/null @@ -1,13 +0,0 @@ -[X] ||| . ||| . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] . ||| [1] . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano ||| [1] old man ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano . ||| [1] old man . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo ||| ugly [1] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo . ||| ugly [1] . ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] gato ||| [1] cat ||| MinusLogP=2.56494935746154 -[X] ||| [X,1] gato . ||| [1] cat . ||| MinusLogP=2.56494935746154 -[X] ||| el ||| the ||| MinusLogP=2.56494935746154 -[X] ||| el [X,1] ||| the [1] ||| MinusLogP=2.56494935746154 -[X] ||| el [X,1] . ||| the [1] . ||| MinusLogP=2.56494935746154 diff --git a/gi/scfg/abc/Release/makefile b/gi/scfg/abc/Release/makefile deleted file mode 100644 index 25949e74..00000000 --- a/gi/scfg/abc/Release/makefile +++ /dev/null @@ -1,66 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -#-include ../makefile.init - -RM := rm -rf - -# All of the sources participating in the build are defined here --include sources.mk --include subdir.mk --include objects.mk - -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(strip $(C++_DEPS)),) --include $(C++_DEPS) -endif -ifneq ($(strip $(CC_DEPS)),) --include $(CC_DEPS) -endif -ifneq ($(strip $(C_DEPS)),) --include $(C_DEPS) -endif -ifneq ($(strip $(CPP_DEPS)),) --include $(CPP_DEPS) -endif -ifneq ($(strip $(CXX_DEPS)),) --include $(CXX_DEPS) -endif -ifneq ($(strip $(C_UPPER_DEPS)),) --include $(C_UPPER_DEPS) -endif -endif - -#-include ../makefile.defs - -# Add inputs and outputs from these tool invocations to the build variables  - -# All Target -all: scfg - -# Tool invocations - -# scfg.o: ../scfg.cpp -# 	@echo 'Building file: $<' -# 	@echo 'Invoking: GCC C++ Compiler' -# 	g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst  -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -# 	@echo 'Finished building: $<' -# 	@echo ' ' - -scfg: $(OBJS) $(USER_OBJS) -	@echo 'Building target: $@' -	@echo 'Invoking: GCC C++ Linker' -	/bin/sh ../../../../libtool --tag=CXX   --mode=link g++  -g -O2 -lz -L/export/ws10smt/software/lib -R/export/ws10smt/software/lib -L/export/ws10smt/software/srilm-1.5.10/lib/i686 -o scfg $(OBJS) -L/export/ws10smt/software/lib -lgtest -pthread ../../../../decoder/libcdec.a -lboost_program_options -loolm -ldstruct -lmisc	 -	@echo 'Finished building target: $@' -	@echo ' ' -#g++ -I/home/tnguyen/ws10smt/gi/scfg/cdec/ -I/export/ws10smt/software/srilm-1.5.10/include/ -L/home/tnguyen/ws10smt/decoder  -lpthread -ldl -lm  $(OBJS) $(USER_OBJS) $(LIBS) -o"scfg" -# Other Targets -clean: -	-$(RM) $(OBJS)$(C++_DEPS)$(EXECUTABLES)$(CC_DEPS)$(C_DEPS)$(CPP_DEPS)$(CXX_DEPS)$(C_UPPER_DEPS) scfg -	-@echo ' ' - -.PHONY: all clean dependents -.SECONDARY: - --include ../makefile.targets diff --git a/gi/scfg/abc/Release/process_grammar.pl b/gi/scfg/abc/Release/process_grammar.pl deleted file mode 100644 index f82a8e5a..00000000 --- a/gi/scfg/abc/Release/process_grammar.pl +++ /dev/null @@ -1,36 +0,0 @@ -#!perl - -use warnings; -use strict; - -my $grammar_file = $ARGV[0]; - -my %nt_count; #maps nt--> count rules whose lhs is nt  - -open(G, "<$grammar_file") or die "Can't open file $grammar_file"; - -while (<G>){ - -    chomp(); - -    s/\|\|\|.*//g; -    s/\s//g; - -    $nt_count{$_}++; -} - - -close (G); - -open(G, "<$grammar_file") or die "Can't open file $grammar_file"; - -while (<G>){ - -    chomp(); - -    (my $nt = $_) =~ s/\|\|\|.*//g; -    $nt =~ s/\s//g; - -    s/(.+\|\|\|.+\|\|\|.+\|\|\|).+/$1/g; -    print $_ . " MinusLogP=" .(log($nt_count{$nt})) ."\n"; -} diff --git a/gi/scfg/abc/Release/scfg b/gi/scfg/abc/Release/scfgBinary files differ deleted file mode 100755 index 3faa52cc..00000000 --- a/gi/scfg/abc/Release/scfg +++ /dev/null diff --git a/gi/scfg/abc/Release/scfg.d b/gi/scfg/abc/Release/scfg.d deleted file mode 100644 index b3cfbbb5..00000000 --- a/gi/scfg/abc/Release/scfg.d +++ /dev/null @@ -1,213 +0,0 @@ -scfg.d scfg.o: ../scfg.cpp \ - /export/ws10smt/software/include/boost/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \ - /export/ws10smt/software/include/boost/config.hpp \ - /export/ws10smt/software/include/boost/config/user.hpp \ - /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \ - /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \ - /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \ - /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \ - /export/ws10smt/software/include/boost/config/select_platform_config.hpp \ - /export/ws10smt/software/include/boost/config/platform/linux.hpp \ - /export/ws10smt/software/include/boost/config/posix_features.hpp \ - /export/ws10smt/software/include/boost/config/suffix.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \ - /export/ws10smt/software/include/boost/assert.hpp \ - /export/ws10smt/software/include/boost/checked_delete.hpp \ - /export/ws10smt/software/include/boost/throw_exception.hpp \ - /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \ - /export/ws10smt/software/include/boost/detail/workaround.hpp \ - /export/ws10smt/software/include/boost/exception/exception.hpp \ - /export/ws10smt/software/include/boost/current_function.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \ - /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \ - /export/ws10smt/software/include/boost/memory_order.hpp \ - /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \ - /export/ws10smt/software/include/boost/pointer_cast.hpp \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/wordid.h \ - /home/tnguyen/ws10smt/decoder/array2d.h \ - /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/lattice.h \ - /home/tnguyen/ws10smt/decoder/trule.h \ - /home/tnguyen/ws10smt/decoder/sparse_vector.h \ - /home/tnguyen/ws10smt/decoder/fdict.h \ - /home/tnguyen/ws10smt/decoder/dict.h \ - /export/ws10smt/software/include/boost/functional/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash.hpp \ - /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \ - /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \ - /export/ws10smt/software/include/boost/limits.hpp \ - /export/ws10smt/software/include/boost/integer/static_log2.hpp \ - /export/ws10smt/software/include/boost/integer_fwd.hpp \ - /export/ws10smt/software/include/boost/cstdint.hpp \ - /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \ - /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \ - /export/ws10smt/software/include/boost/detail/container_fwd.hpp \ - /home/tnguyen/ws10smt/decoder/hg.h \ - /home/tnguyen/ws10smt/decoder/small_vector.h \ - /home/tnguyen/ws10smt/decoder/prob.h \ - /home/tnguyen/ws10smt/decoder/logval.h \ - /home/tnguyen/ws10smt/decoder/bottom_up_parser.h \ - /home/tnguyen/ws10smt/decoder/grammar.h \ - /home/tnguyen/ws10smt/decoder/hg_intersect.h ../../utils/ParamsArray.h \ - ../../utils/Util.h ../../utils/UtfConverter.h ../../utils/ConvertUTF.h - -/export/ws10smt/software/include/boost/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp: - -/export/ws10smt/software/include/boost/config.hpp: - -/export/ws10smt/software/include/boost/config/user.hpp: - -/export/ws10smt/software/include/boost/config/select_compiler_config.hpp: - -/export/ws10smt/software/include/boost/config/compiler/gcc.hpp: - -/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp: - -/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp: - -/export/ws10smt/software/include/boost/config/select_platform_config.hpp: - -/export/ws10smt/software/include/boost/config/platform/linux.hpp: - -/export/ws10smt/software/include/boost/config/posix_features.hpp: - -/export/ws10smt/software/include/boost/config/suffix.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp: - -/export/ws10smt/software/include/boost/assert.hpp: - -/export/ws10smt/software/include/boost/checked_delete.hpp: - -/export/ws10smt/software/include/boost/throw_exception.hpp: - -/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp: - -/export/ws10smt/software/include/boost/detail/workaround.hpp: - -/export/ws10smt/software/include/boost/exception/exception.hpp: - -/export/ws10smt/software/include/boost/current_function.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp: - -/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp: - -/export/ws10smt/software/include/boost/memory_order.hpp: - -/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp: - -/export/ws10smt/software/include/boost/pointer_cast.hpp: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/wordid.h: - -/home/tnguyen/ws10smt/decoder/array2d.h: - -/home/tnguyen/ws10smt/decoder/tdict.h: - -../agrammar.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/lattice.h: - -/home/tnguyen/ws10smt/decoder/trule.h: - -/home/tnguyen/ws10smt/decoder/sparse_vector.h: - -/home/tnguyen/ws10smt/decoder/fdict.h: - -/home/tnguyen/ws10smt/decoder/dict.h: - -/export/ws10smt/software/include/boost/functional/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash.hpp: - -/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp: - -/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp: - -/export/ws10smt/software/include/boost/limits.hpp: - -/export/ws10smt/software/include/boost/integer/static_log2.hpp: - -/export/ws10smt/software/include/boost/integer_fwd.hpp: - -/export/ws10smt/software/include/boost/cstdint.hpp: - -/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp: - -/export/ws10smt/software/include/boost/functional/hash/extensions.hpp: - -/export/ws10smt/software/include/boost/detail/container_fwd.hpp: - -/home/tnguyen/ws10smt/decoder/hg.h: - -/home/tnguyen/ws10smt/decoder/small_vector.h: - -/home/tnguyen/ws10smt/decoder/prob.h: - -/home/tnguyen/ws10smt/decoder/logval.h: - -/home/tnguyen/ws10smt/decoder/bottom_up_parser.h: - -/home/tnguyen/ws10smt/decoder/grammar.h: - -/home/tnguyen/ws10smt/decoder/hg_intersect.h: - -../../utils/ParamsArray.h: - -../../utils/Util.h: - -../../utils/UtfConverter.h: - -../../utils/ConvertUTF.h: diff --git a/gi/scfg/abc/Release/sources.mk b/gi/scfg/abc/Release/sources.mk deleted file mode 100644 index 6c7070aa..00000000 --- a/gi/scfg/abc/Release/sources.mk +++ /dev/null @@ -1,27 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -C_UPPER_SRCS :=  -C_SRCS :=  -CPP_SRCS :=  -O_SRCS :=  -ASM_SRCS :=  -S_SRCS :=  -C++_SRCS :=  -CXX_SRCS :=  -CC_SRCS :=  -OBJ_SRCS :=  -OBJS :=  -C++_DEPS :=  -EXECUTABLES :=  -CC_DEPS :=  -C_DEPS :=  -CPP_DEPS :=  -CXX_DEPS :=  -C_UPPER_DEPS :=  - -# Every subdirectory with source files must be described here -SUBDIRS := \ -. \ - diff --git a/gi/scfg/abc/Release/subdir.mk b/gi/scfg/abc/Release/subdir.mk deleted file mode 100644 index 49080b36..00000000 --- a/gi/scfg/abc/Release/subdir.mk +++ /dev/null @@ -1,59 +0,0 @@ - -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -# Add inputs and outputs from these tool invocations to the build variables  -CPP_SRCS += \ -../../utils/Util.cc \ -../agrammar.cc \ -../scfg.cpp - - -OBJS += \ -./Util.o \ -./agrammar.o \ -./scfg.o  - - -CPP_DEPS += \ -./Util.d \ -./agrammar.d \ -./scfg.d - -# Each subdirectory must supply rules for building sources it contributes -# %.o: ../%.cpp -# 	@echo 'Building file: $<' -# 	@echo 'Invoking: GCC C++ Compiler' -# 	g++ -g -p -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst  -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -# 	 -# 	@echo ' ' - -%.o: ../../utils/%.cc -	@echo 'Building file: $<' -	@echo 'Invoking: GCC C++ Compiler' -	g++ -g -p -g3 -Wall -c -fmessage-length=0  -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -	@echo 'Finished building: $<' -	@echo ' ' - -%.o: ../../utils/%.c -	@echo 'Building file: $<' -	@echo 'Invoking: GCC C++ Compiler' -	g++ -g -p -g3 -Wall -c -fmessage-length=0  -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -	@echo 'Finished building: $<' -	@echo ' ' - -%.o: ../%.cpp -	@echo 'Building file: $<' -	@echo 'Invoking: GCC C++ Compiler' -	g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -	@echo 'Finished building: $<' -	@echo ' ' - -%.o: ../%.cc -	@echo 'Building file: $<' -	@echo 'Invoking: GCC C++ Compiler' -	g++ -O3 -g3 -Wall -c -fmessage-length=0  -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" -	@echo 'Finished building: $<' -	@echo ' ' - diff --git a/gi/scfg/abc/Release/tmp.grammar b/gi/scfg/abc/Release/tmp.grammar deleted file mode 100644 index 9df1b77d..00000000 --- a/gi/scfg/abc/Release/tmp.grammar +++ /dev/null @@ -1,2 +0,0 @@ -[A] ||| [B] [C] . ||| [B] [C]. ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[A] ||| [B] asd . ||| [B] asd . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
\ No newline at end of file diff --git a/gi/scfg/abc/Release/toy-grammar b/gi/scfg/abc/Release/toy-grammar deleted file mode 120000 index 50dea8df..00000000 --- a/gi/scfg/abc/Release/toy-grammar +++ /dev/null @@ -1 +0,0 @@ -/export/ws10smt/toy-grammar/
\ No newline at end of file diff --git a/gi/scfg/abc/a.out b/gi/scfg/abc/a.outBinary files differ deleted file mode 100755 index 0467acf0..00000000 --- a/gi/scfg/abc/a.out +++ /dev/null diff --git a/gi/scfg/abc/agrammar.cc b/gi/scfg/abc/agrammar.cc deleted file mode 100644 index 016a0189..00000000 --- a/gi/scfg/abc/agrammar.cc +++ /dev/null @@ -1,489 +0,0 @@ -#include <algorithm> -#include <utility> -#include <map> - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include "agrammar.h" -#include "../utils/Util.h" - - - -aTRule::aTRule(TRulePtr rule){ - -  this -> e_ = rule->e_; -  this -> f_ = rule->f_; -  this ->lhs_ = rule->lhs_; -  this -> arity_ = rule->arity_; -  this -> scores_ = rule->scores_; -  ResetScore(0.00000001); -} - -bool equal(TRulePtr const & rule1, TRulePtr const & rule2){ -  if (rule1->lhs_ != rule2->lhs_) return false; -  if (rule1->f_.size() != rule2->f_.size()) return false; -  if (rule1->e_.size() != rule2->e_.size()) return false; - -  for (int i=0; i<rule1->f_.size(); i++) -    if (rule1->f_.at(i) != rule2->f_.at(i)) return false; -  for (int i=0; i<rule1->e_.size(); i++) -    if (rule1->e_.at(i) != rule2->e_.at(i)) return false; -  return true; -} - - -//const vector<TRulePtr> Grammar::NO_RULES; - -void aRemoveRule(vector<TRulePtr> & v, const TRulePtr  & rule){ // remove rule from v if found -  for (int i=0; i< v.size(); i++) -    if (equal(v[i], rule )){ -      //      cout<<"erase rule from vector:"<<rule->AsString()<<endl; -      v.erase(v.begin()+i); -    } -} - -void aRemoveRule(vector<NTRule> & v, const NTRule  & ntrule){ // remove rule from v if found -  for (int i=0; i< v.size(); i++) -    if (equal(v[i].rule_, ntrule.rule_ )){ -      //      cout<<"erase rule from vector:"<<rule->AsString()<<endl; -       v.erase(v.begin()+i); -    } -} - -struct aTextRuleBin : public RuleBin { -  int GetNumRules() const { -    return rules_.size(); -  } -  TRulePtr GetIthRule(int i) const { -    return rules_[i]; -  } -  void AddRule(TRulePtr t) { -    rules_.push_back(t); -  } - -  void RemoveRule(const TRulePtr & rule ){ -    aRemoveRule(rules_, rule); -  } -       - -  int Arity() const { -    return rules_.front()->Arity(); -  } - -  void Dump() const { -    for (int i = 0; i < rules_.size(); ++i) -      cerr << rules_[i]->AsString() << endl; -  } - private: -  vector<TRulePtr> rules_; -}; - - -struct aTextGrammarNode : public GrammarIter { -  aTextGrammarNode() : rb_(NULL) {} -  ~aTextGrammarNode() { -    delete rb_; -  } -  const GrammarIter* Extend(int symbol) const { -    map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol); -    if (i == tree_.end()) return NULL; -    return &i->second; -  } - -  const RuleBin* GetRules() const { -    if (rb_) { -      //rb_->Dump(); -    } -    return rb_; -  } - -  map<WordID, aTextGrammarNode> tree_; -  aTextRuleBin* rb_; -}; - -struct aTGImpl { -  aTextGrammarNode root_; -}; - -aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} -aTextGrammar::aTextGrammar(const string& file) :  -  max_span_(10), -  pimpl_(new aTGImpl) { -  ReadFromFile(file); -} - -const GrammarIter* aTextGrammar::GetRoot() const { -  return &pimpl_->root_; -} - -void aTextGrammar::SetGoalNT(const string & goal_str){ -  goalID = TD::Convert(goal_str); - -} - -void getNTRule( const TRulePtr & rule, map<WordID, NTRule> & ntrule_map){ -   -  NTRule lhs_ntrule(rule, rule->lhs_ * -1); -  ntrule_map[rule->lhs_ * -1] = lhs_ntrule; - -  for (int i=0; i< (rule->f_).size(); i++) -    if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){ -        NTRule rhs_ntrule(rule, rule->f_.at(i) * -1); -	ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule; -    } -} - - -void aTextGrammar::AddRule(const TRulePtr& rule) { -  if (rule->IsUnary()) { -    rhs2unaries_[rule->f().front()].push_back(rule); -    unaries_.push_back(rule); -  } else { -    aTextGrammarNode* cur = &pimpl_->root_; -    for (int i = 0; i < rule->f_.size(); ++i) -      cur = &cur->tree_[rule->f_[i]]; -    if (cur->rb_ == NULL) -      cur->rb_ = new aTextRuleBin; -    cur->rb_->AddRule(rule); -  } -   -  //add the rule to lhs_rules_ -  lhs_rules_[rule->lhs_* -1].push_back(rule); -   -  //add the rule to nt_rules_ -  map<WordID, NTRule> ntrule_map; -  getNTRule (rule, ntrule_map); -  for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ -    nt_rules_[it->first].push_back(it->second); -  } -} - -void aTextGrammar::RemoveRule(const TRulePtr & rule){ -  //  cout<<"Remove rule:  "<<rule->AsString()<<endl; -  if (rule->IsUnary()) { -    aRemoveRule(rhs2unaries_[rule->f().front()], rule); -    aRemoveRule(unaries_, rule); -  } else { -    aTextGrammarNode* cur = &pimpl_->root_; -    for (int i = 0; i < rule->f_.size(); ++i) -      cur = &cur->tree_[rule->f_[i]]; -//     if (cur->rb_ == NULL) -//       cur->rb_ = new aTextRuleBin; -    cur->rb_->RemoveRule(rule); -  } - -  //remove rules from lhs_rules_ -   -  aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule); - - -  //remove the rule from nt_rules_ -  map<WordID, NTRule> ntrule_map; -  getNTRule (rule, ntrule_map); -  for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){ -    aRemoveRule(nt_rules_[it->first], it->second); -  } - -} - -void aTextGrammar::RemoveNonterminal(WordID wordID){ -  vector<NTRule> rules = nt_rules_[wordID]; -//  //  remove the nonterminal from ntrules_ -  nt_rules_.erase(wordID); -  for (int i =0; i<rules.size(); i++) -    RemoveRule(rules[i].rule_); -  sum_probs_.erase(wordID); -  cnt_rules.erase(wordID); - -} - -void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} - - - -   -void aTextGrammar::AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts){ - -  vector<NTRule> rules = nt_rules_[nt_old]; - -  //  cout<<"\n\n\n start add splitting rules"<<endl; - -  const double epsilon = 0.001; -  for (int i=0; i<rules.size(); i++){ -    NTRule old_rule = rules.at(i); -    vector<int> ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old -    //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts' -    //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions, -    //hence the rules' new probabilities have to divide to cnt also -    //    cout<<"splitting NT in rule "<<old_rule.rule_->AsString()<<endl; - -//     cout<<"nt position in the rules"<<endl; -//     for (int j=0; j<ntPos.size();j++) cout<<ntPos[j]<<"  "; cout<<endl; - -    int cnt_newrules = pow( nts.size(), ntPos.size() ); -    //    cout<<"cnt_newrules="<<cnt_newrules<<endl; - -    double log_nts_size = log(nts.size()); - - -    map<WordID, int> cnt_addepsilon; //cnt_addepsilon and cont_minusepsilon to track the number of rules epsilon is added or minus for each lhs nonterminal, ideally we want these two numbers are equal -    map<WordID, int> cnt_minusepsilon;  -    cnt_addepsilon[old_rule.rule_->lhs_] = 0; -    cnt_minusepsilon[old_rule.rule_->lhs_] = 0; -    for (int j =0; j<nts.size(); j++) {   cnt_addepsilon[nts[j] ] = 0;   cnt_minusepsilon[nts[j] ] = 0;} - - -    for (int j=0; j<cnt_newrules; j++){ //each j represents a new rule -      //convert j to a vector of size ntPos.size(), each entry in the vector >=0 and <nts.size() -      int mod = nts.size(); -      vector <int> j_vector(ntPos.size(), 0); //initiate the vector to all 0 -      int j_tmp =j; -      for (int k=0; k<ntPos.size(); k++){ -	j_vector[k] = j_tmp % mod; -	j_tmp = (j_tmp - j_vector[k]) / mod; -      } -      //      cout<<"print vector j_vector"<<endl; -      //      for (int k=0; k<ntPos.size();k++) cout<<j_vector[k]<<"  "; cout<<endl; -      //now use the vector to create a new rule -      TRulePtr newrule(new aTRule()); - -      newrule -> e_   = (old_rule.rule_)->e_; -      newrule -> f_ = old_rule.rule_->f_; -      newrule->lhs_ = old_rule.rule_->lhs_; -      newrule -> arity_ = old_rule.rule_->arity_; -      newrule -> scores_ = old_rule.rule_->scores_; - -      //      cout<<"end up update score\n"; -      if (ntPos[0] == -1){ //update the lhs -	newrule->lhs_ = nts[j_vector[0]] * -1; - -	//score has to randomly add/minus a small epsilon to break the balance -	if (nts.size() >1 && ntPos.size() >1){ -	  //  cout<<"start to add/minus epsilon"<<endl; -	  if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon -	    newrule-> scores_ -= epsilon; -	  else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) )  -	    newrule-> scores_ += epsilon; -	  else{ -	    double  random = rand()/RAND_MAX;  -	    if (random > .5){ -	      newrule-> scores_ += epsilon; -	      cnt_addepsilon[newrule->lhs_]++; -	    } -	    else{ -	      newrule-> scores_ -= epsilon; -	      cnt_minusepsilon[newrule->lhs_]++; -	    } -	  } -	} - - -	for (int k=1; k<ntPos.size(); k++){//update f_ -	  //	  cout<<"ntPos[k]="<<ntPos[k]<<endl; -	  newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1; //update the ntPos[k-1]-th nonterminal in f_ to the j_vector[k] NT in nts -	} -	newrule -> scores_ += (ntPos.size() -1) * log_nts_size; - - -      } -      else{ -	//score has to randomly add/minus a small epsilon to break the balance -	if ( ntPos.size() >0 && nts.size()>1){ -	  //	  cout<<"start to add/minus epsilon"<<endl; -	  if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon -	    newrule-> scores_ -= epsilon; -	  else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 )  -	    newrule-> scores_ += epsilon; -	  else{ -	    double  random = rand()/RAND_MAX;  -	    if (random > .5){ -	      newrule-> scores_ += epsilon; -	      cnt_addepsilon[newrule->lhs_]++; -	    } -	    else{ -	      newrule-> scores_ -= epsilon; -	      cnt_minusepsilon[newrule->lhs_]++; -	    } -	  } -	} - - -	for (int k=0; k<ntPos.size(); k++){ //update f_ -	  //	  cout<<"ntPos[k]="<<ntPos[k]<<endl; -	  newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1; -	} -	newrule -> scores_ += ntPos.size() * log_nts_size; -      } -      this->AddRule (newrule);       -    }//add new rules for each grammar rules - -  } //iterate through all grammar rules - -} - - -void aTextGrammar::splitNonterminal(WordID wordID){ - -  //first added the splits nonterminal into the TD dictionary  -   -  string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split -   -  vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits -  for (int i =0; i< this->max_split_; i++){ -    string split_str = old_str + "+" + itos(i); -    WordID splitID = TD::Convert(split_str); -    v_splits.push_back(splitID); - -  } -   -  //  grSplitNonterminals[wordID] = v_splits; - -  //print split nonterminas of wordID -  //  v_splits = grSplitNonterminals[wordID]; -  // cout<<"print split nonterminals\n"; -  // for (int i =0; i<v_splits.size(); i++) -  //   cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl; - -  AddSplitNonTerminal(wordID, v_splits);   -  RemoveNonterminal(wordID); - -  //  grSplitNonterminals.erase (grSplitNonterminals.find(WordID) ); - -  if (wordID == goalID){ //add rule X-> X1; X->X2,... if X is the goal NT -    for (int i =0; i<v_splits.size(); i++){ -      TRulePtr rule (new aTRule()); -      rule ->lhs_ = goalID * -1; -      rule ->f_.push_back(v_splits[i] * -1); -      rule->e_.push_back(0); - -      rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) ); -      AddRule(rule); -    } - -  } - -} - - -void aTextGrammar::splitAllNonterminals(){ -  map<WordID, vector<TRulePtr> >::const_iterator it; -  vector<WordID> v ; // WordID >0 -  for (it = lhs_rules_.begin(); it != lhs_rules_.end(); it++) //iterate through all nts -    if (it->first != goalID || lhs_rules_.size() ==1) -      v.push_back(it->first); -   -  for (int i=0; i< v.size(); i++) -    splitNonterminal(v[i]); -} - - -void aTextGrammar::PrintAllRules(const string & filename) const{ - -   -  cerr<<"print grammar to "<<filename<<endl; - -  ofstream outfile(filename.c_str()); -  if (!outfile.good()) { -    cerr << "error opening output file " << filename << endl; -    exit(1); -  } - -  map<WordID, vector<TRulePtr > >::const_iterator it; -  for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ - -    vector<TRulePtr> v = it-> second; -    for (int i =0; i< v.size(); i++){ -      outfile<<v[i]->AsString()<<"\t"<<endl; -    } -  } -} - - -void aTextGrammar::ResetScore(){ - -  map<WordID, vector<TRulePtr > >::const_iterator it; -  for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ -    vector<TRulePtr> v = it-> second; -    for (int i =0; i< v.size(); i++){ -      //      cerr<<"Reset score of Rule "<<v[i]->AsString()<<endl; -      boost::static_pointer_cast<aTRule>(v[i])->ResetScore(alpha_ /v.size()); -    } -    lhs_rules_[it->first] = v; -    sum_probs_[it->first] = alpha_; -  } - -} - -void aTextGrammar::UpdateScore(){ - -  map<WordID, vector<TRulePtr > >::const_iterator it; -  for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){ -    vector<TRulePtr> v = it-> second; -    for (int i =0; i< v.size(); i++){ -      boost::static_pointer_cast<aTRule>(v[i])->UpdateScore(sum_probs_[it->first] ); -    } - -    //    cerr<<"sum_probs_[it->first]  ="<<sum_probs_[it->first] <<endl; -    sum_probs_[it->first] = alpha_; -  } - -} - - -void aTextGrammar::UpdateHgProsteriorProb(Hypergraph & hg){ -  std::vector<prob_t> posts ; -   -  prob_t goal_score = hg.ComputeEdgePosteriors(1, &posts); -  for (int i =0; i<posts.size(); i++){ - -    //cout<<posts[i]<<endl; -    Hypergraph::Edge& e = hg.edges_[i]; -    string goalstr("Goal"); -    string str_lhs = TD::Convert(e.rule_->lhs_ * -1); - -    if (str_lhs.find(goalstr) != string::npos) -      continue; - -    //    cerr<<e.rule_->AsString()<<endl; -    //    cerr<<e.rule_->parent_rule_->AsString()<<endl; - -    boost::static_pointer_cast<aTRule>(e.rule_->parent_rule_)->AddProb(posts[i] / goal_score); - //    cerr<<"add count for rule\n"; -//     cerr<<"posts[i]="<<posts[i]<<"  goal_score="<<goal_score<<endl; -//     cerr<<"posts[i] /goal_score="<<(posts[i] /goal_score)<<endl; -    sum_probs_[e.rule_->parent_rule_->lhs_* -1 ] += posts[i] /goal_score; - -  } -   - -} - - -void aTextGrammar::PrintNonterminalRules(WordID nt) const{ -  vector< NTRule > v;    -  map<WordID, vector<NTRule> >::const_iterator mit= nt_rules_.find(nt); -  if (mit == nt_rules_.end()) -    return; - -  v = mit->second; - -  for (vector<NTRule>::const_iterator it = v.begin(); it != v.end(); it++) -    cout<<it->rule_->AsString()<<endl; -} - -static void AddRuleHelper(const TRulePtr& new_rule, void* extra) { -  aTRule  *p = new aTRule(new_rule);  -   -  static_cast<aTextGrammar*>(extra)->AddRule(TRulePtr(p)); -} - -void aTextGrammar::ReadFromFile(const string& filename) { -  ReadFile in(filename); -  RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this); -} - -bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { -  return (max_span_ >= distance); -} - diff --git a/gi/scfg/abc/agrammar.h b/gi/scfg/abc/agrammar.h deleted file mode 100644 index 0910aae6..00000000 --- a/gi/scfg/abc/agrammar.h +++ /dev/null @@ -1,116 +0,0 @@ -#ifndef AGRAMMAR_H_ -#define AGRAMMAR_H_ - -#include "grammar.h" -#include "hg.h" - - -using namespace std; - -class aTRule: public TRule{ - public: - aTRule() : TRule(){ResetScore(0.00000001); } -  aTRule(TRulePtr rule_); - -  void ResetScore(double initscore){//cerr<<"Reset Score "<<this->AsString()<<endl; -    sum_scores_.set_value(FD::Convert("Prob"), initscore);} -  void AddProb(double p ){ -    //    cerr<<"in AddProb p="<<p<<endl; -    //    cerr<<"prob sumscores ="<<sum_scores_[FD::Convert("Prob")]<<endl; -    sum_scores_.add_value(FD::Convert("Prob"), p); -    //    cerr<<"after AddProb\n"; -  } - -  void UpdateScore(double sumprob){ -    double minuslogp = 0 - log( sum_scores_.value(FD::Convert("Prob")) /sumprob); -    if (sumprob<  sum_scores_.value(FD::Convert("Prob"))){ -      cerr<<"UpdateScore sumprob="<<sumprob<< "  sum_scores_.value(FD::Convert(\"Prob\"))="<< sum_scores_.value(FD::Convert("Prob"))<< this->AsString()<<endl; -      exit(1); -    } -    this->scores_.set_value(FD::Convert("MinusLogP"), minuslogp); - -  } - private: -  SparseVector<double> sum_scores_; -}; - - -class aTGImpl; -struct NTRule{ - -  NTRule(){}; -  NTRule(const TRulePtr & rule, WordID nt){ -    nt_ = nt; -    rule_ = rule; -     -    if (rule->lhs_ * -1 == nt)  -      ntPos_.push_back(-1); -     -    for (int i=0; i< rule->f().size(); i++) -      if (rule->f().at(i) * -1 == nt) -	ntPos_.push_back(i); - - -  } -   -  TRulePtr rule_; -  WordID nt_; //the labelID of the nt (nt_>0); -   -  vector<int> ntPos_; //position of nt_ -1: lhs, from 0...f_.size() for nt of f_() -  //i.e the rules is: NP-> DET NP; if nt_=5 is the labelID of NP then ntPos_ = (-1, 1): the indexes of nonterminal NP - -}; - - -struct aTextGrammar : public Grammar { -  aTextGrammar(); -  aTextGrammar(const std::string& file); -  void SetMaxSpan(int m) { max_span_ = m; } -   -  virtual const GrammarIter* GetRoot() const; -  void AddRule(const TRulePtr& rule); -  void ReadFromFile(const std::string& filename); -  virtual bool HasRuleForSpan(int i, int j, int distance) const; -  const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const; - -  void AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts); -  void setMaxSplit(int max_split); -  void splitNonterminal(WordID wordID); - - -  void splitAllNonterminals(); - -  void PrintAllRules(const string & filename) const; -  void PrintNonterminalRules(WordID nt) const; -  void SetGoalNT(const string & goal_str); - -  void ResetScore(); - -  void UpdateScore(); - -  void UpdateHgProsteriorProb(Hypergraph & hg); - -  void set_alpha(double alpha){alpha_ = alpha;} - private: - -  void RemoveRule(const TRulePtr & rule); -  void RemoveNonterminal(WordID wordID); - -  int max_span_; -  int max_split_; -  boost::shared_ptr<aTGImpl> pimpl_; - -  map <WordID, vector<TRulePtr> > lhs_rules_;// WordID >0 -  map <WordID, vector<NTRule> > nt_rules_;  - -  map <WordID, double> sum_probs_; -  map <WordID, double> cnt_rules; - -  double alpha_; - -  //  map<WordID, vector<WordID> > grSplitNonterminals; -  WordID goalID; -}; - - -#endif diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc deleted file mode 100644 index 33d70dfc..00000000 --- a/gi/scfg/abc/old_agrammar.cc +++ /dev/null @@ -1,383 +0,0 @@ -#include "agrammar.h" -#include "Util.h" - -#include <algorithm> -#include <utility> -#include <map> - -#include "rule_lexer.h" -#include "filelib.h" -#include "tdict.h" -#include <iostream> -#include <fstream> - -map<WordID, vector<WordID> > grSplitNonterminals; -//const vector<TRulePtr> Grammar::NO_RULES; - - -// vector<TRulePtr> substituteF(TRulePtr & rule, WordID wordID, vector<WordID> & v){ -//   vector<TRulePtr> vRules; //outputs - -//   vector<WordID> f = rule->f(); -//   vector<vector<WordID> > newfvector; -//   for (int i =0; i< f.size(); i++){ -//     if (f[i] == wordID){ -//       newfvector.push_back(v); -//     } -//     else -//       newfvector.push_back(vector<WordID> (1, f[i])); -//   } -   -//   //now creates new rules; - - -//   return vRules; -// } - - -struct aTextRuleBin : public RuleBin { -  int GetNumRules() const { -    return rules_.size(); -  } -  TRulePtr GetIthRule(int i) const { -    return rules_[i]; -  } -  void AddRule(TRulePtr t) { -    rules_.push_back(t); -  } -  int Arity() const { -    return rules_.front()->Arity(); -  } -  void Dump() const { -    for (int i = 0; i < rules_.size(); ++i) -      cerr << rules_[i]->AsString() << endl; -  } - - -  vector<TRulePtr> getRules(){ return rules_;} - - -  void substituteF(vector<WordID> & f_path,   map<WordID, vector<WordID> > &   grSplitNonterminals){ -    //this substituteF method is different with substituteF procedure found in cdec code; -  // -  //aTextRuleBin has a collection of rules with the same f() on the rhs,  -  //substituteF() replaces the f_ of all the rules with f_path vector,  -  //the grSplitNonterminals input to split the lhs_ nonterminals of the rules  incase the lhs_ nonterminal found in grSplitNonterminals - -    vector <TRulePtr> newrules; -    for (vector<TRulePtr>::iterator it = rules_.begin() ; it != rules_.end(); it++){ -      assert(f_path.size() == (*it)->f_.size()); -       -      if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){ -	(*it)->f_ = f_path; -      } -      else{ // split the lhs NT,  -	vector<WordID> new_lhs = grSplitNonterminals[ (*it)->lhs_ ]; -	for (vector<WordID>::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){ -	  TRulePtr newrule; -	  newrule -> e_ = (*it)->e_; -	  newrule -> f_ = (*it)->f_; -	  newrule->lhs_ = *vit; -	  newrule -> scores_ = (*it)->scores_; -	  newrule -> arity_ = (*it)->arity_; -	  newrules.push_back (newrule); -	} -	rules_.erase(it); -      } -    } - -    //now add back newrules(output of splitting lhs_) to rules_ -    rules_.insert(newrules.begin(),newrules.begin(), newrules.end()); -  } -   -private: -  vector<TRulePtr> rules_; -}; - - - -struct aTextGrammarNode : public GrammarIter { -  aTextGrammarNode() : rb_(NULL) {} - -  aTextGrammarNode(const aTextGrammarNode  & a){ -    nonterminals_ = a.nonterminals_; -    tree_ = a.tree_; -    rb_  = new  aTextRuleBin(); //cp constructor: don't cp the set of rules over  -  } - -  ~aTextGrammarNode() { -    delete rb_; -  } -  const GrammarIter* Extend(int symbol) const { -    map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol); -    if (i == tree_.end()) return NULL; -    return &i->second; -  } - -  const RuleBin* GetRules() const { -    if (rb_) { -      //rb_->Dump(); -    } -    return rb_; -  } -  -  void DFS(); - -  void visit (); //todo: make this as a function pointer - -  vector <WordID > path_; //vector of f_ nonterminals/terminals from the top to the current node; -  set<WordID> nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID  is the label in the dict; i.e WordID>0  -  map<WordID, aTextGrammarNode> tree_; -  aTextRuleBin* rb_; -   -  void print_path(){ //for debug only -    cout<<"path="<<endl; -    for (int i =0; i< path_.size(); i++) -      cout<<path_[i]<<"  "; -    cout<<endl; -  } -}; - -void aTextGrammarNode::DFS(){ //because the grammar is a tree without circle, DFS does not require to color the nodes - -  visit(); -   -  for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin(); it != tree_.end(); it++){ -    (it->second).DFS(); -  } -} - - -void aTextGrammarNode::visit( ){   - -  cout<<"start visit()"<<endl; -   -  cout<<"got grSplitNonterminals"<<endl; -//   if (grSplitNonterminals.find(*it) != grSplitNonterminals.end()){ //split this *it nonterminal -//     vector<WordID> vsplits = grSplitNonterminals[*it]; //split *it into vsplits - -  //iterate through next terminals/nonterminals in tree_ -  vector<WordID> tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_ - -  for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin() ; it != tree_.end(); it++){ -    cout<<"in visit(): inside for loop: wordID=="<<it->first<<endl; - -    map<WordID, vector<WordID> >::const_iterator git = grSplitNonterminals.find(it->first * -1 ); - -    if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split -      cout<<"not split\n"; -      tree_[it->first ].path_  = path_; -      tree_[it->first ].path_.push_back(it->first); -      cout<<"in visit() tree_[it->first ].path_= "; -      tree_[it->first ].print_path(); -      continue; -    } - - -    cout<<"tmp2"; -    vector<WordID> vsplits = grSplitNonterminals[it->first * -1]; -    //    vector<WordID> vsplits = git->second; -    cout<<"tmp3"; -    //    vector<WordID> vsplits = agrammar_ ->splitNonterminals_[it->first * -1]; -    cout <<"got vsplits"<<endl; -    for (int i =0 ; i<vsplits.size(); i++){ -      //  nonterminals_.insert(vsplits[i]); //add vsplits[i] into nonterminals_ of the current TextGrammarNode -      tree_[vsplits[i] * -1] = aTextGrammarNode(tree_[it->first]); //cp the subtree to new nonterminal -      tree_[vsplits[i] * -1].path_  = path_; //update the path if the subtrees -      tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1); -      tree_[vsplits[i] * -1].print_path(); -    } - -    //remove the old node: -    tobe_removedNTs.push_back(it->first);  -     -  } - -  for (int i =0; i<tobe_removedNTs.size(); i++) -    tree_.erase(tobe_removedNTs[i]); -   -  if (tree_.size() ==0){ //the last (terminal/nonterminal -    cout<<"inside visit(): the last terminal/nonterminal"<<endl; -    rb_->substituteF(path_, grSplitNonterminals); -     -  } -  cout<<"visit() end"<<endl; -} - -struct aTGImpl { -  aTextGrammarNode root_; -}; - -aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {} -aTextGrammar::aTextGrammar(const std::string&  file) :  -  max_span_(10), -  pimpl_(new aTGImpl) { -  ReadFromFile(file); -} - - -const GrammarIter* aTextGrammar::GetRoot() const { -  return &pimpl_->root_; -} - - -void aTextGrammar::addNonterminal(WordID wordID){  -  //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map<WordID, int>) nonterminals_ of grammar -  //if the input parameter wordID<0 then do nothing - -  if (wordID <0){ //it is a nonterminal - -    map<WordID, int>::iterator it = nonterminals_.find(wordID * -1); -    if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals) -        nonterminals_[wordID * -1] = 1; -  } -} - - - -void aTextGrammar::AddRule(const TRulePtr& rule) { -  //add the LHS nonterminal to nonterminals_ map - -  this->addNonterminal(rule->lhs_); - -  if (rule->IsUnary()) { -    rhs2unaries_[rule->f().front()].push_back(rule); -    unaries_.push_back(rule); -    if (rule->f().front() <0) -      //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a  nonterminal then multiply by -1) -      this->addNonterminal(rule->f().front()); -     -     -  } else { -    aTextGrammarNode* cur = &pimpl_->root_; -    for (int i = 0; i < rule->f_.size(); ++i){ -      if (rule->f_[i] <0){  -	cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set -	this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the  grammar's list of nonterminals -      } -      cur = &cur->tree_[rule->f_[i]]; - -    } -    if (cur->rb_ == NULL) -      cur->rb_ = new aTextRuleBin; -    cur->rb_->AddRule(rule); -     -  } -} - -static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) { -  static_cast<aTextGrammar*>(extra)->AddRule(new_rule); -} - - -void aTextGrammar::ReadFromFile(const string& filename) { -  ReadFile in(filename); -  RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this); -} - -bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const { -  return (max_span_ >= distance); -} - - -////Linh added - -void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;} - - -void aTextGrammar::printAllNonterminals() const{ -  for (map<WordID, int>::const_iterator it =nonterminals_.begin(); -       it != nonterminals_.end(); it++){ -    if (it->second >0){ -      cout <<it->first<<"\t"<<TD::Convert(it->first)<<endl; -    } -  } -   -} -  - -void aTextGrammar::splitNonterminal(WordID wordID){ - -  //first added the splits nonterminal into the TD dictionary  -   -  string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split -   -  vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits -  for (int i =0; i< this->max_split_; i++){ -    string split_str = old_str + "+" + itos(i); -    WordID splitID = TD::Convert(split_str); -    v_splits.push_back(splitID); -    nonterminals_[splitID] = 1; -  } -   -  grSplitNonterminals[wordID] = v_splits; -  //set wordID to be an inactive nonterminal -  nonterminals_[wordID] = 0; - -  //print split nonterminas of wordID -  v_splits = grSplitNonterminals[wordID]; -  cout<<"print split nonterminals\n"; -  for (int i =0; i<v_splits.size(); i++) -    cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl; - - -  //now update in grammar rules and gramar tree: -  vector<TRulePtr> newrules; -  //first unary rules: -  //iterate through unary rules -  for (int i =0; i < unaries_.size(); i++){ -    TRulePtr rule = unaries_[i]; -    WordID lhs = rule.lhs_; -    if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal -	&& grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too -	){  -      vector<WordID> rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the  rhs nonterminal into the list of nonterminals in 'rhs_nonterminals' -      vector<WordID> lhs_nonterminals = grSplitNonterminals[lhs]; //split the  rhs nonterminal into the list of nonterminals in 'lhs_nonterminals'       -      for (int k =0; k <rhs_nonterminals.size(); k++) -	for (int j =0; j <lhs_nonterminals.size(); j++){ -	  TRulePtr newrule; -	  newrule -> e_ = rule->e_; -	  newrule -> f_ = rhs_nonterminals[k]->f_; -	  newrule->lhs_ = lhs_nonterminals[j]->lhs_; -	  newrule -> scores_ = rule->scores_; -	  newrule -> arity_ = (*it)->arity_; -	  newrules.push_back (newrule); -	   -	  //update  -	} -    } -    else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal -       - -    } -  } -   -  // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){ - -  // }   -  // if (rule->IsUnary()) { -  //   rhs2unaries_[rule->f().front()].push_back(rule); -  //   unaries_.push_back(rule); -  //   if (rule->f().front() <0) -  //     //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a  nonterminal then multiply by -1) -  //     this->addNonterminal(rule->f().front()); -     - -  pimpl_->root_.DFS(); -   -} - - -// void aTextGrammar::splitNonterminal0(WordID wordID){ - -//   TextGrammarNode* cur = &pimpl_->root_; -//   for (int i = 0; i < rule->f_.size(); ++i) -//     cur = &cur->tree_[rule->f_[i]]; -  -// } - -void aTextGrammar::splitAllNonterminals(){ - - -} - diff --git a/gi/scfg/abc/old_agrammar.h b/gi/scfg/abc/old_agrammar.h deleted file mode 100644 index d68c2548..00000000 --- a/gi/scfg/abc/old_agrammar.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _AGRAMMAR_H_ -#define _AGRAMMAR_H_ - -#include "grammar.h" - -using namespace std; - -class aTGImpl; - -struct  aTextGrammar : public Grammar { -  aTextGrammar(); -  aTextGrammar(const std::string& file); -  void SetMaxSpan(int m) { max_span_ = m; } -   -  virtual const GrammarIter* GetRoot() const; -  void AddRule(const TRulePtr& rule); -  void ReadFromFile(const std::string& filename); -  virtual bool HasRuleForSpan(int i, int j, int distance) const; -  const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const; -   -  void setMaxSplit(int max_split); - -  void printAllNonterminals() const; -  void addNonterminal(WordID wordID); - -  void splitAllNonterminals(); -  void splitNonterminal(WordID wordID); - -  //  inline  map<WordID, vector<WordID> > & getSplitNonterminals(){return splitNonterminals_;} -  //  map<WordID, vector<WordID> > splitNonterminals_; -  private: -  int max_span_; -  boost::shared_ptr<aTGImpl> pimpl_; -  int max_split_; -   -  map<WordID, int> nonterminals_; //list of nonterminals of the grammar if nonterminals_[WordID] > 0 the nonterminal WordID is found in the grammar - - - -}; - - - - -#endif diff --git a/gi/scfg/abc/scfg.cpp b/gi/scfg/abc/scfg.cpp deleted file mode 100644 index 1e59fb4a..00000000 --- a/gi/scfg/abc/scfg.cpp +++ /dev/null @@ -1,277 +0,0 @@ -#include <iostream> -#include <fstream> - -#include <boost/shared_ptr.hpp> -#include <boost/pointer_cast.hpp> -#include "lattice.h" -#include "tdict.h" -#include "agrammar.h" -#include "bottom_up_parser.h" -#include "hg.h" -#include "hg_intersect.h" -#include "../utils/ParamsArray.h" - - -using namespace std; - -vector<string> src_corpus; -vector<string> tgt_corpus; - -bool openParallelCorpora(string & input_filename){ -  ifstream input_file; - -  input_file.open(input_filename.c_str()); -  if (!input_file) { -    cerr << "Cannot open input file " << input_filename << ". Exiting..." << endl; -    return false; -  }  - -  int line =0; -  while (!input_file.eof()) { -    // get a line of source language data                                                                                                                                           -    //    cerr<<"new line "<<ctr<<endl;                                                                                                                                            -    string str; - -    getline(input_file, str); -    line++; -    if (str.length()==0){ -      cerr<<" sentence number "<<line<<" is empty, skip the sentence\n"; -      continue; -    } -    string delimiters("|||"); - -    vector<string> v = tokenize(str, delimiters); - -    if ( (v.size() != 2)  and (v.size() != 3) )  { -      cerr<<str<<endl; -      cerr<<" source or target sentence is not found in sentence number "<<line<<" , skip the sentence\n"; -      continue; -    } - -    src_corpus.push_back(v[0]); -    tgt_corpus.push_back(v[1]); -  } -  return true; -} - - -typedef aTextGrammar aGrammar; -aGrammar * load_grammar(string & grammar_filename){ -  cerr<<"start_load_grammar "<<grammar_filename<<endl; - -  aGrammar * test = new aGrammar(grammar_filename); - -  return test; -} - -Lattice convertSentenceToLattice(const string & str){ - -  std::vector<WordID> vID; -  TD::ConvertSentence(str , &vID); -  Lattice lsentence; -  lsentence.resize(vID.size()); - -  for (int i=0; i<vID.size(); i++){ - -    lsentence[i].push_back( LatticeArc(vID[i], 0.0, 1) );   -  } - -  //  if(!lsentence.IsSentence()) -  //  cout<<"not a sentence"<<endl; - -  return lsentence; - -} - -bool parseSentencePair(const string & goal_sym, const string & src, const string & tgt,  GrammarPtr & g, Hypergraph &hg){ - - -  //  cout<<"  Start parse the sentence pairs\n"<<endl; -  Lattice lsource = convertSentenceToLattice(src); -   -  //parse the source sentence by the grammar - -  vector<GrammarPtr> grammars(1, g); - -  ExhaustiveBottomUpParser parser = ExhaustiveBottomUpParser(goal_sym, grammars); -   -  if (!parser.Parse(lsource, &hg)){ - -     cerr<<"source sentence is not parsed by the grammar!"<<endl; -     return false; -   } - -  //intersect the hg with the target sentence -  Lattice ltarget = convertSentenceToLattice(tgt); - -  //forest.PrintGraphviz(); -  if (!HG::Intersect(ltarget, & hg)) return false; - -  SparseVector<double> reweight; -   -  reweight.set_value(FD::Convert("MinusLogP"), -1 ); -  hg.Reweight(reweight); - -  return true; -   -} - - - - -int main(int argc, char** argv){ - -  ParamsArray params(argc, argv); -  params.setDescription("scfg models"); - -  params.addConstraint("grammar_file", "grammar file (default ./grammar.pr )", true); //  optional                                - -  params.addConstraint("input_file", "parallel input file (default ./parallel_corpora)", true); //optional                                          - -  params.addConstraint("output_file", "grammar output file (default ./grammar_output)", true); //optional                                          - -  params.addConstraint("goal_symbol", "top nonterminal symbol (default: X)", true); //optional                                          - -  params.addConstraint("split", "split one nonterminal into 'split' nonterminals (default: 2)", true); //optional                                          - -  params.addConstraint("prob_iters", "number of iterations (default: 10)", true); //optional                                          - -  params.addConstraint("split_iters", "number of splitting iterations (default: 3)", true); //optional                                          - -  params.addConstraint("alpha", "alpha (default: 0.1)", true); //optional                                          - -  if (!params.runConstraints("scfg")) { -    return 0; -  } -  cerr<<"get parametters\n\n\n"; - - -  string grammar_file = params.asString("grammar_file", "./grammar.pr"); - -  string input_file = params.asString("input_file", "parallel_corpora"); - -  string output_file = params.asString("output_file", "grammar_output"); - -  string goal_sym = params.asString("goal_symbol", "X"); - -  int max_split = atoi(params.asString("split", "2").c_str()); -   -  int prob_iters = atoi(params.asString("prob_iters", "2").c_str()); -  int split_iters = atoi(params.asString("split_iters", "1").c_str()); -  double alpha = atof(params.asString("alpha", ".001").c_str()); - -  ///// -  cerr<<"grammar_file ="<<grammar_file<<endl; -  cerr<<"input_file ="<< input_file<<endl; -  cerr<<"output_file ="<< output_file<<endl; -  cerr<<"goal_sym ="<< goal_sym<<endl; -  cerr<<"max_split ="<< max_split<<endl; -  cerr<<"prob_iters ="<< prob_iters<<endl; -  cerr<<"split_iters ="<< split_iters<<endl; -  cerr<<"alpha ="<< alpha<<endl; -  ////////////////////////// - -  cerr<<"\n\nLoad parallel corpus...\n"; -  if (! openParallelCorpora(input_file)) -    exit(1); - -  cerr<<"Load grammar file ...\n"; -  aGrammar * agrammar = load_grammar(grammar_file); -  agrammar->SetGoalNT(goal_sym); -  agrammar->setMaxSplit(max_split); -  agrammar->set_alpha(alpha); - -  srand(123); - -  GrammarPtr g( agrammar); -  Hypergraph hg; - -  int data_size = src_corpus.size(); -  int cnt_unparsed =0; -  for (int i =0; i <split_iters; i++){ -     -    cerr<<"Split Nonterminals, iteration "<<(i+1)<<endl; -    agrammar->PrintAllRules(output_file+".s" + itos(i+1)); -    agrammar->splitAllNonterminals(); - -    //vector<string> src_corpus; -    //vector<string> tgt_corpus; -     -    for (int j=0; j<prob_iters; j++){ -      cerr<<"reset grammar score\n"; -      agrammar->ResetScore(); -      //      cerr<<"done reset grammar score\n"; -      for (int k=0; k <data_size; k++){ -	string src = src_corpus[k]; -   -	string tgt = tgt_corpus[k]; -	cerr <<"parse sentence pair: "<<src<<"  |||  "<<tgt<<endl; - -	if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){ -	  cerr<<"target sentence is not parsed by the grammar!\n"; -	  //return 1; -	  cnt_unparsed++; -	  continue; - -	}  - -	cerr<<"update edge posterior prob"<<endl; -	boost::static_pointer_cast<aGrammar>(g)->UpdateHgProsteriorProb(hg); -	hg.clear(); -	if (k%1000 ==0 ) cerr<<"sentences "<<k<<endl; -      } -      cerr<<"cnt_unparased="<<cnt_unparsed<<endl; -      boost::static_pointer_cast<aGrammar>(g)->UpdateScore(); -    } -    boost::static_pointer_cast<aGrammar>(g)->PrintAllRules(output_file+".e" + itos(i+1)); -  } - - - - -   - -  - - -  // // agrammar->ResetScore(); -  // // agrammar->UpdateScore(); -  // if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){ -  //   cerr<<"target sentence is not parsed by the grammar!\n"; -  //   return 1; - -  //  } -  // //   hg.PrintGraphviz(); -  //  //hg.clear(); - -  // agrammar->PrintAllRules(); -  // /*split grammar*/ -  // cout<<"split NTs\n";  -  // cerr<<"first of all write all nonterminals"<<endl; -  // // agrammar->printAllNonterminals(); -  // cout<<"after split nonterminal"<<endl; -  // agrammar->PrintAllRules(); -  // Hypergraph hg1; -  // if (! parseSentencePair(goal_sym, src, tgt,  g, hg1) ){ -  //   cerr<<"target sentence is not parsed by the grammar!\n"; -  //   return 1; - -  // } - -  // hg1.PrintGraphviz(); -   - -  // agrammar->splitNonterminal(15); -  // cout<<"after split nonterminal"<<TD::Convert(15)<<endl; -  // agrammar->PrintAllRules(); - -   -  /*load training corpus*/ - - -  /*for each sentence pair in training corpus*/ -  -  //  forest.PrintGraphviz(); -  /*calculate expected count*/ -   -} diff --git a/gi/scfg/abc/tmp.cpp b/gi/scfg/abc/tmp.cpp deleted file mode 100644 index 967a601d..00000000 --- a/gi/scfg/abc/tmp.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include <iostream> -#include <set> -#include <vector> -using namespace std; - -int x = 5; - -class A{A(){x++;}}; -//  { -//   int a_; - -// }; - -class B: public A{ - -  int b_; -}; - -int main(){ - -  cout<<"Hello World"; -  set<int> s; - -  s.insert(1); -  s.insert(2); - -  x++; -  cout<<"x="<<x<<endl; - -  vector<int> t; -  t.push_back(2); t.push_back(1); t.push_back(2); t.push_back(3); t.push_back(2); t.push_back(4); -  for(vector<int>::iterator it = t.begin(); it != t.end(); it++){ -    if (*it ==2) t.erase(it); -    cout <<*it<<endl; -  } -} | 
