summaryrefslogtreecommitdiff
path: root/gi
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-31 13:57:24 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-05-31 13:57:24 +0200
commit6f6601111710aa67eee5169e5b7d89102cc33bb8 (patch)
tree0872544abd6bc76162f3f80eb3920999afbf2c34 /gi
parent8cee8b565a9c56a7732365e9563f52ff3c4ff7fd (diff)
parent090a64e73f94a6a35e5364a9d416dcf75c0a2938 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'gi')
-rw-r--r--gi/scfg/abc/Release/IConv.d3
-rw-r--r--gi/scfg/abc/Release/Util.d8
-rw-r--r--gi/scfg/abc/Release/agrammar.d205
-rwxr-xr-xgi/scfg/abc/Release/dict_testbin1485797 -> 0 bytes
-rw-r--r--gi/scfg/abc/Release/grammar13
-rw-r--r--gi/scfg/abc/Release/grammar.pr13
-rw-r--r--gi/scfg/abc/Release/makefile66
-rw-r--r--gi/scfg/abc/Release/process_grammar.pl36
-rwxr-xr-xgi/scfg/abc/Release/scfgbin4438644 -> 0 bytes
-rw-r--r--gi/scfg/abc/Release/scfg.d213
-rw-r--r--gi/scfg/abc/Release/sources.mk27
-rw-r--r--gi/scfg/abc/Release/subdir.mk59
-rw-r--r--gi/scfg/abc/Release/tmp.grammar2
l---------gi/scfg/abc/Release/toy-grammar1
-rwxr-xr-xgi/scfg/abc/a.outbin22639 -> 0 bytes
-rw-r--r--gi/scfg/abc/agrammar.cc489
-rw-r--r--gi/scfg/abc/agrammar.h116
-rw-r--r--gi/scfg/abc/old_agrammar.cc383
-rw-r--r--gi/scfg/abc/old_agrammar.h45
-rw-r--r--gi/scfg/abc/scfg.cpp277
-rw-r--r--gi/scfg/abc/tmp.cpp36
21 files changed, 0 insertions, 1992 deletions
diff --git a/gi/scfg/abc/Release/IConv.d b/gi/scfg/abc/Release/IConv.d
deleted file mode 100644
index 082cb15b..00000000
--- a/gi/scfg/abc/Release/IConv.d
+++ /dev/null
@@ -1,3 +0,0 @@
-IConv.d IConv.o: ../../utils/IConv.cc ../../utils/IConv.hpp
-
-../../utils/IConv.hpp:
diff --git a/gi/scfg/abc/Release/Util.d b/gi/scfg/abc/Release/Util.d
deleted file mode 100644
index 586d4d60..00000000
--- a/gi/scfg/abc/Release/Util.d
+++ /dev/null
@@ -1,8 +0,0 @@
-Util.d Util.o: ../../utils/Util.cc ../../utils/Util.h \
- ../../utils/UtfConverter.h ../../utils/ConvertUTF.h
-
-../../utils/Util.h:
-
-../../utils/UtfConverter.h:
-
-../../utils/ConvertUTF.h:
diff --git a/gi/scfg/abc/Release/agrammar.d b/gi/scfg/abc/Release/agrammar.d
deleted file mode 100644
index 553752ca..00000000
--- a/gi/scfg/abc/Release/agrammar.d
+++ /dev/null
@@ -1,205 +0,0 @@
-agrammar.d agrammar.o: ../agrammar.cc \
- /home/tnguyen/ws10smt/decoder/rule_lexer.h \
- /home/tnguyen/ws10smt/decoder/trule.h \
- /export/ws10smt/software/include/boost/shared_ptr.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \
- /export/ws10smt/software/include/boost/config.hpp \
- /export/ws10smt/software/include/boost/config/user.hpp \
- /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \
- /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \
- /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \
- /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \
- /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \
- /export/ws10smt/software/include/boost/config/select_platform_config.hpp \
- /export/ws10smt/software/include/boost/config/platform/linux.hpp \
- /export/ws10smt/software/include/boost/config/posix_features.hpp \
- /export/ws10smt/software/include/boost/config/suffix.hpp \
- /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \
- /export/ws10smt/software/include/boost/assert.hpp \
- /export/ws10smt/software/include/boost/checked_delete.hpp \
- /export/ws10smt/software/include/boost/throw_exception.hpp \
- /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \
- /export/ws10smt/software/include/boost/detail/workaround.hpp \
- /export/ws10smt/software/include/boost/exception/exception.hpp \
- /export/ws10smt/software/include/boost/current_function.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \
- /export/ws10smt/software/include/boost/memory_order.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \
- /home/tnguyen/ws10smt/decoder/sparse_vector.h \
- /home/tnguyen/ws10smt/decoder/fdict.h \
- /home/tnguyen/ws10smt/decoder/dict.h \
- /export/ws10smt/software/include/boost/functional/hash.hpp \
- /export/ws10smt/software/include/boost/functional/hash/hash.hpp \
- /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \
- /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \
- /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \
- /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \
- /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \
- /export/ws10smt/software/include/boost/limits.hpp \
- /export/ws10smt/software/include/boost/integer/static_log2.hpp \
- /export/ws10smt/software/include/boost/integer_fwd.hpp \
- /export/ws10smt/software/include/boost/cstdint.hpp \
- /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \
- /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \
- /export/ws10smt/software/include/boost/detail/container_fwd.hpp \
- /home/tnguyen/ws10smt/decoder/wordid.h \
- /home/tnguyen/ws10smt/decoder/filelib.h \
- /home/tnguyen/ws10smt/decoder/gzstream.h \
- /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \
- /home/tnguyen/ws10smt/decoder/grammar.h \
- /home/tnguyen/ws10smt/decoder/lattice.h \
- /home/tnguyen/ws10smt/decoder/array2d.h \
- /home/tnguyen/ws10smt/decoder/hg.h \
- /home/tnguyen/ws10smt/decoder/small_vector.h \
- /home/tnguyen/ws10smt/decoder/prob.h \
- /home/tnguyen/ws10smt/decoder/logval.h ../../utils/Util.h \
- ../../utils/UtfConverter.h ../../utils/ConvertUTF.h
-
-/home/tnguyen/ws10smt/decoder/rule_lexer.h:
-
-/home/tnguyen/ws10smt/decoder/trule.h:
-
-/export/ws10smt/software/include/boost/shared_ptr.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp:
-
-/export/ws10smt/software/include/boost/config.hpp:
-
-/export/ws10smt/software/include/boost/config/user.hpp:
-
-/export/ws10smt/software/include/boost/config/select_compiler_config.hpp:
-
-/export/ws10smt/software/include/boost/config/compiler/gcc.hpp:
-
-/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp:
-
-/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp:
-
-/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp:
-
-/export/ws10smt/software/include/boost/config/select_platform_config.hpp:
-
-/export/ws10smt/software/include/boost/config/platform/linux.hpp:
-
-/export/ws10smt/software/include/boost/config/posix_features.hpp:
-
-/export/ws10smt/software/include/boost/config/suffix.hpp:
-
-/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp:
-
-/export/ws10smt/software/include/boost/assert.hpp:
-
-/export/ws10smt/software/include/boost/checked_delete.hpp:
-
-/export/ws10smt/software/include/boost/throw_exception.hpp:
-
-/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp:
-
-/export/ws10smt/software/include/boost/detail/workaround.hpp:
-
-/export/ws10smt/software/include/boost/exception/exception.hpp:
-
-/export/ws10smt/software/include/boost/current_function.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
-
-/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp:
-
-/export/ws10smt/software/include/boost/memory_order.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp:
-
-/home/tnguyen/ws10smt/decoder/sparse_vector.h:
-
-/home/tnguyen/ws10smt/decoder/fdict.h:
-
-/home/tnguyen/ws10smt/decoder/dict.h:
-
-/export/ws10smt/software/include/boost/functional/hash.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/hash.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp:
-
-/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp:
-
-/export/ws10smt/software/include/boost/limits.hpp:
-
-/export/ws10smt/software/include/boost/integer/static_log2.hpp:
-
-/export/ws10smt/software/include/boost/integer_fwd.hpp:
-
-/export/ws10smt/software/include/boost/cstdint.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/extensions.hpp:
-
-/export/ws10smt/software/include/boost/detail/container_fwd.hpp:
-
-/home/tnguyen/ws10smt/decoder/wordid.h:
-
-/home/tnguyen/ws10smt/decoder/filelib.h:
-
-/home/tnguyen/ws10smt/decoder/gzstream.h:
-
-/home/tnguyen/ws10smt/decoder/tdict.h:
-
-../agrammar.h:
-
-/home/tnguyen/ws10smt/decoder/grammar.h:
-
-/home/tnguyen/ws10smt/decoder/lattice.h:
-
-/home/tnguyen/ws10smt/decoder/array2d.h:
-
-/home/tnguyen/ws10smt/decoder/hg.h:
-
-/home/tnguyen/ws10smt/decoder/small_vector.h:
-
-/home/tnguyen/ws10smt/decoder/prob.h:
-
-/home/tnguyen/ws10smt/decoder/logval.h:
-
-../../utils/Util.h:
-
-../../utils/UtfConverter.h:
-
-../../utils/ConvertUTF.h:
diff --git a/gi/scfg/abc/Release/dict_test b/gi/scfg/abc/Release/dict_test
deleted file mode 100755
index 1ba94218..00000000
--- a/gi/scfg/abc/Release/dict_test
+++ /dev/null
Binary files differ
diff --git a/gi/scfg/abc/Release/grammar b/gi/scfg/abc/Release/grammar
deleted file mode 100644
index 75fac3a0..00000000
--- a/gi/scfg/abc/Release/grammar
+++ /dev/null
@@ -1,13 +0,0 @@
-[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
-[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
-[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
diff --git a/gi/scfg/abc/Release/grammar.pr b/gi/scfg/abc/Release/grammar.pr
deleted file mode 100644
index e4e327cf..00000000
--- a/gi/scfg/abc/Release/grammar.pr
+++ /dev/null
@@ -1,13 +0,0 @@
-[X] ||| . ||| . ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] . ||| [1] . ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] anciano ||| [1] old man ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] anciano . ||| [1] old man . ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] feo ||| ugly [1] ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] feo . ||| ugly [1] . ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] gato ||| [1] cat ||| MinusLogP=2.56494935746154
-[X] ||| [X,1] gato . ||| [1] cat . ||| MinusLogP=2.56494935746154
-[X] ||| el ||| the ||| MinusLogP=2.56494935746154
-[X] ||| el [X,1] ||| the [1] ||| MinusLogP=2.56494935746154
-[X] ||| el [X,1] . ||| the [1] . ||| MinusLogP=2.56494935746154
diff --git a/gi/scfg/abc/Release/makefile b/gi/scfg/abc/Release/makefile
deleted file mode 100644
index 25949e74..00000000
--- a/gi/scfg/abc/Release/makefile
+++ /dev/null
@@ -1,66 +0,0 @@
-################################################################################
-# Automatically-generated file. Do not edit!
-################################################################################
-
-#-include ../makefile.init
-
-RM := rm -rf
-
-# All of the sources participating in the build are defined here
--include sources.mk
--include subdir.mk
--include objects.mk
-
-ifneq ($(MAKECMDGOALS),clean)
-ifneq ($(strip $(C++_DEPS)),)
--include $(C++_DEPS)
-endif
-ifneq ($(strip $(CC_DEPS)),)
--include $(CC_DEPS)
-endif
-ifneq ($(strip $(C_DEPS)),)
--include $(C_DEPS)
-endif
-ifneq ($(strip $(CPP_DEPS)),)
--include $(CPP_DEPS)
-endif
-ifneq ($(strip $(CXX_DEPS)),)
--include $(CXX_DEPS)
-endif
-ifneq ($(strip $(C_UPPER_DEPS)),)
--include $(C_UPPER_DEPS)
-endif
-endif
-
-#-include ../makefile.defs
-
-# Add inputs and outputs from these tool invocations to the build variables
-
-# All Target
-all: scfg
-
-# Tool invocations
-
-# scfg.o: ../scfg.cpp
-# @echo 'Building file: $<'
-# @echo 'Invoking: GCC C++ Compiler'
-# g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
-# @echo 'Finished building: $<'
-# @echo ' '
-
-scfg: $(OBJS) $(USER_OBJS)
- @echo 'Building target: $@'
- @echo 'Invoking: GCC C++ Linker'
- /bin/sh ../../../../libtool --tag=CXX --mode=link g++ -g -O2 -lz -L/export/ws10smt/software/lib -R/export/ws10smt/software/lib -L/export/ws10smt/software/srilm-1.5.10/lib/i686 -o scfg $(OBJS) -L/export/ws10smt/software/lib -lgtest -pthread ../../../../decoder/libcdec.a -lboost_program_options -loolm -ldstruct -lmisc
- @echo 'Finished building target: $@'
- @echo ' '
-#g++ -I/home/tnguyen/ws10smt/gi/scfg/cdec/ -I/export/ws10smt/software/srilm-1.5.10/include/ -L/home/tnguyen/ws10smt/decoder -lpthread -ldl -lm $(OBJS) $(USER_OBJS) $(LIBS) -o"scfg"
-# Other Targets
-clean:
- -$(RM) $(OBJS)$(C++_DEPS)$(EXECUTABLES)$(CC_DEPS)$(C_DEPS)$(CPP_DEPS)$(CXX_DEPS)$(C_UPPER_DEPS) scfg
- -@echo ' '
-
-.PHONY: all clean dependents
-.SECONDARY:
-
--include ../makefile.targets
diff --git a/gi/scfg/abc/Release/process_grammar.pl b/gi/scfg/abc/Release/process_grammar.pl
deleted file mode 100644
index f82a8e5a..00000000
--- a/gi/scfg/abc/Release/process_grammar.pl
+++ /dev/null
@@ -1,36 +0,0 @@
-#!perl
-
-use warnings;
-use strict;
-
-my $grammar_file = $ARGV[0];
-
-my %nt_count; #maps nt--> count rules whose lhs is nt
-
-open(G, "<$grammar_file") or die "Can't open file $grammar_file";
-
-while (<G>){
-
- chomp();
-
- s/\|\|\|.*//g;
- s/\s//g;
-
- $nt_count{$_}++;
-}
-
-
-close (G);
-
-open(G, "<$grammar_file") or die "Can't open file $grammar_file";
-
-while (<G>){
-
- chomp();
-
- (my $nt = $_) =~ s/\|\|\|.*//g;
- $nt =~ s/\s//g;
-
- s/(.+\|\|\|.+\|\|\|.+\|\|\|).+/$1/g;
- print $_ . " MinusLogP=" .(log($nt_count{$nt})) ."\n";
-}
diff --git a/gi/scfg/abc/Release/scfg b/gi/scfg/abc/Release/scfg
deleted file mode 100755
index 3faa52cc..00000000
--- a/gi/scfg/abc/Release/scfg
+++ /dev/null
Binary files differ
diff --git a/gi/scfg/abc/Release/scfg.d b/gi/scfg/abc/Release/scfg.d
deleted file mode 100644
index b3cfbbb5..00000000
--- a/gi/scfg/abc/Release/scfg.d
+++ /dev/null
@@ -1,213 +0,0 @@
-scfg.d scfg.o: ../scfg.cpp \
- /export/ws10smt/software/include/boost/shared_ptr.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \
- /export/ws10smt/software/include/boost/config.hpp \
- /export/ws10smt/software/include/boost/config/user.hpp \
- /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \
- /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \
- /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \
- /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \
- /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \
- /export/ws10smt/software/include/boost/config/select_platform_config.hpp \
- /export/ws10smt/software/include/boost/config/platform/linux.hpp \
- /export/ws10smt/software/include/boost/config/posix_features.hpp \
- /export/ws10smt/software/include/boost/config/suffix.hpp \
- /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \
- /export/ws10smt/software/include/boost/assert.hpp \
- /export/ws10smt/software/include/boost/checked_delete.hpp \
- /export/ws10smt/software/include/boost/throw_exception.hpp \
- /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \
- /export/ws10smt/software/include/boost/detail/workaround.hpp \
- /export/ws10smt/software/include/boost/exception/exception.hpp \
- /export/ws10smt/software/include/boost/current_function.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
- /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \
- /export/ws10smt/software/include/boost/memory_order.hpp \
- /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \
- /export/ws10smt/software/include/boost/pointer_cast.hpp \
- /home/tnguyen/ws10smt/decoder/lattice.h \
- /home/tnguyen/ws10smt/decoder/wordid.h \
- /home/tnguyen/ws10smt/decoder/array2d.h \
- /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \
- /home/tnguyen/ws10smt/decoder/grammar.h \
- /home/tnguyen/ws10smt/decoder/lattice.h \
- /home/tnguyen/ws10smt/decoder/trule.h \
- /home/tnguyen/ws10smt/decoder/sparse_vector.h \
- /home/tnguyen/ws10smt/decoder/fdict.h \
- /home/tnguyen/ws10smt/decoder/dict.h \
- /export/ws10smt/software/include/boost/functional/hash.hpp \
- /export/ws10smt/software/include/boost/functional/hash/hash.hpp \
- /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \
- /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \
- /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \
- /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \
- /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \
- /export/ws10smt/software/include/boost/limits.hpp \
- /export/ws10smt/software/include/boost/integer/static_log2.hpp \
- /export/ws10smt/software/include/boost/integer_fwd.hpp \
- /export/ws10smt/software/include/boost/cstdint.hpp \
- /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \
- /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \
- /export/ws10smt/software/include/boost/detail/container_fwd.hpp \
- /home/tnguyen/ws10smt/decoder/hg.h \
- /home/tnguyen/ws10smt/decoder/small_vector.h \
- /home/tnguyen/ws10smt/decoder/prob.h \
- /home/tnguyen/ws10smt/decoder/logval.h \
- /home/tnguyen/ws10smt/decoder/bottom_up_parser.h \
- /home/tnguyen/ws10smt/decoder/grammar.h \
- /home/tnguyen/ws10smt/decoder/hg_intersect.h ../../utils/ParamsArray.h \
- ../../utils/Util.h ../../utils/UtfConverter.h ../../utils/ConvertUTF.h
-
-/export/ws10smt/software/include/boost/shared_ptr.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp:
-
-/export/ws10smt/software/include/boost/config.hpp:
-
-/export/ws10smt/software/include/boost/config/user.hpp:
-
-/export/ws10smt/software/include/boost/config/select_compiler_config.hpp:
-
-/export/ws10smt/software/include/boost/config/compiler/gcc.hpp:
-
-/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp:
-
-/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp:
-
-/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp:
-
-/export/ws10smt/software/include/boost/config/select_platform_config.hpp:
-
-/export/ws10smt/software/include/boost/config/platform/linux.hpp:
-
-/export/ws10smt/software/include/boost/config/posix_features.hpp:
-
-/export/ws10smt/software/include/boost/config/suffix.hpp:
-
-/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp:
-
-/export/ws10smt/software/include/boost/assert.hpp:
-
-/export/ws10smt/software/include/boost/checked_delete.hpp:
-
-/export/ws10smt/software/include/boost/throw_exception.hpp:
-
-/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp:
-
-/export/ws10smt/software/include/boost/detail/workaround.hpp:
-
-/export/ws10smt/software/include/boost/exception/exception.hpp:
-
-/export/ws10smt/software/include/boost/current_function.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
-
-/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp:
-
-/export/ws10smt/software/include/boost/memory_order.hpp:
-
-/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp:
-
-/export/ws10smt/software/include/boost/pointer_cast.hpp:
-
-/home/tnguyen/ws10smt/decoder/lattice.h:
-
-/home/tnguyen/ws10smt/decoder/wordid.h:
-
-/home/tnguyen/ws10smt/decoder/array2d.h:
-
-/home/tnguyen/ws10smt/decoder/tdict.h:
-
-../agrammar.h:
-
-/home/tnguyen/ws10smt/decoder/grammar.h:
-
-/home/tnguyen/ws10smt/decoder/lattice.h:
-
-/home/tnguyen/ws10smt/decoder/trule.h:
-
-/home/tnguyen/ws10smt/decoder/sparse_vector.h:
-
-/home/tnguyen/ws10smt/decoder/fdict.h:
-
-/home/tnguyen/ws10smt/decoder/dict.h:
-
-/export/ws10smt/software/include/boost/functional/hash.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/hash.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp:
-
-/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp:
-
-/export/ws10smt/software/include/boost/limits.hpp:
-
-/export/ws10smt/software/include/boost/integer/static_log2.hpp:
-
-/export/ws10smt/software/include/boost/integer_fwd.hpp:
-
-/export/ws10smt/software/include/boost/cstdint.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp:
-
-/export/ws10smt/software/include/boost/functional/hash/extensions.hpp:
-
-/export/ws10smt/software/include/boost/detail/container_fwd.hpp:
-
-/home/tnguyen/ws10smt/decoder/hg.h:
-
-/home/tnguyen/ws10smt/decoder/small_vector.h:
-
-/home/tnguyen/ws10smt/decoder/prob.h:
-
-/home/tnguyen/ws10smt/decoder/logval.h:
-
-/home/tnguyen/ws10smt/decoder/bottom_up_parser.h:
-
-/home/tnguyen/ws10smt/decoder/grammar.h:
-
-/home/tnguyen/ws10smt/decoder/hg_intersect.h:
-
-../../utils/ParamsArray.h:
-
-../../utils/Util.h:
-
-../../utils/UtfConverter.h:
-
-../../utils/ConvertUTF.h:
diff --git a/gi/scfg/abc/Release/sources.mk b/gi/scfg/abc/Release/sources.mk
deleted file mode 100644
index 6c7070aa..00000000
--- a/gi/scfg/abc/Release/sources.mk
+++ /dev/null
@@ -1,27 +0,0 @@
-################################################################################
-# Automatically-generated file. Do not edit!
-################################################################################
-
-C_UPPER_SRCS :=
-C_SRCS :=
-CPP_SRCS :=
-O_SRCS :=
-ASM_SRCS :=
-S_SRCS :=
-C++_SRCS :=
-CXX_SRCS :=
-CC_SRCS :=
-OBJ_SRCS :=
-OBJS :=
-C++_DEPS :=
-EXECUTABLES :=
-CC_DEPS :=
-C_DEPS :=
-CPP_DEPS :=
-CXX_DEPS :=
-C_UPPER_DEPS :=
-
-# Every subdirectory with source files must be described here
-SUBDIRS := \
-. \
-
diff --git a/gi/scfg/abc/Release/subdir.mk b/gi/scfg/abc/Release/subdir.mk
deleted file mode 100644
index 49080b36..00000000
--- a/gi/scfg/abc/Release/subdir.mk
+++ /dev/null
@@ -1,59 +0,0 @@
-
-################################################################################
-# Automatically-generated file. Do not edit!
-################################################################################
-
-# Add inputs and outputs from these tool invocations to the build variables
-CPP_SRCS += \
-../../utils/Util.cc \
-../agrammar.cc \
-../scfg.cpp
-
-
-OBJS += \
-./Util.o \
-./agrammar.o \
-./scfg.o
-
-
-CPP_DEPS += \
-./Util.d \
-./agrammar.d \
-./scfg.d
-
-# Each subdirectory must supply rules for building sources it contributes
-# %.o: ../%.cpp
-# @echo 'Building file: $<'
-# @echo 'Invoking: GCC C++ Compiler'
-# g++ -g -p -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
-#
-# @echo ' '
-
-%.o: ../../utils/%.cc
- @echo 'Building file: $<'
- @echo 'Invoking: GCC C++ Compiler'
- g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
- @echo 'Finished building: $<'
- @echo ' '
-
-%.o: ../../utils/%.c
- @echo 'Building file: $<'
- @echo 'Invoking: GCC C++ Compiler'
- g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
- @echo 'Finished building: $<'
- @echo ' '
-
-%.o: ../%.cpp
- @echo 'Building file: $<'
- @echo 'Invoking: GCC C++ Compiler'
- g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
- @echo 'Finished building: $<'
- @echo ' '
-
-%.o: ../%.cc
- @echo 'Building file: $<'
- @echo 'Invoking: GCC C++ Compiler'
- g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
- @echo 'Finished building: $<'
- @echo ' '
-
diff --git a/gi/scfg/abc/Release/tmp.grammar b/gi/scfg/abc/Release/tmp.grammar
deleted file mode 100644
index 9df1b77d..00000000
--- a/gi/scfg/abc/Release/tmp.grammar
+++ /dev/null
@@ -1,2 +0,0 @@
-[A] ||| [B] [C] . ||| [B] [C]. ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
-[A] ||| [B] asd . ||| [B] asd . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 \ No newline at end of file
diff --git a/gi/scfg/abc/Release/toy-grammar b/gi/scfg/abc/Release/toy-grammar
deleted file mode 120000
index 50dea8df..00000000
--- a/gi/scfg/abc/Release/toy-grammar
+++ /dev/null
@@ -1 +0,0 @@
-/export/ws10smt/toy-grammar/ \ No newline at end of file
diff --git a/gi/scfg/abc/a.out b/gi/scfg/abc/a.out
deleted file mode 100755
index 0467acf0..00000000
--- a/gi/scfg/abc/a.out
+++ /dev/null
Binary files differ
diff --git a/gi/scfg/abc/agrammar.cc b/gi/scfg/abc/agrammar.cc
deleted file mode 100644
index 016a0189..00000000
--- a/gi/scfg/abc/agrammar.cc
+++ /dev/null
@@ -1,489 +0,0 @@
-#include <algorithm>
-#include <utility>
-#include <map>
-
-#include "rule_lexer.h"
-#include "filelib.h"
-#include "tdict.h"
-#include "agrammar.h"
-#include "../utils/Util.h"
-
-
-
-aTRule::aTRule(TRulePtr rule){
-
- this -> e_ = rule->e_;
- this -> f_ = rule->f_;
- this ->lhs_ = rule->lhs_;
- this -> arity_ = rule->arity_;
- this -> scores_ = rule->scores_;
- ResetScore(0.00000001);
-}
-
-bool equal(TRulePtr const & rule1, TRulePtr const & rule2){
- if (rule1->lhs_ != rule2->lhs_) return false;
- if (rule1->f_.size() != rule2->f_.size()) return false;
- if (rule1->e_.size() != rule2->e_.size()) return false;
-
- for (int i=0; i<rule1->f_.size(); i++)
- if (rule1->f_.at(i) != rule2->f_.at(i)) return false;
- for (int i=0; i<rule1->e_.size(); i++)
- if (rule1->e_.at(i) != rule2->e_.at(i)) return false;
- return true;
-}
-
-
-//const vector<TRulePtr> Grammar::NO_RULES;
-
-void aRemoveRule(vector<TRulePtr> & v, const TRulePtr & rule){ // remove rule from v if found
- for (int i=0; i< v.size(); i++)
- if (equal(v[i], rule )){
- // cout<<"erase rule from vector:"<<rule->AsString()<<endl;
- v.erase(v.begin()+i);
- }
-}
-
-void aRemoveRule(vector<NTRule> & v, const NTRule & ntrule){ // remove rule from v if found
- for (int i=0; i< v.size(); i++)
- if (equal(v[i].rule_, ntrule.rule_ )){
- // cout<<"erase rule from vector:"<<rule->AsString()<<endl;
- v.erase(v.begin()+i);
- }
-}
-
-struct aTextRuleBin : public RuleBin {
- int GetNumRules() const {
- return rules_.size();
- }
- TRulePtr GetIthRule(int i) const {
- return rules_[i];
- }
- void AddRule(TRulePtr t) {
- rules_.push_back(t);
- }
-
- void RemoveRule(const TRulePtr & rule ){
- aRemoveRule(rules_, rule);
- }
-
-
- int Arity() const {
- return rules_.front()->Arity();
- }
-
- void Dump() const {
- for (int i = 0; i < rules_.size(); ++i)
- cerr << rules_[i]->AsString() << endl;
- }
- private:
- vector<TRulePtr> rules_;
-};
-
-
-struct aTextGrammarNode : public GrammarIter {
- aTextGrammarNode() : rb_(NULL) {}
- ~aTextGrammarNode() {
- delete rb_;
- }
- const GrammarIter* Extend(int symbol) const {
- map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol);
- if (i == tree_.end()) return NULL;
- return &i->second;
- }
-
- const RuleBin* GetRules() const {
- if (rb_) {
- //rb_->Dump();
- }
- return rb_;
- }
-
- map<WordID, aTextGrammarNode> tree_;
- aTextRuleBin* rb_;
-};
-
-struct aTGImpl {
- aTextGrammarNode root_;
-};
-
-aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {}
-aTextGrammar::aTextGrammar(const string& file) :
- max_span_(10),
- pimpl_(new aTGImpl) {
- ReadFromFile(file);
-}
-
-const GrammarIter* aTextGrammar::GetRoot() const {
- return &pimpl_->root_;
-}
-
-void aTextGrammar::SetGoalNT(const string & goal_str){
- goalID = TD::Convert(goal_str);
-
-}
-
-void getNTRule( const TRulePtr & rule, map<WordID, NTRule> & ntrule_map){
-
- NTRule lhs_ntrule(rule, rule->lhs_ * -1);
- ntrule_map[rule->lhs_ * -1] = lhs_ntrule;
-
- for (int i=0; i< (rule->f_).size(); i++)
- if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){
- NTRule rhs_ntrule(rule, rule->f_.at(i) * -1);
- ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule;
- }
-}
-
-
-void aTextGrammar::AddRule(const TRulePtr& rule) {
- if (rule->IsUnary()) {
- rhs2unaries_[rule->f().front()].push_back(rule);
- unaries_.push_back(rule);
- } else {
- aTextGrammarNode* cur = &pimpl_->root_;
- for (int i = 0; i < rule->f_.size(); ++i)
- cur = &cur->tree_[rule->f_[i]];
- if (cur->rb_ == NULL)
- cur->rb_ = new aTextRuleBin;
- cur->rb_->AddRule(rule);
- }
-
- //add the rule to lhs_rules_
- lhs_rules_[rule->lhs_* -1].push_back(rule);
-
- //add the rule to nt_rules_
- map<WordID, NTRule> ntrule_map;
- getNTRule (rule, ntrule_map);
- for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){
- nt_rules_[it->first].push_back(it->second);
- }
-}
-
-void aTextGrammar::RemoveRule(const TRulePtr & rule){
- // cout<<"Remove rule: "<<rule->AsString()<<endl;
- if (rule->IsUnary()) {
- aRemoveRule(rhs2unaries_[rule->f().front()], rule);
- aRemoveRule(unaries_, rule);
- } else {
- aTextGrammarNode* cur = &pimpl_->root_;
- for (int i = 0; i < rule->f_.size(); ++i)
- cur = &cur->tree_[rule->f_[i]];
-// if (cur->rb_ == NULL)
-// cur->rb_ = new aTextRuleBin;
- cur->rb_->RemoveRule(rule);
- }
-
- //remove rules from lhs_rules_
-
- aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule);
-
-
- //remove the rule from nt_rules_
- map<WordID, NTRule> ntrule_map;
- getNTRule (rule, ntrule_map);
- for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){
- aRemoveRule(nt_rules_[it->first], it->second);
- }
-
-}
-
-void aTextGrammar::RemoveNonterminal(WordID wordID){
- vector<NTRule> rules = nt_rules_[wordID];
-// // remove the nonterminal from ntrules_
- nt_rules_.erase(wordID);
- for (int i =0; i<rules.size(); i++)
- RemoveRule(rules[i].rule_);
- sum_probs_.erase(wordID);
- cnt_rules.erase(wordID);
-
-}
-
-void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;}
-
-
-
-
-void aTextGrammar::AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts){
-
- vector<NTRule> rules = nt_rules_[nt_old];
-
- // cout<<"\n\n\n start add splitting rules"<<endl;
-
- const double epsilon = 0.001;
- for (int i=0; i<rules.size(); i++){
- NTRule old_rule = rules.at(i);
- vector<int> ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old
- //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts'
- //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions,
- //hence the rules' new probabilities have to divide to cnt also
- // cout<<"splitting NT in rule "<<old_rule.rule_->AsString()<<endl;
-
-// cout<<"nt position in the rules"<<endl;
-// for (int j=0; j<ntPos.size();j++) cout<<ntPos[j]<<" "; cout<<endl;
-
- int cnt_newrules = pow( nts.size(), ntPos.size() );
- // cout<<"cnt_newrules="<<cnt_newrules<<endl;
-
- double log_nts_size = log(nts.size());
-
-
- map<WordID, int> cnt_addepsilon; //cnt_addepsilon and cont_minusepsilon to track the number of rules epsilon is added or minus for each lhs nonterminal, ideally we want these two numbers are equal
- map<WordID, int> cnt_minusepsilon;
- cnt_addepsilon[old_rule.rule_->lhs_] = 0;
- cnt_minusepsilon[old_rule.rule_->lhs_] = 0;
- for (int j =0; j<nts.size(); j++) { cnt_addepsilon[nts[j] ] = 0; cnt_minusepsilon[nts[j] ] = 0;}
-
-
- for (int j=0; j<cnt_newrules; j++){ //each j represents a new rule
- //convert j to a vector of size ntPos.size(), each entry in the vector >=0 and <nts.size()
- int mod = nts.size();
- vector <int> j_vector(ntPos.size(), 0); //initiate the vector to all 0
- int j_tmp =j;
- for (int k=0; k<ntPos.size(); k++){
- j_vector[k] = j_tmp % mod;
- j_tmp = (j_tmp - j_vector[k]) / mod;
- }
- // cout<<"print vector j_vector"<<endl;
- // for (int k=0; k<ntPos.size();k++) cout<<j_vector[k]<<" "; cout<<endl;
- //now use the vector to create a new rule
- TRulePtr newrule(new aTRule());
-
- newrule -> e_ = (old_rule.rule_)->e_;
- newrule -> f_ = old_rule.rule_->f_;
- newrule->lhs_ = old_rule.rule_->lhs_;
- newrule -> arity_ = old_rule.rule_->arity_;
- newrule -> scores_ = old_rule.rule_->scores_;
-
- // cout<<"end up update score\n";
- if (ntPos[0] == -1){ //update the lhs
- newrule->lhs_ = nts[j_vector[0]] * -1;
-
- //score has to randomly add/minus a small epsilon to break the balance
- if (nts.size() >1 && ntPos.size() >1){
- // cout<<"start to add/minus epsilon"<<endl;
- if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon
- newrule-> scores_ -= epsilon;
- else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) )
- newrule-> scores_ += epsilon;
- else{
- double random = rand()/RAND_MAX;
- if (random > .5){
- newrule-> scores_ += epsilon;
- cnt_addepsilon[newrule->lhs_]++;
- }
- else{
- newrule-> scores_ -= epsilon;
- cnt_minusepsilon[newrule->lhs_]++;
- }
- }
- }
-
-
- for (int k=1; k<ntPos.size(); k++){//update f_
- // cout<<"ntPos[k]="<<ntPos[k]<<endl;
- newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1; //update the ntPos[k-1]-th nonterminal in f_ to the j_vector[k] NT in nts
- }
- newrule -> scores_ += (ntPos.size() -1) * log_nts_size;
-
-
- }
- else{
- //score has to randomly add/minus a small epsilon to break the balance
- if ( ntPos.size() >0 && nts.size()>1){
- // cout<<"start to add/minus epsilon"<<endl;
- if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon
- newrule-> scores_ -= epsilon;
- else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 )
- newrule-> scores_ += epsilon;
- else{
- double random = rand()/RAND_MAX;
- if (random > .5){
- newrule-> scores_ += epsilon;
- cnt_addepsilon[newrule->lhs_]++;
- }
- else{
- newrule-> scores_ -= epsilon;
- cnt_minusepsilon[newrule->lhs_]++;
- }
- }
- }
-
-
- for (int k=0; k<ntPos.size(); k++){ //update f_
- // cout<<"ntPos[k]="<<ntPos[k]<<endl;
- newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1;
- }
- newrule -> scores_ += ntPos.size() * log_nts_size;
- }
- this->AddRule (newrule);
- }//add new rules for each grammar rules
-
- } //iterate through all grammar rules
-
-}
-
-
-void aTextGrammar::splitNonterminal(WordID wordID){
-
- //first added the splits nonterminal into the TD dictionary
-
- string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split
-
- vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits
- for (int i =0; i< this->max_split_; i++){
- string split_str = old_str + "+" + itos(i);
- WordID splitID = TD::Convert(split_str);
- v_splits.push_back(splitID);
-
- }
-
- // grSplitNonterminals[wordID] = v_splits;
-
- //print split nonterminas of wordID
- // v_splits = grSplitNonterminals[wordID];
- // cout<<"print split nonterminals\n";
- // for (int i =0; i<v_splits.size(); i++)
- // cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl;
-
- AddSplitNonTerminal(wordID, v_splits);
- RemoveNonterminal(wordID);
-
- // grSplitNonterminals.erase (grSplitNonterminals.find(WordID) );
-
- if (wordID == goalID){ //add rule X-> X1; X->X2,... if X is the goal NT
- for (int i =0; i<v_splits.size(); i++){
- TRulePtr rule (new aTRule());
- rule ->lhs_ = goalID * -1;
- rule ->f_.push_back(v_splits[i] * -1);
- rule->e_.push_back(0);
-
- rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) );
- AddRule(rule);
- }
-
- }
-
-}
-
-
-void aTextGrammar::splitAllNonterminals(){
- map<WordID, vector<TRulePtr> >::const_iterator it;
- vector<WordID> v ; // WordID >0
- for (it = lhs_rules_.begin(); it != lhs_rules_.end(); it++) //iterate through all nts
- if (it->first != goalID || lhs_rules_.size() ==1)
- v.push_back(it->first);
-
- for (int i=0; i< v.size(); i++)
- splitNonterminal(v[i]);
-}
-
-
-void aTextGrammar::PrintAllRules(const string & filename) const{
-
-
- cerr<<"print grammar to "<<filename<<endl;
-
- ofstream outfile(filename.c_str());
- if (!outfile.good()) {
- cerr << "error opening output file " << filename << endl;
- exit(1);
- }
-
- map<WordID, vector<TRulePtr > >::const_iterator it;
- for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){
-
- vector<TRulePtr> v = it-> second;
- for (int i =0; i< v.size(); i++){
- outfile<<v[i]->AsString()<<"\t"<<endl;
- }
- }
-}
-
-
-void aTextGrammar::ResetScore(){
-
- map<WordID, vector<TRulePtr > >::const_iterator it;
- for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){
- vector<TRulePtr> v = it-> second;
- for (int i =0; i< v.size(); i++){
- // cerr<<"Reset score of Rule "<<v[i]->AsString()<<endl;
- boost::static_pointer_cast<aTRule>(v[i])->ResetScore(alpha_ /v.size());
- }
- lhs_rules_[it->first] = v;
- sum_probs_[it->first] = alpha_;
- }
-
-}
-
-void aTextGrammar::UpdateScore(){
-
- map<WordID, vector<TRulePtr > >::const_iterator it;
- for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){
- vector<TRulePtr> v = it-> second;
- for (int i =0; i< v.size(); i++){
- boost::static_pointer_cast<aTRule>(v[i])->UpdateScore(sum_probs_[it->first] );
- }
-
- // cerr<<"sum_probs_[it->first] ="<<sum_probs_[it->first] <<endl;
- sum_probs_[it->first] = alpha_;
- }
-
-}
-
-
-void aTextGrammar::UpdateHgProsteriorProb(Hypergraph & hg){
- std::vector<prob_t> posts ;
-
- prob_t goal_score = hg.ComputeEdgePosteriors(1, &posts);
- for (int i =0; i<posts.size(); i++){
-
- //cout<<posts[i]<<endl;
- Hypergraph::Edge& e = hg.edges_[i];
- string goalstr("Goal");
- string str_lhs = TD::Convert(e.rule_->lhs_ * -1);
-
- if (str_lhs.find(goalstr) != string::npos)
- continue;
-
- // cerr<<e.rule_->AsString()<<endl;
- // cerr<<e.rule_->parent_rule_->AsString()<<endl;
-
- boost::static_pointer_cast<aTRule>(e.rule_->parent_rule_)->AddProb(posts[i] / goal_score);
- // cerr<<"add count for rule\n";
-// cerr<<"posts[i]="<<posts[i]<<" goal_score="<<goal_score<<endl;
-// cerr<<"posts[i] /goal_score="<<(posts[i] /goal_score)<<endl;
- sum_probs_[e.rule_->parent_rule_->lhs_* -1 ] += posts[i] /goal_score;
-
- }
-
-
-}
-
-
-void aTextGrammar::PrintNonterminalRules(WordID nt) const{
- vector< NTRule > v;
- map<WordID, vector<NTRule> >::const_iterator mit= nt_rules_.find(nt);
- if (mit == nt_rules_.end())
- return;
-
- v = mit->second;
-
- for (vector<NTRule>::const_iterator it = v.begin(); it != v.end(); it++)
- cout<<it->rule_->AsString()<<endl;
-}
-
-static void AddRuleHelper(const TRulePtr& new_rule, void* extra) {
- aTRule *p = new aTRule(new_rule);
-
- static_cast<aTextGrammar*>(extra)->AddRule(TRulePtr(p));
-}
-
-void aTextGrammar::ReadFromFile(const string& filename) {
- ReadFile in(filename);
- RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this);
-}
-
-bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const {
- return (max_span_ >= distance);
-}
-
diff --git a/gi/scfg/abc/agrammar.h b/gi/scfg/abc/agrammar.h
deleted file mode 100644
index 0910aae6..00000000
--- a/gi/scfg/abc/agrammar.h
+++ /dev/null
@@ -1,116 +0,0 @@
-#ifndef AGRAMMAR_H_
-#define AGRAMMAR_H_
-
-#include "grammar.h"
-#include "hg.h"
-
-
-using namespace std;
-
-class aTRule: public TRule{
- public:
- aTRule() : TRule(){ResetScore(0.00000001); }
- aTRule(TRulePtr rule_);
-
- void ResetScore(double initscore){//cerr<<"Reset Score "<<this->AsString()<<endl;
- sum_scores_.set_value(FD::Convert("Prob"), initscore);}
- void AddProb(double p ){
- // cerr<<"in AddProb p="<<p<<endl;
- // cerr<<"prob sumscores ="<<sum_scores_[FD::Convert("Prob")]<<endl;
- sum_scores_.add_value(FD::Convert("Prob"), p);
- // cerr<<"after AddProb\n";
- }
-
- void UpdateScore(double sumprob){
- double minuslogp = 0 - log( sum_scores_.value(FD::Convert("Prob")) /sumprob);
- if (sumprob< sum_scores_.value(FD::Convert("Prob"))){
- cerr<<"UpdateScore sumprob="<<sumprob<< " sum_scores_.value(FD::Convert(\"Prob\"))="<< sum_scores_.value(FD::Convert("Prob"))<< this->AsString()<<endl;
- exit(1);
- }
- this->scores_.set_value(FD::Convert("MinusLogP"), minuslogp);
-
- }
- private:
- SparseVector<double> sum_scores_;
-};
-
-
-class aTGImpl;
-struct NTRule{
-
- NTRule(){};
- NTRule(const TRulePtr & rule, WordID nt){
- nt_ = nt;
- rule_ = rule;
-
- if (rule->lhs_ * -1 == nt)
- ntPos_.push_back(-1);
-
- for (int i=0; i< rule->f().size(); i++)
- if (rule->f().at(i) * -1 == nt)
- ntPos_.push_back(i);
-
-
- }
-
- TRulePtr rule_;
- WordID nt_; //the labelID of the nt (nt_>0);
-
- vector<int> ntPos_; //position of nt_ -1: lhs, from 0...f_.size() for nt of f_()
- //i.e the rules is: NP-> DET NP; if nt_=5 is the labelID of NP then ntPos_ = (-1, 1): the indexes of nonterminal NP
-
-};
-
-
-struct aTextGrammar : public Grammar {
- aTextGrammar();
- aTextGrammar(const std::string& file);
- void SetMaxSpan(int m) { max_span_ = m; }
-
- virtual const GrammarIter* GetRoot() const;
- void AddRule(const TRulePtr& rule);
- void ReadFromFile(const std::string& filename);
- virtual bool HasRuleForSpan(int i, int j, int distance) const;
- const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const;
-
- void AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts);
- void setMaxSplit(int max_split);
- void splitNonterminal(WordID wordID);
-
-
- void splitAllNonterminals();
-
- void PrintAllRules(const string & filename) const;
- void PrintNonterminalRules(WordID nt) const;
- void SetGoalNT(const string & goal_str);
-
- void ResetScore();
-
- void UpdateScore();
-
- void UpdateHgProsteriorProb(Hypergraph & hg);
-
- void set_alpha(double alpha){alpha_ = alpha;}
- private:
-
- void RemoveRule(const TRulePtr & rule);
- void RemoveNonterminal(WordID wordID);
-
- int max_span_;
- int max_split_;
- boost::shared_ptr<aTGImpl> pimpl_;
-
- map <WordID, vector<TRulePtr> > lhs_rules_;// WordID >0
- map <WordID, vector<NTRule> > nt_rules_;
-
- map <WordID, double> sum_probs_;
- map <WordID, double> cnt_rules;
-
- double alpha_;
-
- // map<WordID, vector<WordID> > grSplitNonterminals;
- WordID goalID;
-};
-
-
-#endif
diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc
deleted file mode 100644
index 33d70dfc..00000000
--- a/gi/scfg/abc/old_agrammar.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-#include "agrammar.h"
-#include "Util.h"
-
-#include <algorithm>
-#include <utility>
-#include <map>
-
-#include "rule_lexer.h"
-#include "filelib.h"
-#include "tdict.h"
-#include <iostream>
-#include <fstream>
-
-map<WordID, vector<WordID> > grSplitNonterminals;
-//const vector<TRulePtr> Grammar::NO_RULES;
-
-
-// vector<TRulePtr> substituteF(TRulePtr & rule, WordID wordID, vector<WordID> & v){
-// vector<TRulePtr> vRules; //outputs
-
-// vector<WordID> f = rule->f();
-// vector<vector<WordID> > newfvector;
-// for (int i =0; i< f.size(); i++){
-// if (f[i] == wordID){
-// newfvector.push_back(v);
-// }
-// else
-// newfvector.push_back(vector<WordID> (1, f[i]));
-// }
-
-// //now creates new rules;
-
-
-// return vRules;
-// }
-
-
-struct aTextRuleBin : public RuleBin {
- int GetNumRules() const {
- return rules_.size();
- }
- TRulePtr GetIthRule(int i) const {
- return rules_[i];
- }
- void AddRule(TRulePtr t) {
- rules_.push_back(t);
- }
- int Arity() const {
- return rules_.front()->Arity();
- }
- void Dump() const {
- for (int i = 0; i < rules_.size(); ++i)
- cerr << rules_[i]->AsString() << endl;
- }
-
-
- vector<TRulePtr> getRules(){ return rules_;}
-
-
- void substituteF(vector<WordID> & f_path, map<WordID, vector<WordID> > & grSplitNonterminals){
- //this substituteF method is different with substituteF procedure found in cdec code;
- //
- //aTextRuleBin has a collection of rules with the same f() on the rhs,
- //substituteF() replaces the f_ of all the rules with f_path vector,
- //the grSplitNonterminals input to split the lhs_ nonterminals of the rules incase the lhs_ nonterminal found in grSplitNonterminals
-
- vector <TRulePtr> newrules;
- for (vector<TRulePtr>::iterator it = rules_.begin() ; it != rules_.end(); it++){
- assert(f_path.size() == (*it)->f_.size());
-
- if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){
- (*it)->f_ = f_path;
- }
- else{ // split the lhs NT,
- vector<WordID> new_lhs = grSplitNonterminals[ (*it)->lhs_ ];
- for (vector<WordID>::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){
- TRulePtr newrule;
- newrule -> e_ = (*it)->e_;
- newrule -> f_ = (*it)->f_;
- newrule->lhs_ = *vit;
- newrule -> scores_ = (*it)->scores_;
- newrule -> arity_ = (*it)->arity_;
- newrules.push_back (newrule);
- }
- rules_.erase(it);
- }
- }
-
- //now add back newrules(output of splitting lhs_) to rules_
- rules_.insert(newrules.begin(),newrules.begin(), newrules.end());
- }
-
-private:
- vector<TRulePtr> rules_;
-};
-
-
-
-struct aTextGrammarNode : public GrammarIter {
- aTextGrammarNode() : rb_(NULL) {}
-
- aTextGrammarNode(const aTextGrammarNode & a){
- nonterminals_ = a.nonterminals_;
- tree_ = a.tree_;
- rb_ = new aTextRuleBin(); //cp constructor: don't cp the set of rules over
- }
-
- ~aTextGrammarNode() {
- delete rb_;
- }
- const GrammarIter* Extend(int symbol) const {
- map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol);
- if (i == tree_.end()) return NULL;
- return &i->second;
- }
-
- const RuleBin* GetRules() const {
- if (rb_) {
- //rb_->Dump();
- }
- return rb_;
- }
-
- void DFS();
-
- void visit (); //todo: make this as a function pointer
-
- vector <WordID > path_; //vector of f_ nonterminals/terminals from the top to the current node;
- set<WordID> nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID is the label in the dict; i.e WordID>0
- map<WordID, aTextGrammarNode> tree_;
- aTextRuleBin* rb_;
-
- void print_path(){ //for debug only
- cout<<"path="<<endl;
- for (int i =0; i< path_.size(); i++)
- cout<<path_[i]<<" ";
- cout<<endl;
- }
-};
-
-void aTextGrammarNode::DFS(){ //because the grammar is a tree without circle, DFS does not require to color the nodes
-
- visit();
-
- for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin(); it != tree_.end(); it++){
- (it->second).DFS();
- }
-}
-
-
-void aTextGrammarNode::visit( ){
-
- cout<<"start visit()"<<endl;
-
- cout<<"got grSplitNonterminals"<<endl;
-// if (grSplitNonterminals.find(*it) != grSplitNonterminals.end()){ //split this *it nonterminal
-// vector<WordID> vsplits = grSplitNonterminals[*it]; //split *it into vsplits
-
- //iterate through next terminals/nonterminals in tree_
- vector<WordID> tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_
-
- for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin() ; it != tree_.end(); it++){
- cout<<"in visit(): inside for loop: wordID=="<<it->first<<endl;
-
- map<WordID, vector<WordID> >::const_iterator git = grSplitNonterminals.find(it->first * -1 );
-
- if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split
- cout<<"not split\n";
- tree_[it->first ].path_ = path_;
- tree_[it->first ].path_.push_back(it->first);
- cout<<"in visit() tree_[it->first ].path_= ";
- tree_[it->first ].print_path();
- continue;
- }
-
-
- cout<<"tmp2";
- vector<WordID> vsplits = grSplitNonterminals[it->first * -1];
- // vector<WordID> vsplits = git->second;
- cout<<"tmp3";
- // vector<WordID> vsplits = agrammar_ ->splitNonterminals_[it->first * -1];
- cout <<"got vsplits"<<endl;
- for (int i =0 ; i<vsplits.size(); i++){
- // nonterminals_.insert(vsplits[i]); //add vsplits[i] into nonterminals_ of the current TextGrammarNode
- tree_[vsplits[i] * -1] = aTextGrammarNode(tree_[it->first]); //cp the subtree to new nonterminal
- tree_[vsplits[i] * -1].path_ = path_; //update the path if the subtrees
- tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1);
- tree_[vsplits[i] * -1].print_path();
- }
-
- //remove the old node:
- tobe_removedNTs.push_back(it->first);
-
- }
-
- for (int i =0; i<tobe_removedNTs.size(); i++)
- tree_.erase(tobe_removedNTs[i]);
-
- if (tree_.size() ==0){ //the last (terminal/nonterminal
- cout<<"inside visit(): the last terminal/nonterminal"<<endl;
- rb_->substituteF(path_, grSplitNonterminals);
-
- }
- cout<<"visit() end"<<endl;
-}
-
-struct aTGImpl {
- aTextGrammarNode root_;
-};
-
-aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {}
-aTextGrammar::aTextGrammar(const std::string& file) :
- max_span_(10),
- pimpl_(new aTGImpl) {
- ReadFromFile(file);
-}
-
-
-const GrammarIter* aTextGrammar::GetRoot() const {
- return &pimpl_->root_;
-}
-
-
-void aTextGrammar::addNonterminal(WordID wordID){
- //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map<WordID, int>) nonterminals_ of grammar
- //if the input parameter wordID<0 then do nothing
-
- if (wordID <0){ //it is a nonterminal
-
- map<WordID, int>::iterator it = nonterminals_.find(wordID * -1);
- if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals)
- nonterminals_[wordID * -1] = 1;
- }
-}
-
-
-
-void aTextGrammar::AddRule(const TRulePtr& rule) {
- //add the LHS nonterminal to nonterminals_ map
-
- this->addNonterminal(rule->lhs_);
-
- if (rule->IsUnary()) {
- rhs2unaries_[rule->f().front()].push_back(rule);
- unaries_.push_back(rule);
- if (rule->f().front() <0)
- //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1)
- this->addNonterminal(rule->f().front());
-
-
- } else {
- aTextGrammarNode* cur = &pimpl_->root_;
- for (int i = 0; i < rule->f_.size(); ++i){
- if (rule->f_[i] <0){
- cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set
- this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the grammar's list of nonterminals
- }
- cur = &cur->tree_[rule->f_[i]];
-
- }
- if (cur->rb_ == NULL)
- cur->rb_ = new aTextRuleBin;
- cur->rb_->AddRule(rule);
-
- }
-}
-
-static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) {
- static_cast<aTextGrammar*>(extra)->AddRule(new_rule);
-}
-
-
-void aTextGrammar::ReadFromFile(const string& filename) {
- ReadFile in(filename);
- RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this);
-}
-
-bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const {
- return (max_span_ >= distance);
-}
-
-
-////Linh added
-
-void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;}
-
-
-void aTextGrammar::printAllNonterminals() const{
- for (map<WordID, int>::const_iterator it =nonterminals_.begin();
- it != nonterminals_.end(); it++){
- if (it->second >0){
- cout <<it->first<<"\t"<<TD::Convert(it->first)<<endl;
- }
- }
-
-}
-
-
-void aTextGrammar::splitNonterminal(WordID wordID){
-
- //first added the splits nonterminal into the TD dictionary
-
- string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split
-
- vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits
- for (int i =0; i< this->max_split_; i++){
- string split_str = old_str + "+" + itos(i);
- WordID splitID = TD::Convert(split_str);
- v_splits.push_back(splitID);
- nonterminals_[splitID] = 1;
- }
-
- grSplitNonterminals[wordID] = v_splits;
- //set wordID to be an inactive nonterminal
- nonterminals_[wordID] = 0;
-
- //print split nonterminas of wordID
- v_splits = grSplitNonterminals[wordID];
- cout<<"print split nonterminals\n";
- for (int i =0; i<v_splits.size(); i++)
- cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl;
-
-
- //now update in grammar rules and gramar tree:
- vector<TRulePtr> newrules;
- //first unary rules:
- //iterate through unary rules
- for (int i =0; i < unaries_.size(); i++){
- TRulePtr rule = unaries_[i];
- WordID lhs = rule.lhs_;
- if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal
- && grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too
- ){
- vector<WordID> rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the rhs nonterminal into the list of nonterminals in 'rhs_nonterminals'
- vector<WordID> lhs_nonterminals = grSplitNonterminals[lhs]; //split the rhs nonterminal into the list of nonterminals in 'lhs_nonterminals'
- for (int k =0; k <rhs_nonterminals.size(); k++)
- for (int j =0; j <lhs_nonterminals.size(); j++){
- TRulePtr newrule;
- newrule -> e_ = rule->e_;
- newrule -> f_ = rhs_nonterminals[k]->f_;
- newrule->lhs_ = lhs_nonterminals[j]->lhs_;
- newrule -> scores_ = rule->scores_;
- newrule -> arity_ = (*it)->arity_;
- newrules.push_back (newrule);
-
- //update
- }
- }
- else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal
-
-
- }
- }
-
- // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){
-
- // }
- // if (rule->IsUnary()) {
- // rhs2unaries_[rule->f().front()].push_back(rule);
- // unaries_.push_back(rule);
- // if (rule->f().front() <0)
- // //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1)
- // this->addNonterminal(rule->f().front());
-
-
- pimpl_->root_.DFS();
-
-}
-
-
-// void aTextGrammar::splitNonterminal0(WordID wordID){
-
-// TextGrammarNode* cur = &pimpl_->root_;
-// for (int i = 0; i < rule->f_.size(); ++i)
-// cur = &cur->tree_[rule->f_[i]];
-
-// }
-
-void aTextGrammar::splitAllNonterminals(){
-
-
-}
-
diff --git a/gi/scfg/abc/old_agrammar.h b/gi/scfg/abc/old_agrammar.h
deleted file mode 100644
index d68c2548..00000000
--- a/gi/scfg/abc/old_agrammar.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef _AGRAMMAR_H_
-#define _AGRAMMAR_H_
-
-#include "grammar.h"
-
-using namespace std;
-
-class aTGImpl;
-
-struct aTextGrammar : public Grammar {
- aTextGrammar();
- aTextGrammar(const std::string& file);
- void SetMaxSpan(int m) { max_span_ = m; }
-
- virtual const GrammarIter* GetRoot() const;
- void AddRule(const TRulePtr& rule);
- void ReadFromFile(const std::string& filename);
- virtual bool HasRuleForSpan(int i, int j, int distance) const;
- const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const;
-
- void setMaxSplit(int max_split);
-
- void printAllNonterminals() const;
- void addNonterminal(WordID wordID);
-
- void splitAllNonterminals();
- void splitNonterminal(WordID wordID);
-
- // inline map<WordID, vector<WordID> > & getSplitNonterminals(){return splitNonterminals_;}
- // map<WordID, vector<WordID> > splitNonterminals_;
- private:
- int max_span_;
- boost::shared_ptr<aTGImpl> pimpl_;
- int max_split_;
-
- map<WordID, int> nonterminals_; //list of nonterminals of the grammar if nonterminals_[WordID] > 0 the nonterminal WordID is found in the grammar
-
-
-
-};
-
-
-
-
-#endif
diff --git a/gi/scfg/abc/scfg.cpp b/gi/scfg/abc/scfg.cpp
deleted file mode 100644
index 1e59fb4a..00000000
--- a/gi/scfg/abc/scfg.cpp
+++ /dev/null
@@ -1,277 +0,0 @@
-#include <iostream>
-#include <fstream>
-
-#include <boost/shared_ptr.hpp>
-#include <boost/pointer_cast.hpp>
-#include "lattice.h"
-#include "tdict.h"
-#include "agrammar.h"
-#include "bottom_up_parser.h"
-#include "hg.h"
-#include "hg_intersect.h"
-#include "../utils/ParamsArray.h"
-
-
-using namespace std;
-
-vector<string> src_corpus;
-vector<string> tgt_corpus;
-
-bool openParallelCorpora(string & input_filename){
- ifstream input_file;
-
- input_file.open(input_filename.c_str());
- if (!input_file) {
- cerr << "Cannot open input file " << input_filename << ". Exiting..." << endl;
- return false;
- }
-
- int line =0;
- while (!input_file.eof()) {
- // get a line of source language data
- // cerr<<"new line "<<ctr<<endl;
- string str;
-
- getline(input_file, str);
- line++;
- if (str.length()==0){
- cerr<<" sentence number "<<line<<" is empty, skip the sentence\n";
- continue;
- }
- string delimiters("|||");
-
- vector<string> v = tokenize(str, delimiters);
-
- if ( (v.size() != 2) and (v.size() != 3) ) {
- cerr<<str<<endl;
- cerr<<" source or target sentence is not found in sentence number "<<line<<" , skip the sentence\n";
- continue;
- }
-
- src_corpus.push_back(v[0]);
- tgt_corpus.push_back(v[1]);
- }
- return true;
-}
-
-
-typedef aTextGrammar aGrammar;
-aGrammar * load_grammar(string & grammar_filename){
- cerr<<"start_load_grammar "<<grammar_filename<<endl;
-
- aGrammar * test = new aGrammar(grammar_filename);
-
- return test;
-}
-
-Lattice convertSentenceToLattice(const string & str){
-
- std::vector<WordID> vID;
- TD::ConvertSentence(str , &vID);
- Lattice lsentence;
- lsentence.resize(vID.size());
-
- for (int i=0; i<vID.size(); i++){
-
- lsentence[i].push_back( LatticeArc(vID[i], 0.0, 1) );
- }
-
- // if(!lsentence.IsSentence())
- // cout<<"not a sentence"<<endl;
-
- return lsentence;
-
-}
-
-bool parseSentencePair(const string & goal_sym, const string & src, const string & tgt, GrammarPtr & g, Hypergraph &hg){
-
-
- // cout<<" Start parse the sentence pairs\n"<<endl;
- Lattice lsource = convertSentenceToLattice(src);
-
- //parse the source sentence by the grammar
-
- vector<GrammarPtr> grammars(1, g);
-
- ExhaustiveBottomUpParser parser = ExhaustiveBottomUpParser(goal_sym, grammars);
-
- if (!parser.Parse(lsource, &hg)){
-
- cerr<<"source sentence is not parsed by the grammar!"<<endl;
- return false;
- }
-
- //intersect the hg with the target sentence
- Lattice ltarget = convertSentenceToLattice(tgt);
-
- //forest.PrintGraphviz();
- if (!HG::Intersect(ltarget, & hg)) return false;
-
- SparseVector<double> reweight;
-
- reweight.set_value(FD::Convert("MinusLogP"), -1 );
- hg.Reweight(reweight);
-
- return true;
-
-}
-
-
-
-
-int main(int argc, char** argv){
-
- ParamsArray params(argc, argv);
- params.setDescription("scfg models");
-
- params.addConstraint("grammar_file", "grammar file (default ./grammar.pr )", true); // optional
-
- params.addConstraint("input_file", "parallel input file (default ./parallel_corpora)", true); //optional
-
- params.addConstraint("output_file", "grammar output file (default ./grammar_output)", true); //optional
-
- params.addConstraint("goal_symbol", "top nonterminal symbol (default: X)", true); //optional
-
- params.addConstraint("split", "split one nonterminal into 'split' nonterminals (default: 2)", true); //optional
-
- params.addConstraint("prob_iters", "number of iterations (default: 10)", true); //optional
-
- params.addConstraint("split_iters", "number of splitting iterations (default: 3)", true); //optional
-
- params.addConstraint("alpha", "alpha (default: 0.1)", true); //optional
-
- if (!params.runConstraints("scfg")) {
- return 0;
- }
- cerr<<"get parametters\n\n\n";
-
-
- string grammar_file = params.asString("grammar_file", "./grammar.pr");
-
- string input_file = params.asString("input_file", "parallel_corpora");
-
- string output_file = params.asString("output_file", "grammar_output");
-
- string goal_sym = params.asString("goal_symbol", "X");
-
- int max_split = atoi(params.asString("split", "2").c_str());
-
- int prob_iters = atoi(params.asString("prob_iters", "2").c_str());
- int split_iters = atoi(params.asString("split_iters", "1").c_str());
- double alpha = atof(params.asString("alpha", ".001").c_str());
-
- /////
- cerr<<"grammar_file ="<<grammar_file<<endl;
- cerr<<"input_file ="<< input_file<<endl;
- cerr<<"output_file ="<< output_file<<endl;
- cerr<<"goal_sym ="<< goal_sym<<endl;
- cerr<<"max_split ="<< max_split<<endl;
- cerr<<"prob_iters ="<< prob_iters<<endl;
- cerr<<"split_iters ="<< split_iters<<endl;
- cerr<<"alpha ="<< alpha<<endl;
- //////////////////////////
-
- cerr<<"\n\nLoad parallel corpus...\n";
- if (! openParallelCorpora(input_file))
- exit(1);
-
- cerr<<"Load grammar file ...\n";
- aGrammar * agrammar = load_grammar(grammar_file);
- agrammar->SetGoalNT(goal_sym);
- agrammar->setMaxSplit(max_split);
- agrammar->set_alpha(alpha);
-
- srand(123);
-
- GrammarPtr g( agrammar);
- Hypergraph hg;
-
- int data_size = src_corpus.size();
- int cnt_unparsed =0;
- for (int i =0; i <split_iters; i++){
-
- cerr<<"Split Nonterminals, iteration "<<(i+1)<<endl;
- agrammar->PrintAllRules(output_file+".s" + itos(i+1));
- agrammar->splitAllNonterminals();
-
- //vector<string> src_corpus;
- //vector<string> tgt_corpus;
-
- for (int j=0; j<prob_iters; j++){
- cerr<<"reset grammar score\n";
- agrammar->ResetScore();
- // cerr<<"done reset grammar score\n";
- for (int k=0; k <data_size; k++){
- string src = src_corpus[k];
-
- string tgt = tgt_corpus[k];
- cerr <<"parse sentence pair: "<<src<<" ||| "<<tgt<<endl;
-
- if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){
- cerr<<"target sentence is not parsed by the grammar!\n";
- //return 1;
- cnt_unparsed++;
- continue;
-
- }
-
- cerr<<"update edge posterior prob"<<endl;
- boost::static_pointer_cast<aGrammar>(g)->UpdateHgProsteriorProb(hg);
- hg.clear();
- if (k%1000 ==0 ) cerr<<"sentences "<<k<<endl;
- }
- cerr<<"cnt_unparased="<<cnt_unparsed<<endl;
- boost::static_pointer_cast<aGrammar>(g)->UpdateScore();
- }
- boost::static_pointer_cast<aGrammar>(g)->PrintAllRules(output_file+".e" + itos(i+1));
- }
-
-
-
-
-
-
-
-
-
- // // agrammar->ResetScore();
- // // agrammar->UpdateScore();
- // if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){
- // cerr<<"target sentence is not parsed by the grammar!\n";
- // return 1;
-
- // }
- // // hg.PrintGraphviz();
- // //hg.clear();
-
- // agrammar->PrintAllRules();
- // /*split grammar*/
- // cout<<"split NTs\n";
- // cerr<<"first of all write all nonterminals"<<endl;
- // // agrammar->printAllNonterminals();
- // cout<<"after split nonterminal"<<endl;
- // agrammar->PrintAllRules();
- // Hypergraph hg1;
- // if (! parseSentencePair(goal_sym, src, tgt, g, hg1) ){
- // cerr<<"target sentence is not parsed by the grammar!\n";
- // return 1;
-
- // }
-
- // hg1.PrintGraphviz();
-
-
- // agrammar->splitNonterminal(15);
- // cout<<"after split nonterminal"<<TD::Convert(15)<<endl;
- // agrammar->PrintAllRules();
-
-
- /*load training corpus*/
-
-
- /*for each sentence pair in training corpus*/
-
- // forest.PrintGraphviz();
- /*calculate expected count*/
-
-}
diff --git a/gi/scfg/abc/tmp.cpp b/gi/scfg/abc/tmp.cpp
deleted file mode 100644
index 967a601d..00000000
--- a/gi/scfg/abc/tmp.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <iostream>
-#include <set>
-#include <vector>
-using namespace std;
-
-int x = 5;
-
-class A{A(){x++;}};
-// {
-// int a_;
-
-// };
-
-class B: public A{
-
- int b_;
-};
-
-int main(){
-
- cout<<"Hello World";
- set<int> s;
-
- s.insert(1);
- s.insert(2);
-
- x++;
- cout<<"x="<<x<<endl;
-
- vector<int> t;
- t.push_back(2); t.push_back(1); t.push_back(2); t.push_back(3); t.push_back(2); t.push_back(4);
- for(vector<int>::iterator it = t.begin(); it != t.end(); it++){
- if (*it ==2) t.erase(it);
- cout <<*it<<endl;
- }
-}