summaryrefslogtreecommitdiff
path: root/gi
diff options
context:
space:
mode:
authorlinh.kitty <linh.kitty@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 20:20:55 +0000
committerlinh.kitty <linh.kitty@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 20:20:55 +0000
commitf305e7b0e23b952fb4b7299b2607176ab7409ef9 (patch)
tree8d8d10484b7f10d1bc2a5f28b694490773ca8e6e /gi
parentc807e0b514f21a80df0268c686c7ba70fe39611a (diff)
linh added
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@241 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi')
-rw-r--r--gi/scfg/abc/Release/IConv.d3
-rw-r--r--gi/scfg/abc/Release/Util.d8
-rw-r--r--gi/scfg/abc/Release/agrammar.d193
-rwxr-xr-xgi/scfg/abc/Release/dict_testbin0 -> 1485797 bytes
-rw-r--r--gi/scfg/abc/Release/grammar13
-rw-r--r--gi/scfg/abc/Release/grammar.pr13
-rw-r--r--gi/scfg/abc/Release/makefile66
-rw-r--r--gi/scfg/abc/Release/process_grammar.pl36
-rwxr-xr-xgi/scfg/abc/Release/scfgbin0 -> 4277125 bytes
-rw-r--r--gi/scfg/abc/Release/scfg.d209
-rw-r--r--gi/scfg/abc/Release/sources.mk27
-rw-r--r--gi/scfg/abc/Release/subdir.mk59
-rw-r--r--gi/scfg/abc/Release/tmp.grammar2
l---------gi/scfg/abc/Release/toy-grammar1
-rwxr-xr-xgi/scfg/abc/a.outbin0 -> 22639 bytes
-rw-r--r--gi/scfg/abc/agrammar.cc378
-rw-r--r--gi/scfg/abc/agrammar.h68
-rw-r--r--gi/scfg/abc/old_agrammar.cc383
-rw-r--r--gi/scfg/abc/old_agrammar.h45
-rw-r--r--gi/scfg/abc/scfg.cpp152
-rw-r--r--gi/scfg/abc/tmp.cpp36
21 files changed, 1692 insertions, 0 deletions
diff --git a/gi/scfg/abc/Release/IConv.d b/gi/scfg/abc/Release/IConv.d
new file mode 100644
index 00000000..082cb15b
--- /dev/null
+++ b/gi/scfg/abc/Release/IConv.d
@@ -0,0 +1,3 @@
+IConv.d IConv.o: ../../utils/IConv.cc ../../utils/IConv.hpp
+
+../../utils/IConv.hpp:
diff --git a/gi/scfg/abc/Release/Util.d b/gi/scfg/abc/Release/Util.d
new file mode 100644
index 00000000..586d4d60
--- /dev/null
+++ b/gi/scfg/abc/Release/Util.d
@@ -0,0 +1,8 @@
+Util.d Util.o: ../../utils/Util.cc ../../utils/Util.h \
+ ../../utils/UtfConverter.h ../../utils/ConvertUTF.h
+
+../../utils/Util.h:
+
+../../utils/UtfConverter.h:
+
+../../utils/ConvertUTF.h:
diff --git a/gi/scfg/abc/Release/agrammar.d b/gi/scfg/abc/Release/agrammar.d
new file mode 100644
index 00000000..6cf14f0d
--- /dev/null
+++ b/gi/scfg/abc/Release/agrammar.d
@@ -0,0 +1,193 @@
+agrammar.d agrammar.o: ../agrammar.cc \
+ /home/tnguyen/ws10smt/decoder/rule_lexer.h \
+ /home/tnguyen/ws10smt/decoder/trule.h \
+ /export/ws10smt/software/include/boost/shared_ptr.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \
+ /export/ws10smt/software/include/boost/config.hpp \
+ /export/ws10smt/software/include/boost/config/user.hpp \
+ /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \
+ /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \
+ /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \
+ /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \
+ /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \
+ /export/ws10smt/software/include/boost/config/select_platform_config.hpp \
+ /export/ws10smt/software/include/boost/config/platform/linux.hpp \
+ /export/ws10smt/software/include/boost/config/posix_features.hpp \
+ /export/ws10smt/software/include/boost/config/suffix.hpp \
+ /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \
+ /export/ws10smt/software/include/boost/assert.hpp \
+ /export/ws10smt/software/include/boost/checked_delete.hpp \
+ /export/ws10smt/software/include/boost/throw_exception.hpp \
+ /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \
+ /export/ws10smt/software/include/boost/detail/workaround.hpp \
+ /export/ws10smt/software/include/boost/exception/exception.hpp \
+ /export/ws10smt/software/include/boost/current_function.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
+ /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \
+ /export/ws10smt/software/include/boost/memory_order.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \
+ /home/tnguyen/ws10smt/decoder/sparse_vector.h \
+ /home/tnguyen/ws10smt/decoder/fdict.h \
+ /home/tnguyen/ws10smt/decoder/dict.h \
+ /export/ws10smt/software/include/boost/functional/hash.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/hash.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \
+ /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \
+ /export/ws10smt/software/include/boost/limits.hpp \
+ /export/ws10smt/software/include/boost/integer/static_log2.hpp \
+ /export/ws10smt/software/include/boost/integer_fwd.hpp \
+ /export/ws10smt/software/include/boost/cstdint.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \
+ /export/ws10smt/software/include/boost/detail/container_fwd.hpp \
+ /home/tnguyen/ws10smt/decoder/wordid.h \
+ /home/tnguyen/ws10smt/decoder/filelib.h \
+ /home/tnguyen/ws10smt/decoder/gzstream.h \
+ /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \
+ /home/tnguyen/ws10smt/decoder/grammar.h \
+ /home/tnguyen/ws10smt/decoder/lattice.h \
+ /home/tnguyen/ws10smt/decoder/array2d.h ../../utils/Util.h \
+ ../../utils/UtfConverter.h ../../utils/ConvertUTF.h
+
+/home/tnguyen/ws10smt/decoder/rule_lexer.h:
+
+/home/tnguyen/ws10smt/decoder/trule.h:
+
+/export/ws10smt/software/include/boost/shared_ptr.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp:
+
+/export/ws10smt/software/include/boost/config.hpp:
+
+/export/ws10smt/software/include/boost/config/user.hpp:
+
+/export/ws10smt/software/include/boost/config/select_compiler_config.hpp:
+
+/export/ws10smt/software/include/boost/config/compiler/gcc.hpp:
+
+/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp:
+
+/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp:
+
+/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp:
+
+/export/ws10smt/software/include/boost/config/select_platform_config.hpp:
+
+/export/ws10smt/software/include/boost/config/platform/linux.hpp:
+
+/export/ws10smt/software/include/boost/config/posix_features.hpp:
+
+/export/ws10smt/software/include/boost/config/suffix.hpp:
+
+/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp:
+
+/export/ws10smt/software/include/boost/assert.hpp:
+
+/export/ws10smt/software/include/boost/checked_delete.hpp:
+
+/export/ws10smt/software/include/boost/throw_exception.hpp:
+
+/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp:
+
+/export/ws10smt/software/include/boost/detail/workaround.hpp:
+
+/export/ws10smt/software/include/boost/exception/exception.hpp:
+
+/export/ws10smt/software/include/boost/current_function.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
+
+/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp:
+
+/export/ws10smt/software/include/boost/memory_order.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp:
+
+/home/tnguyen/ws10smt/decoder/sparse_vector.h:
+
+/home/tnguyen/ws10smt/decoder/fdict.h:
+
+/home/tnguyen/ws10smt/decoder/dict.h:
+
+/export/ws10smt/software/include/boost/functional/hash.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/hash.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp:
+
+/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp:
+
+/export/ws10smt/software/include/boost/limits.hpp:
+
+/export/ws10smt/software/include/boost/integer/static_log2.hpp:
+
+/export/ws10smt/software/include/boost/integer_fwd.hpp:
+
+/export/ws10smt/software/include/boost/cstdint.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/extensions.hpp:
+
+/export/ws10smt/software/include/boost/detail/container_fwd.hpp:
+
+/home/tnguyen/ws10smt/decoder/wordid.h:
+
+/home/tnguyen/ws10smt/decoder/filelib.h:
+
+/home/tnguyen/ws10smt/decoder/gzstream.h:
+
+/home/tnguyen/ws10smt/decoder/tdict.h:
+
+../agrammar.h:
+
+/home/tnguyen/ws10smt/decoder/grammar.h:
+
+/home/tnguyen/ws10smt/decoder/lattice.h:
+
+/home/tnguyen/ws10smt/decoder/array2d.h:
+
+../../utils/Util.h:
+
+../../utils/UtfConverter.h:
+
+../../utils/ConvertUTF.h:
diff --git a/gi/scfg/abc/Release/dict_test b/gi/scfg/abc/Release/dict_test
new file mode 100755
index 00000000..1ba94218
--- /dev/null
+++ b/gi/scfg/abc/Release/dict_test
Binary files differ
diff --git a/gi/scfg/abc/Release/grammar b/gi/scfg/abc/Release/grammar
new file mode 100644
index 00000000..75fac3a0
--- /dev/null
+++ b/gi/scfg/abc/Release/grammar
@@ -0,0 +1,13 @@
+[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
+[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
+[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
+[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
+[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629
+[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
+[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
+[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
+[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0
+[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0
+[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
+[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
+[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0
diff --git a/gi/scfg/abc/Release/grammar.pr b/gi/scfg/abc/Release/grammar.pr
new file mode 100644
index 00000000..e4e327cf
--- /dev/null
+++ b/gi/scfg/abc/Release/grammar.pr
@@ -0,0 +1,13 @@
+[X] ||| . ||| . ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] . ||| [1] . ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] anciano ||| [1] old man ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] anciano . ||| [1] old man . ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] feo ||| ugly [1] ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] feo . ||| ugly [1] . ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] gato ||| [1] cat ||| MinusLogP=2.56494935746154
+[X] ||| [X,1] gato . ||| [1] cat . ||| MinusLogP=2.56494935746154
+[X] ||| el ||| the ||| MinusLogP=2.56494935746154
+[X] ||| el [X,1] ||| the [1] ||| MinusLogP=2.56494935746154
+[X] ||| el [X,1] . ||| the [1] . ||| MinusLogP=2.56494935746154
diff --git a/gi/scfg/abc/Release/makefile b/gi/scfg/abc/Release/makefile
new file mode 100644
index 00000000..25949e74
--- /dev/null
+++ b/gi/scfg/abc/Release/makefile
@@ -0,0 +1,66 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+#-include ../makefile.init
+
+RM := rm -rf
+
+# All of the sources participating in the build are defined here
+-include sources.mk
+-include subdir.mk
+-include objects.mk
+
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(strip $(C++_DEPS)),)
+-include $(C++_DEPS)
+endif
+ifneq ($(strip $(CC_DEPS)),)
+-include $(CC_DEPS)
+endif
+ifneq ($(strip $(C_DEPS)),)
+-include $(C_DEPS)
+endif
+ifneq ($(strip $(CPP_DEPS)),)
+-include $(CPP_DEPS)
+endif
+ifneq ($(strip $(CXX_DEPS)),)
+-include $(CXX_DEPS)
+endif
+ifneq ($(strip $(C_UPPER_DEPS)),)
+-include $(C_UPPER_DEPS)
+endif
+endif
+
+#-include ../makefile.defs
+
+# Add inputs and outputs from these tool invocations to the build variables
+
+# All Target
+all: scfg
+
+# Tool invocations
+
+# scfg.o: ../scfg.cpp
+# @echo 'Building file: $<'
+# @echo 'Invoking: GCC C++ Compiler'
+# g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+# @echo 'Finished building: $<'
+# @echo ' '
+
+scfg: $(OBJS) $(USER_OBJS)
+ @echo 'Building target: $@'
+ @echo 'Invoking: GCC C++ Linker'
+ /bin/sh ../../../../libtool --tag=CXX --mode=link g++ -g -O2 -lz -L/export/ws10smt/software/lib -R/export/ws10smt/software/lib -L/export/ws10smt/software/srilm-1.5.10/lib/i686 -o scfg $(OBJS) -L/export/ws10smt/software/lib -lgtest -pthread ../../../../decoder/libcdec.a -lboost_program_options -loolm -ldstruct -lmisc
+ @echo 'Finished building target: $@'
+ @echo ' '
+#g++ -I/home/tnguyen/ws10smt/gi/scfg/cdec/ -I/export/ws10smt/software/srilm-1.5.10/include/ -L/home/tnguyen/ws10smt/decoder -lpthread -ldl -lm $(OBJS) $(USER_OBJS) $(LIBS) -o"scfg"
+# Other Targets
+clean:
+ -$(RM) $(OBJS)$(C++_DEPS)$(EXECUTABLES)$(CC_DEPS)$(C_DEPS)$(CPP_DEPS)$(CXX_DEPS)$(C_UPPER_DEPS) scfg
+ -@echo ' '
+
+.PHONY: all clean dependents
+.SECONDARY:
+
+-include ../makefile.targets
diff --git a/gi/scfg/abc/Release/process_grammar.pl b/gi/scfg/abc/Release/process_grammar.pl
new file mode 100644
index 00000000..f82a8e5a
--- /dev/null
+++ b/gi/scfg/abc/Release/process_grammar.pl
@@ -0,0 +1,36 @@
+#!perl
+
+use warnings;
+use strict;
+
+my $grammar_file = $ARGV[0];
+
+my %nt_count; #maps nt--> count rules whose lhs is nt
+
+open(G, "<$grammar_file") or die "Can't open file $grammar_file";
+
+while (<G>){
+
+ chomp();
+
+ s/\|\|\|.*//g;
+ s/\s//g;
+
+ $nt_count{$_}++;
+}
+
+
+close (G);
+
+open(G, "<$grammar_file") or die "Can't open file $grammar_file";
+
+while (<G>){
+
+ chomp();
+
+ (my $nt = $_) =~ s/\|\|\|.*//g;
+ $nt =~ s/\s//g;
+
+ s/(.+\|\|\|.+\|\|\|.+\|\|\|).+/$1/g;
+ print $_ . " MinusLogP=" .(log($nt_count{$nt})) ."\n";
+}
diff --git a/gi/scfg/abc/Release/scfg b/gi/scfg/abc/Release/scfg
new file mode 100755
index 00000000..4b6cfb19
--- /dev/null
+++ b/gi/scfg/abc/Release/scfg
Binary files differ
diff --git a/gi/scfg/abc/Release/scfg.d b/gi/scfg/abc/Release/scfg.d
new file mode 100644
index 00000000..ae7a87bb
--- /dev/null
+++ b/gi/scfg/abc/Release/scfg.d
@@ -0,0 +1,209 @@
+scfg.d scfg.o: ../scfg.cpp /home/tnguyen/ws10smt/decoder/lattice.h \
+ /home/tnguyen/ws10smt/decoder/wordid.h \
+ /home/tnguyen/ws10smt/decoder/array2d.h \
+ /home/tnguyen/ws10smt/decoder/tdict.h ../agrammar.h \
+ /home/tnguyen/ws10smt/decoder/grammar.h \
+ /export/ws10smt/software/include/boost/shared_ptr.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp \
+ /export/ws10smt/software/include/boost/config.hpp \
+ /export/ws10smt/software/include/boost/config/user.hpp \
+ /export/ws10smt/software/include/boost/config/select_compiler_config.hpp \
+ /export/ws10smt/software/include/boost/config/compiler/gcc.hpp \
+ /export/ws10smt/software/include/boost/config/select_stdlib_config.hpp \
+ /export/ws10smt/software/include/boost/config/no_tr1/utility.hpp \
+ /export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp \
+ /export/ws10smt/software/include/boost/config/select_platform_config.hpp \
+ /export/ws10smt/software/include/boost/config/platform/linux.hpp \
+ /export/ws10smt/software/include/boost/config/posix_features.hpp \
+ /export/ws10smt/software/include/boost/config/suffix.hpp \
+ /export/ws10smt/software/include/boost/config/no_tr1/memory.hpp \
+ /export/ws10smt/software/include/boost/assert.hpp \
+ /export/ws10smt/software/include/boost/checked_delete.hpp \
+ /export/ws10smt/software/include/boost/throw_exception.hpp \
+ /export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp \
+ /export/ws10smt/software/include/boost/detail/workaround.hpp \
+ /export/ws10smt/software/include/boost/exception/exception.hpp \
+ /export/ws10smt/software/include/boost/current_function.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
+ /export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp \
+ /export/ws10smt/software/include/boost/memory_order.hpp \
+ /export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp \
+ /home/tnguyen/ws10smt/decoder/lattice.h \
+ /home/tnguyen/ws10smt/decoder/trule.h \
+ /home/tnguyen/ws10smt/decoder/sparse_vector.h \
+ /home/tnguyen/ws10smt/decoder/fdict.h \
+ /home/tnguyen/ws10smt/decoder/dict.h \
+ /export/ws10smt/software/include/boost/functional/hash.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/hash.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp \
+ /export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp \
+ /export/ws10smt/software/include/boost/limits.hpp \
+ /export/ws10smt/software/include/boost/integer/static_log2.hpp \
+ /export/ws10smt/software/include/boost/integer_fwd.hpp \
+ /export/ws10smt/software/include/boost/cstdint.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp \
+ /export/ws10smt/software/include/boost/functional/hash/extensions.hpp \
+ /export/ws10smt/software/include/boost/detail/container_fwd.hpp \
+ /home/tnguyen/ws10smt/decoder/bottom_up_parser.h \
+ /home/tnguyen/ws10smt/decoder/grammar.h \
+ /home/tnguyen/ws10smt/decoder/hg.h \
+ /home/tnguyen/ws10smt/decoder/small_vector.h \
+ /home/tnguyen/ws10smt/decoder/prob.h \
+ /home/tnguyen/ws10smt/decoder/logval.h \
+ /home/tnguyen/ws10smt/decoder/hg_intersect.h ../../utils/ParamsArray.h \
+ ../../utils/Util.h ../../utils/UtfConverter.h ../../utils/ConvertUTF.h
+
+/home/tnguyen/ws10smt/decoder/lattice.h:
+
+/home/tnguyen/ws10smt/decoder/wordid.h:
+
+/home/tnguyen/ws10smt/decoder/array2d.h:
+
+/home/tnguyen/ws10smt/decoder/tdict.h:
+
+../agrammar.h:
+
+/home/tnguyen/ws10smt/decoder/grammar.h:
+
+/export/ws10smt/software/include/boost/shared_ptr.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/shared_ptr.hpp:
+
+/export/ws10smt/software/include/boost/config.hpp:
+
+/export/ws10smt/software/include/boost/config/user.hpp:
+
+/export/ws10smt/software/include/boost/config/select_compiler_config.hpp:
+
+/export/ws10smt/software/include/boost/config/compiler/gcc.hpp:
+
+/export/ws10smt/software/include/boost/config/select_stdlib_config.hpp:
+
+/export/ws10smt/software/include/boost/config/no_tr1/utility.hpp:
+
+/export/ws10smt/software/include/boost/config/stdlib/libstdcpp3.hpp:
+
+/export/ws10smt/software/include/boost/config/select_platform_config.hpp:
+
+/export/ws10smt/software/include/boost/config/platform/linux.hpp:
+
+/export/ws10smt/software/include/boost/config/posix_features.hpp:
+
+/export/ws10smt/software/include/boost/config/suffix.hpp:
+
+/export/ws10smt/software/include/boost/config/no_tr1/memory.hpp:
+
+/export/ws10smt/software/include/boost/assert.hpp:
+
+/export/ws10smt/software/include/boost/checked_delete.hpp:
+
+/export/ws10smt/software/include/boost/throw_exception.hpp:
+
+/export/ws10smt/software/include/boost/exception/detail/attribute_noreturn.hpp:
+
+/export/ws10smt/software/include/boost/detail/workaround.hpp:
+
+/export/ws10smt/software/include/boost/exception/exception.hpp:
+
+/export/ws10smt/software/include/boost/current_function.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/shared_count.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/bad_weak_ptr.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_has_sync.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
+
+/export/ws10smt/software/include/boost/detail/sp_typeinfo.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/sp_convertible.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_pool.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/spinlock_sync.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/yield_k.hpp:
+
+/export/ws10smt/software/include/boost/memory_order.hpp:
+
+/export/ws10smt/software/include/boost/smart_ptr/detail/operator_bool.hpp:
+
+/home/tnguyen/ws10smt/decoder/lattice.h:
+
+/home/tnguyen/ws10smt/decoder/trule.h:
+
+/home/tnguyen/ws10smt/decoder/sparse_vector.h:
+
+/home/tnguyen/ws10smt/decoder/fdict.h:
+
+/home/tnguyen/ws10smt/decoder/dict.h:
+
+/export/ws10smt/software/include/boost/functional/hash.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/hash.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/hash_fwd.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/detail/hash_float.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/detail/float_functions.hpp:
+
+/export/ws10smt/software/include/boost/config/no_tr1/cmath.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/detail/limits.hpp:
+
+/export/ws10smt/software/include/boost/limits.hpp:
+
+/export/ws10smt/software/include/boost/integer/static_log2.hpp:
+
+/export/ws10smt/software/include/boost/integer_fwd.hpp:
+
+/export/ws10smt/software/include/boost/cstdint.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/detail/hash_float_generic.hpp:
+
+/export/ws10smt/software/include/boost/functional/hash/extensions.hpp:
+
+/export/ws10smt/software/include/boost/detail/container_fwd.hpp:
+
+/home/tnguyen/ws10smt/decoder/bottom_up_parser.h:
+
+/home/tnguyen/ws10smt/decoder/grammar.h:
+
+/home/tnguyen/ws10smt/decoder/hg.h:
+
+/home/tnguyen/ws10smt/decoder/small_vector.h:
+
+/home/tnguyen/ws10smt/decoder/prob.h:
+
+/home/tnguyen/ws10smt/decoder/logval.h:
+
+/home/tnguyen/ws10smt/decoder/hg_intersect.h:
+
+../../utils/ParamsArray.h:
+
+../../utils/Util.h:
+
+../../utils/UtfConverter.h:
+
+../../utils/ConvertUTF.h:
diff --git a/gi/scfg/abc/Release/sources.mk b/gi/scfg/abc/Release/sources.mk
new file mode 100644
index 00000000..6c7070aa
--- /dev/null
+++ b/gi/scfg/abc/Release/sources.mk
@@ -0,0 +1,27 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+C_UPPER_SRCS :=
+C_SRCS :=
+CPP_SRCS :=
+O_SRCS :=
+ASM_SRCS :=
+S_SRCS :=
+C++_SRCS :=
+CXX_SRCS :=
+CC_SRCS :=
+OBJ_SRCS :=
+OBJS :=
+C++_DEPS :=
+EXECUTABLES :=
+CC_DEPS :=
+C_DEPS :=
+CPP_DEPS :=
+CXX_DEPS :=
+C_UPPER_DEPS :=
+
+# Every subdirectory with source files must be described here
+SUBDIRS := \
+. \
+
diff --git a/gi/scfg/abc/Release/subdir.mk b/gi/scfg/abc/Release/subdir.mk
new file mode 100644
index 00000000..49080b36
--- /dev/null
+++ b/gi/scfg/abc/Release/subdir.mk
@@ -0,0 +1,59 @@
+
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables
+CPP_SRCS += \
+../../utils/Util.cc \
+../agrammar.cc \
+../scfg.cpp
+
+
+OBJS += \
+./Util.o \
+./agrammar.o \
+./scfg.o
+
+
+CPP_DEPS += \
+./Util.d \
+./agrammar.d \
+./scfg.d
+
+# Each subdirectory must supply rules for building sources it contributes
+# %.o: ../%.cpp
+# @echo 'Building file: $<'
+# @echo 'Invoking: GCC C++ Compiler'
+# g++ -g -p -g3 -Wall -c -fmessage-length=0 -I../../openfst-1.1/src/include/ -L../../openfst-1.1/src/lib/ -lfst -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+#
+# @echo ' '
+
+%.o: ../../utils/%.cc
+ @echo 'Building file: $<'
+ @echo 'Invoking: GCC C++ Compiler'
+ g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+ @echo 'Finished building: $<'
+ @echo ' '
+
+%.o: ../../utils/%.c
+ @echo 'Building file: $<'
+ @echo 'Invoking: GCC C++ Compiler'
+ g++ -g -p -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+ @echo 'Finished building: $<'
+ @echo ' '
+
+%.o: ../%.cpp
+ @echo 'Building file: $<'
+ @echo 'Invoking: GCC C++ Compiler'
+ g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+ @echo 'Finished building: $<'
+ @echo ' '
+
+%.o: ../%.cc
+ @echo 'Building file: $<'
+ @echo 'Invoking: GCC C++ Compiler'
+ g++ -O3 -g3 -Wall -c -fmessage-length=0 -I../../utils/ -I/home/tnguyen/ws10smt/decoder -I/export/ws10smt/software/include -I/export/ws10smt/software/srilm-1.5.10/include -lpthread -ldl -lm -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+ @echo 'Finished building: $<'
+ @echo ' '
+
diff --git a/gi/scfg/abc/Release/tmp.grammar b/gi/scfg/abc/Release/tmp.grammar
new file mode 100644
index 00000000..9df1b77d
--- /dev/null
+++ b/gi/scfg/abc/Release/tmp.grammar
@@ -0,0 +1,2 @@
+[A] ||| [B] [C] . ||| [B] [C]. ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0
+[A] ||| [B] asd . ||| [B] asd . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 \ No newline at end of file
diff --git a/gi/scfg/abc/Release/toy-grammar b/gi/scfg/abc/Release/toy-grammar
new file mode 120000
index 00000000..50dea8df
--- /dev/null
+++ b/gi/scfg/abc/Release/toy-grammar
@@ -0,0 +1 @@
+/export/ws10smt/toy-grammar/ \ No newline at end of file
diff --git a/gi/scfg/abc/a.out b/gi/scfg/abc/a.out
new file mode 100755
index 00000000..0467acf0
--- /dev/null
+++ b/gi/scfg/abc/a.out
Binary files differ
diff --git a/gi/scfg/abc/agrammar.cc b/gi/scfg/abc/agrammar.cc
new file mode 100644
index 00000000..585255e3
--- /dev/null
+++ b/gi/scfg/abc/agrammar.cc
@@ -0,0 +1,378 @@
+#include <algorithm>
+#include <utility>
+#include <map>
+
+#include "rule_lexer.h"
+#include "filelib.h"
+#include "tdict.h"
+#include "agrammar.h"
+#include "../utils/Util.h"
+
+bool equal(TRulePtr const & rule1, TRulePtr const & rule2){
+ if (rule1->lhs_ != rule2->lhs_) return false;
+ if (rule1->f_.size() != rule2->f_.size()) return false;
+ if (rule1->e_.size() != rule2->e_.size()) return false;
+
+ for (int i=0; i<rule1->f_.size(); i++)
+ if (rule1->f_.at(i) != rule2->f_.at(i)) return false;
+ for (int i=0; i<rule1->e_.size(); i++)
+ if (rule1->e_.at(i) != rule2->e_.at(i)) return false;
+ return true;
+}
+
+//const vector<TRulePtr> Grammar::NO_RULES;
+
+void aRemoveRule(vector<TRulePtr> & v, const TRulePtr & rule){ // remove rule from v if found
+ for (int i=0; i< v.size(); i++)
+ if (equal(v[i], rule )){
+ cout<<"erase rule from vector:"<<rule->AsString()<<endl;
+ v.erase(v.begin()+i);
+ }
+}
+
+struct aTextRuleBin : public RuleBin {
+ int GetNumRules() const {
+ return rules_.size();
+ }
+ TRulePtr GetIthRule(int i) const {
+ return rules_[i];
+ }
+ void AddRule(TRulePtr t) {
+ rules_.push_back(t);
+ }
+ void RemoveRule(TRulePtr t){
+ for (int i=0; i<rules_.size(); i++){
+ if (equal(rules_.at(i), t)){
+ rules_.erase(rules_.begin() + i);
+ //cout<<"IntextRulebin removerulle\n";
+ return;
+ }
+ }
+ }
+
+
+ int Arity() const {
+ return rules_.front()->Arity();
+ }
+ void Dump() const {
+ for (int i = 0; i < rules_.size(); ++i)
+ cerr << rules_[i]->AsString() << endl;
+ }
+ private:
+ vector<TRulePtr> rules_;
+};
+
+struct aTextGrammarNode : public GrammarIter {
+ aTextGrammarNode() : rb_(NULL) {}
+ ~aTextGrammarNode() {
+ delete rb_;
+ }
+ const GrammarIter* Extend(int symbol) const {
+ map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol);
+ if (i == tree_.end()) return NULL;
+ return &i->second;
+ }
+
+ const RuleBin* GetRules() const {
+ if (rb_) {
+ //rb_->Dump();
+ }
+ return rb_;
+ }
+
+ map<WordID, aTextGrammarNode> tree_;
+ aTextRuleBin* rb_;
+};
+
+struct aTGImpl {
+ aTextGrammarNode root_;
+};
+
+aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {}
+aTextGrammar::aTextGrammar(const string& file) :
+ max_span_(10),
+ pimpl_(new aTGImpl) {
+ ReadFromFile(file);
+}
+
+const GrammarIter* aTextGrammar::GetRoot() const {
+ return &pimpl_->root_;
+}
+
+void aTextGrammar::SetGoalNT(const string & goal_str){
+ goalID = TD::Convert(goal_str);
+
+}
+void getNTRule( const TRulePtr & rule, map<WordID, NTRule> & ntrule_map){
+
+ NTRule lhs_ntrule(rule, rule->lhs_ * -1);
+ ntrule_map[rule->lhs_ * -1] = lhs_ntrule;
+
+ for (int i=0; i< (rule->f_).size(); i++)
+ if (ntrule_map.find((rule->f_).at(i) * -1) == ntrule_map.end() && (rule->f_).at(i) <0 ){
+ NTRule rhs_ntrule(rule, rule->f_.at(i) * -1);
+ ntrule_map[(rule->f_).at(i) *-1] = rhs_ntrule;
+ }
+
+
+}
+void aTextGrammar::AddRule(const TRulePtr& rule) {
+ if (rule->IsUnary()) {
+ rhs2unaries_[rule->f().front()].push_back(rule);
+ unaries_.push_back(rule);
+ } else {
+ aTextGrammarNode* cur = &pimpl_->root_;
+ for (int i = 0; i < rule->f_.size(); ++i)
+ cur = &cur->tree_[rule->f_[i]];
+ if (cur->rb_ == NULL)
+ cur->rb_ = new aTextRuleBin;
+ cur->rb_->AddRule(rule);
+ }
+
+ //add the rule to lhs_rules_
+ lhs_rules_[rule->lhs_* -1].push_back(rule);
+
+ //add the rule to nt_rules_
+ map<WordID, NTRule> ntrule_map;
+ getNTRule (rule, ntrule_map);
+ for (map<WordID,NTRule>::const_iterator it= ntrule_map.begin(); it != ntrule_map.end(); it++){
+ nt_rules_[it->first].push_back(it->second);
+ }
+}
+
+void aTextGrammar::RemoveRule(const TRulePtr & rule){
+ cout<<"Remove rule: "<<rule->AsString()<<endl;
+ if (rule->IsUnary()) {
+ aRemoveRule(rhs2unaries_[rule->f().front()], rule);
+ aRemoveRule(unaries_, rule);
+ } else {
+ aTextGrammarNode* cur = &pimpl_->root_;
+ for (int i = 0; i < rule->f_.size(); ++i)
+ cur = &cur->tree_[rule->f_[i]];
+// if (cur->rb_ == NULL)
+// cur->rb_ = new aTextRuleBin;
+ cur->rb_->RemoveRule(rule);
+ }
+
+ //remove rules from lhs_rules_
+
+ aRemoveRule(lhs_rules_[rule->lhs_ * -1] , rule);
+
+}
+
+void aTextGrammar::RemoveNonterminal(WordID wordID){
+ vector<NTRule> rules = nt_rules_[wordID];
+// // remove the nonterminal from ntrules_
+ nt_rules_.erase(wordID);
+ for (int i =0; i<rules.size(); i++)
+ RemoveRule(rules[i].rule_);
+
+}
+
+void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;}
+
+
+
+
+void aTextGrammar::AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts){
+
+ vector<NTRule> rules = nt_rules_[nt_old];
+
+ // cout<<"\n\n\n start add splitting rules"<<endl;
+
+ const double epsilon = 0.001;
+ for (int i=0; i<rules.size(); i++){
+ NTRule old_rule = rules.at(i);
+ vector<int> ntPos = old_rule.ntPos_; //in rule old_rule, ntPos is the positions of nonterminal nt_old
+ //we have to substitute each nt in these positions by the list of new nonterminals in the input vector 'nts'
+ //there are cnt =size_of(nts)^ size_of(ntPos) possibilities for the substitutions,
+ //hence the rules' new probabilities have to divide to cnt also
+ // cout<<"splitting NT in rule "<<old_rule.rule_->AsString()<<endl;
+
+// cout<<"nt position in the rules"<<endl;
+// for (int j=0; j<ntPos.size();j++) cout<<ntPos[j]<<" "; cout<<endl;
+
+ int cnt_newrules = pow( nts.size(), ntPos.size() );
+ // cout<<"cnt_newrules="<<cnt_newrules<<endl;
+
+ double log_nts_size = log(nts.size());
+
+
+ map<WordID, int> cnt_addepsilon; //cnt_addepsilon and cont_minusepsilon to track the number of rules epsilon is added or minus for each lhs nonterminal, ideally we want these two numbers are equal
+ map<WordID, int> cnt_minusepsilon; //these two number also use to control the random generated add epsilon/minus epsilon of a new rule
+ cnt_addepsilon[old_rule.rule_->lhs_] = 0;
+ cnt_minusepsilon[old_rule.rule_->lhs_] = 0;
+ for (int j =0; j<nts.size(); j++) { cnt_addepsilon[nts[j] ] = 0; cnt_minusepsilon[nts[j] ] = 0;}
+
+
+ for (int j=0; j<cnt_newrules; j++){ //each j represents a new rule
+ //convert j to a vector of size ntPos.size(), each entry in the vector >=0 and <nts.size()
+ int mod = nts.size();
+ vector <int> j_vector(ntPos.size(), 0); //initiate the vector to all 0
+ int j_tmp =j;
+ for (int k=0; k<ntPos.size(); k++){
+ j_vector[k] = j_tmp % mod;
+ j_tmp = (j_tmp - j_vector[k]) / mod;
+ }
+ // cout<<"print vector j_vector"<<endl;
+ // for (int k=0; k<ntPos.size();k++) cout<<j_vector[k]<<" "; cout<<endl;
+ //now use the vector to create a new rule
+ TRulePtr newrule(new TRule());
+
+ newrule -> e_ = (old_rule.rule_)->e_;
+ newrule -> f_ = old_rule.rule_->f_;
+ newrule->lhs_ = old_rule.rule_->lhs_;
+ newrule -> arity_ = old_rule.rule_->arity_;
+ newrule -> scores_ = old_rule.rule_->scores_;
+
+ // cout<<"end up update score\n";
+ if (ntPos[0] == -1){ //update the lhs
+ newrule->lhs_ = nts[j_vector[0]] * -1;
+
+ //score has to randomly add/minus a small epsilon to break the balance
+ if (nts.size() >1 && ntPos.size() >1){
+ // cout<<"start to add/minus epsilon"<<endl;
+ if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) ) //there are enough rules added epsilon, the new rules has to minus epsilon
+ newrule-> scores_ -= epsilon;
+ else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules / (2*ntPos.size()) )
+ newrule-> scores_ += epsilon;
+ else{
+ double random = rand()/RAND_MAX;
+ if (random > .5){
+ newrule-> scores_ += epsilon;
+ cnt_addepsilon[newrule->lhs_]++;
+ }
+ else{
+ newrule-> scores_ -= epsilon;
+ cnt_minusepsilon[newrule->lhs_]++;
+ }
+ }
+ }
+
+
+ for (int k=1; k<ntPos.size(); k++){//update f_
+ // cout<<"ntPos[k]="<<ntPos[k]<<endl;
+ newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1; //update the ntPos[k-1]-th nonterminal in f_ to the j_vector[k] NT in nts
+ }
+ newrule -> scores_ += (ntPos.size() -1) * log_nts_size;
+
+
+ }
+ else{
+ //score has to randomly add/minus a small epsilon to break the balance
+ if ( ntPos.size() >0 && nts.size()>1){
+ // cout<<"start to add/minus epsilon"<<endl;
+ if ( cnt_addepsilon[newrule->lhs_] >= cnt_newrules / 2 ) //there are enough rules added epsilon, the new rules has to minus epsilon
+ newrule-> scores_ -= epsilon;
+ else if ( cnt_minusepsilon[newrule->lhs_] >= cnt_newrules /2 )
+ newrule-> scores_ += epsilon;
+ else{
+ double random = rand()/RAND_MAX;
+ if (random > .5){
+ newrule-> scores_ += epsilon;
+ cnt_addepsilon[newrule->lhs_]++;
+ }
+ else{
+ newrule-> scores_ -= epsilon;
+ cnt_minusepsilon[newrule->lhs_]++;
+ }
+ }
+ }
+
+
+ for (int k=0; k<ntPos.size(); k++){ //update f_
+ // cout<<"ntPos[k]="<<ntPos[k]<<endl;
+ newrule->f_[ntPos[k]] = nts[j_vector[k]] * -1;
+ }
+ newrule -> scores_ += ntPos.size() * log_nts_size;
+ }
+ this->AddRule (newrule);
+ }//add new rules for each grammar rules
+
+ } //iterate through all grammar rules
+
+}
+
+
+void aTextGrammar::splitNonterminal(WordID wordID){
+
+ //first added the splits nonterminal into the TD dictionary
+
+ string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split
+
+ vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits
+ for (int i =0; i< this->max_split_; i++){
+ string split_str = old_str + "+" + itos(i);
+ WordID splitID = TD::Convert(split_str);
+ v_splits.push_back(splitID);
+
+ }
+
+ // grSplitNonterminals[wordID] = v_splits;
+
+ //print split nonterminas of wordID
+ // v_splits = grSplitNonterminals[wordID];
+ // cout<<"print split nonterminals\n";
+ // for (int i =0; i<v_splits.size(); i++)
+ // cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl;
+
+ AddSplitNonTerminal(wordID, v_splits);
+ RemoveNonterminal(wordID);
+
+ // grSplitNonterminals.erase (grSplitNonterminals.find(WordID) );
+
+ if (wordID == goalID){ //add rule X-> X1; X->X2,... if X is the goal NT
+ for (int i =0; i<v_splits.size(); i++){
+ TRulePtr rule (new TRule());
+ rule ->lhs_ = goalID * -1;
+ rule ->f_.push_back(v_splits[i] * -1);
+ rule->e_.push_back(0);
+
+ rule->scores_.set_value(FD::Convert("MinusLogP"), log(v_splits.size()) );
+ AddRule(rule);
+ }
+
+ }
+
+
+}
+
+
+
+void aTextGrammar::PrintAllRules() const{
+ map<WordID, vector<TRulePtr> >::const_iterator it;
+ for (it= lhs_rules_.begin(); it != lhs_rules_.end(); it++){
+
+ vector<TRulePtr> v = it-> second;
+ for (int i =0; i< v.size(); i++){
+ cout<<v[i]->AsString()<<"\t"<<endl;
+ }
+ }
+}
+
+
+void aTextGrammar::PrintNonterminalRules(WordID nt) const{
+ vector< NTRule > v;
+ map<WordID, vector<NTRule> >::const_iterator mit= nt_rules_.find(nt);
+ if (mit == nt_rules_.end())
+ return;
+
+ v = mit->second;
+
+ for (vector<NTRule>::const_iterator it = v.begin(); it != v.end(); it++)
+ cout<<it->rule_->AsString()<<endl;
+}
+
+static void AddRuleHelper(const TRulePtr& new_rule, void* extra) {
+ static_cast<aTextGrammar*>(extra)->AddRule(new_rule);
+}
+
+void aTextGrammar::ReadFromFile(const string& filename) {
+ ReadFile in(filename);
+ RuleLexer::ReadRules(in.stream(), &AddRuleHelper, this);
+}
+
+bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const {
+ return (max_span_ >= distance);
+}
+
diff --git a/gi/scfg/abc/agrammar.h b/gi/scfg/abc/agrammar.h
new file mode 100644
index 00000000..8a7186bf
--- /dev/null
+++ b/gi/scfg/abc/agrammar.h
@@ -0,0 +1,68 @@
+#ifndef AGRAMMAR_H_
+#define AGRAMMAR_H_
+
+#include "grammar.h"
+
+
+using namespace std;
+
+class aTGImpl;
+struct NTRule{
+
+ NTRule(){};
+ NTRule(const TRulePtr & rule, WordID nt){
+ nt_ = nt;
+ rule_ = rule;
+
+ if (rule->lhs_ * -1 == nt)
+ ntPos_.push_back(-1);
+
+ for (int i=0; i< rule->f().size(); i++)
+ if (rule->f().at(i) * -1 == nt)
+ ntPos_.push_back(i);
+ }
+
+ TRulePtr rule_;
+ WordID nt_; //the labelID of the nt (WordID>0);
+
+ vector<int> ntPos_; //position of nt_ -1: lhs, from 0...f_.size() for nt of f_()
+ //i.e the rules is: NP-> DET NP; if nt_=5 is the labelID of NP then ntPos_ = (-1, 1): the indexes of nonterminal NP
+
+
+};
+
+struct aTextGrammar : public Grammar {
+ aTextGrammar();
+ aTextGrammar(const std::string& file);
+ void SetMaxSpan(int m) { max_span_ = m; }
+
+ virtual const GrammarIter* GetRoot() const;
+ void AddRule(const TRulePtr& rule);
+ void ReadFromFile(const std::string& filename);
+ virtual bool HasRuleForSpan(int i, int j, int distance) const;
+ const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const;
+
+ void AddSplitNonTerminal(WordID nt_old, vector<WordID> & nts);
+ void setMaxSplit(int max_split);
+ void splitNonterminal(WordID wordID);
+
+ void PrintAllRules() const;
+ void PrintNonterminalRules(WordID nt) const;
+ void SetGoalNT(const string & goal_str);
+ private:
+
+ void RemoveRule(const TRulePtr & rule);
+ void RemoveNonterminal(WordID wordID);
+
+ int max_span_;
+ int max_split_;
+ boost::shared_ptr<aTGImpl> pimpl_;
+ map <WordID, vector<TRulePtr> > lhs_rules_;// WordID >0
+ map <WordID, vector<NTRule> > nt_rules_;
+
+ // map<WordID, vector<WordID> > grSplitNonterminals;
+ WordID goalID;
+};
+
+
+#endif
diff --git a/gi/scfg/abc/old_agrammar.cc b/gi/scfg/abc/old_agrammar.cc
new file mode 100644
index 00000000..33d70dfc
--- /dev/null
+++ b/gi/scfg/abc/old_agrammar.cc
@@ -0,0 +1,383 @@
+#include "agrammar.h"
+#include "Util.h"
+
+#include <algorithm>
+#include <utility>
+#include <map>
+
+#include "rule_lexer.h"
+#include "filelib.h"
+#include "tdict.h"
+#include <iostream>
+#include <fstream>
+
+map<WordID, vector<WordID> > grSplitNonterminals;
+//const vector<TRulePtr> Grammar::NO_RULES;
+
+
+// vector<TRulePtr> substituteF(TRulePtr & rule, WordID wordID, vector<WordID> & v){
+// vector<TRulePtr> vRules; //outputs
+
+// vector<WordID> f = rule->f();
+// vector<vector<WordID> > newfvector;
+// for (int i =0; i< f.size(); i++){
+// if (f[i] == wordID){
+// newfvector.push_back(v);
+// }
+// else
+// newfvector.push_back(vector<WordID> (1, f[i]));
+// }
+
+// //now creates new rules;
+
+
+// return vRules;
+// }
+
+
+struct aTextRuleBin : public RuleBin {
+ int GetNumRules() const {
+ return rules_.size();
+ }
+ TRulePtr GetIthRule(int i) const {
+ return rules_[i];
+ }
+ void AddRule(TRulePtr t) {
+ rules_.push_back(t);
+ }
+ int Arity() const {
+ return rules_.front()->Arity();
+ }
+ void Dump() const {
+ for (int i = 0; i < rules_.size(); ++i)
+ cerr << rules_[i]->AsString() << endl;
+ }
+
+
+ vector<TRulePtr> getRules(){ return rules_;}
+
+
+ void substituteF(vector<WordID> & f_path, map<WordID, vector<WordID> > & grSplitNonterminals){
+ //this substituteF method is different with substituteF procedure found in cdec code;
+ //
+ //aTextRuleBin has a collection of rules with the same f() on the rhs,
+ //substituteF() replaces the f_ of all the rules with f_path vector,
+ //the grSplitNonterminals input to split the lhs_ nonterminals of the rules incase the lhs_ nonterminal found in grSplitNonterminals
+
+ vector <TRulePtr> newrules;
+ for (vector<TRulePtr>::iterator it = rules_.begin() ; it != rules_.end(); it++){
+ assert(f_path.size() == (*it)->f_.size());
+
+ if (grSplitNonterminals.find( (*it)->lhs_) == grSplitNonterminals.end()){
+ (*it)->f_ = f_path;
+ }
+ else{ // split the lhs NT,
+ vector<WordID> new_lhs = grSplitNonterminals[ (*it)->lhs_ ];
+ for (vector<WordID>::iterator vit = new_lhs.begin(); vit != new_lhs.end(); vit++){
+ TRulePtr newrule;
+ newrule -> e_ = (*it)->e_;
+ newrule -> f_ = (*it)->f_;
+ newrule->lhs_ = *vit;
+ newrule -> scores_ = (*it)->scores_;
+ newrule -> arity_ = (*it)->arity_;
+ newrules.push_back (newrule);
+ }
+ rules_.erase(it);
+ }
+ }
+
+ //now add back newrules(output of splitting lhs_) to rules_
+ rules_.insert(newrules.begin(),newrules.begin(), newrules.end());
+ }
+
+private:
+ vector<TRulePtr> rules_;
+};
+
+
+
+struct aTextGrammarNode : public GrammarIter {
+ aTextGrammarNode() : rb_(NULL) {}
+
+ aTextGrammarNode(const aTextGrammarNode & a){
+ nonterminals_ = a.nonterminals_;
+ tree_ = a.tree_;
+ rb_ = new aTextRuleBin(); //cp constructor: don't cp the set of rules over
+ }
+
+ ~aTextGrammarNode() {
+ delete rb_;
+ }
+ const GrammarIter* Extend(int symbol) const {
+ map<WordID, aTextGrammarNode>::const_iterator i = tree_.find(symbol);
+ if (i == tree_.end()) return NULL;
+ return &i->second;
+ }
+
+ const RuleBin* GetRules() const {
+ if (rb_) {
+ //rb_->Dump();
+ }
+ return rb_;
+ }
+
+ void DFS();
+
+ void visit (); //todo: make this as a function pointer
+
+ vector <WordID > path_; //vector of f_ nonterminals/terminals from the top to the current node;
+ set<WordID> nonterminals_; //Linh added: the set of nonterminals extend the current TextGrammarNode, WordID is the label in the dict; i.e WordID>0
+ map<WordID, aTextGrammarNode> tree_;
+ aTextRuleBin* rb_;
+
+ void print_path(){ //for debug only
+ cout<<"path="<<endl;
+ for (int i =0; i< path_.size(); i++)
+ cout<<path_[i]<<" ";
+ cout<<endl;
+ }
+};
+
+void aTextGrammarNode::DFS(){ //because the grammar is a tree without circle, DFS does not require to color the nodes
+
+ visit();
+
+ for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin(); it != tree_.end(); it++){
+ (it->second).DFS();
+ }
+}
+
+
+void aTextGrammarNode::visit( ){
+
+ cout<<"start visit()"<<endl;
+
+ cout<<"got grSplitNonterminals"<<endl;
+// if (grSplitNonterminals.find(*it) != grSplitNonterminals.end()){ //split this *it nonterminal
+// vector<WordID> vsplits = grSplitNonterminals[*it]; //split *it into vsplits
+
+ //iterate through next terminals/nonterminals in tree_
+ vector<WordID> tobe_removedNTs; //the list of nonterminal children in tree_ were splited hence will be removed from tree_
+
+ for (map<WordID, aTextGrammarNode>::iterator it = tree_.begin() ; it != tree_.end(); it++){
+ cout<<"in visit(): inside for loop: wordID=="<<it->first<<endl;
+
+ map<WordID, vector<WordID> >::const_iterator git = grSplitNonterminals.find(it->first * -1 );
+
+ if (git == grSplitNonterminals.end() || it->first >0){ //the next symbols is not to be split
+ cout<<"not split\n";
+ tree_[it->first ].path_ = path_;
+ tree_[it->first ].path_.push_back(it->first);
+ cout<<"in visit() tree_[it->first ].path_= ";
+ tree_[it->first ].print_path();
+ continue;
+ }
+
+
+ cout<<"tmp2";
+ vector<WordID> vsplits = grSplitNonterminals[it->first * -1];
+ // vector<WordID> vsplits = git->second;
+ cout<<"tmp3";
+ // vector<WordID> vsplits = agrammar_ ->splitNonterminals_[it->first * -1];
+ cout <<"got vsplits"<<endl;
+ for (int i =0 ; i<vsplits.size(); i++){
+ // nonterminals_.insert(vsplits[i]); //add vsplits[i] into nonterminals_ of the current TextGrammarNode
+ tree_[vsplits[i] * -1] = aTextGrammarNode(tree_[it->first]); //cp the subtree to new nonterminal
+ tree_[vsplits[i] * -1].path_ = path_; //update the path if the subtrees
+ tree_[vsplits[i] * -1].path_.push_back(vsplits[i] * -1);
+ tree_[vsplits[i] * -1].print_path();
+ }
+
+ //remove the old node:
+ tobe_removedNTs.push_back(it->first);
+
+ }
+
+ for (int i =0; i<tobe_removedNTs.size(); i++)
+ tree_.erase(tobe_removedNTs[i]);
+
+ if (tree_.size() ==0){ //the last (terminal/nonterminal
+ cout<<"inside visit(): the last terminal/nonterminal"<<endl;
+ rb_->substituteF(path_, grSplitNonterminals);
+
+ }
+ cout<<"visit() end"<<endl;
+}
+
+struct aTGImpl {
+ aTextGrammarNode root_;
+};
+
+aTextGrammar::aTextGrammar() : max_span_(10), pimpl_(new aTGImpl) {}
+aTextGrammar::aTextGrammar(const std::string& file) :
+ max_span_(10),
+ pimpl_(new aTGImpl) {
+ ReadFromFile(file);
+}
+
+
+const GrammarIter* aTextGrammar::GetRoot() const {
+ return &pimpl_->root_;
+}
+
+
+void aTextGrammar::addNonterminal(WordID wordID){
+ //addNonterminal add the nonterminal wordID (wordID<0) to the list of nonterminals (map<WordID, int>) nonterminals_ of grammar
+ //if the input parameter wordID<0 then do nothing
+
+ if (wordID <0){ //it is a nonterminal
+
+ map<WordID, int>::iterator it = nonterminals_.find(wordID * -1);
+ if (it == nonterminals_.end()) //if not found in the list of nonterminals(a new nonterminals)
+ nonterminals_[wordID * -1] = 1;
+ }
+}
+
+
+
+void aTextGrammar::AddRule(const TRulePtr& rule) {
+ //add the LHS nonterminal to nonterminals_ map
+
+ this->addNonterminal(rule->lhs_);
+
+ if (rule->IsUnary()) {
+ rhs2unaries_[rule->f().front()].push_back(rule);
+ unaries_.push_back(rule);
+ if (rule->f().front() <0)
+ //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1)
+ this->addNonterminal(rule->f().front());
+
+
+ } else {
+ aTextGrammarNode* cur = &pimpl_->root_;
+ for (int i = 0; i < rule->f_.size(); ++i){
+ if (rule->f_[i] <0){
+ cur->nonterminals_.insert(rule->f_[i] * -1); //add the next(extend) nonterminals to the current node's nonterminals_ set
+ this->addNonterminal(rule->f_[i]); //add the rhs nonterminal to the grammar's list of nonterminals
+ }
+ cur = &cur->tree_[rule->f_[i]];
+
+ }
+ if (cur->rb_ == NULL)
+ cur->rb_ = new aTextRuleBin;
+ cur->rb_->AddRule(rule);
+
+ }
+}
+
+static void aAddRuleHelper(const TRulePtr& new_rule, void* extra) {
+ static_cast<aTextGrammar*>(extra)->AddRule(new_rule);
+}
+
+
+void aTextGrammar::ReadFromFile(const string& filename) {
+ ReadFile in(filename);
+ RuleLexer::ReadRules(in.stream(), &aAddRuleHelper, this);
+}
+
+bool aTextGrammar::HasRuleForSpan(int i, int j, int distance) const {
+ return (max_span_ >= distance);
+}
+
+
+////Linh added
+
+void aTextGrammar::setMaxSplit(int max_split){max_split_ = max_split;}
+
+
+void aTextGrammar::printAllNonterminals() const{
+ for (map<WordID, int>::const_iterator it =nonterminals_.begin();
+ it != nonterminals_.end(); it++){
+ if (it->second >0){
+ cout <<it->first<<"\t"<<TD::Convert(it->first)<<endl;
+ }
+ }
+
+}
+
+
+void aTextGrammar::splitNonterminal(WordID wordID){
+
+ //first added the splits nonterminal into the TD dictionary
+
+ string old_str = TD::Convert(wordID); //get the nonterminal label of wordID, the new nonterminals will be old_str+t where t=1..max_split
+
+ vector<WordID> v_splits;//split nonterminal wordID into the list of nonterminals in v_splits
+ for (int i =0; i< this->max_split_; i++){
+ string split_str = old_str + "+" + itos(i);
+ WordID splitID = TD::Convert(split_str);
+ v_splits.push_back(splitID);
+ nonterminals_[splitID] = 1;
+ }
+
+ grSplitNonterminals[wordID] = v_splits;
+ //set wordID to be an inactive nonterminal
+ nonterminals_[wordID] = 0;
+
+ //print split nonterminas of wordID
+ v_splits = grSplitNonterminals[wordID];
+ cout<<"print split nonterminals\n";
+ for (int i =0; i<v_splits.size(); i++)
+ cout<<v_splits[i]<<"\t"<<TD::Convert(v_splits[i])<<endl;
+
+
+ //now update in grammar rules and gramar tree:
+ vector<TRulePtr> newrules;
+ //first unary rules:
+ //iterate through unary rules
+ for (int i =0; i < unaries_.size(); i++){
+ TRulePtr rule = unaries_[i];
+ WordID lhs = rule.lhs_;
+ if (grSplitNonterminals.find(rule->f().front() ) != grSplitNonterminals.end()//if the rhs is in the list of splitting nonterminal
+ && grSplitNonterminals.find(lhs ) != grSplitNonterminals.end() //and the lhs is in the list of splitting nonterminal too
+ ){
+ vector<WordID> rhs_nonterminals = grSplitNonterminals[rule->f().front()]; //split the rhs nonterminal into the list of nonterminals in 'rhs_nonterminals'
+ vector<WordID> lhs_nonterminals = grSplitNonterminals[lhs]; //split the rhs nonterminal into the list of nonterminals in 'lhs_nonterminals'
+ for (int k =0; k <rhs_nonterminals.size(); k++)
+ for (int j =0; j <lhs_nonterminals.size(); j++){
+ TRulePtr newrule;
+ newrule -> e_ = rule->e_;
+ newrule -> f_ = rhs_nonterminals[k]->f_;
+ newrule->lhs_ = lhs_nonterminals[j]->lhs_;
+ newrule -> scores_ = rule->scores_;
+ newrule -> arity_ = (*it)->arity_;
+ newrules.push_back (newrule);
+
+ //update
+ }
+ }
+ else{//the rhs terminal/nonterminal is not in the list of splitting nonterminal
+
+
+ }
+ }
+
+ // for (Cat2Rule::const_iterator it = rhs2unaries_.begin(); it != rhs2unaries_.end(); it++){
+
+ // }
+ // if (rule->IsUnary()) {
+ // rhs2unaries_[rule->f().front()].push_back(rule);
+ // unaries_.push_back(rule);
+ // if (rule->f().front() <0)
+ // //add the RHS nonterminal to the list of nonterminals (the addNonterminal() function will check if it is the rhs symbol is a nonterminal then multiply by -1)
+ // this->addNonterminal(rule->f().front());
+
+
+ pimpl_->root_.DFS();
+
+}
+
+
+// void aTextGrammar::splitNonterminal0(WordID wordID){
+
+// TextGrammarNode* cur = &pimpl_->root_;
+// for (int i = 0; i < rule->f_.size(); ++i)
+// cur = &cur->tree_[rule->f_[i]];
+
+// }
+
+void aTextGrammar::splitAllNonterminals(){
+
+
+}
+
diff --git a/gi/scfg/abc/old_agrammar.h b/gi/scfg/abc/old_agrammar.h
new file mode 100644
index 00000000..d68c2548
--- /dev/null
+++ b/gi/scfg/abc/old_agrammar.h
@@ -0,0 +1,45 @@
+#ifndef _AGRAMMAR_H_
+#define _AGRAMMAR_H_
+
+#include "grammar.h"
+
+using namespace std;
+
+class aTGImpl;
+
+struct aTextGrammar : public Grammar {
+ aTextGrammar();
+ aTextGrammar(const std::string& file);
+ void SetMaxSpan(int m) { max_span_ = m; }
+
+ virtual const GrammarIter* GetRoot() const;
+ void AddRule(const TRulePtr& rule);
+ void ReadFromFile(const std::string& filename);
+ virtual bool HasRuleForSpan(int i, int j, int distance) const;
+ const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const;
+
+ void setMaxSplit(int max_split);
+
+ void printAllNonterminals() const;
+ void addNonterminal(WordID wordID);
+
+ void splitAllNonterminals();
+ void splitNonterminal(WordID wordID);
+
+ // inline map<WordID, vector<WordID> > & getSplitNonterminals(){return splitNonterminals_;}
+ // map<WordID, vector<WordID> > splitNonterminals_;
+ private:
+ int max_span_;
+ boost::shared_ptr<aTGImpl> pimpl_;
+ int max_split_;
+
+ map<WordID, int> nonterminals_; //list of nonterminals of the grammar if nonterminals_[WordID] > 0 the nonterminal WordID is found in the grammar
+
+
+
+};
+
+
+
+
+#endif
diff --git a/gi/scfg/abc/scfg.cpp b/gi/scfg/abc/scfg.cpp
new file mode 100644
index 00000000..4d094488
--- /dev/null
+++ b/gi/scfg/abc/scfg.cpp
@@ -0,0 +1,152 @@
+#include "lattice.h"
+#include "tdict.h"
+#include "agrammar.h"
+#include "bottom_up_parser.h"
+#include "hg.h"
+#include "hg_intersect.h"
+#include "../utils/ParamsArray.h"
+
+
+using namespace std;
+
+typedef aTextGrammar aGrammar;
+aGrammar * load_grammar(string & grammar_filename){
+ cerr<<"start_load_grammar "<<grammar_filename<<endl;
+
+ aGrammar * test = new aGrammar(grammar_filename);
+
+
+ return test;
+}
+
+Lattice convertSentenceToLattice(const string & str){
+
+ std::vector<WordID> vID;
+ TD::ConvertSentence(str , &vID);
+ Lattice lsentence;
+ lsentence.resize(vID.size());
+
+
+ for (int i=0; i<vID.size(); i++){
+
+ lsentence[i].push_back( LatticeArc(vID[i], 0.0, 1) );
+ }
+
+ // if(!lsentence.IsSentence())
+ // cout<<"not a sentence"<<endl;
+
+ return lsentence;
+
+}
+
+bool parseSentencePair(const string & goal_sym, const string & src, const string & tgt, GrammarPtr & g, Hypergraph &hg){
+
+ Lattice lsource = convertSentenceToLattice(src);
+
+ //parse the source sentence by the grammar
+
+ vector<GrammarPtr> grammars(1, g);
+
+ ExhaustiveBottomUpParser parser = ExhaustiveBottomUpParser(goal_sym, grammars);
+
+ if (!parser.Parse(lsource, &hg)){
+
+ cerr<<"source sentence does not parse by the grammar!"<<endl;
+ return false;
+ }
+
+ //intersect the hg with the target sentence
+ Lattice ltarget = convertSentenceToLattice(tgt);
+
+ //forest.PrintGraphviz();
+ return HG::Intersect(ltarget, & hg);
+
+}
+
+
+
+
+int main(int argc, char** argv){
+
+ ParamsArray params(argc, argv);
+ params.setDescription("scfg models");
+
+ params.addConstraint("grammar_file", "grammar file ", true); // optional
+
+ params.addConstraint("input_file", "parallel input file", true); //optional
+
+ if (!params.runConstraints("scfg")) {
+ return 0;
+ }
+ cerr<<"get parametters\n\n\n";
+
+ string input_file = params.asString("input_file", "parallel_corpora");
+ string grammar_file = params.asString("grammar_file", "./grammar.pr");
+
+
+ string src = "el gato .";
+
+ string tgt = "the cat .";
+
+
+ string goal_sym = "X";
+ srand(123);
+ /*load grammar*/
+
+
+ aGrammar * agrammar = load_grammar(grammar_file);
+ agrammar->SetGoalNT(goal_sym);
+ cout<<"before split nonterminal"<<endl;
+ GrammarPtr g( agrammar);
+
+ Hypergraph hg;
+ if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){
+ cerr<<"target sentence is not parsed by the grammar!\n";
+ return 1;
+
+ }
+ hg.PrintGraphviz();
+
+ if (! parseSentencePair(goal_sym, src, tgt, g, hg) ){
+ cerr<<"target sentence is not parsed by the grammar!\n";
+ return 1;
+
+ }
+ hg.PrintGraphviz();
+ //hg.clear();
+
+ if (1==1) return 1;
+
+ agrammar->PrintAllRules();
+ /*split grammar*/
+ cout<<"split NTs\n";
+ cerr<<"first of all write all nonterminals"<<endl;
+ // agrammar->printAllNonterminals();
+ agrammar->setMaxSplit(2);
+ agrammar->splitNonterminal(4);
+ cout<<"after split nonterminal"<<endl;
+ agrammar->PrintAllRules();
+ Hypergraph hg1;
+ if (! parseSentencePair(goal_sym, src, tgt, g, hg1) ){
+ cerr<<"target sentence is not parsed by the grammar!\n";
+ return 1;
+
+ }
+
+ hg1.PrintGraphviz();
+
+
+ agrammar->splitNonterminal(15);
+ cout<<"after split nonterminal"<<TD::Convert(15)<<endl;
+ agrammar->PrintAllRules();
+
+
+ /*load training corpus*/
+
+
+ /*for each sentence pair in training corpus*/
+
+ // forest.PrintGraphviz();
+ /*calculate expected count*/
+
+}
diff --git a/gi/scfg/abc/tmp.cpp b/gi/scfg/abc/tmp.cpp
new file mode 100644
index 00000000..967a601d
--- /dev/null
+++ b/gi/scfg/abc/tmp.cpp
@@ -0,0 +1,36 @@
+#include <iostream>
+#include <set>
+#include <vector>
+using namespace std;
+
+int x = 5;
+
+class A{A(){x++;}};
+// {
+// int a_;
+
+// };
+
+class B: public A{
+
+ int b_;
+};
+
+int main(){
+
+ cout<<"Hello World";
+ set<int> s;
+
+ s.insert(1);
+ s.insert(2);
+
+ x++;
+ cout<<"x="<<x<<endl;
+
+ vector<int> t;
+ t.push_back(2); t.push_back(1); t.push_back(2); t.push_back(3); t.push_back(2); t.push_back(4);
+ for(vector<int>::iterator it = t.begin(); it != t.end(); it++){
+ if (*it ==2) t.erase(it);
+ cout <<*it<<endl;
+ }
+}