--- samples/kftt.30k/alignment/alignment.sh.orig 2013-07-30 14:14:40.000000000 +0900 +++ samples/kftt.30k/alignment/alignment.sh 2013-10-27 18:22:32.000000000 +0900 @@ -1,11 +1,11 @@ #!/bin/sh -cicada=../../.. +bindir=@PREFIX@/bin ## We perform symmetized posterior constrained training, and ## perform smoothing by naive Bayes. -exec ${cicada}/scripts/cicada-alignment.py \ +exec ${bindir}/cicada-alignment.py \ --f ../data/train.ja.bz2 \ --e ../data/train.en.bz2 \ --symmetric \ --- samples/kftt.30k/ngram/expgram.sh.orig 2013-07-30 14:14:34.000000000 +0900 +++ samples/kftt.30k/ngram/expgram.sh 2013-10-27 18:23:31.000000000 +0900 @@ -1,17 +1,12 @@ #!/bin/sh -expgram= - -if test "$expgram" = ""; then - echo "where is your expgram?" - exit 1 -fi +bindir=@PREFIX@/bin ### Following is a quick example of LM estimation on a small data set. -$expgram/progs/expgram_counts_extract --corpus ../data/train.en.bz2 --output ngram.5.en.counts --order 5 --threads 4 +${bindir}/expgram_counts_extract --corpus ../data/train.en.bz2 --output ngram.5.en.counts --order 5 --threads 4 -$expgram/progs/expgram_counts_estimate --ngram ngram.5.en.counts --output ngram.5.en.lm --shard 4 +${bindir}/expgram_counts_estimate --ngram ngram.5.en.counts --output ngram.5.en.lm --shard 4 ### A standard way is: -# $expgram/scripts/expgram.py --corpus ../data/train.en --output ngram.5.en --threads 4 +# ${bindir}/expgram.py --corpus ../data/train.en --output ngram.5.en --threads 4 --- samples/kftt.30k/s2t/data/preprocess.sh.orig 2013-08-02 10:22:41.000000000 +0900 +++ samples/kftt.30k/s2t/data/preprocess.sh 2013-10-27 19:52:56.000000000 +0900 @@ -1,12 +1,8 @@ #!/bin/sh -stanford= -cicada=../../../.. - -if test "$stanford" = ""; then - echo "where is your stanford parser?" - exit 1 -fi +stanford=@PREFIX@/share/java/stanford-parser +version=3.2.0 +bindir=@PREFIX@/bin # Here, we use stanford-parser to parse training data in English # cicada_filter_penntreebank to transform into hypergraph. @@ -18,7 +14,7 @@ bzcat ../../data/train.en.bz2 | \ java \ -mx12g \ - -cp $stanford/stanford-parser.jar:$stanford/stanford-parser-3.2.0-models.jar \ + -cp $stanford/stanford-parser.jar:$stanford/stanford-parser-${version}-models.jar \ -tLPP edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams \ -tokenized -sentences newline \ -escaper edu.stanford.nlp.process.PTBEscapingProcessor \ @@ -29,10 +25,10 @@ -outputFormatOptions includePunctuationDependencies \ edu/stanford/nlp/models/lexparser/englishFactored.ser.gz \ - | \ -$cicada/progs/cicada_filter_penntreebank \ +${bindir}/cicada_filter_penntreebank \ --map ../../data/train.en.bz2 \ --normalize | \ -$cicada/progs/cicada \ +${bindir}/cicada \ --input-forest \ --threads 8 \ --operation binarize:direction=left,order=2 \ --- samples/kftt.30k/s2t/model/extract.sh.orig 2013-07-31 09:19:40.000000000 +0900 +++ samples/kftt.30k/s2t/model/extract.sh 2013-10-27 19:53:42.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-extract.py \ +exec $bindir/cicada-extract.py \ --f ../../data/train.ja.bz2 \ --e ../../data/train.en.bz2 \ --a ../../alignment/model/aligned.posterior-itg \ --- samples/kftt.30k/s2t/model/index.sh.orig 2013-07-31 09:19:37.000000000 +0900 +++ samples/kftt.30k/s2t/model/index.sh 2013-10-27 19:54:07.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-index.py \ +exec $bindir/cicada-index.py \ --model-dir . \ \ --ghkm \ --- samples/kftt.30k/s2t/test/test.sh.orig 2013-08-02 10:23:48.000000000 +0900 +++ samples/kftt.30k/s2t/test/test.sh 2013-10-27 19:55:42.000000000 +0900 @@ -1,16 +1,11 @@ #!/bin/sh -cicada=../../../.. -weights= - -if test "$weights" = ""; then - echo "where is your weights? (for instance ../tune/learn.10.weights)" - exit 1 -fi +bindir=@PREFIX@/bin +weights=../tune/learn.10.weights ### generate config file -$cicada/progs/cicada_filter_config \ +$bindir/cicada_filter_config \ --input ../tune/cicada.config \ --output cicada.config \ --weights "weights=$weights" \ @@ -18,9 +13,9 @@ --file "file=-" ### perform translation -$cicada/progs/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en +$bindir/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en ### evaluation -$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref +$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref --- samples/kftt.30k/s2t/tune/config.sh.orig 2013-08-02 10:24:29.000000000 +0900 +++ samples/kftt.30k/s2t/tune/config.sh 2013-10-27 19:56:16.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-config.py \ +exec $bindir/cicada-config.py \ --tree-grammar ../model/ghkm-index \ --max-span 10 \ --goal '[ROOT]' \ --- samples/kftt.30k/s2t/tune/tune.sh.orig 2013-09-19 17:38:55.000000000 +0900 +++ samples/kftt.30k/s2t/tune/tune.sh 2013-10-27 19:56:32.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../../ +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-learn.py \ +exec $bindir/cicada-learn.py \ --srcset ../../data/tune.ja \ --refset ../../data/tune.en.ref \ --config cicada.config \ --- samples/kftt.30k/scfg/model/extract.sh.orig 2013-07-30 14:14:16.000000000 +0900 +++ samples/kftt.30k/scfg/model/extract.sh 2013-10-27 19:57:17.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-extract.py \ +exec $bindir/cicada-extract.py \ --f ../../data/train.ja.bz2 \ --e ../../data/train.en.bz2 \ --a ../../alignment/model/aligned.posterior-itg \ --- samples/kftt.30k/scfg/model/index.sh.orig 2013-07-30 14:14:21.000000000 +0900 +++ samples/kftt.30k/scfg/model/index.sh 2013-10-27 19:57:11.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-index.py \ +exec $bindir/cicada-index.py \ --model-dir . \ \ --scfg \ --- samples/kftt.30k/scfg/test/test.sh.orig 2013-07-30 13:00:31.000000000 +0900 +++ samples/kftt.30k/scfg/test/test.sh 2013-10-27 19:57:55.000000000 +0900 @@ -1,16 +1,11 @@ #!/bin/sh -cicada=../../../.. -weights= - -if test "$weights" = ""; then - echo "where is your weights? (for instance ../tune/learn.10.weights)" - exit 1 -fi +bindir=@PREFIX@/bin +weights=../tune/learn.10.weights ### generate config file -$cicada/progs/cicada_filter_config \ +$bindir/cicada_filter_config \ --input ../tune/cicada.config \ --output cicada.config \ --weights "weights=$weights" \ @@ -18,9 +13,9 @@ --file "file=-" ### perform translation -$cicada/progs/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en +$bindir/cicada --config cicada.config --threads 4 --debug < ../../data/dev.ja > dev.ja-en ### evaluation -$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref +$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref --- samples/kftt.30k/scfg/tune/config.sh.orig 2013-07-29 13:01:44.000000000 +0900 +++ samples/kftt.30k/scfg/tune/config.sh 2013-10-27 19:58:21.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-config.py \ +exec $bindir/cicada-config.py \ --grammar ../model/scfg-index \ --max-span 15 \ --straight \ --- samples/kftt.30k/scfg/tune/tune.sh.orig 2013-09-19 17:38:30.000000000 +0900 +++ samples/kftt.30k/scfg/tune/tune.sh 2013-10-27 19:58:32.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-learn.py \ +exec $bindir/cicada-learn.py \ --srcset ../../data/tune.ja \ --refset ../../data/tune.en.ref \ --config cicada.config \ --- samples/kftt.30k/t2s/data/preprocess.sh.orig 2013-07-30 15:15:17.000000000 +0900 +++ samples/kftt.30k/t2s/data/preprocess.sh 2013-10-27 20:00:35.000000000 +0900 @@ -4,20 +4,9 @@ # Note that the Japanese side was segmented by mecab (https://code.google.com/p/mecab/). # Thus, it is a natural choice to use cabocha (https://code.google.com/p/cabocha/). -cicada=../../../.. -mecab= -cabocha= - -if test "$mecab" = ""; then - echo "where is your mecab?" - exit 1 -fi - -if test "$cabocha" = ""; then - echo "where is your cabocha?" - exit 1 -fi - +bindir=@PREFIX@/bin +mecab=$bindir/mecab +cabocha=$bindir/cabocha for data in train dev tune; do @@ -34,12 +23,12 @@ awk '{for (i=1;i<=NF;++i) {printf "%s\t*\n", $i } print "EOS";}' | \ $mecab -p | \ $cabocha -f1 -I 1 | \ - $cicada/progs/cicada_filter_dependency \ + $bindir/cicada_filter_dependency \ --cabocha \ --func \ --forest \ --head | \ - $cicada/progs/cicada \ + $bindir/cicada \ --input-forest \ --threads 4 \ --operation binarize:direction=cyk,order=1 \ --- samples/kftt.30k/t2s/model/extract.sh.orig 2013-07-30 15:26:16.000000000 +0900 +++ samples/kftt.30k/t2s/model/extract.sh 2013-10-27 20:01:00.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-extract.py \ +exec $bindir/cicada-extract.py \ --f ../../data/train.ja.bz2 \ --e ../../data/train.en.bz2 \ --a ../../alignment/model/aligned.posterior-itg \ --- samples/kftt.30k/t2s/model/index.sh.orig 2013-07-30 14:14:12.000000000 +0900 +++ samples/kftt.30k/t2s/model/index.sh 2013-10-27 20:01:08.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-index.py \ +exec $bindir/cicada-index.py \ --root-dir . \ --model-dir . \ \ --- samples/kftt.30k/t2s/test/test.sh.orig 2013-07-30 13:00:41.000000000 +0900 +++ samples/kftt.30k/t2s/test/test.sh 2013-10-27 20:01:41.000000000 +0900 @@ -1,16 +1,11 @@ #!/bin/sh -cicada=../../../.. -weights= - -if test "$weights" = ""; then - echo "where is your weights? (for instance ../tune/learn.10.weights)" - exit 1 -fi +bindir=@PREFIX@/bin +weights=../tune/learn.10.weights ### generate config file -$cicada/progs/cicada_filter_config \ +$bindir/cicada_filter_config \ --input ../tune/cicada.config \ --output cicada.config \ --weights "weights=$weights" \ @@ -18,9 +13,9 @@ --file "file=-" ### perform translation -$cicada/progs/cicada --config cicada.config --threads 4 --debug --input ../data/dev.forest.ja.gz > dev.ja-en +$bindir/cicada --config cicada.config --threads 4 --debug --input ../data/dev.forest.ja.gz > dev.ja-en ### evaluation -$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref +$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref --- samples/kftt.30k/t2s/tune/config.sh.orig 2013-07-29 13:52:34.000000000 +0900 +++ samples/kftt.30k/t2s/tune/config.sh 2013-10-27 20:02:02.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-config.py \ +exec $bindir/cicada-config.py \ --tree-grammar ../model/ghkm-index \ --goal '[x]' \ --glue '[x]' \ --- samples/kftt.30k/t2s/tune/tune.sh.orig 2013-09-19 17:38:49.000000000 +0900 +++ samples/kftt.30k/t2s/tune/tune.sh 2013-10-27 20:02:19.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-learn.py \ +exec $bindir/cicada-learn.py \ --srcset ../data/tune.forest.ja.gz \ --refset ../../data/tune.en.ref \ --config cicada.config \ --- samples/kftt.30k/t2t/model/extract.sh.orig 2013-08-01 08:37:16.000000000 +0900 +++ samples/kftt.30k/t2t/model/extract.sh 2013-10-27 20:02:40.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-extract.py \ +exec $bindir/cicada-extract.py \ --f ../../data/train.ja \ --e ../../data/train.en \ --a ../../alignment/model/aligned.posterior-itg \ --- samples/kftt.30k/t2t/model/index.sh.orig 2013-07-31 09:36:12.000000000 +0900 +++ samples/kftt.30k/t2t/model/index.sh 2013-10-27 20:02:49.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-index.py \ +exec $bindir/cicada-index.py \ --model-dir . \ \ --tree \ --- samples/kftt.30k/t2t/test/test.sh.orig 2013-07-31 09:35:38.000000000 +0900 +++ samples/kftt.30k/t2t/test/test.sh 2013-10-27 20:03:19.000000000 +0900 @@ -1,16 +1,11 @@ #!/bin/sh -cicada=../../../.. -weights= - -if test "$weights" = ""; then - echo "where is your weights? (for instance ../tune/learn.10.weights)" - exit 1 -fi +bindir=@PREFIX@/bin +weights=../tune/learn.10.weights ### generate config file -$cicada/progs/cicada_filter_config \ +$bindir/cicada_filter_config \ --input ../tune/cicada.config \ --output cicada.config \ --weights "weights=$weights" \ @@ -18,9 +13,9 @@ --file "file=-" ### perform translation -$cicada/progs/cicada --config cicada.config --threads 4 --debug --input ../../t2s/data/dev.forest.ja.gz > dev.ja-en +$bindir/cicada --config cicada.config --threads 4 --debug --input ../../t2s/data/dev.forest.ja.gz > dev.ja-en ### evaluation -$cicada/progs/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref +$bindir/cicada_eval --tstset dev.ja-en --refset ../../data/dev.en.ref --- samples/kftt.30k/t2t/tune/config.sh.orig 2013-07-31 09:52:14.000000000 +0900 +++ samples/kftt.30k/t2t/tune/config.sh 2013-10-27 20:03:41.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-config.py \ +exec $bindir/cicada-config.py \ --tree-grammar ../model/tree-index \ --goal '[ROOT]' \ --glue '[x]' \ --- samples/kftt.30k/t2t/tune/tune.sh.orig 2013-09-19 17:38:41.000000000 +0900 +++ samples/kftt.30k/t2t/tune/tune.sh 2013-10-27 20:03:55.000000000 +0900 @@ -1,8 +1,8 @@ #!/bin/sh -cicada=../../../.. +bindir=@PREFIX@/bin -exec $cicada/scripts/cicada-learn.py \ +exec $bindir/cicada-learn.py \ --srcset ../../t2s/data/tune.forest.ja.gz \ --refset ../../data/tune.en.ref \ --config cicada.config \ --- samples/s2t/sample.sh.orig 2013-08-03 10:37:31.000000000 +0900 +++ samples/s2t/sample.sh 2013-10-27 20:07:14.000000000 +0900 @@ -1,6 +1,7 @@ #!/bin/sh -cicada=../.. +bindir=@PREFIX@/bin +exdir=@PREFIX@/share/examples/cicada ## ## @@ -18,16 +19,16 @@ ## apply features ## output kbests -exec $cicada/progs/cicada \ - --input $cicada/samples/scfg/input.txt \ +exec $bindir/cicada \ + --input $exdir/scfg/input.txt \ --goal '[ROOT]' \ - --tree-grammar $cicada/samples/s2t/grammar.bin:max-span=10 \ + --tree-grammar $exdir/s2t/grammar.bin:max-span=10 \ --tree-grammar "glue:goal-source=[ROOT],goal-target=[ROOT],non-terminal-source=[x],non-terminal-target=[x],straight=true,invert=false" \ --grammar "insertion:non-terminal=[x]" \ - --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \ + --feature-function "ngram:file=$exdir/scfg/ngram.bin" \ --feature-function word-penalty \ --feature-function rule-penalty \ --feature-function glue-tree-penalty \ --operation compose-tree-cky \ - --operation apply:prune=true,size=100,weights=$cicada/samples/s2t/weights \ - --operation output:file=-,kbest=10,weights=$cicada/samples/s2t/weights + --operation apply:prune=true,size=100,weights=$exdir/s2t/weights \ + --operation output:file=-,kbest=10,weights=$exdir/s2t/weights --- samples/scfg/sample.sh.orig 2013-07-30 14:00:13.000000000 +0900 +++ samples/scfg/sample.sh 2013-10-27 20:07:02.000000000 +0900 @@ -1,15 +1,16 @@ #!/bin/sh -cicada=../.. +bindir=@PREFIX@/bin +exdir=@PREFIX@/share/examples/cicada -exec $cicada/progs/cicada \ - --input $cicada/samples/scfg/input.txt \ - --grammar $cicada/samples/scfg/grammar.bin \ +exec $bindir/cicada \ + --input $exdir/scfg/input.txt \ + --grammar $exdir/scfg/grammar.bin \ --grammar "glue:straight=true,inverted=false,non-terminal=[x],goal=[s]" \ --grammar "insertion:non-terminal=[x]" \ - --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \ + --feature-function "ngram:file=$exdir/scfg/ngram.bin" \ --feature-function word-penalty \ --feature-function rule-penalty \ --operation compose-cky \ - --operation apply:prune=true,size=100,weights=$cicada/samples/scfg/weights \ - --operation output:file=-,kbest=10,weights=$cicada/samples/scfg/weights + --operation apply:prune=true,size=100,weights=$exdir/scfg/weights \ + --operation output:file=-,kbest=10,weights=$exdir/scfg/weights --- samples/t2s/sample.sh.orig 2013-08-19 16:18:37.000000000 +0900 +++ samples/t2s/sample.sh 2013-10-27 20:08:28.000000000 +0900 @@ -1,6 +1,7 @@ #!/bin/sh -cicada=../.. +bindir=@PREFIX@/bin +exdir=@PREFIX@/share/examples/cicada ## ## 1. pentreebank is converted into hypergraph @@ -20,20 +21,20 @@ ## apply features ## output kbests -$cicada/progs/cicada_filter_penntreebank \ - --input $cicada/samples/t2s/input.txt \ +$bindir/cicada_filter_penntreebank \ + --input $exdir/t2s/input.txt \ --normalize \ | \ -$cicada/progs/cicada \ +$bindir/cicada \ --input - \ --input-forest \ --goal '[ROOT]' \ - --tree-grammar $cicada/samples/t2s/grammar.bin \ + --tree-grammar $exdir/t2s/grammar.bin \ --tree-grammar fallback \ - --feature-function "ngram:file=$cicada/samples/scfg/ngram.bin" \ + --feature-function "ngram:file=$exdir/scfg/ngram.bin" \ --feature-function word-penalty \ --feature-function rule-penalty \ --operation binarize:direction=cyk,order=1 \ --operation compose-tree \ - --operation apply:prune=true,size=1000,weights=$cicada/samples/t2s/weights \ - --operation output:file=-,kbest=10,weights=$cicada/samples/t2s/weights + --operation apply:prune=true,size=1000,weights=$exdir/t2s/weights \ + --operation output:file=-,kbest=10,weights=$exdir/t2s/weights