--- extract.sh.orig +++ extract.sh @@ -15,18 +15,41 @@ fi dir=`cd $(dirname $0) && pwd` . $dir/conf.sh +PERL=@PERL@ export LANG=C export LC_ALL=C export EXOPLIB=$dir/lib export EXOPDIC=$dir/dic +cmd=`basename $0` + +usage() { + echo "Usage: $cmd [options] file_name [topic_str]" + echo "Options: -h, --help Show this message" + echo " -d, --dictionary DIR Specify the dictionary directory" + echo " -m, --model BASE Specify the base path of model files" + echo " -t, --tmpdir DIR Specify the temporary directory" + exit -1 +} + +while test -n "$1"; do + case "$1" in + -h|--help) usage ;; + -d|--dictionary) dictionary="$2"; shift 2 ;; + -m|--model) model="$2"; shift 2 ;; + -t|--tmpdir) TMPDIR="$2"; shift 2 ;; + *) file_name="$1"; topic_str="$2"; break ;; + esac +done + tmp=${TMPDIR:-.} tsvfile=$tmp/extract.$$.tsv +euc_file=$tmp/extract.$$.euc #nkf -e -W --fb-skip < $1 > $1.euc # 指定文字コードは環境によって表記が違うためiconv -lで確認 -iconv -c -f UTF-8 -t EUC-JP < $1 > $1.euc -perl -I $EXOPLIB $EXOPLIB/in2tsv.pl $1.euc $2 > $tsvfile -$dir/_extract.sh $model $tsvfile | perl -I $EXOPLIB $EXOPLIB/tsv2out.pl +iconv -c -f UTF-8 -t EUC-JP < $file_name > $euc_file +$PERL -I $EXOPLIB $EXOPLIB/in2tsv.pl $euc_file $topic_str > $tsvfile +$dir/_extract.sh $model $tsvfile | $PERL -I $EXOPLIB $EXOPLIB/tsv2out.pl rm -f $tsvfile