#!/bin/bash
#
# by Chih-Wei Huang
#                        31 July 1998
#                        19 February 1999
#                        21 August 1999
#              Updated    3 January 2000
#

BINPATH="/bin:/usr/bin:/usr/local/bin"
PATH="$BINPATH:`pwd`"

SCRIPTNAME=`basename $0`

if [ $# = 0 ]; then
    echo "Usage: $SCRIPTNAME [options] file1 [file2, ...]"
    exit 1
fi


### Common Functions ###

function checksrcfile()
{
    if [ ! -e $1 ]; then
        if [ ! -e $1.sgml ]; then
            echo "File $1 doesn't exist!"
            return 0
        fi
        src=$1.sgml
    fi
    return 1
}

function do_encoding()
{
    # Filter out some buggy macros for LaTeX output
    sed 's/<nidx>/<!--nidx>/g; s/<\/nidx>/<\/nidx-->/g' $1 | \
    $DOS2UNIX | $MB2A -e -c $ENC -o $TMP/$2
}

function encoding()
{
    name=`basename $1 .sgml`
    do_encoding $1 $name.sgml

    # Encode the including SGML files
    if [ ! -z "$PERL" ]; then
        inc=`$PERL -ne 'if (/"(.*\.sgml)"/) { print "$1 " }' $1`
        for f in $inc; do
            d="`dirname $f`"
            [ -d $TMP/$d ] || mkdir -p $TMP/$d
            [ -f $f ] && do_encoding $f $f
        done
    fi
}

function cleartmp()
{
    [ "$DEBUG" != "1" ] && rm -rf $TMP
    umask $UMASK
}


###  SGMLCHECK  ###

function run_zhsgmlcheck()
{
    for src in $filelist; do
        checksrcfile $src
        [ $? = 0 ] && continue

        encoding $src

        pushd . > /dev/null
        cd $TMP
        sgmlcheck $options $name.sgml
        popd > /dev/null
    done 
}

###  SGML2HTML  ###

function run_zhsgml2html()
{
    for src in $filelist; do
        checksrcfile $src
        [ $? = 0 ] && continue

        encoding $src

        pushd . > /dev/null
        cd $TMP
        sgml2html $options $name.sgml

        if [ -e $name.html ]; then
            list=`ls $name*.html`
        elif [ -e $name.htm ]; then
            list=`ls $name*.htm`
        else
            exit 2
        fi
        popd > /dev/null

        for file in $list; do
            sed "s/<HEAD>/<HEAD><META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text\/html; charset=$ENC\">/g" $TMP/$file | \
            sed "s/CONTENT=\"SGML-Tools/CONTENT=\"ZH-SGML-Tools/g" | \
            $MB2A -d -o $file
        done
    done
}

###  SGML2LATEX  ###

function run_zhsgml2latex()
{
    sgmloptions=""
    TEX=0
    DVI=0
    PS=0
    CLEAN=0
    STOP=0

    for opt in $options; do
        case $opt in
            --output=tex)
            TEX=1
            ;;

            --output=dvi)
            DVI=1
            ;;

            --output=ps)
            PS=1
            ;;

            --output=all)
            TEX=1; DVI=1; PS=1
            ;;

            --clean)
            CLEAN=1
            ;;

            --stop)
            STOP=1
            ;;

            --font*)
            FONT=`echo $opt | cut -d= -f2`
            ;;

            --zhtex*)
            ZHTEX=`echo $opt | cut -d= -f2`
            ;;

            *)
            sgmloptions="$sgmloptions $opt"
            ;;
        esac
    done

    if [ "$DVI" = "1" -o "$PS" = "1" ]; then
	HAVE_CJK=0; HAVE_CHITEX=0;
	[ -n "`kpsewhich CJK.sty`" -a -n "`type -p latex`" \
	  -a \( "$ENC" != "big5" -o -n "`type -p bg5latex`" \) ] \
	    && HAVE_CJK=1
	[ -n "`type -p chilatex`" ] && HAVE_CHITEX=1

	if [ $HAVE_CJK = 1 -a \( "$ZHTEX" = "CJK" -o $HAVE_CHITEX = 0 \) ]
	then
            ZHTEX=CJK
	elif [ $HAVE_CHITEX = 1 ]; then
	    ZHTEX=ChiTeX
	else
	    echo "Neither chilatex nor CJK was found! Create .tex file only..."
            DVI=0; PS=0
        fi
        TEX=1
    fi

    TEXINPUTS="`kpsexpand \\\$TEXINPUTS`:/usr/lib/sgml-tools//"
    export TEXINPUTS

    for src in $filelist; do
        checksrcfile $src
        [ $? = 0 ] && continue

        encoding $src

        pushd . > /dev/null
        cd $TMP
        sgml2latex --output=tex $sgmloptions $name.sgml
        popd > /dev/null

        if [ -e $TMP/$name.tex ]; then
            head -1 $TMP/$name.tex > $name.tex
            [ "$STOP" != "1" ] && echo '\nonstopmode' >> $name.tex
            [ "$ZHTEX" != "CJK" -a -n "$FONT" ] && echo "\\$FONT" >> $name.tex
            tail +2 $TMP/$name.tex | \
            sed 's/\\usepackage{babel}/\\usepackage[english]{babel}/g' | \
            $MB2A -d >> $name.tex

	    if [ "$ZHTEX" = "CJK" ]; then
		if [ "$ENC" = "big5" ]; then
		    FONT=${FONT:=ming}
		else
		    FONT=${FONT:=song}
		fi
		perl -w -pi - $name.tex <<-EOF
		#!/usr/bin/perl -w -p
		#
		#  pre-CJK-LaTeX.pl.in v0.1
		#
		#  Use with {bg5,sgml2latex to preprocess a Big5 or GB document
		#  before feeding it to CJK for LaTeX.
		#
		#    Written by: Anthony Fok <foka@debian.org>, 1999-12-30
		#    GNU General Public License, version 2 or later
		#
		#  Note that this filter is not fool-proof.  Error(s) may occur.
		
		BEGIN {
		    \$verbatim = 0;
		    \$enc = '$CJKENC';
		    \$font = '$FONT';
		    if (\$enc eq 'Bg5') {
			\$space = '@@';
		    } else { # For GB and GBK
			\$space = '';
		    }
		}
		
		#  CJK document prologue
		s/^(\\\\usepackage\\{null\\})/\$1
		\\\\usepackage\\{CJK\\}
		\\\\usepackage\\{indentfirst\\}
		\\\\renewcommand\\{\\\\baselinestretch\\}\\{1.3\\}
		\\\\renewcommand\\{\\\\CJKboldshift\\}\\{0.024em\\}/;
		
		s/^(\\\\begin\\{document\\})\$/\$1
		\\\\begin\\{CJK*\\}\\{\$enc\\}\\{\$font\\}
		\\\\CJKtilde%
		\\\\CJKcaption\\{\$enc\\}%
		\\\\settowidth\\{\\\\parindent\\}\\{\$space\\}%/;
		
		#  CJK document trailer
		s/^(\\\\end\\{document\\})\$/\\\\newpage\\n\\\\end\\{CJK*\\}\\n\$1/;
		
		#  A failed attempt to change ~ to \\nbs{}
		### while (s/^((?:[\\x00-\\x7f]|[\\x80-\\xff].)+)~/\$1\\\\nbs\\{\\}/) {}
		
		if (/^\\\\begin\\{verbatim\\}\$/) {
		    \$verbatim = 1;
		    next;
		}
		if (/^\\\\end\\{verbatim\\}\$/) {
		    \$verbatim = 0;
		    next;
		}
		
		if (\$verbatim == 0) {
		    #  Change " " to "~": from ASCII to Chinese.
		    #  The second rule is to undo things like {\\bfseries~n}.
		    #  If possible, try to combine these two rules into one.
		    s#([\\w\\(\\)/]+) (?=[\\x80-\\xff].)#\$1~#g;
		    s/(\\\\[A-Za-z]+)~(?=[\\x80-\\xff].)/\$1 /g;
		
		    #  Change " " to "~": from Chinese to ASCII
		    while (s/^((?:[\\x00-\\x7f]*[\\x80-\\xff].)+)\\s(?=[\\x00-\\x7f])/\$1~/) {}
		
		    # Make "*TeX" prints pretty
		    s/(?<!\\\\)\\b(TeX|LaTeX|LaTeXe)\\b/\\\\\$1/;
		    s/(?<!\\\\)\\b(Chi)(TeX|LaTeX|LaTeXe)\\b/\$1\\\\\$2/;
		}
		EOF
	    fi

            if [ "$DVI" = "1" -o "$PS" = "1" ]; then
		if [ "$ZHTEX" = "CJK" ]; then
		    if [ "$ENC" = "big5" ]; then
			bg5latex $name.tex
			bg5latex $name.tex
			bg5latex $name.tex
		    else
			latex $name.tex
			latex $name.tex
			latex $name.tex
		    fi
		else
		  # Only run once for chitex 6.0.9p5
		    chilatex -$ENC $name.tex
		fi
                if [ -e $name.dvi -a "$PS" = "1" ]; then
                    dvips -o $name.ps $name.dvi
                fi
            fi

            if [ "$CLEAN" = "1" ]; then
                rm -f $name.aux $name.log $name.toc $name.cjk
                [ "$TEX" = "0" -a -e $name.tex ] && rm -f $name.tex
                [ "$DVI" = "0" -a -e $name.dvi ] && rm -f $name.dvi
                [ "$PS" = "0" -a -e $name.ps ] && rm -f $name.ps
            fi
        fi
    done
}

###  SGML2TXT  ###

function run_zhsgml2txt()
{
    LYNX="`type -p lynx`"
    if [ -z "$LYNX" ]; then
        echo "lynx not found!"
        exit 1
    fi

    WIDTH=""
    NOIDX=0

    for opt in $options; do
        case $opt in
            --width*)
            WIDTH=`echo $opt | cut -b2-`
            ;;

            --noidx)
            NOIDX=1
            ;;

            # ignore these options
            --filter | -f | \
            --man | -m | \
            --pass | -P | \
            --dosnames | -h | \
            --split | -s)
            ;;

            *)
            sgmloptions="$sgmloptions $opt"
            ;;
        esac
    done


    for src in $filelist; do
        checksrcfile $src
        [ $? = 0 ] && continue

        encoding $src

        pushd . > /dev/null
        cd $TMP

        # Generate the index
        if [ "$NOIDX" = "0" ]; then
            sgml2html $sgmloptions $name.sgml
            [ -e $name.html ] || exit 2
            $MB2A -d -f $name.html -o $name.idx.html
            $LYNX $WIDTH -dump -nolist $name.idx.html | \
            csplit -s --prefix=$name- -  /______/+1 {*}
        else
            touch $name-03
        fi

        # Generate the main content
        sgml2html --split=0 $sgmloptions $name.sgml > /dev/null 
        $MB2A -d -f $name.html -o $name.txt.html
        $LYNX $WIDTH -dump $name.txt.html | \
        csplit -s --prefix=$name. --keep-files - /______/+1 {1}
        popd > /dev/null

        # Concatenate the result
        cat $TMP/$name.00 $TMP/$name.01 $TMP/$name-03 $TMP/$name.02 > $name.txt
    done
}

###  SGML2LYX  ###

function run_zhsgml2lyx()
{
    for src in $filelist; do
        checksrcfile $src
        [ $? = 0 ] && continue

        encoding $src

        pushd . > /dev/null
        cd $TMP
        sgml2lyx $options $name.sgml
        [ -e $name.lyx ] || exit 2
        popd > /dev/null

        $MB2A -d -f $TMP/$name.lyx -o $name.lyx
    done
}

###  Main Program  ###

unalias -a
UMASK=`umask`
umask 022

unset LANG
unset LC_CTYPE
unset LC_ALL

MB2A="`type -p mb2a`"
if [ -z "$MB2A" ]; then
    echo "mb2a utility not found!"
    exit 1
fi

PATH=$BINPATH

DOS2UNIX="`type -p dos2unix`"
if [ -z "$DOS2UNIX" ]; then
    DOS2UNIX=cat
fi

PERL="`type -p perl`"

options=""
filelist=""
DEBUG=0

TMP=/tmp/zh-sgmltools.$$
trap 'rm -rf $TMP' 2 3 15
mkdir $TMP

for opt in $*; do
    if [ "`echo $opt | cut -c1`" = "-" ]; then
        if [ "$opt" = "-d" ]; then
            DEBUG=1
        else
            options="$options $opt"
        fi
    else
        filelist="$filelist $opt"
    fi 
done

# determine the encoding
case $SCRIPTNAME in
    bg5sgml*)
    ENC=big5
    CJKENC=Bg5
    ;;

    gbsgml*)
    ENC=gb2312
    CJKENC=GB
    ;;
esac

# run the appropriate function
case $SCRIPTNAME in
    bg5sgmlcheck|gbsgmlcheck)
    run_zhsgmlcheck
    ;;

    bg5sgml2html|gbsgml2html)
    run_zhsgml2html
    ;;

    bg5sgml2latex|gbsgml2latex)
    run_zhsgml2latex
    ;;

    bg5sgml2txt|gbsgml2txt)
    run_zhsgml2txt
    ;;

    bg5sgml2lyx|gbsgml2lyx)
    run_zhsgml2lyx
    ;;

    *)
    echo "Unknown functions, Aborted."
    exit 1
    ;;
esac


cleartmp

exit 0
