[comp.sys.m6809] texcon.c

jimomura@lsuc.UUCP (12/09/87)

     I've been working on 'texcon.c' for about a week and
I almost have it working.  Texcon does most of the important
text file conversions you run into which makes transfer of
files between systems less of a hassle.

EOL conversions:

     Texcon will do add LF, strip LF, add CR, strip CR,
swap CR for LF and swap LF to CR.  That makes it capable
of Unix to OS-9 and back, or MS-DOS to OS-9 and back.

High bit strip:

     Obvious.  If you need an explanation go read about
parity. :-)

TAB expansion and compression:

     Ahem.  This is only partly working and is the
problem I'm having.  I have coded both, but only the
TAB expansion works.  For some reason, my TAB compression
isn't working.  If you find a solution, please post it
so I can have it too.

/* texcon.c */

/* Utility to convert many common textfile types.
 *
 * 1987/12/05
 * Public Domain
 *
 * By Jim Omura
 * 2A King George's Drive
 * Toronto, Ontario, Canada
 */

/* Usage:
 *
 * texcon [infile] [outfile] [-?achlr] [-s[=nn]] [-t[=nn]]
 * default (no change)
 *
 * ? help
 *
 * a add:
 *     -al = add LF after CR, -ac = add CR before LF
 * c LF to CR
 * h High Bit Strip
 * l CR to LF
 * r remove:
 *     -rl = remove LF, -rc = remove CR
 * s tabs to spaces, default 4
 * t spaces to tabs, default 4
 *
 * Limits:
 *
 *     Converting Tabs doesn't compensate for current location.
 */

#include <stdio.h>
#include <modes.h>

#define ERROR   -1

#define FALSE   0

#define STDIN   0
#define STDOUT  1
#define STDERR  2

#define TRUE    1

/* ASCII characters */

#define CR    0x0d
#define HTAB  0x09
#define LF    0x0a
#define SPCC  0x20  /* Space */

/* Program Specific Definitions: */

#define CHGMODE 0
#define ADDMODE 1
#define REMMODE 2

#define MAXSPC   20
#define CRFLAG   1
#define LFFLAG   2
#define SPCFLAG  1
#define TABFLAG  2

main(argc,argv)

int  argc;
char **argv;

{

/* Declare Local Variables */

    char   crlf[2];         /* CR/LF string */
    int    eolmflag;      /* Modify EOL flag */
    char   *eollf;        /* End of line LF char */
    int    hbflag;        /* High Bit Strip Flag */
    char   inbuff[1];
    int    inpath;
    int    modeflag;    /* Mode Flag 0 = change, 1 = add, 2 = remove */
    int    numspc;      /* TAB->SPC SPC->TAB conversion factor */
    int    outpath;
    char   pathname[80];
    int    report;
    int    spccntr;      /* Space counter */
    char   tabchr[1];      /* TAB Character */
    int    tabmflag;     /* TAB/SPC modify flag */
    char   *tabspcs;

/* Set Constants: */

     crlf[0] = CR;     /* It didn't work as a string constant, so ... */
     crlf[1] = LF;     /* ... I've changed it to a variable. */
     eollf = &crlf[1];
     *tabchr = HTAB;
     tabspcs = "                    "; /* 20 spaces */

/* Set Defaults: */

/* Flags: */

    eolmflag = FALSE;
    hbflag   = FALSE;
    modeflag = CHGMODE;  /* = 0 */
    numspc   = 4;
    spccntr  = 0;
    tabmflag = FALSE;

/* Paths: */

    inpath   = STDIN;
    outpath  = STDOUT;

    for( ;; )

    {

         if(argc < 2)
         {
             break; /* *** Compiler Bug? *** */
         }

        ++argv;
        --argc;

        if(**argv == '-')
        {

            for (;;)
            {
                ++*argv;

                switch((int) **argv) 
                {
                    case '\0':
                        break;      /* Next argv */
                    case '?':
                        shorthelp();
                        exit(0);
                    case 'a':
                        modeflag = ADDMODE;
                        continue;
                    case 'c':
                        eolmflag = CRFLAG;
                        continue;
                    case 'h':
                        hbflag = TRUE;
                        continue;
                    case 'l':
                        eolmflag = LFFLAG;
                        continue;
                    case 'r':
                        modeflag = REMMODE;
                        continue;
                    case 's':
                        tabmflag = SPCFLAG;
                        ++*argv;

                        if (**argv== '0')
                        {
                            break;
                        }

                        if (**argv == '=')
                        {
                            ++*argv;
                        }

                        numspc = atoi(*argv);
                        break;
                    case 't':
                        tabmflag = TABFLAG;
                        ++*argv;

                        if (**argv == '0')
                        {
                            break;
                        }

                        if (**argv == '=')
                        {
                            ++*argv;
                        }

                        numspc = atoi(*argv);
                        break;
                    default:
                        continue;    /* Next Character */
                }   /* Endswitch */

             break;    /* Next argv */

             }   /* Endloop dash handler */

             continue;  /* next argv */

         }   /* Endif dash switches */

         if (inpath == STDIN)
         {
            inpath = open(*argv,S_IREAD);

            if (inpath == ERROR)
            {
                fprintf(stderr,"Input file error\n");
                exit(0);
            }   /* Endif open error */

            continue;    /* Next argv */

        }   /* Endif inpath specified */

        if (outpath == STDOUT)
        {
            outpath = creat(*argv,S_IWRITE);

            if (outpath == ERROR)
            {
                fprintf(stderr,"texcon:  Output file error\n");
                exit(0);
            }

            continue;    /* Next argv */

        }   /* Endif outpath specified */

    }   /* Endloop arghandler */

/* Enforce Variable Limits: */

    if (numspc > MAXSPC)
    {
        numspc = MAXSPC;
    }

/* Invert CR/LF flags for Removal functions */

    if(modeflag == REMMODE)
    {
        if (eolmflag == LFFLAG)
        {
            eolmflag = CRFLAG;
        }
        else
        {
            eolmflag = LFFLAG;
        }
    }   /* endif Remove mode */

/* Process the file: */

    for(;;)
    {

        report = read(inpath,inbuff,1);  /* Get the Character */

        if (report < 1)
        {
             if (spccntr > 0)
             {
                 write(outpath,tabspcs,spccntr);
             }
             exit(0);
        }

        if (hbflag)
        {
            inbuff[0] = inbuff[0] & 0x7f;    /* Strip High Bit */
        }

        switch(tabmflag)
        {
            case TABFLAG:
                if (*inbuff == SPCC)
                {
                    ++spccntr;
                    if (spccntr == numspc)
                    {
                        write(outpath,tabchr,1);
                        spccntr = 0;
                    }
                    continue;    /* Get next char. */
                }
                else
                {
                    if(spccntr > 0)
                    {
                        write(outpath,tabspcs,spccntr);
                        spccntr = 0;
                    }
                }
                break;  /* Continue with EOL conversion test */
            case SPCFLAG:

                if (*inbuff == HTAB)
                {
                    write(outpath,tabspcs,numspc);
                    continue;
                }
                else
                {
                    break;
                }
            default:
        }

        switch((int) *inbuff)
        {
            case CR:
                if (eolmflag == LFFLAG)  /* LF is target */
                {
                    switch (modeflag)
                    {
                        case CHGMODE:
                            write(outpath,eollf,1);
                            continue;
                        case ADDMODE:
                            write(outpath,crlf,2);
                            continue;
                        default:       /* REMMODE == strip CR */
                            continue;
                    }
                }
                else
                {
                    write(outpath,inbuff,1);
                }
                continue;
            case LF:
                if (eolmflag == CRFLAG)  /* CR is target */
                {
                    switch (modeflag)
                    {
                        case CHGMODE:
                            write(outpath,crlf,1);
                            continue;
                        case ADDMODE:
                            write(outpath,crlf,2);
                            continue;
                        default:        /* REMMODE == strip LF */
                            continue;
                    }
                }
                else
                {
                    write(outpath,inbuff,1);
                }
                continue;
            default:
                 write(outpath,inbuff,1);
        }

    }   /* Endloop expand */
    exit(0);

}   /* End of main() */

/* ------------------------------------ */

shorthelp()
{
    fprintf(stderr,"texcon: Text file converter\n\n");
    fprintf(stderr,"Usage: texcon [inpath] [outpath] [-?achlr]");
    fprintf(stderr," [-s[=nn]] [-t[=nn]]\n\n");
    fprintf(stderr," ? = This help message\n");
    fprintf(stderr," a = add mode:\n");
    fprintf(stderr,"  -ac = add CR before LF, -al = add LF after CR\n");
    fprintf(stderr," c = LF to CR\n");
    fprintf(stderr," h = High Bit Strip\n");
    fprintf(stderr," l = CR to LF\n");
    fprintf(stderr," r = remove mode:\n");
    fprintf(stderr,"  -rc = remove CR, -rl = remove LF\n");
    fprintf(stderr," s TABs to SPACEs -- default = 4\n");
    fprintf(stderr," t SPACEs to TABs -- default = 4\n");
}   /* End of shorthelp() */

/* End of texcon.c */
-- 
Jim Omura, 2A King George's Drive, Toronto, (416) 652-3880
ihnp4!utzoo!lsuc!jimomura
Byte Information eXchange: jimomura

jimomura@lsuc.uucp (Jim Omura) (12/13/87)

     This is the first fully working version of 'texcon.c'.  My
thanks to Scott Shaheen who took the time to fix the tab/space
conversion.

Cheers! -- Jim O.

/* texcon.c */

/* Utility to convert many common textfile types.
 *
 * 1987/12/05
 * Public Domain
 *
 * By Jim Omura
 * 2A King George's Drive
 * Toronto, Ontario, Canada
 *
 * and Scott Shaheen (BIX 'sshaheen')
 */

/* Usage:
 *
 * texcon [infile] [outfile] [-?achlr] [-s[=nn]] [-t[=nn]]
 * default (no change)
 *
 * ? help
 *
 * a add:
 *     -al = add LF after CR, -ac = add CR before LF
 * c LF to CR
 * h High Bit Strip
 * l CR to LF
 * r remove:
 *     -rl = remove LF, -rc = remove CR
 * s tabs to spaces, default 4
 * t spaces to tabs, default 4
 *
 * Limits:
 *
 */

#include <stdio.h>
#include <modes.h>

#define ERROR   -1

#define FALSE   0

#define STDIN   0
#define STDOUT  1
#define STDERR  2

#define TRUE    1

/* ASCII characters */

#define CR    0x0d
#define HTAB  0x09
#define LF    0x0a
#define SPCC  0x20  /* Space */

/* Program Specific Definitions: */

#define CHGMODE 0
#define ADDMODE 1
#define REMMODE 2

#define MAXSPC   20
#define CRFLAG   1
#define LFFLAG   2
#define SPCFLAG  1
#define TABFLAG  2

main(argc,argv)

int  argc;
char **argv;

{

/* Declare Local Variables */

    char   crlf[2];         /* CR/LF string */
    int    eolmflag;      /* Modify EOL flag */
    char   *eollf;        /* End of line LF char */
    int    hbflag;        /* High Bit Strip Flag */
    char   inbuff[1];
    int    inpath;
    int    modeflag;    /* Mode Flag 0 = change, 1 = add, 2 = remove */
    float  numspc;      /* TAB->SPC SPC->TAB conversion factor */
    int    outpath;
    char   pathname[80];
    int    report;
    int    spccntr;      /* Space counter */
    char   tabchr[1];      /* TAB Character */
    int    tabmflag;     /* TAB/SPC modify flag */
    char   *tabspcs;
    float  j , k ;    /* j is position in line , k is tab position count */

/* Set Constants: */

     crlf[0] = CR;     /* It didn't work as a string constant, so ... */
     crlf[1] = LF;     /* ... I've changed it to a variable. */
     eollf = &crlf[1];
     *tabchr = HTAB;
     tabspcs = "                    "; /* 20 spaces */

/* Set Defaults: */

/* Flags: */

    eolmflag = FALSE;
    hbflag   = FALSE;
    modeflag = CHGMODE;  /* = 0 */
    numspc   = 4;
    spccntr  = 0;
    tabmflag = FALSE;
    j        = 0;
    k        = 0;

/* Paths: */

    inpath   = STDIN;
    outpath  = STDOUT;

    for( ;; )

    {

         if(argc < 2)
         {
             break; /* *** Compiler Bug? *** */
         }

        ++argv;
        --argc;

        if(**argv == '-')
        {

            for (;;)
            {
                ++*argv;

                switch((int) **argv) 
                {
                    case '\0':
                        break;      /* Next argv */
                    case '?':
                        shorthelp();
                        exit(0);
                    case 'a':
                        modeflag = ADDMODE;
                        continue;
                    case 'c':
                        eolmflag = CRFLAG;
                        continue;
                    case 'h':
                        hbflag = TRUE;
                        continue;
                    case 'l':
                        eolmflag = LFFLAG;
                        continue;
                    case 'r':
                        modeflag = REMMODE;
                        continue;
                    case 's':
                        tabmflag = SPCFLAG;
                        ++*argv;

                        if (**argv== '0')
                        {
                            break;
                        }

                        if (**argv == '=')
                        {
                            ++*argv;
                            numspc = atoi(*argv);
                        }
                        break;

                    case 't':
                        tabmflag = TABFLAG;
                        ++*argv;

                        if (**argv == '0')
                        {
                            break;
                        }

                        if (**argv == '=')
                        {
                            ++*argv;
                            numspc = atoi(*argv);
                        }
                        break;

                    default:
                        continue;    /* Next Character */
                }   /* Endswitch */

             break;    /* Next argv */

             }   /* Endloop dash handler */

             continue;  /* next argv */

         }   /* Endif dash switches */

         if (inpath == STDIN)
         {
            inpath = open(*argv,S_IREAD);

            if (inpath == ERROR)
            {
                fprintf(stderr,"Input file error\n");
                exit(0);
            }   /* Endif open error */

            continue;    /* Next argv */

        }   /* Endif inpath specified */

        if (outpath == STDOUT)
        {
            outpath = creat(*argv,S_IWRITE);

            if (outpath == ERROR)
            {
                fprintf(stderr,"texcon:  Output file error\n");
                exit(0);
            }

            continue;    /* Next argv */

        }   /* Endif outpath specified */

    }   /* Endloop arghandler */

/* Enforce Variable Limits: */

    if (numspc > MAXSPC)
    {
        numspc = MAXSPC;
    }

/* Invert CR/LF flags for Removal functions */

    if(modeflag == REMMODE)
    {
        if (eolmflag == LFFLAG)
        {
            eolmflag = CRFLAG;
        }
        else
        {
            eolmflag = LFFLAG;
        }
    }   /* endif Remove mode */

/* Process the file: */

    for(;;)
    {

        report = read(inpath,inbuff,1);  /* Get the Character */

        if (report < 1)
        {
             if (spccntr > 0)
             {
                 write(outpath,tabspcs,spccntr);
             }
             exit(0);
        }
        if (*inbuff != HTAB)
        {
            j++;
            if ((j / numspc) == (k+1))
            {
                k++;
            }
        }
        if (hbflag)
        {
            inbuff[0] = inbuff[0] & 0x7f;    /* Strip High Bit */
        }

        switch(tabmflag)
        {
            case TABFLAG:
                if (*inbuff == SPCC)
                {
                    ++spccntr;
                    if ((j / numspc) == k)
                    {
                        write(outpath,tabchr,1);
                        spccntr = 0;
                    }
                    continue;    /* Get next char. */
                }
                else
                {
                    if ((((j-1) / numspc) == k)&&spccntr >1)
                    {
                        write(outpath,tabchr,1);
                        spccntr = 0;
                    }
                    else
                    {
                        write(outpath,tabspcs,spccntr);
                        spccntr = 0;
                    }
                }
                break;  /* Continue with EOL conversion test */
            case SPCFLAG:

                if (*inbuff == HTAB)
                {
                    write(outpath,tabspcs,1);
                    j++;
                    while ((j / numspc) != (k+1))
                    {
                        write(outpath,tabspcs,1);
                        j++;
                    }
                    k++;
                    continue;
                }
                else
                {
                    break;
                }
             default:
                    break;
        }

        switch((int) *inbuff)
        {
            case CR:
                j=k=0;
                if (eolmflag == LFFLAG)  /* LF is target */
                {
                    switch (modeflag)
                    {
                        case CHGMODE:
                            write(outpath,eollf,1);
                            continue;
                        case ADDMODE:
                            write(outpath,crlf,2);
                            continue;
                        default:       /* REMMODE == strip CR */
                            continue;
                    }
                }
                else
                {
                    write(outpath,inbuff,1);
                }
                continue;
            case LF:
                j=k=0;
                if (eolmflag == CRFLAG)  /* CR is target */
                {
                    switch (modeflag)
                    {
                        case CHGMODE:
                            write(outpath,crlf,1);
                            continue;
                        case ADDMODE:
                            write(outpath,crlf,2);
                            continue;
                        default:        /* REMMODE == strip LF */
                            continue;
                    }
                }
                else
                {
                    write(outpath,inbuff,1);
                }
                continue;
            default:
                 write(outpath,inbuff,1);
        }

    }   /* Endloop expand */
    exit(0);

}   /* End of main() */

/* ------------------------------------ */

shorthelp()
{
    fprintf(stderr,"texcon: Text file converter\n\n");
    fprintf(stderr,"Usage: texcon [inpath] [outpath] [-?achlr]");
    fprintf(stderr," [-s[=nn]] [-t[=nn]]\n\n");
    fprintf(stderr," ? = This help message\n");
    fprintf(stderr," a = add mode:\n");
    fprintf(stderr,"  -ac = add CR before LF, -al = add LF after CR\n");
    fprintf(stderr," c = LF to CR\n");
    fprintf(stderr," h = High Bit Strip\n");
    fprintf(stderr," l = CR to LF\n");
    fprintf(stderr," r = remove mode:\n");
    fprintf(stderr,"  -rc = remove CR, -rl = remove LF\n");
    fprintf(stderr," s TABs to SPACEs -- default = 4\n");
    fprintf(stderr," t SPACEs to TABs -- default = 4\n");
}   /* End of shorthelp() */

/* End of texcon.c */
-- 
Jim Omura, 2A King George's Drive, Toronto, (416) 652-3880
ihnp4!utzoo!lsuc!jimomura
Byte Information eXchange: jimomura