[comp.sources.misc] v14i007: pc2unix, file translation programs

magnus@thep.lu.se (Magnus Olsson) (07/16/90)

Posting-number: Volume 14, Issue 7
Submitted-by: magnus@thep.lu.se (Magnus Olsson)
Archive-name: pc2unix/part01

To the moderator of Comp.sources.misc:

I wrote the following two small filters since I do a lot of file transfer
between a PC and a Unix workstation. The filters convert files between the
formats used for text files by Unix and MSDOS. They also take care of 
messy things like control characters and extended ASCII characters.

Both programs are public domain. I hope they will prove themselves useful.


+=====================================================+
| Magnus Olsson		     	| \e+ 	   /_	      |
| Dept. of Theoretical Physics 	|  \  Z	  / q	      |
| University of Lund	     	|   >----<	      |
| Solvegatan 14 a		|  /	  \===== g    |
| S-223 62 LUND, Sweden 	| /e-	   \q	      | 
+===============================+=====================+


---- Cut Here and unpack ----
#!/bin/sh
# shar:	Shell Archiver  (v1.22)
#	Packed Wed Jul 11 16:13:42 MET DST 1990 by lena!magnus
#	from directory /usr/users/magnus/hack
#
#	Run the following text with /bin/sh to create:
#	  pc2unix.c
#	  unix2pc.c
#	  pc2unix.doc
#
sed 's/^X//' << 'SHAR_EOF' > pc2unix.c &&
X/******************************************
X ***            pc2unix.c               ***
X ***					***
X ***   A filter to convert text files   ***
X ***     from MSDOS to Unix format.     ***
X ***      (to be run under Unix)        ***
X ***					***
X ***      Public domain software	***
X ***					***
X ***            Written by 		***
X *** Magnus Olsson (magnus@thep.lu.se)	***
X ***          July 11, 1990		***
X ******************************************/
X
X/* 
X   Switches: 
X   -c: Pass control characters through unchanged
X        (default: Replace with ^+letter)
X   -h: Put a header in front of each input file (not stdin)
X   -d: Replace extended characters (> 126) with a dot
X   -e: Pass extended characters through unchanged
X 	(default: Replace with the char code in parentheses)
X*/
X
X#include <stdio.h>
X
X#define CTRL_Z 26
X#define TAB 9
X#define CR  13
X#define LF  10
X
X#define TRUE 1
X#define FALSE 0
X
Xint pass_ctrl = FALSE,
X    pass_ext  = FALSE,
X    ext_dot   = FALSE,
X    header    = FALSE;
X
X
Xmain (argc,argv)
X    int argc;
X    char **argv;
X    
X{
X    int i,arg;
X    
X    FILE *f;
X
X    arg = 0;
X    /* Look for switches */
X    while (++arg < argc) {
X     	if (argv [arg][0] == '-') /* Was it a switch? */
X	    switch (argv [arg][1]) {
X	        case 'c': pass_ctrl = TRUE; break;
X		case 'e': pass_ext  = TRUE; break;
X		case 'd': ext_dot = TRUE; break;
X		case 'h': header = TRUE; break;
X		default:  fprintf (stderr,"Error: Bad switch %s\n",argv [arg]);
X			  exit (1);
X	    }
X	else
X	    break;
X    }	    
X	    
X    if (arg >= argc) /* No filenames as parameters */
X    	process (stdin);
X    else    
X        for (i = arg; i < argc; ++i) {
X	    if (header)
X                printf ("\n\n<<< %s >>>\n\n",argv [i]);
X	    if (! (f = fopen (argv [i],"r"))) {
X	        fprintf (stderr,"Error: Couldn't open file %s\n",argv [i]);
X		exit (1);
X	    }
X    	    process (f);
X    	    fclose (f);
X        }
X}
X
X	
X
Xprocess (f)
X    FILE *f;
X
X{
X    unsigned char ch; /* Must be unsigned to handle extended ASCII codes */
X	   
X    ch = 0; 
X    while (ch != CTRL_Z && ! feof (f)) {
X    	ch = getc (f);
X    	if (ch != CR && ch != CTRL_Z) {
X	    if (! pass_ctrl && ch < ' ' && ch != LF && ch != TAB) 
X 		printf ("^%c",ch + 'A' - 1);
X	    else if (! pass_ext && ch > 126) 
X	        if (ext_dot)
X		    putchar ('.');
X		else
X	            printf ("(%d)",ch);
X            else
X		putchar (ch);
X        }
X    }
X}
SHAR_EOF
chmod 0644 pc2unix.c || echo "restore of pc2unix.c fails"
sed 's/^X//' << 'SHAR_EOF' > unix2pc.c &&
X/******************************************
X ***            unix2pc.c               ***
X ***					***
X ***   A filter to convert text files   ***
X ***     from Unix to MSDOS format.     ***
X ***      (to be run under Unix)        ***
X ***					***
X ***      Public domain software	***
X ***					***
X ***            Written by 		***
X *** Magnus Olsson (magnus@thep.lu.se)	***
X ***          July 11, 1990		***
X ******************************************/
X
X/* 
X   Switches: 
X   -c: Pass ^Z through unchanged
X   -h: Put a header in front of each input file (not stdin)
X*/
X
X#include <stdio.h>
X
X#define CTRL_Z 26
X#define CR  13
X#define LF  10
X
X#define TRUE 1
X#define FALSE 0
X
Xint pass_ctrl = FALSE,
X    header    = FALSE;
X
X
Xmain (argc,argv)
X    int argc;
X    char **argv;
X    
X{
X    int i,arg;
X    
X    FILE *f;
X
X    arg = 0;
X    /* Look fo switches */
X    while (++arg < argc) {
X	if (argv [arg][0] == '-') /* Was it a switch? */
X	    switch (argv [arg][1]) {
X		case 'c': pass_ctrl = TRUE; break;
X		case 'h': header = TRUE; break;
X		default:  fprintf (stderr,"Error: Bad switch %s\n",argv [arg]);
X			  exit (1);
X	    }
X	else
X	    break;
X    }	    
X	    
X    if (arg >= argc) /* No filenames as parameters */
X    	process (stdin);
X    else    
X        for (i = arg; i < argc; ++i) {
X	    if (header)
X                printf ("\n\n<<< %s >>>\n\n",argv [i]);
X	    if (! (f = fopen (argv [i],"r"))) {
X	        fprintf (stderr,"Error: Couldn't open file %s\n",argv [i]);
X		exit (1);
X	    }
X    	    process (f);
X    	    fclose (f);
X        }
X}
X
X	
X
Xprocess (f)
X    FILE *f;
X
X{
X    int ch; 
X	   
X    ch = 0; 
X    while ((ch = getc (f)) > -1) {
X        switch (ch) {
X	    case CTRL_Z: if (pass_ctrl) 
X	    		     putchar (ch);
X			 else
X			     printf ("^Z");
X	    case LF:     putchar (CR);
X	    default:     putchar (ch);
X	}
X    }
X    putchar (CTRL_Z); /* Add EOF marker */
X}
SHAR_EOF
chmod 0644 unix2pc.c || echo "restore of unix2pc.c fails"
sed 's/^X//' << 'SHAR_EOF' > pc2unix.doc &&
Xpc2unix and unix2pc --- filters for text file conversion Unix <--> MSDOS
X
XBy Magnus Olsson (magnus@thep.lu.se)
X
X===============================================================================
X
XThese two small filters are useful if you, like I do, transfer text files
Xbetween PC's and Unix machines (for example, the documentation for
XComp.sources.ibm.pc programs). 
X
XAs is well known, MSDOS and Unix use differnet formats for text files. Under
XUnix, each line ends with a newline character (ASCII 10) while under MSDOS,
Xit ends in a carriage return (ASCII 13) + a line feed. MSDOS uses a special
Xcharacter (ASCII 26) as an end of file marker, while Unix doesn't. Also, MSDOS
Xuses an extended character set (ASCII codes > 127) which most Unixes can't
Xhandle). 
X
XNormally, your transfer program (ftp, kermit etc) will automatically convert
Xtext files to the correct format, but sometimes they don't, for example if the
Xtext file is part of a .ARC file which is then unpacked under the 'wrong'
Xoperating system. In that case, you may need these filters.
X
XHow to install them:
X===================
X
XJust do 
Xcc -o unix2pc unix2pc.c
Xand
Xcc -o pc2unix pc2unix.c
X
Xand copy the executables to somewhere in your path.
X
X
XHow to use them:
X===============
X
XIf you don't give these commands any parameters, they will act as filters,
Xreading their input from stdin and writing to stdout. If you specify one or
Xmore filenames on the command line, input will be read from the file(s) and
Xthe converted text will be written on stdout (the files will be concatenated). 
X
XYou may also specify one or more of the following switches (*before* any
Xfilenames):
X
XFor pc2unix:
X
X-c: Changes the way control characters are treated. If this switch is
X    specified, all control characters will be passed through the filter. The
X    default is to replace all control characters expcept tabs and line feeds
X    with a symbolic representation (like ^C for ASCII 3).
X    
X-d: Replace all extended characters (ASCII > 126) with a dot, instead of
X    replacing them with their character code in parentheses (the default).
X
X-e: Pass all extended characters unchanged through the filter.
X
X-h: Put a header with the file name before each file in the output if files
X    are specified on the command line.
X
X
XFor unix2pc:
X
X-c: Pass all ASCII 26 characters unchanged through the filter. The default
X    is to replace them with the string "^Z". (This is the EOF character in
X    MSDOS).
X
X-h: This switch has the same meaning as for pc2unix.
X
SHAR_EOF
chmod 0644 pc2unix.doc || echo "restore of pc2unix.doc fails"
exit 0