[net.sources] encode/decode programs

ken@boring.UUCP (08/02/85)

References:
Sender: ken@mcvax.UUCP (Ken Yap)
Reply-To: ken@mcvax.UUCP (Ken Yap)
Followup-To: net.wanted.sources
Distribution: 
Organization: Centrum voor Wiskunde en Informatica, Amsterdam
Keywords: 

Here a couple of filters written by Robert Elz to turn binary streams
to printable characters. It does better than uuencode/uudecode at the
cost of taking about half as much time again.

When everybody has a copy of compress/uncompress and these programs, I
hope the size of large postings, e.g. BinHexed sources, can be
reduced.

	Ken

#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	Makefile
#	encode.1
#	encode.c
#	decode.c
# This archive created: Fri Aug  2 14:07:42 1985
# By:	Ken Yap ()
export PATH; PATH=/bin:$PATH
if test -f 'Makefile'
then
	echo shar: will not over-write existing file "'Makefile'"
else
cat << \SHAR_EOF > 'Makefile'
CFLAGS = -O

decode:		decode.c
		cc -O -o decode decode.c

encode:		encode.o getopt.o
		cc -o encode encode.o getopt.o
SHAR_EOF
if test 119 -ne "`wc -c < 'Makefile'`"
then
	echo shar: error transmitting "'Makefile'" '(should have been 119 characters)'
fi
fi # end of overwriting check
if test -f 'encode.1'
then
	echo shar: will not over-write existing file "'encode.1'"
else
cat << \SHAR_EOF > 'encode.1'
.TH ENCODE 1 "1 August 1985"
.UC 4
.SH NAME
encode,decode \- encode/decode a binary file for transmission via mail
.SH SYNOPSIS
.B encode
[ -wn ]
.br
.B decode
.SH DESCRIPTION
.I Encode
and
.I decode
are filters used to send a binary file via uucp (or other) mail.
They are most useful for sending compressed files.
.PP
.I Encode
takes standard input and
produces an encoded version on the standard output.
The encoding uses only printing ASCII characters.
The optional 'w' flag takes the next argument as the
number of characters per line (default 72).
Line breaks can be suppressed by specifying a length of 0.
.PP
.I Decode
reads an encoded file
and regenerates the original stream on standard output.
.SH SEE\ ALSO
compress(1), uncompress(1), uuencode(1C), uuencode(5), mail(1)
.SH AUTHOR
Robert Elz with modifications by Ken Yap
.SH BUGS
Runs about half again as slow as uuencode/uudecode.
.br
The file is expanded by 23% (78 bits become 96)
causing it to take longer to transmit.
SHAR_EOF
if test 986 -ne "`wc -c < 'encode.1'`"
then
	echo shar: error transmitting "'encode.1'" '(should have been 986 characters)'
fi
fi # end of overwriting check
if test -f 'encode.c'
then
	echo shar: will not over-write existing file "'encode.c'"
else
cat << \SHAR_EOF > 'encode.c'
#include <stdio.h>

#undef	putchar
#define	putchar(c)	pos < linelength ? (putc(c, stdout), pos++) : (putnl(), putc(c, stdout), pos++)

#ifdef SCCSID
static char	*SccsId = "@(#)encode.c	1.2	4/22/85";
#endif /* SCCSID */

/*
 * Produce a 7 bit printable encoding of stdin on stdout.
 *
 * Encoding uses acsii chars from ' ' .. 'z'
 * (040 .. 0172) (0x20 - 0x7a) inclusive
 *
 * Method is to expand 3 chars -> 4 6 bit ones.
 * Then collect 13 6 bit chars, and spread the 13th over
 * the preceding 12, so that each of the 12 chars is now
 * 6.5 bits.  These 2 6.5 bit chars are a little hard
 * to represent on most common machines (one of these days
 * sane hosts will have 1/2 bits just for this program)
 * so we take a pair of them, and represent that in 13 bits.
 * 13 bits (max value 8191) can be represented as
 *	A * 91 + B
 * where A < 91, B < 91  (91^2 == 8281, so it fits!)
 *
 * Each of A and B is encoded as a character by adding 32
 * to make it printable (ie: 0x20).
 *
 * The termination conditions are foul beyond belief.  Don't
 * monkey with them!
 *
 * If you think its a fluke that 040 .. 0171 just happen to
 * be the chars that Piet Beertema's uucp 'f' protocol transmits
 * as single bytes, you're insane.  0172 chars are produced
 * with lower frequency than any other (given random data)
 * so the doubling that occurs with that we will just suffer.
 * (A newer 'f' proto, sometime, will probably not use 0172)
 *
 * This is public domain software, donated by Robert Elz.
 * Permission is granted to use and copy this program, but not
 * for profit, provided this notice is retained.
 *
 * Original by
 *	Robert Elz (..!mulga!kre)
 * Added option to insert newlines periodically.
 *	Ken Yap (..!rochester!ken)
 */

/*
 * the following pair of characters cannot legally occur
 * in normal output (since 90*91 + 90 == 8280, which > 2^13)
 * so we use them to indicate that the data that follows is the
 * terminator.  The character immediately following this
 * pair is the length of the (expanded) terminator (which
 * otherwise might be indeterminable)
 */
#define	ENDMARK1	((90*91 + 90) / 91 + ' ')
#define	ENDMARK2	((90*91 + 90) % 91 + ' ')

int linelength		= 72;
int pos			= 0;

main(argc, argv)
	int argc;
	char *argv[];
{
	register char *p;
	register char *e;
	register c;
	char b3[3];
	extern char *optarg;
	int getopt();

	while ((c = getopt (argc, argv, "w:")) != EOF) {
		switch (c)
		{
		case 'w':
			linelength = atoi(optarg);
			break;	
		default:
			fprintf(stderr, "usage: %s [-wn]\n", argv[0]);
			exit(1);
		}
	}
	p = b3;
	e = b3 + 3;
	while ((c = getchar()) != EOF) {
		*p++ = c;
		if (p == e) {
			encode(b3, 3);
			p = b3;
		}
	}
	encode(b3, p - b3);
	flushout();
	exit(0);
}

static char b13[13];
static int cnt = 0;

encode(c, n)
	register char *c;
	int n;
{
	register char *p;
	register i = cnt;
	register j;
	char b4[4];

	p = b4;

	p[0] = (c[0] >> 2) & 0x3f;
	p[1] = ((c[0] & 0x3) << 4) | ((c[1] >> 4) & 0xf);
	p[2] = ((c[1] & 0xF) << 2) | ((c[2] >> 6) & 0x3);
	if (n == 3)
		p[3] = c[2] & 0x3f;
	else
		p[3] = n;

	c = &b13[i];
	for (j = 4; --j >= 0; i++) {
		if (i == 13) {
			dumpcode(b13, 13);
			c = b13;
			i = 0;
		}
		*c++ = *p++;
	}
	cnt = i;
}

flushout()
{
	putchar(ENDMARK1);
	putchar(ENDMARK2);
	putchar(cnt + ' ');
	dumpcode(b13, cnt);
	if (linelength > 0 && pos > 0) putnl();
}

dumpcode(p, n)
	register char *p;
	register int n;
{
	register last;
	register c;

	if (n == 13)
		n--, last = p[12];
	else if (n & 1)
		last = (1 << (6-1));
	else
		last = 0;

	for ( ; n > 0; n -= 2) {
		c = *p++ << 6;
		c |= *p++;
		if (last & (1 << (6-1)))
			c |= (1 << 12);
		last <<= 1;

		/*
		 * note: 91^2 > 2^13, 90^2 < 2^13, (91 + ' ') is printable
		 */

		/* oh for a compiler that would only do one division... */
		putchar((c / 91) + ' ');
		putchar((c % 91) + ' ');
	}
}

putnl()
{

	if (linelength > 0)
	{
		putc('\n', stdout);
		pos = 0;
	}
}
SHAR_EOF
if test 3902 -ne "`wc -c < 'encode.c'`"
then
	echo shar: error transmitting "'encode.c'" '(should have been 3902 characters)'
fi
fi # end of overwriting check
if test -f 'decode.c'
then
	echo shar: will not over-write existing file "'decode.c'"
else
cat << \SHAR_EOF > 'decode.c'
#include <stdio.h>

#ifdef SCCSID
static char	*SccsId = "@(#)decode.c	1.2	4/22/85";
#endif /* SCCSID */

/*
 * This program is the inverse of encode
 *
 * It collects runs of 12 characters, combines pairs of those
 * to form 6 13 bit numbers, extracts the top bit of each of
 * those to make a 13th 6 bit character, and splits each of
 * the remaining 6 12 bit numbers to form 12 6 bit ones.
 *
 * The strings of 6 bit numbers are collected into groups of
 * 4 and converted into 3 8 bit characters.
 *
 * Now all that would be trivial, if we didn't need to worry
 * about ending all this correctly.  About 1/2 of the following
 * program wouldn't be here if the ending didn't matter....
 *
 * This is public domain software, donated by Robert Elz.
 * Permission is granted to use and copy this program, but not
 * for profit, provided this notice is retained.
 *
 * Original by
 *	Robert Elz (..!mulga!kre)
 * Modified to ignore harmless control characters (\n, \r and \f).
 * Other control characters result in a non-zero exit code.
 *	Ken Yap (..!rochester!ken)
 */

/*
 * the following pair of characters can never occur as a pair
 * in legal input (since (90 * 91 + 90) > 2^13) - they are
 * noticed at the beginning of a 12 char block, and serve to
 * indicate that this block is the terminator.  The character
 * immediately following is the (expanded) terminator length.
 */
#define	ENDMARK1	((90*91 + 90) / 91)
#define	ENDMARK2	((90*91 + 90) % 91)

main()
{
	register c;
	register char *p;
	register i;
	register first = 1;
	register cnt = 0;
	int errcnt = 0;
	char b12[12];
	char c12[12];

	p = b12;
	i = 12;

	while ((c = getchar()) != EOF) {
		if (c < ' ' || c >= (' ' + 91)) {
			if (c != '\n' && c != '\r' && c != '\f')
				errcnt++;
			continue;
		}
		if (i == 10 && p[-1] == ENDMARK1 && p[-2] == ENDMARK2) {
			cnt = c - ' ';
			i = 12;
			p -= 2;
			continue;
		}
		*p++ = c - ' ';
		if (--i == 0) {
			if (p == &b12[12]) {
				if (!first)
					pack12(c12, 12, 0);
				else
					first = 0;
				p = c12;
			} else {
				pack12(b12, 12, 0);
				p = b12;
			}
			i = 12;
		}
	}

	if (p >= &b12[0] && p < &b12[12]) {
		if (!first)
			pack12(c12, 12, i == 12 ? cnt : 0);
	} else
		pack12(b12, 12, i == 12 ? cnt : 0);

	if (i != 12) {
		if (p >= &b12[0] && p < &b12[12])
			pack12(b12, 12-i, cnt);
		else
			pack12(c12, 12-i, cnt);
	}

	exit(errcnt ? 1 : 0);
}

static char b4[4];
static int cnt = 0;

pack12(p, n, last)
	register char *p;
	register n;
	int last;
{
	register i;
	register char *q;
	char b13[13];

	{
		register c;
		register c13;

		q = b13;
		c13 = 0;

		for (i = 0; i < n; i += 2) {
			c = *p++ * 91;
			c += *p++;
			c13 <<= 1;
			if (c & (1 << 12))
				c13 |= 1;
			*q++ = (c >> 6) & 0x3f;
			*q++ = c & 0x3f;
		}
		*q++ = c13;
		if (last)
			q = &b13[last];
	}

	p = b13;
	n = q - p;
	i = cnt;
	q = &b4[cnt];

	while (--n > 0) {
		*q++ = *p++;
		if (++i == 4) {
			char b3[3];
			register char *b = b4;

			/* inline expansion of pack6bit, to save calls ... */

			q = b3;
			*q++ = (b[0] << 2) | ((b[1] >> 4) & 0x3);
			*q++ = (b[1] << 4) | ((b[2] >> 2) & 0xf);
			*q = (b[2] << 6) | (b[3] & 0x3f);

			q = b3;
			while (--i > 0)
				putchar(*q++);

			q = b4;
		}
	}

	*q++ = *p++;	/* the last octet */
	++i;

	if (last || i == 4) {
		pack6bit(b4, i, last);
		i = 0;
	}

	cnt = i;
}

pack6bit(p, n, last)
	register char *p;
	register int n;
	int last;
{
	register char *q;
	register i = 3;
	char b3[3];

	if (last) {
		i = p[n-1];
		if (i >= 3) {
			fprintf(stderr, "Badly encoded file\n");
			i = 3;		/* do the best we can */
		}
	}

	q = b3;
	*q++ = (p[0] << 2) | ((p[1] >> 4) & 0x3);
	*q++ = (p[1] << 4) | ((p[2] >> 2) & 0xf);
	*q = (p[2] << 6) | (p[3] & 0x3f);

	q = b3;

	while (--i >= 0)
		putchar(*q++);
}
SHAR_EOF
if test 3748 -ne "`wc -c < 'decode.c'`"
then
	echo shar: error transmitting "'decode.c'" '(should have been 3748 characters)'
fi
fi # end of overwriting check
#	End of shell archive
exit 0
-- 
UUCP: ..!{seismo,okstate,garfield,decvax,philabs}!mcvax!ken Voice: Ken!
Mail: Centrum voor Wiskunde en Informatica, Kruislaan 413, 1098 SJ, Amsterdam.