[net.sources] new C version of shar

perlman@wanginst.UUCP (Gary Perlman) (01/19/85)

Here is a new version of my C version of shar.
It is a lot faster in simple cases because I figured
out that fread and fwrite are much slower than the
primitive character operations of stdio.  I have
added some functionality, mostly making the shell
archive a better shell script.  I guess the following
shell archive is the best example.

Hope this is useful, and thanks to all the people who
made suggestions (see the man entry for credits).

Gary Perlman/Wang Institute/Tyngsboro, MA/01879/(617) 649-9731

#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	shar.1
#	shar.c
#	traverse.3
#	traverse.c
#	getopt.3
#	getopt.c
#	makefile
# This archive created: Fri Jan 18 17:13:39 1985
# By:	Gary Perlman (Wang Institute, Tyngsboro, MA 01879 USA)
export PATH; PATH=/bin:$PATH
echo shar: extracting "'shar.1'" '(2367 characters)'
if test -f 'shar.1'
then
	echo shar: over-writing existing file "'shar.1'"
fi
cat << \SHAR_EOF > 'shar.1'
.TH SHAR 1net "January 12, 1985"
.SH NAME
shar \- create file storage archive for extraction by /bin/sh
.SH SYNOPSIS
.B shar
[-a] [-d delim] [-p prefix] [-bcv] files
.SH DESCRIPTION
.I shar
prints its input files with special lines around them
to be used by the shell,
.I /bin/sh ,
to extract them later.
The output can be filtered through the shell to
recreate copies of the original files.
.SS Options
.TP
.B -a
All the options.
The options:
.ce
-v -c -b -p X
are implied.
.TP
.B -v
Print verbose feedback messages about what
.I shar
is doing to be printed during extraction.
Sizes of plain files are echoed to allow a simple validity check.
.TP
.B -c
Check file size on extraction by counting characters.
An error message is reported to the person doing the
extraction if the sizes don't match.
One reason why the sizes may not match is that
.I shar
will append a newline to complete incomplete last lines.
.TP
.B -b
Extract files into basenames so that files with absolute path names
are put into the current directory.
This option has little effect when directories are archived.
.TP
.B -d delim
Use this as the ``end of file'' delimiter instead of the default.
The only reason to change it is if you suspect an file
contains the default delimiter:
.B SHAR_EOF
.TP
.B -p prefix
Use this as the prefix to each line of the archived files.
This is to make sure that special characters are not
eaten up by programs like mailers.
If this option is used,
the files will be extracted with the stream editor
.B sed
rather than
.B cat
so it is more efficient and portable to avoid setting the prefix.
.PP
.I shar
allows directories to be named, and
.I shar
prints the necessary commands
.ul
(cd & mkdir)
to create new directories and fill them.
.I shar
will make emit commands to make executable plain files executable.
Note that
.I shar
is not appropriate for sending binary files,
only executable files like shell scripts will work.
.SH AUTHOR
Gary Perlman
(based on a shell version by James Gosling,
with additions motivated by
Derek Zahn,
Michael Thompson,
H. Morrow Long,
Fred Avolio,
&
Gran Uddeborg)
.SH BUGS
.I shar
does not know anything about:
links between files,
binary files.
.I shar
prints a message when it over-writes an existing file,
but I have not yet figured out what to do about it:
ask if the file should be over-written,
exit from the script, ...?
SHAR_EOF
if test 2367 -ne "`wc -c 'shar.1'`"
then
	echo shar: error transmitting "'shar.1'" '(should have been 2367 characters)'
fi
echo shar: extracting "'shar.c'" '(7030 characters)'
if test -f 'shar.c'
then
	echo shar: over-writing existing file "'shar.c'"
fi
cat << \SHAR_EOF > 'shar.c'
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>

/*{
Shar puts readable text files together in a package
from which they are easy to extract.  The original version
was a shell script posted to the net, shown below:
	#Date: Mon Oct 18 11:08:34 1982
	#From: decvax!microsof!uw-beave!jim (James Gosling at CMU)
	AR=$1
	shift
	for i do
		echo a - $i
		echo "echo x - $i" >>$AR
		echo "cat >$i <<'!Funky!Stuff!'" >>$AR
		cat $i >>$AR
		echo "!Funky!Stuff!" >>$AR
	done
I rewrote this version in C to provide better diagnostics
and to run faster.  The major difference is that my version
does not affect any files because it prints to the standard
output.  Mine also has several options.

Gary Perlman/Wang Institute/Tyngsboro, MA/01879/(617) 649-9731

Many enhancements motivated by Michael Thompson.

Directory archiving motivated by Derek Zahn @ wisconsin
	His version had some problems, so I wrote a general
	routine for traversing a directory hierarchy.  It
	allows marching through a directory on old and new
	UNIX systems.

}*/

/* COMMANDS */
#define	PATH "/bin:$PATH"      /* search path for programs */
#define	CAT	"cat";             /* /bin/cat */
#define	SED "sed 's/^%s//'"    /* /bin/sed removes prefix from lines */
#define	MKDIR "mkdir"          /* /bin/mkdir */
#define	TEST "test"            /* /bin/test */
#define	WC_C "wc -c"           /* counts chars in file */
#define	ECHO "echo"            /* echo a message to extractor */

/* OPTIONS */
int 	Verbose = 0;           /* provide append/extract feedback */
int 	Basename = 0;          /* extract into basenames */
int 	Count = 0;             /* count characters to check transfer */
char	*Delim = "SHAR_EOF";   /* put after each file */
char	Filter[100] = "cat";   /* used to extract archived files */
char	*Prefix = NULL;        /* line prefix to avoid funny chars */

#define	USAGE "[-a] [-p prefix] [-d delim] [-bcv] files > archive"
#define	OPTSTRING "ap:d:bcv"

main (argc, argv) char **argv;	
	{
	extern	int 	optind;
	extern	char	*optarg;
	int 	errflg = 0;
	int 	shar ();
	int 	C;
	while ((C = getopt (argc, argv, OPTSTRING)) != EOF)
		switch (C)
			{
			case 'v': Verbose++; break;
			case 'c': Count++; break;
			case 'b': Basename++; break;
			case 'd': Delim = optarg; break;
			case 'a': /* all the options */
				optarg = "X";
				Verbose++;
				Count++;
				Basename++;
				/* fall through to set prefix */
			case 'p': (void) sprintf (Filter, SED, Prefix = optarg); break;
			default: errflg++;
			}
	if (errflg || optind == argc)
		{
		if (optind == argc)
			fprintf (stderr, "shar: No input files\n");
		fprintf (stderr, "USAGE: shar %s\n", USAGE);
		exit (1);
		}
	if (header (argc, argv, optind))
		exit (2);
	while (optind < argc)
		traverse (argv[optind++], shar);
	footer ();
	exit (0);
	}

header (argc, argv, optind)
char	**argv;
	{
	int 	i;
	int 	problems = 0;
	long	clock;
	char	*ctime ();
	char	*getenv ();
	char	*NAME = getenv ("NAME");
	char	*ORG = getenv ("ORGANIZATION");
	for (i = optind; i < argc; i++)
		if (access (argv[i], 4)) /* check read permission */
			{
			fprintf (stderr, "shar: Can't read '%s'\n", argv[i]);
			problems++;
			}
	if (problems) return (problems);
	/*	I have given up on putting a cut line in the archive.
		Too many people complained about having to remove it.
		puts ("-----cut here-----cut here-----cut here-----cut here-----");
	*/
	puts ("#!/bin/sh");
	puts ("# This is a shell archive, meaning:");
	puts ("# 1. Remove everything above the #!/bin/sh line.");
	puts ("# 2. Save the resulting text in a file.");
	puts ("# 3. Execute the file with /bin/sh (not csh) to create the files:");
	for (i = optind; i < argc; i++)
		printf ("#\t%s\n", argv[i]);
	(void) time (&clock);
	printf ("# This archive created: %s", ctime (&clock));
	if (NAME)
		printf ("# By:\t%s (%s)\n", NAME, ORG ? ORG : "");
	printf ("export PATH; PATH=%s\n", PATH);
	return (0);
	}

footer ()
	{
	puts ("#\tEnd of shell archive");
	puts ("exit 0");
	}

archive (input, output)
char	*input, *output;
	{
	char	buf[BUFSIZ];
	FILE	*ioptr;
	int 	len;
	int 	incomplete = 0;
	if (ioptr = fopen (input, "r"))
		{
		printf ("%s << \\%s > '%s'\n", Filter, Delim, output);
		if (Prefix)
			{
			while (fgets (buf, BUFSIZ, ioptr))
				{
				if (Prefix) fputs (Prefix, stdout);
				fputs (buf, stdout);
				}
			len = strlen (buf);
			if (len != 0 && buf[len - 1] != '\n') /* incomplete last line */
				incomplete = 1;
			}
		else incomplete = (copyout (ioptr) != '\n');
		/* thanks to H. Morrow Long (ittvax!long) for the next fix */
		if (incomplete)
			putchar ('\n'); /* Delim must begin new line! */
		puts (Delim);
		if (incomplete)
			printf ("echo shar: a missing newline was added to \"'%s'\"\n", input);
		(void) fclose (ioptr);
		return (0);
		}
	else
		{
		fprintf (stderr, "shar: Can't open '%s'\n", input);
		return (1);
		}
	}

/*
	Copyout copies its ioptr almost as fast as possible
	except that it has to keep track of the last character
	printed.  If the last character is not a newline, then
	shar has to add one so that the end of file delimiter
	is recognized by the shell.  This checking costs about
	a 10% difference in user time.  Otherwise, it is about
	as fast as cat.
*/
copyout (ioptr)
register	FILE	*ioptr;
	{
	register	int 	C;
	register	int 	last;
	last = '\n';
	while ((C = getc (ioptr)) != EOF)
		putchar (last = C);
	return (last);
	}

#define	FSIZE     statbuf.st_size
shar (file, type, pos)
char	*file;
	{
	struct	stat	statbuf;
	char	*basefile = file;
	if (!strcmp (file, ".")) return;
	if (stat (file, &statbuf)) FSIZE = 0;
	if (Basename)
		{
		while (*basefile) basefile++; /* go to end of name */
		while (basefile > file && *(basefile-1) != '/') basefile--;
		}
	if (pos == 0)
		{
		if (type == 'd')
			{
			printf ("if %s ! -d '%s'\n", TEST, basefile);
			printf ("then\n");
			if (Verbose)
				printf ("	echo shar: creating directory \"'%s'\"\n", basefile);
			printf ("	%s '%s'\n", MKDIR, basefile);
			printf ("fi\n");
			if (Verbose)
				printf ("echo shar: entering directory \"'%s'\"\n", basefile);
			printf ("cd '%s'\n", basefile);
			}
		else /* type == 'f' */
			{
			if (Verbose)
				printf ("echo shar: extracting \"'%s'\" '(%d characters)'\n",
					basefile, FSIZE);
			printf ("if %s -f '%s'\n", TEST, basefile);
			printf ("then\n");
			printf ("	echo shar: over-writing existing file \"'%s'\"\n", basefile);
			printf ("fi\n");
			if (archive (file, basefile)) exit (-1);
			}
		}
	else /* pos == 1 */
		{
		if (type == 'd')
			{
			if (Verbose)
				printf ("echo shar: done with directory \"'%s'\"\n", basefile);
			printf ("chdir ..\n");
			}
		else /* type == 'f' (plain file) */
			{
			if (Count)
				{
				printf ("if %s %d -ne \"`%s '%s'`\"\n",
					TEST, FSIZE, WC_C, basefile);
				puts ("then");
				printf ("	echo shar: error transmitting \"'%s'\" ", basefile);
				printf ("'(should have been %d characters)'\n", FSIZE);
				puts ("fi");
				}
			if (access (file, 1) == 0) /* executable -> chmod +x */
				printf ("chmod +x '%s'\n", basefile);
			}
		}
	}
SHAR_EOF
if test 7030 -ne "`wc -c 'shar.c'`"
then
	echo shar: error transmitting "'shar.c'" '(should have been 7030 characters)'
fi
echo shar: extracting "'traverse.3'" '(1050 characters)'
if test -f 'traverse.3'
then
	echo shar: over-writing existing file "'traverse.3'"
fi
cat << \SHAR_EOF > 'traverse.3'
.TH TRAVERSE 3WI "December 16, 1984"
.SH NAME
traverse \- recursively traverse a directory
.SH SYNOPSIS
.nf
traverse (path, func)
char	*path;
int 	(*func) ();

func (path, filetype, position)
char	*path;
.fi
.SH DESCRIPTION
traverse
applies its argument function func to its argument file pathname path.
If path is a directory,
then traverse applies func to all its entries.
.PP
The argument func should take three parameters:
a file name,
a file type,
and a position.
The call looks like this for directories:
.ce
(*func) (path, 'd', position);
and like this for other files:
.ce
(*func) (path, 'f', position);
The position
is 0 when path is first encountered
and 1 when traverse is done.
This is used to allow processing before and after
a directory is processed.
.SH EXAMPLE
.nf
list (name, type, pos)
char	*name;
	{
	if (type == 'd')
		printf ("%s %s\en", pos ? "Leaving" : "Entering", name);
	else /* type == 'f' */
		printf ("	%s\en", name);
	}
.fi
.SH AUTHOR
Gary Perlman
.SH BUGS
There are no diagnostics when directories cannot be searched.
SHAR_EOF
if test 1050 -ne "`wc -c 'traverse.3'`"
then
	echo shar: error transmitting "'traverse.3'" '(should have been 1050 characters)'
fi
echo shar: extracting "'traverse.c'" '(1769 characters)'
if test -f 'traverse.c'
then
	echo shar: over-writing existing file "'traverse.c'"
fi
cat << \SHAR_EOF > 'traverse.c'
/*LINTLIBRARY*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/dir.h>

#ifdef MAXNAMLEN

#define	namedir(entry) (entry->d_name)
#define	MAXNAME 256

#else

#define	DIR	FILE
#define	MAXNAME (DIRSIZ+2)
#define	opendir(path) fopen (path, "r")
#define closedir(dirp) fclose (dirp)
struct direct *
readdir (dirp)
DIR 	*dirp;
	{
	static	struct	direct	entry;
	if (dirp == NULL) return (NULL);
	for (;;)
		{
		if (fread (&entry, sizeof (struct direct), 1, dirp) == 0) return (NULL);
		if (entry.d_ino) return (&entry);
		}
	}
char	*strncpy ();
char *
namedir (entry)
struct	direct	*entry;
	{
	static	char	name[MAXNAME];
	return (strncpy (name, entry->d_name, DIRSIZ));
	}

#endif

#include <sys/stat.h>
#define	isdir(path) (stat(path, &buf) ? 0 : (buf.st_mode&S_IFMT)==S_IFDIR)

traverse (path, func)
char	*path;
int 	(*func) ();
	{
	DIR 	*dirp;
	struct	direct	*entry;
	struct	stat	buf;
	int 	filetype = isdir (path) ? 'd' : 'f';
	(*func) (path, filetype, 0);
	if (filetype == 'd')
		{
		if (chdir (path) == 0)
			{
			if (dirp = opendir ("."))
				{
				while (entry = readdir (dirp))
					{
					char	name[MAXNAME];
					(void) strcpy (name, namedir (entry));
					if (strcmp(name, ".") && strcmp(name, ".."))
						traverse (name, func);
					}
				(void) closedir(dirp);
				}
			(void) chdir ("..");
			}
		}
	(*func) (path, filetype, 1);
	}

#ifdef STANDALONE

static	Indent = 0;
tryverse (file, type, pos)
char	*file;
	{
	int 	in;
	if (pos == 0)
		{
		for (in = 0; in < Indent; in++) putchar ('\t');
		if (type == 'd')
			{
			printf ("%s/\n", file);
			Indent++;
			}
		else puts (file);
		}
	else if (type == 'd') Indent--;
	}

main (argc, argv) char **argv;
	{
	int 	tryverse ();
	char	*root = argc > 1 ? argv[1] : ".";
	traverse (root, tryverse);
	}
#endif
SHAR_EOF
if test 1769 -ne "`wc -c 'traverse.c'`"
then
	echo shar: error transmitting "'traverse.c'" '(should have been 1769 characters)'
fi
echo shar: extracting "'getopt.3'" '(2755 characters)'
if test -f 'getopt.3'
then
	echo shar: over-writing existing file "'getopt.3'"
fi
cat << \SHAR_EOF > 'getopt.3'
.TH GETOPT 3 local
.DA 25 March 1982
.SH NAME
getopt \- get option letter from argv
.SH SYNOPSIS
.ft B
int getopt(argc, argv, optstring)
.br
int argc;
.br
char **argv;
.br
char *optstring;
.sp
extern char *optarg;
.br
extern int optind;
.ft
.SH DESCRIPTION
.I Getopt
returns the next option letter in
.I argv
that matches a letter in
.IR optstring .
.I Optstring
is a string of recognized option letters;
if a letter is followed by a colon, the option is expected to have
an argument that may or may not be separated from it by white space.
.I Optarg
is set to point to the start of the option argument on return from
.IR getopt .
.PP
.I Getopt
places in
.I optind
the
.I argv
index of the next argument to be processed.
Because
.I optind
is external, it is normally initialized to zero automatically
before the first call to 
.IR getopt .
.PP
When all options have been processed (i.e., up to the first
non-option argument),
.I getopt
returns
.BR EOF .
The special option
.B \-\-
may be used to delimit the end of the options;
.B EOF
will be returned, and
.B \-\-
will be skipped.
.SH SEE ALSO
getopt(1)
.SH DIAGNOSTICS
.I Getopt
prints an error message on
.I stderr
and returns a question mark
.RB ( ? )
when it encounters an option letter not included in
.IR optstring .
.SH EXAMPLE
The following code fragment shows how one might process the arguments
for a command that can take the mutually exclusive options
.B a
and
.BR b ,
and the options
.B f
and
.BR o ,
both of which require arguments:
.PP
.RS
.nf
main(argc, argv)
int argc;
char **argv;
{
	int c;
	extern int optind;
	extern char *optarg;
	\&.
	\&.
	\&.
	while ((c = getopt(argc, argv, "abf:o:")) != EOF)
		switch (c) {
		case 'a':
			if (bflg)
				errflg++;
			else
				aflg++;
			break;
		case 'b':
			if (aflg)
				errflg++;
			else
				bproc();
			break;
		case 'f':
			ifile = optarg;
			break;
		case 'o':
			ofile = optarg;
			break;
		case '?':
		default:
			errflg++;
			break;
		}
	if (errflg) {
		fprintf(stderr, "Usage: ...");
		exit(2);
	}
	for (; optind < argc; optind++) {
		\&.
		\&.
		\&.
	}
	\&.
	\&.
	\&.
}
.RE
.PP
A template similar to this can be found in
.IR /usr/pub/template.c .
.SH HISTORY
Written by Henry Spencer, working from a Bell Labs manual page.
Behavior believed identical to the Bell version.
.SH BUGS
It is not obvious how
`\-'
standing alone should be treated;  this version treats it as
a non-option argument, which is not always right.
.PP
Option arguments are allowed to begin with `\-';
this is reasonable but reduces the amount of error checking possible.
.PP
.I Getopt
is quite flexible but the obvious price must be paid:  there is much
it could do that it doesn't, like
checking mutually exclusive options, checking type of
option arguments, etc.
SHAR_EOF
if test 2755 -ne "`wc -c 'getopt.3'`"
then
	echo shar: error transmitting "'getopt.3'" '(should have been 2755 characters)'
fi
echo shar: extracting "'getopt.c'" '(1437 characters)'
if test -f 'getopt.c'
then
	echo shar: over-writing existing file "'getopt.c'"
fi
cat << \SHAR_EOF > 'getopt.c'
/* got this off net.sources */
#include <stdio.h>

/*
 * get option letter from argument vector
 */
int	opterr = 1,		/* useless, never set or used */
	optind = 1,		/* index into parent argv vector */
	optopt;			/* character checked for validity */
char	*optarg;		/* argument associated with option */

#define BADCH	(int)'?'
#define EMSG	""
#define tell(s)	fputs(*nargv,stderr);fputs(s,stderr); \
		fputc(optopt,stderr);fputc('\n',stderr);return(BADCH);

getopt(nargc,nargv,ostr)
int	nargc;
char	**nargv,
	*ostr;
{
	static char	*place = EMSG;	/* option letter processing */
	register char	*oli;		/* option letter list index */
	char	*index();

	if(!*place) {			/* update scanning pointer */
		if(optind >= nargc || *(place = nargv[optind]) != '-' || !*++place) return(EOF);
		if (*place == '-') {	/* found "--" */
			++optind;
			return(EOF);
		}
	}				/* option letter okay? */
	if ((optopt = (int)*place++) == (int)':' || !(oli = index(ostr,optopt))) {
		if(!*place) ++optind;
		tell(": illegal option -- ");
	}
	if (*++oli != ':') {		/* don't need argument */
		optarg = NULL;
		if (!*place) ++optind;
	}
	else {				/* need an argument */
		if (*place) optarg = place;	/* no white space */
		else if (nargc <= ++optind) {	/* no arg */
			place = EMSG;
			tell(": option requires an argument -- ");
		}
	 	else optarg = nargv[optind];	/* white space */
		place = EMSG;
		++optind;
	}
	return(optopt);			/* dump back option letter */
}
SHAR_EOF
if test 1437 -ne "`wc -c 'getopt.c'`"
then
	echo shar: error transmitting "'getopt.c'" '(should have been 1437 characters)'
fi
echo shar: extracting "'makefile'" '(161 characters)'
if test -f 'makefile'
then
	echo shar: over-writing existing file "'makefile'"
fi
cat << \SHAR_EOF > 'makefile'
CFLAGS=-O
shar: shar.o traverse.o getopt.o
	cc -o shar shar.o traverse.o getopt.o
archive: shar
	shar -cv shar.[c1] traverse.[c3] getopt.[c3] makefile > shar.sh
SHAR_EOF
if test 161 -ne "`wc -c 'makefile'`"
then
	echo shar: error transmitting "'makefile'" '(should have been 161 characters)'
fi
#	End of shell archive
exit 0

avolio@grendel.UUCP (Frederick M. Avolio) (01/30/85)

Close....  Very close... :-) For best results change *all* "chdir" (to
the  shell  *not*  the  system  call)  to  "cd".  You got all but one,
Gary... (see line 240 in shar.c).

240c240
< 			printf ("chdir ..\n");
---
> 			printf ("cd ..\n");
-- 
Fred Avolio      {decvax,seismo}!grendel!avolio      301/731-4100 x4227