[alt.sources.d] Beyond shar

peter@ficc.uu.net (Peter da Silva) (05/02/90)

I also agree that shars are getting out of hand. "shar" was a good idea for
its time, but it's gotten too big, too fast. I think it's long past the
time for a standard text archive format on Usenet.

I vote for the Software Tools format:

-h- filename date true_path_name
file
-h- nextfilename date true_path_name
...

Sometimes you see things like this:
-h- filename date true_path_name
file
-t- filename date true_path_name

I think there's room to turn this into something we can all live with.
Adding the file size, and prefixing all lines with some character, should
make things a bit safer.

Martin Minow did an implementation of this. It's kind of bare bones, but
it should work as a starting place. I'm appending it to this message.

In shar format. :->

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then feed it
# into a shell via "sh file" or similar.  To overwrite existing files,
# type "sh file -c".
# The tool that generated this appeared in the comp.sources.unix newsgroup;
# send mail to comp-sources-unix@uunet.uu.net if you want that tool.
# If this archive is complete, you will see the following message at the end:
#		"End of shell archive."
# Contents:  archc.c archx.c readme.txt
# Wrapped by peter@ficc.uu.net on Wed May  2 11:49:01 1990
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'archc.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'archc.c'\"
else
echo shar: Extracting \"'archc.c'\" \(7785 characters\)
sed "s/^X//" >'archc.c' <<'END_OF_FILE'
X/*
X *			A R C H I V E
X *
X * Create an archive
X *
X */
X
X/*)BUILD	$(TKBOPTIONS) = {
X			TASK	= ...ARC
X		}
X*/
X
X#ifdef	DOCUMENTATION
X
Xtitle	archc	text file archive creation
Xindex		text file archive creation
X
Xsynopsis
X
X	archc file[s] >archive
X
Xdescription
X
X	Archc manages archives (libraries) of source files, allowing
X	a large number of small files to be stored without using
X	excessive system resources.  It copies the set of named
X	files to standard output in archive format.
X
X	The archx program will recreate the files from an archive.
X
X	Note: there are no checks against the same file appearing
X	twice in an archive.
X
Xarchive file format
X
X	Archive files are standard text files.  Each archive element is
X	preceeded by a line of the format:
X	.s.nf
X	-h-	file.name	date	true_path_name
X	.s.f
X	Note that there is no line or byte count.  To prevent problems,
X	a '-' at the beginning of a record within a user file or embedded
X	archive will be "quoted" by doubling it.  The date and true filename
X	fields are ignored.  On Dec operating systems, file.name is
X	forced to lowercase.  Certain bytes at the beginning of a record are
X	also prefixed by '-' to prevent mailers from treating them
X	as commands.
X
Xdiagnostics
X
X	Diagnostic messages should be self-explanatory
X
Xauthor
X
X	Martin Minow
X
X#endif
X
X#include	<stdio.h>
X#include	<ctype.h>
X
X#define unix
X#undef vms
X
X#ifdef vms
X#include		<ssdef.h>
X#include		<stsdef.h>
X#define	IO_SUCCESS	(SS$_NORMAL | STS$M_INHIB_MSG)
X#define	IO_ERROR	SS$_ABORT
X#endif
X/*
X * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file
X */
X#ifndef	IO_SUCCESS
X#define	IO_SUCCESS	0
X#endif
X#ifndef	IO_ERROR
X#define	IO_ERROR	1
X#endif
X#define EOS		0
X#define	FALSE		0
X#define	TRUE		1
X
Xchar		text[513];		/* Working text			*/
Xchar		name[81];		/* Current archive member name	*/
Xchar		pathname[81];		/* Output for argetname()	*/
Xchar		*timetext;		/* Time of day text		*/
Xint		verbose		= TRUE; /* TRUE for verbosity		*/
XFILE		*infd;			/* Input file			*/
X
Xmain(argc, argv)
Xint		argc;			/* Arg count			*/
Xchar		*argv[];		/* Arg vector			*/
X{
X	register int		i;	/* Random counter		*/
X	register char		*fn;	/* File name pointer		*/
X	register char		*argp;	/* Arg pointer			*/
X	int			nfiles;
X	extern char		*ctime();
X	extern long		time();
X	long			timval;
X
X	time(&timval);
X	timetext = ctime(&timval);
X	timetext[24] = EOS;
X#ifdef vms
X	argc = getredirection(argc, argv);
X#endif
X	if (argc <= 1)
X	    fprintf(stderr, "No files to archive?\n");
X#ifdef	unix
X	for (i = 1; i < argc; i++) {
X	    if ((infd = fopen(argv[i], "r")) == NULL)
X		perror(argv[i]);
X	    else {
X		strcpy(pathname, argv[i]);
X		import();
X		fclose(infd);
X	    }
X	}
X#else
X	/*
X	 * Decus C supports fwild/fnext for explicit processing
X	 * of wild-card filenames.
X	 */
X	for (i = 1; i < argc; i++) {
X	    if ((infd = fwild(argv[i], "r")) == NULL)
X		perror(argv[i]);
X	    else {
X		for (nfiles = 0; fnext(infd) != NULL; nfiles++) {
X		    fgetname(infd, pathname);
X		    import();
X		}
X		fclose(infd);
X		if (nfiles == 0)
X		    fprintf(stderr, "No files match \"%s\"\n", argv[i]);
X	    }
X	}
X#endif
X}
X
Ximport()
X/*
X * Add the file open on infd (with file name in pathname) to
X * the archive.
X */
X{
X	unsigned int	nrecords;
X
X	fixname();
X	nrecords = 0;
X	printf("-h- %s\t%s\t%s\n", name, timetext, pathname);
X	while (fgets(text, sizeof text, infd) != NULL) {
X	    switch (text[0]) {
X	    case '-':
X	    case '.':
X	    case '~':
X		putchar('-');				/* Quote	*/
X	    }
X	    fputs(text, stdout);
X	    nrecords++;
X	}
X	if (ferror(infd)) {
X	    perror(name);
X	    fprintf(stderr, "Error when importing a file\n");
X	}
X	if (verbose) {
X	    fprintf(stderr, "%u records read from %s\n",
X		nrecords, pathname);
X	}
X}
X
Xfixname()
X/*
X * Get file name (in pathname), stripping off device:[directory]
X * and ;version.  The archive name ("file.ext") is written to name[].
X * On a dec operating system, name is forced to lowercase.
X */
X{
X	register char	*tp;
X	register char	*ip;
X	char		bracket;
X	extern char	*strrchr();
X
X#ifdef	unix
X	/*
X	 * name is after all directory information
X	 */
X	if ((tp = strrchr(pathname, '/')) != NULL)
X	    tp++;
X	else
X	    tp = pathname;
X	strcpy(name, tp);
X#else
X	strcpy(name, pathname);
X	if ((tp = strrchr(name, ';')) != NULL)
X		*tp = EOS;
X	while ((tp = strchr(name, ':')) != NULL)
X		strcpy(name, tp + 1);
X	switch (name[0]) {
X	case '[':	bracket = ']';
X			break;
X	case '<':	bracket = '>';
X			break;
X	case '(':	bracket = ')';
X			break;
X	default:	bracket = EOS;
X			break;
X	}
X	if (bracket != EOS) {
X	    if ((tp = strchr(name, bracket)) == NULL) {
X		fprintf(stderr, "? Illegal file name \"%s\"\n",
X		    pathname);
X	    }
X	    else {
X		strcpy(name, tp + 1);
X	    }
X	}
X	for (tp = name; *tp != EOS; tp++) {
X	    if (isupper(*tp))
X		*tp = tolower(*tp);
X	}
X#endif
X}
X
X#ifdef	unix
Xchar *
Xstrrchr(stng, chr)
Xregister char	*stng;
Xregister char	chr;
X/*
X * Return rightmost instance of chr in stng.
X * This has the wrong name on some Unix systems.
X */
X{
X	register char	*result;
X
X	result = NULL;
X
X	do {
X	    if (*stng == chr)
X		result = stng;
X	} while (*stng++ != EOS);
X	return (result);
X}
X#endif
X
X/*
X * getredirection() is intended to aid in porting C programs
X * to VMS (Vax-11 C) which does not support '>' and '<'
X * I/O redirection.  With suitable modification, it may
X * useful for other portability problems as well.
X */
X
Xstatic int
Xgetredirection(argc, argv)
Xint		argc;
Xchar		**argv;
X/*
X * Process vms redirection arg's.  Exit if any error is seen.
X * If getredirection() processes an argument, it is erased
X * from the vector.  getredirection() returns a new argc value.
X *
X * Warning: do not try to simplify the code for vms.  The code
X * presupposes that getredirection() is called before any data is
X * read from stdin or written to stdout.
X *
X * Normal usage is as follows:
X *
X *	main(argc, argv)
X *	int		argc;
X *	char		*argv[];
X *	{
X *		argc = getredirection(argc, argv);
X *	}
X */
X{
X#ifdef	vms
X	register char		*ap;	/* Argument pointer	*/
X	int			i;	/* argv[] index		*/
X	int			j;	/* Output index		*/
X	int			file;	/* File_descriptor 	*/
X
X	for (j = i = 1; i < argc; i++) {   /* Do all arguments	*/
X	    switch (*(ap = argv[i])) {
X	    case '<':			/* <file		*/
X		if (freopen(++ap, "r", stdin) == NULL) {
X		    perror(ap);		/* Can't find file	*/
X		    exit(IO_ERROR);	/* Is a fatal error	*/
X		}
X
X	    case '>':			/* >file or >>file	*/
X		if (*++ap == '>') {	/* >>file		*/
X		    /*
X		     * If the file exists, and is writable by us,
X		     * call freopen to append to the file (using the
X		     * file's current attributes).  Otherwise, create
X		     * a new file with "vanilla" attributes as if
X		     * the argument was given as ">filename".
X		     * access(name, 2) is TRUE if we can write on
X		     * the specified file.
X		     */
X		    if (access(++ap, 2) == 0) {
X			if (freopen(ap, "a", stdout) != NULL)
X			    break;	/* Exit case statement	*/
X			perror(ap);	/* Error, can't append	*/
X			exit(IO_ERROR);	/* After access test	*/
X		    }			/* If file accessable	*/
X		}
X		/*
X		 * On vms, we want to create the file using "standard"
X		 * record attributes.  create(...) creates the file
X		 * using the caller's default protection mask and
X		 * "variable length, implied carriage return"
X		 * attributes. dup2() associates the file with stdout.
X		 */
X		if ((file = creat(ap, 0, "rat=cr", "rfm=var")) == -1
X		 || dup2(file, fileno(stdout)) == -1) {
X		    perror(ap);		/* Can't create file	*/
X		    exit(IO_ERROR);	/* is a fatal error	*/
X		}			/* If '>' creation	*/
X		break;			/* Exit case test	*/
X
X	    default:
X		argv[j++] = ap;		/* Not a redirector	*/
X		break;			/* Exit case test	*/
X	    }
X	}				/* For all arguments	*/
X	return (j);
X#else
X	/*
X	 * Note: argv[] is referenced to fool the Decus C
X	 * syntax analyser, supressing an unneeded warning
X	 * message.
X	 */
X	return (argv[0], argc);		/* Just return as seen	*/
X#endif
X}
X
X
X
END_OF_FILE
if test 7785 -ne `wc -c <'archc.c'`; then
    echo shar: \"'archc.c'\" unpacked with wrong size!
fi
# end of 'archc.c'
fi
if test -f 'archx.c' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'archx.c'\"
else
echo shar: Extracting \"'archx.c'\" \(7837 characters\)
sed "s/^X//" >'archx.c' <<'END_OF_FILE'
X/*
X *			A R C H X
X *
X * Archive extraction
X *
X */
X
X/*
X * Note: the )BUILD comment is extracted by a Decus C tool to construct
X * system-dependent compiler command lines.
X *
X * Text inside #ifdef DOCUMENTATION is converted to runoff by a
X * Decus C tool.
X */
X
X/*)BUILD	$(TKBOPTIONS) = {
X			TASK	= ...ARX
X		}
X*/
X
X#ifdef	DOCUMENTATION
X
Xtitle	archx	text file archiver extraction
Xindex		text file archiver extraction
X
Xsynopsis
X
X	archx archive_files
X
Xdescription
X
X	Archx manages archives (libraries) of source files, allowing
X	a large number of small files to be stored without using
X	excessive system resources.  Archx extracts all files from
X	an archive.
X
X	If no archive_name file is given, the standard input is read.
X	Archive header records are echoed to the standard output.
X
Xarchive file format
X
X	Archive files are standard text files.  Each archive element is
X	preceeded by a line of the format:
X	.s.nf
X	-h-	file.name	date	true_name
X	.s.f
X	Note that there is no line or byte count.  To prevent problems,
X	a '-' at the beginning of a record within a user file or embedded
X	archive will be "quoted" by doubling it.  The date and true filename
X	fields are ignored.  On some operating systems, file.name is
X	forced to lowercase.  The archive builder (archc) may prefix
X	other characters by '-'.
X
X	If the first non-blank line of an input file does not
X	begin with "-h", the text will be appended to "archx.tmp"
X	This is needed if archives are distributed by mail
X	and arrive with initial routing and subject information.
X
Xdiagnostics
X
X	Diagnostic messages should be self-explanatory
X
Xauthor
X
X	Martin Minow
X
Xbugs
X
X#endif
X
X#include	<stdio.h>
X#include	<ctype.h>
X#ifdef vms
X#include		<ssdef.h>
X#include		<stsdef.h>
X#define	IO_SUCCESS	(SS$_NORMAL | STS$M_INHIB_MSG)
X#define	IO_ERROR	SS$_ABORT
X#endif
X/*
X * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file
X */
X#ifndef	IO_SUCCESS
X#define	IO_SUCCESS	0
X#endif
X#ifndef	IO_ERROR
X#define	IO_ERROR	1
X#endif
X
X#define EOS		0
X#define	FALSE		0
X#define	TRUE		1
X
X/*
X * The following status codes are returned by gethdr()
X */
X#define DONE	0
X#define	GOTCHA	1
X#define	NOGOOD	2
X
Xchar		text[513];		/* Working text line		*/
Xchar		name[81];		/* Current archive member name	*/
Xchar		filename[81];		/* Working file name		*/
Xchar		arfilename[81];		/* Archive file name		*/
Xchar		fullname[81];		/* Output for argetname()	*/
Xint		verbose		= TRUE;	/* TRUE for verbosity		*/
Xint		first_archive;		/* For mail header skipping	*/
X
Xmain(argc, argv)
Xint		argc;			/* Arg count			*/
Xchar		*argv[];		/* Arg vector			*/
X{
X	register int		i;	/* Random counter		*/
X	int			status;	/* Exit status			*/
X
X#ifdef	vms
X	argc = getredirection(argc, argv);
X#endif
X	status = IO_SUCCESS;
X	if (argc == 1)
X	    process();
X	else {
X	    for (i = 1; i < argc; i++) {
X		if (freopen(argv[i], "r", stdin) != NULL)
X		    process();
X		else {
X		    perror(argv[i]);
X		    status = IO_ERROR;
X		}
X	    }
X	}
X	exit(status);
X}
X
Xprocess()
X/*
X * Process archive open on stdin
X */
X{
X	register char		*fn;	/* File name pointer		*/
X	register FILE		*outfd;
X	register int		i;
X
X	text[0] = EOS;
X	while ((i = gethdr()) != DONE) {
X	    switch (i) {
X	    case GOTCHA:
X		if ((outfd = fopen(name, "w")) == NULL) {
X		    perror(name);
X		    fprintf(stderr, "Can't create \"%s\"\n", name);
X		    arskip();
X		    continue;
X		}
X		break;
X
X	    case NOGOOD:
X		fprintf(stderr, "Missing -h-, writing to archx.tmp\n");
X		fprintf(stderr, "Current text line: %s", text);
X		strcpy(name, "archx.tmp");
X		if ((outfd = fopen(name, "a")) == NULL) {
X		    perror(name);
X		    fprintf(stderr, "Cannot append to %s\n", name);
X		    arskip();
X		    continue;
X		}
X		break;
X	    }
X	    arexport(outfd);
X	    fclose(outfd);
X	}
X}
X
Xint
Xgethdr()
X/*
X * If text is null, read a record, returning to signal input state:
X *	DONE	Eof read
X *	NOGOOD	-h- wasn't first non-blank line.  Line is in text[]
X *	GOTCHA	-h- found, parsed into name.
X */
X{
X	register char	*tp;
X	register char	*np;
X
Xagain:	if (text[0] == EOS
X	 && fgets(text, sizeof text, stdin) == NULL)
X	    return (DONE);
X	if (text[0] == '\n' && text[1] == EOS) {
X	    text[0] = EOS;
X	    goto again;
X	}
X	if (text[0] != '-'
X	 || text[1] != 'h'
X	 || text[2] != '-')
X	    return (NOGOOD);
X	for (tp = &text[3]; isspace(*tp); tp++)
X	    ;
X	for (np = name; !isspace(*tp); *np++ = *tp++)
X	    ;
X	*np = EOS;
X	return (GOTCHA);
X}
X
Xarskip()
X/*
X * Skip to next header
X */
X{
X	while (fgets(text, sizeof text, stdin) != NULL) {
X	    if (text[0] == '-' && text[1] == 'h' && text[2] == '-')
X		return;
X	}
X	text[0] = EOS;				/* EOF signal		*/
X}
X
Xarexport(outfd)
Xregister FILE	*outfd;
X/*
X * Read secret archive format, writing archived data to outfd.
X * Clean out extraneous <cr>,<lf>'s
X */
X{
X	register char	*tp;
X	unsigned int	nrecords;
X
X	printf("Creating \"%s\", ", name);
X	nrecords = 0;
X	while (fgets(text, sizeof text, stdin) != NULL) {
X	    tp = &text[strlen(text)];
X	    if (tp > &text[1] && *--tp == '\n' && *--tp == '\r') {
X		*tp++ = '\n';
X		*tp = EOS;
X	    }
X	    if (text[0] == '-') {
X		if (text[1] == 'h')
X		    goto gotcha;
X		fputs(text+1, outfd);
X	    }
X	    else {
X		fputs(text, outfd);
X	    }
X	    nrecords++;
X	}
X	text[0] = EOS;
Xgotcha:	printf("%u records\n", nrecords);
X	if (ferror(stdin) || ferror(outfd))
X	    printf("Creation of \"%s\" completed with error\n", name);
X}
X
X/*
X * getredirection() is intended to aid in porting C programs
X * to VMS (Vax-11 C) which does not support '>' and '<'
X * I/O redirection.  With suitable modification, it may
X * useful for other portability problems as well.
X */
X
X#ifdef	vms
Xstatic int
Xgetredirection(argc, argv)
Xint		argc;
Xchar		**argv;
X/*
X * Process vms redirection arg's.  Exit if any error is seen.
X * If getredirection() processes an argument, it is erased
X * from the vector.  getredirection() returns a new argc value.
X *
X * Warning: do not try to simplify the code for vms.  The code
X * presupposes that getredirection() is called before any data is
X * read from stdin or written to stdout.
X *
X * Normal usage is as follows:
X *
X *	main(argc, argv)
X *	int		argc;
X *	char		*argv[];
X *	{
X *		argc = getredirection(argc, argv);
X *	}
X */
X{
X	register char		*ap;	/* Argument pointer	*/
X	int			i;	/* argv[] index		*/
X	int			j;	/* Output index		*/
X	int			file;	/* File_descriptor 	*/
X
X	for (j = i = 1; i < argc; i++) {   /* Do all arguments	*/
X	    switch (*(ap = argv[i])) {
X	    case '<':			/* <file		*/
X		if (freopen(++ap, "r", stdin) == NULL) {
X		    perror(ap);		/* Can't find file	*/
X		    exit(IO_ERROR);	/* Is a fatal error	*/
X		}
X
X	    case '>':			/* >file or >>file	*/
X		if (*++ap == '>') {	/* >>file		*/
X		    /*
X		     * If the file exists, and is writable by us,
X		     * call freopen to append to the file (using the
X		     * file's current attributes).  Otherwise, create
X		     * a new file with "vanilla" attributes as if
X		     * the argument was given as ">filename".
X		     * access(name, 2) is TRUE if we can write on
X		     * the specified file.
X		     */
X		    if (access(++ap, 2) == 0) {
X			if (freopen(ap, "a", stdout) != NULL)
X			    break;	/* Exit case statement	*/
X			perror(ap);	/* Error, can't append	*/
X			exit(IO_ERROR);	/* After access test	*/
X		    }			/* If file accessable	*/
X		}
X		/*
X		 * On vms, we want to create the file using "standard"
X		 * record attributes.  create(...) creates the file
X		 * using the caller's default protection mask and
X		 * "variable length, implied carriage return"
X		 * attributes. dup2() associates the file with stdout.
X		 */
X		if ((file = creat(ap, 0, "rat=cr", "rfm=var")) == -1
X		 || dup2(file, fileno(stdout)) == -1) {
X		    perror(ap);		/* Can't create file	*/
X		    exit(IO_ERROR);	/* is a fatal error	*/
X		}			/* If '>' creation	*/
X		break;			/* Exit case test	*/
X
X	    default:
X		argv[j++] = ap;		/* Not a redirector	*/
X		break;			/* Exit case test	*/
X	    }
X	}				/* For all arguments	*/
X	return (j);
X}
X#endif
X
END_OF_FILE
if test 7837 -ne `wc -c <'archx.c'`; then
    echo shar: \"'archx.c'\" unpacked with wrong size!
fi
# end of 'archx.c'
fi
if test -f 'readme.txt' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'readme.txt'\"
else
echo shar: Extracting \"'readme.txt'\" \(1420 characters\)
sed "s/^X//" >'readme.txt' <<'END_OF_FILE'
XThis is a suggested replacement for shar.  It is based on
Xthe archive program in Kernighan and Plauger's Software Tools,
Xbut has been heavily simplified.
X
XIt has the following advantages over shar:
X
X1. it is not tied to Unix -- thus VMS users can unpack files without
X   excessive effort.  Archc and archx should run without change on
X   all Unix and Unix lookalike systems, as well as on VMS (VaxC)
X   and all PDP-11 Decus C systems.  It has been in use for over 6
X   years.
X
X2. it does not execute the distributed image, but interprets it.  This
X   means that trojan horses cannot be concealed in distributions.
X
X3  The distribution file can be edited without damaging the archive.
X   (Also, embedded archives can be handled).
X
XIt has the following disadvantages:
X
X1. It is not as flexible as shar -- it cannot create directories or
X   access any other Unix system services.
X
X2. There is no checksum capability (it appears impossible to implement
X   checksumming in a system-independent manner).
X
XTo use, save this message.  Then, use your favorite editor to extract
Xarchx.c (delimited by lines beginning with "-h-" in column 1).  Then
Xcompile archx and run it using the command:
X	archx <this_file>
XIt should produce readme.txt, archx.c, and archc.c.
XManual pages can be produced by extracting the text delimited by
X	#ifdef DOCUMENTATION
X	...
X	#endif
X
XPlease report problems to the author:
X
XMartin Minow
Xdecvax!minow
X
END_OF_FILE
if test 1420 -ne `wc -c <'readme.txt'`; then
    echo shar: \"'readme.txt'\" unpacked with wrong size!
fi
# end of 'readme.txt'
fi
echo shar: End of shell archive.
exit 0
-- 
 _--_|\  `-_-' Peter da Silva. +1 713 274 5180.      <peter@ficc.uu.net>
/      \  'U`  Have you hugged your wolf today?  <peter@sugar.hackercorp.com>
\_.--._/       Disclaimer: commercial solicitation by email to this address
      v                    is acceptable.

jfh@rpp386.cactus.org (John F. Haugh II) (05/03/90)

In article <H9631XCxds13@ficc.uu.net> peter@ficc.uu.net (Peter da Silva) writes:
>I also agree that shars are getting out of hand. "shar" was a good idea for
>its time, but it's gotten too big, too fast. I think it's long past the
>time for a standard text archive format on Usenet.
>
>I vote for the Software Tools format:

The reason shar's were such a hot idea was that you could unwrap
shar's with tools you knew you had on the receiving side already.
The tools pre-dated the sharchive format ...

The problem some people are pointing out with the new shars is
that they are overly complex and make unarchiving on non-UNIX
machines more difficult than they need be.  A simple, exact,
format needs to be specified that is =compatible= with the tools
which already exist.  Then you can create interpreters for this
compatible format.  But don't create a completely incompatible
format expecting people to follow, because they won't ...

I am certain Warren Tucker and the other busybodies are well
intended in their efforts, however, sometimes less is more.  And
this just happens to be one of those times, in my humble opinion.
-- 
John F. Haugh II                             UUCP: ...!cs.utexas.edu!rpp386!jfh
Ma Bell: (512) 832-8832                           Domain: jfh@rpp386.cactus.org

wht@n4hgf.uucp (Warren Tucker) (05/04/90)

In article <18275@rpp386.cactus.org> jfh@rpp386.cactus.org (John F. Haugh II) writes:
>In article <H9631XCxds13@ficc.uu.net> peter@ficc.uu.net (Peter da Silva) writes:
>>I also agree that shars are getting out of hand. "shar" was a good idea for
>>
>The reason shar's were such a hot idea was that you could unwrap
>shar's with tools you knew you had on the receiving side already.
>The tools pre-dated the sharchive format ...

Like sh?  I still have that tool.  Shar 3.21, cshar, shar2 and _ALL_ the
other shars work well with the bourne shell.  Stuff useful to systems
other than UNIX should not be packed with shar3.  I would be very sorry
if auth had been packed with VMS DCL wrappers, since I _don't_ think it
works there.  Pack VMS source with DCL, insurance records with IBM tape
labels, guns with gelatin and UNIX sources with shell archives.

Really guys, to drive the point one more bloody inch into the ground,
if you are worried about the color and shape of the envelope, the
letter inside is really gonna keep you busy analyzing for aeons.
Maybe you will answer the halting question and prove correctness
in the process.

If your machine can't handle the shar format, nothing I put in them will
do more than make your compiler will vomit or your command line interpreter
say "NO SUCH COMMAND: MAKE".

Anyone who thought the _alt.sources_ shar thingie was an attempt to
supplant all known shars or to shake the hypchondriac compulsive fringe
is being deluded by their own pathetic sense of self-importance.

If Haugh wants to call me a busybody, I call him a brown shirt.  Now we
are equally equal to sewage, square in flame vs.  flame.  If he flames
me again, he shall remain one or more up forever.  If a few want to make
a religious issue of a collection of bits, count me out.  I've had my
full say here.  I promise not to post on this subject again unless
someone brings up a new, cogent idea.
 
------------------------------------------------------------------
Warren Tucker, TuckerWare gatech!n4hgf!wht or wht%n4hgf@gatech.edu
McCarthyism did to cinema what ANSI did to C,  cast a great number
of characters into the void.

peter@ficc.uu.net (Peter da Silva) (05/06/90)

In article <682@n4hgf.uucp> wht@n4hgf.UUCP (Warren Tucker) writes:
> If your machine can't handle the shar format, nothing I put in them will
> do more than make your compiler will vomit or your command line interpreter
> say "NO SUCH COMMAND: MAKE".

-h- article Sat May  5 18:40:07 CDT 1990 .article
I hate to disabuse someone so obviously sure of themselves, but getting a
decent personal computer like (say, just for the sake of argument) an Amiga
to eat the stuff in a typical UNIX program is pretty much a snap. There's
a freely-redistributable bourne shell clone for the Amiga... but, alas, it
lacks here documents. Anything in a typical Makefile, though, is just grist
for the mill.

At worst, you'd have to modify ${CC} and ${CFLAGS}, and maybe ${LIBS}.

Sure, we can unpack shars with unshar programs, but as shars get more and
more out of hand it gets more and more of a pain to keep mangling things.

Why not just switch to something that was (once upon a time, anyway) common,
real simple, and documented in any competant programmer's library (anyone who
doesn't have "Software Tools" needs to get it, IMHO). The Software Tools
archive format.
-t-
-- 
`-_-' Peter da Silva. +1 713 274 5180.      <peter@ficc.uu.net>
 'U`  Have you hugged your wolf today?  <peter@sugar.hackercorp.com>
@FIN  Commercial solicitation *is* accepted by email to this address.

tneff@bfmny0.UU.NET (Tom Neff) (05/06/90)

In article <--830L1ggpc2@ficc.uu.net> peter@ficc.uu.net (Peter da Silva) writes:
>Sure, we can unpack shars with unshar programs, but as shars get more and
>more out of hand it gets more and more of a pain to keep mangling things.

With proper standardization it should not be necessary for an unshar
program to handle all kinds of weird shars.  If a portable shar/unshar
pair were promulgated that incorporates detailed file information for
unshar itself, and minimal self-extraction capability for UNIX-like
environments without the unshar program, that should be enough.

>Why not just switch to something that was (once upon a time, anyway) common,
>real simple, and documented in any competant programmer's library (anyone who
>doesn't have "Software Tools" needs to get it, IMHO). The Software Tools
>archive format.

Because it doesn't self extract.  I hate to sound obvious, but if the
Software Tools format were intrinsically superior in practice we'd
probably already be using it.  (Although not to package our news article
text, hehe)

Also, my memory may be faulty but the last time I looked at ST it seemed
to me there was no provision for recording modes or ownership.  Peter may
correct me on this.

I would repeat my suggestion, which is to adopt a minimal shar that has
header comments of the form

	#%# Name=.article Mode=0644 Mtime=641968578 Owner=tneff
	#%# Checksum=1943729 Type=text

and issue shar/unshar programs that generate and decode them.  The user
can ignore some or all header info with option switches to unshar.

peter@ficc.uu.net (Peter da Silva) (05/07/90)

In article <15462@bfmny0.UU.NET> tneff@bfmny0.UU.NET (Tom Neff) writes:
> With proper standardization it should not be necessary for an unshar
> program to handle all kinds of weird shars.

I think that optimistic. Consider all the companies maintaining backwards
compatible bugs in their software because people ignored *enforcable*
standards!

[ I said... why not Software Tools format? ]
> Because it doesn't self extract.

Neither does a shar on anything but UNIX, nor a DCL-shar on anything but
VMS, etc...

> Also, my memory may be faulty but the last time I looked at ST it seemed
> to me there was no provision for recording modes or ownership.

Modes? You mean like Read, Write, Extend, Delete, Execute, Set-User-ID,
Set-Group-ID, etc...? Or do you mean like Stream-LF vs Fortran Cariage
Control vs Fixed Record versus Card Image etc...?

Ownership? What's that? Or does that relate to ACLs...?

Things like that are inherently O/S specific, and if they're important
include a shell script or DCL command file to set them. That can also be
automatically generated.

How about making everything not between a -h- and -t- a comment, and make
the file look like this:

sed -e 1d -e s/^X// > file << '-t- file'
-h- file Sun May  6 17:09:59 CDT 1990 ./file
Xstuff
Xand more stuff
-t- file
etcetera...

As for coded comments...

I much prefer having an automatically-generated shar-mode.sh file appended
to the archive, like the MANIFEST is in some sharchives right now.
-- 
`-_-' Peter da Silva. +1 713 274 5180.      <peter@ficc.uu.net>
 'U`  Have you hugged your wolf today?  <peter@sugar.hackercorp.com>
@FIN  Commercial solicitation *is* accepted by email to this address.

jfh@rpp386.cactus.org (John F. Haugh II) (05/07/90)

In article <682@n4hgf.uucp> wht@n4hgf.UUCP (Warren Tucker) writes:
>Like sh?  I still have that tool.  Shar 3.21, cshar, shar2 and _ALL_ the
>other shars work well with the bourne shell.  Stuff useful to systems
>other than UNIX should not be packed with shar3.

Such software chauvanism is inappropriate in today's environment.  Much
of the software on the net is "useful" to systems other than UNIX.

I would suggest, then, that everyone do as I did and dump Warren's
shar sources in the trash can.  Software which doesn't have to be
O/S specific, but which is anyway, is very poorly designed.  I have
no place in my heart for intentionally ill-designed software.
-- 
John F. Haugh II                             UUCP: ...!cs.utexas.edu!rpp386!jfh
Ma Bell: (512) 832-8832                           Domain: jfh@rpp386.cactus.org

rsalz@bbn.com (Rich Salz) (05/07/90)

In <--830L1ggpc2@ficc.uu.net> peter@ficc.uu.net (Peter da Silva) writes:
>Why not just switch to something that was (once upon a time, anyway) common,
>...  The Software Tools archive format.
It doesn't protect against leading characters that can cause problems, like
	[SPACE] . ~

/r$
-- 
Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.

peter@ficc.uu.net (Peter da Silva) (05/08/90)

In article <2499@litchi.bbn.com> rsalz@bbn.com (Rich Salz) writes:
> In <--830L1ggpc2@ficc.uu.net> peter@ficc.uu.net (Peter da Silva) writes:
> >Why not just switch to something that was (once upon a time, anyway) common,
> >...  The Software Tools archive format.

> It doesn't protect against leading characters that can cause problems, like
> 	[SPACE] . ~

So? Make it so. It's been widely distributed in source format. I'm sure that
anyone reading this can find the Ratfor or Pascal source in half a day at
most, simply by asking their colleagues if they have a copy of K&P.
-- 
`-_-' Peter da Silva. +1 713 274 5180.      <peter@ficc.uu.net>
 'U`  Have you hugged your wolf today?  <peter@sugar.hackercorp.com>
@FIN  Commercial solicitation *is* accepted by email to this address.

jand@kuling.UUCP (Jan Dj{rv) (05/08/90)

In article <18275@rpp386.cactus.org> jfh@rpp386.cactus.org (John F. Haugh II) writes:
>
>The problem some people are pointing out with the new shars is
>that they are overly complex and make unarchiving on non-UNIX
>machines more difficult than they need be.

I think the new complex shar's are fine. Let me explain why. At my work we
have some different UNIX machines, but we are not connected to the outside
world except on a rented telephone line (4800 baud). So I'm reading news
and ftp'ing on other machines. When I want to take home some source or
binary these shar's are a wonderful tool, they compress uudecode and split
the input so I can mail it home. I can't do that with a simple shar.

>
>I am certain Warren Tucker and the other busybodies are well
>intended in their efforts, however, sometimes less is more.  And
>this just happens to be one of those times, in my humble opinion.

If a standard for sharchives where to emerge, why not put in a '-standard'
option to the complex shar's? Let the moderators reject source wrapped
in a nonstandard way. Then everybody would be happy (no?).

But don't say these new shar's are a bad thing. Some of us needs them.

	Jan D.

pete@stc.co.uk (Peter Kendell) (05/26/90)

In <18123@well.sf.ca.us> jef@well.sf.ca.us (Jef Poskanzer) writes:

>In the referenced message, tneff@bfmny0.UU.NET (Tom Neff) wrote:
>}                       UCT transmission savings are illusory since most
>}newsfeeds are compressed anyway.

Absolutely.

>The reason I don't like compressed tar files is that you have to have
>all parts before you can begin to unpack and peruse.

The reason *I* don't like compressed tar files is that not all 
machines (Curse Intel for ever!) can handle 16-bit compression.

Leave compression to the newsfeed. My upstream site *knows* to 
use 12 bits.
-- 
----------------------------------------------------------------------------
|		  Peter Kendell <pete@stc.co.uk>	        	   |
|				...mcvax!ukc!stc!pete		           |
----------------------------------------------------------------------------

xanthian@zorch.SF-Bay.ORG (Kent Paul Dolan) (05/29/90)

In article <18123@well.sf.ca.us> Jef Poskanzer <jef@well.sf.ca.us> writes:
>In the referenced message, tneff@bfmny0.UU.NET (Tom Neff) wrote:
>}                       UCT transmission savings are illusory since most
>}newsfeeds are compressed anyway.
>
>Most newsfeeds are compressed, yes.  But most news is transmitted over
>uncompressed feeds.  Why?  Because NNTP feeds are not compressed.
>
>The reason I don't like compressed tar files is that you have to have
>all parts before you can begin to unpack and peruse.

Beyond that, shar is widely used to port sources away from Unix (granted
zoo is more appropriate, shar's _are_ human readable), and "tar" is not
often available on the target systems.

I have, for example, many files ported to my Amiga in shar format, and
an "unshar" program that imitates the actions of "sh" well enough to unpack
most common shar output.

Kent, the man from xanth.
(xanthian@zorch.sf-bay.org)

stevem@sauron.Columbia.NCR.COM (Steve McClure) (05/30/90)

In article <1990May29.043552.15964@zorch.SF-Bay.ORG> xanthian@zorch.SF-Bay.ORG (Kent Paul Dolan) writes:
[ stuff deleted ]
>
>Beyond that, shar is widely used to port sources away from Unix (granted
>zoo is more appropriate, shar's _are_ human readable), and "tar" is not
>often available on the target systems.
>
>I have, for example, many files ported to my Amiga in shar format, and
>an "unshar" program that imitates the actions of "sh" well enough to unpack
>most common shar output.

I use tar to transport files between my Amiga and Unix boxes.  Works great
with my tape drive.  There is a pdtar on FF316 or so.  It is supposed to be
generic enough to port to individual systems although I have seen an MS-DOS
binary yet.


-- 
----------------------------------------------------------------------
Steve		email: Steve.McClure@Columbia.NCR.COM	803-791-7054
The above are my opinions, which NCR doesn't really care about anyway!
CAUSER's Amiga BBS! | 803-796-3127 | 8pm-8am 8n1 | 300/1200/2400

wkt@rodos2.cs.adfa.oz.au (Warren Toomey) (06/01/90)

In article [...], xanthian@zorch.SF-Bay.ORG (Kent Paul Dolan) writes:
> 
> Beyond that, shar is widely used to port sources away from Unix (granted
> zoo is more appropriate, shar's _are_ human readable), and "tar" is not
> often available on the target systems.
> 
> I have, for example, many files ported to my Amiga in shar format, and
> an "unshar" program that imitates the actions of "sh" well enough to unpack
> most common shar output.

Sorry, I just stumbled onto this thread, viz. shar files being a security
hole. Below is a quick little hack of an unshar, which can unshar files
that used cat, sed & gres, but doesn't invoke a shell. There are two main
options: -t to show a table of contents (nice) & -x to extract a particular
file. It was originally posted in the [comp.os.minix] newsgroup, but compiles
under Unix & QuickC aka MS-DOS (you need getopt!). It's primitive, but
security holes are minimal.

	Warren Toomey
------

echo x - unshar.c
sed '/^X/s///' > unshar.c << '/'
X/* unshar - extract files from a shell archive	Author: Warren Toomey */
X
X
X/* Unshar - extract files from shell archive 
X *
X * Written by Warren Toomey [wkt@cs.adfa.oz.au@munnari.oz@uunet.uu.net] You may
X * freely copy or give away this source as long as this notice remains
X * intact. 
X *
X * Definitions used by unshar 
X */
X
X
X#include <stdio.h>
X
X/* Methods of unsharing */
X#define UNKNOWN	0
X#define BRUTAL	1
X
X/* Whitespace indicators */
X#define WHITE	0
X#define NOWHITE 1
X
X/* Leading character indicators */
X#define NOX	0
X#define YESX	1
X
X/* Emulation types available */
X
X#define NUMTOKS    4		/* Must change NUMTOKS to equal the */
X /* Define UNKNOWN  0 *//* number of emulation types */
X#define SED	   1
X#define GRES 	   2
X#define CAT	   3
X
X/* The list of emulation types. */
Xstatic char *token[NUMTOKS]=
X{
X  "",
X  "sed",
X  "gres",
X  "cat"
X};
X
X
X/* Misc. constants */
X#define BUFSIZE	512		/* Size of line buffer */
X
X/* Global variables */
Xint table;			/* Generate a table, or extract */
Xint verbose;			/* Unshar verbosely - debugging */
Xint numext;			/* Number of files to extract */
Xint binfile;			/* Binary file - err indicator */
Xchar *exfile[100];		/* Files to extract */
X
X
X#define getline(x,y)	fgetline(stdin,x,y)
X
Xint fgetline(zin, how, buf)	/* Get a line from a file */
XFILE *zin;
Xint how;			/* Ignore leading whitespace if */
Xchar *buf;			/* how == NOWHITE */
X{
X  int ch = 0;
X
X  *buf = 0;			/* Null the buffer */
X  if (how == NOWHITE) {		/* If skip any whitespace */
X	while (((ch = fgetc(zin)) == ' ') || (ch == '\t'));
X	if (ch == EOF) return(EOF);	/* Returning EOF or 0 */
X	if (ch == '\n') return (0);
X	*buf++ = ch;		/* Put char in buffer */
X  }
X  while ((ch = fgetc(zin)) != '\n') {	/* Now get the line */
X	if (ch == EOF) {
X		*buf = 0;
X		return(EOF);
X	}
X	if (ch > 127) {
X		binfile = 1;
X		return(0);
X	}
X	*buf++ = ch;
X  }
X
X  *buf = 0;			/* Finally null-terminate the buffer */
X  return(0);			/* and return */
X}
X
X
X
Xchar *getstring(buf)		/* Get the next string from the buffer */
Xchar *buf;			/* ignoring any quotes */
X{
X  char out[BUFSIZE];
X  char *temp = out;
X  char inquotes = 0, ok = 1;
X  while ((*buf == ' ') || (*buf == '\t'))
X	buf++;			/* Skip whitespace */
X
X  if (verbose) printf("In getstring...\n");
X  *temp = 0;
X  while (ok) {			/* Parse line */
X	switch (*buf) {
X	    case '\"':
X	    case '\'':
X		buf++;
X		inquotes = !inquotes;	/* Toggle inquotes */
X		break;
X	    case 0:
X	    case '\n':		/* Stop on <, >, NULL */
X	    case '>':		/* \n, and sometimes */
X	        case '<':	ok = 0;	break;	/* space & tab */
X	    case '\t':
X	    case ' ':
X		if (!inquotes) ok = 0;
X	    case '\\':
X		if (!inquotes) {/* Ignore backquotes */
X			buf++;
X			break;
X		}
X	    default:
X		*temp++ = *buf++;	/* Copy chars :-) */
X	}
X  }
X  *temp = 0;
X  if (verbose) printf("Returning *%s*\n", out);
X  return(out);
X}
X
X
Xint firstword(buf)			/* Return token value of first word */
Xchar *buf;			/* in the buffer. Assume no leading */
X{				/* whitespace in the buffer */
X  int i;
X
X  for (i = 1; i < NUMTOKS; i++)
X	if (strncmp(buf, token[i], strlen(token[i])) == 0) return(i);
X
X  return(UNKNOWN);
X}
X
X
Xint mustget(s1)			/* Return 1 if s1 is in the list of  */
Xchar *s1;			/* files to extract. Return 0 if not */
X{
X  int i;
X
X  if (numext == 0) return(0);
X  for (i = 0; i < numext; i++)
X	if (!strcmp(s1, exfile[i])) return(1);
X  return(0);
X}
X
X
Xvoid extract(how, file, end, lead)	/* Extract file, up until end word */
Xint how;			/* If how==YESX, then ignore lead   */
Xchar *file;			/* character on every line */
Xchar *end;
Xint lead;
X{
X  FILE *zout;
X  char line[BUFSIZE];
X  char *temp;
X  int ch;
X
X  zout = fopen(file, "w");	/* Open output file */
X  if (zout == NULL) {
X	perror("unshar1");
X	return;
X  }
X  while (1) {
X	binfile = 0;
X	ch = getline(WHITE, line);	/* Get a line of file */
X	temp = line;
X	if (binfile || (ch == EOF)) {
X		fprintf(zout, "%s\n", line);
X		fclose(zout);
X		return;
X	}
X	if ((how == YESX) && (*temp == lead)) temp++;	/* Skip any lead */
X
X	if (strcmp(temp, end) == 0) {	/* If end word */
X		fclose(zout);	/* close the file */
X		return;
X	}
X	fprintf(zout, "%s\n", temp);
X  }
X}
X
X
Xvoid getnames(buf, file, word)	/* Get the file & end word */
Xchar *buf, *file, *word;	/* from the buffer */
X{
X  char *temp;
X
X  temp = buf;
X  if (verbose) printf("Getnames: buf is %s\n", buf);
X
X  while (*temp != 0) {		/* Scan along buffer */
X	switch (*temp) {	/* Get file or end word */
X	    case '>':
X		strcpy(file, getstring(++temp));	/* Get the file name */
X		break;
X	    case '<':
X		if (*(++temp) == '<') ++temp;	/* Skip 2nd < */
X		strcpy(word, getstring(temp));	/* Get next word */
X		break;
X	    default:
X		temp++;
X	}
X  }
X}
X
X
X
Xvoid disembowel()
X{				/* Unshar brutally! */
X  char buf[BUFSIZE];		/* Line buffer */
X  char file[BUFSIZE];		/* File name */
X  char word[BUFSIZE];		/* Word buffer */
X  int ch, x;
X
X  if (verbose) printf("Entering disembowel\n");
X  x = 'X';			/* Leading X character */
X  while (1) {
X	binfile = 0;
X	ch = getline(NOWHITE, buf);	/* Get a line from file */
X	if (ch == EOF) return;
X	if (binfile) continue;
X
X	switch (firstword(buf)) {	/* Extract, depending on first word */
X	    case CAT:
X		if (verbose) printf("About to do getnames\n");
X		getnames(buf, file, word);
X		if (table == 0) {
X			if ((numext == 0) || (mustget(file))) {
X				printf("unshar: Extracting  %s\n", file);
X				if (verbose)
X					printf("        stopping at %s\n", word);
X				extract(NOX, file, word, x);
X			}
X		} else
X			printf("  %s\n", file);
X		break;
X	    case GRES:
X	    case SED:
X		if (verbose) printf("About to do getnames\n");
X		getnames(buf, file, word);
X		if (table == 0) {
X			if ((numext == 0) || (mustget(file))) {
X				printf("unshar: Extracting  %s\n", file);
X				if (verbose)
X					printf("        stopping at %s\n", word);
X				extract(YESX, file, word, x);
X			}
X		} else
X			printf("  %s\n", file);
X		break;
X	    default:
X		break;
X	}
X  }
X}
X
X
X
Xusage()
X{
X  fprintf(stderr, "Usage: unshar [-t] [-b] [-v] [-xfile] [file(s)]\n");
X  exit(0);
X}
X
X
Xmain(argc, argv)
Xint argc;
Xchar *argv[];
X{
X  extern int optind;
X  extern char *optarg;
X  int i, c, first;
X
X  FILE *zin;			/* Dummy file descriptor */
X  int method;			/* Method of unsharing */
X
X  method = BRUTAL;		/* Only BRUTAL currently available */
X  table = 0;			/* Don't generate a table */
X  verbose = 0;			/* Nor be very verbose */
X  numext = 0;			/* Initially no files to extract */
X
X
X  while ((c = getopt(argc, argv, "x:tbv")) != EOF) switch (c) {
X	    case 't':
X		table = 1;	/* Get the various options */
X		break;
X	        case 'b':	method = BRUTAL;	break;
X	        case 'v':	verbose = 1;	break;
X	    case 'x':
X		exfile[numext] = (char *) malloc(strlen(optarg) + 1);
X		strcpy(exfile[numext++], optarg);
X		break;
X	    default:
X		usage();
X	}
X
X  if (argc == 1)
X	first = argc;		/* Find first file argument */
X  else
X	for (first = 1; first < argc; first++)
X		if (argv[first][0] != '-') break;
X
X  if (first == argc) {		/* If no file argument *//* use stdin only */
X	switch (method) {
X	    case BRUTAL:
X		disembowel();	/* Unshar brutally! */
X		break;
X	    default:
X		fprintf(stderr, "unshar: Unknown method of unsharing\n");
X		exit(1);
X	}
X  } else
X	for (i = first; i < argc; i++) {	/* open stdio with every
X						 * file */
X		if (table) printf("%s:\n", argv[i]);
X		fclose(stdin);
X		if ((zin = fopen(argv[i], "r")) == NULL) {
X			perror("unshar2");
X			exit(1);
X		}
X		switch (method) {
X		    case BRUTAL:
X			disembowel();	/* Unshar brutally! */
X			break;
X		    default:
X			fprintf(stderr, "unshar: Unknown method of unsharing\n");
X			exit(1);
X		}
X	}
X  exit(0);
X}
/
    Warren Toomey VK2XWT, really enjoying this.
     Deep in the bowels of ADFA Comp Science.
  `Happy birthday to you, Happy birthday to you!'