[comp.os.minix] tarscan.c -- a tarfile lister/splitter

paula@bcsaic.UUCP (Paul Allen) (11/22/89)

I mentioned this program a week or so ago and got a little mail
about it, so here it is.  Tarscan is descended from a program I
wrote a long time ago to list the file names and Subject: lines
of News archive tapes.  The primary motivation for adding features 
and porting it to Minix was the need to move ~20Mb or so of Minix
archives to my home machine via floppies.  I also wanted to be able
to store Minix backups as something more convienient than huge
multi-floppy tarfiles.  I wanted each floppy to be a stand-alone 
tar file, but I didn't want to have to hand-craft each one.  So I 
went to my archives and dragged out my old tarscan program.  I had 
posted it to net.sources (or whatever it was called back before 
the Great Renaming) and Rich $alz sent me back a 'cleaned-up' 
version.  It was Rich's pretty version that I started with this 
time around.

This new version of tarscan can do two things: list the contents
of a tarfile with any Subject: lines, and split the tarfile up
into multiple independent tarfiles that don't exceed a given size.
The input tarfile is expected to be on stdin.

The -v flag turns on the listing feature.  The listing goes to
stdout.  If you also ask for the tarfile to be split up, the
listing contains a header line marking the beginning of each output
volume.  If a Subject: line is found in the first block of a file,
it is printed after the file's name in the listing.

The -f option specifies a filename for the tarfile to be written
on.  This can be a device name, like /dev/at0.  The -f flag will
generally be used in conjunction with the -s flag.

The -s option specifies the maximum number of 512-byte blocks to
be written to the output tar files.  If the output file is a
device, tarscan prompts for each volume to be inserted.  If
the output is a regular file, tarscan uses the specified name
as a base on which to append sequential numbers, starting with
000.

Problems, things to watch for:

I had trouble getting tarscan to reliably parse the output of Sun's
tar program, and I never really investigated it.  I used John
Gilmore's public-domain tar instead, and had no trouble with it.

Tarscan will not break a member of a tar archive in the middle.
Thus, there will always be some wasted space at the end of each
volume.  I haven't tried to find out what happens if you feed it
a tar file that contains a member that is larger than the specified
output volume size.  I don't think it will be pretty!  :-)

I developed this code on my Sun under 4.0.1 and used it to move
a whole lot of stuff to my home machine.  I then moved the tarscan
code over to Minix and fixed the problems related to word-size and
got the device stuff working.  The version I'm posting compiles and
appears to run as expected on both my Sun and PC Minix 1.3d.  If
you have trouble with it, I'd like to hear about it.

Tarscan needs getopt.c.  It's simple enough that I haven't bothered
with a Makefile or a man page.

Enjoy!

Paul L. Allen                       | pallen@atc.boeing.com
Boeing Advanced Technology Center   | ...!uw-beaver!bcsaic!pallen
---------------------------- cut here ----------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	tarscan.c
# This archive created: Tue Nov 21 23:19:36 1989
export PATH; PATH=/bin:$PATH
echo shar: extracting "'tarscan.c'" '(5871 characters)'
if test -f 'tarscan.c'
then
	echo shar: will not over-write existing file "'tarscan.c'"
else
sed 's/^X//' << \SHAR_EOF > 'tarscan.c'
X/*
X**  TARSCAN
X**  News archive tape scanner/splitter.
X**
X**  Reads a tar news archive, optionally splitting it into smaller
X**  archives or listing the contained files and their Subject lines
X**  on stdout.  Useful for creating a multi-floppy archive when
X**  you might want to retrieve just one file from the 27th floppy
X**  of an archive.  Each volume is a complete stand-alone tar archive.
X**
X**  Arguments:
X**
X**	One or more of:
X**
X**	-v		Write file names and Subject lines on stdout.
X**			(Mainly for news articles or mail.)  If the
X**			-s switch is present, tarscan will announce
X**			volume changes on stdout.
X**
X**	-f filename	Copy the input tarfile to the named file.
X**
X**	-s size		Only used with -f.  Break the tarfile into
X**			individual tarfiles that don't exceed 'size'
X**			tape blocks.  If 'filename' is a device,
X**			the user will be prompted to change media
X**			when appropriate.  If 'filename' is not a
X**			device, then the input tarfile is copied
X**			into a series of files with 'filename' as
X**			a base and sequential numbers as a suffix.
X**			(Beware of 14-character filenames!)
X**
X**	Original news archive Subject lister by Paul Allen.
X**	Major style beautification by Rich $alz.
X**	Tarfile splitting logic added by Paul Allen.
X**
X**	I consider this code to be in the public domain.
X**
X*/
X#include <stdio.h>
X#include <fcntl.h>
X
X#define TBLOCK		512
X#define NAMSIZ		100
X#define CLUMP		1
X
Xextern char	*strcpy();
X
Xextern char	*optarg;
Xextern int	 optind;
X
Xtypedef union {
X    char	 dummy[TBLOCK];
X    struct {
X	char	 name[NAMSIZ];
X	char	 mode[8];
X	char	 uid[8];
X	char	 gid[8];
X	char	 size[12];
X	char	 mtime[12];
X	char	 chksum[8];
X	char	 linkflag;
X	char	 linkname[NAMSIZ];
X    } D;
X} HEADER;
X
X#define OCTAL(x) (((x) >= '0') && ((x) <= '7'))
X
Xvoid
Xusage ()
X{
X    fprintf (stderr, "Usage: tarscan [-v] [-f filename [-s size] ] <tarfile\n");
X    exit (1);
X}
X
Xstatic HEADER *
Xmyread(mt)
X    int			 mt;
X{
X    static HEADER	 buff[CLUMP];
X    static int		 pos = CLUMP;
X    int			 len;
X
X    if (pos == CLUMP) {
X	if ((len=read(mt, (char *)buff, sizeof buff)) == 0) {
X	    return ((HEADER *) 0);
X	} else if (len == -1) {
X	    perror ("read");
X	    exit (1);
X	}
X	pos = 0;
X    }
X    return(&buff[pos++]);
X}
X
X
Xmain(ac, av)
X    int			 ac;
X    char		*av[];
X{
X    register HEADER	*H;
X    register char	*p;
X    register char	*q;
X    register char	*r;
X    register char	*t;
X    char		 subject[256];
X    int			 Nblocks;
X    register int	 first;
X    long int		 size;
X    char		 Name[NAMSIZ];
X    int			 outblocks;
X    int			 outvol = 0;
X    int			 outfile = -1;
X    int			 volume_size;
X    char		 zbuff [TBLOCK];
X    char		 outname [100];
X    char		 t_outname [100];
X    int			 verbose = 0;
X    int			 make_copy = 0;
X    int			 limit_size = 0;
X    int			 start;
X    int			 c;
X    int			 device;
X    int			 ttyfd;
X
X    for (p=zbuff,q=zbuff+TBLOCK; p<q; (*p++)=0) ;
X
X    while ((c=getopt(ac, av, "vf:s:")) != -1)
X	switch (c) {
X	case 'v':
X	    verbose = 1;
X	    fprintf (stderr, "verbose flag\n");
X	    break;
X	case 'f':
X	    make_copy = 1;
X	    strcpy (outname, optarg);
X	    device = strncmp ("/dev/", outname, 5) == 0;
X	    fprintf (stderr, "make copy on %s\n", outname);
X	    break;
X	case 's':
X	    limit_size = 1;
X	    volume_size = atoi(optarg);
X	    fprintf (stderr, "limit output volumes to %d blocks\n", volume_size);
X	    break;
X	default:
X	    usage ();
X	    exit (1);
X	}
X
X    if ((verbose + make_copy + limit_size) == 0) {
X	usage ();
X	exit (1);
X    }
X    if (limit_size && !make_copy) {
X	usage ();
X	exit (1);
X    }
X
X    if (make_copy && device) {
X	ttyfd = open ("/dev/tty", O_RDONLY);
X	if (ttyfd == -1) {
X	    perror ("open tty");
X	    exit(1);
X	}
X    }
X
X    if (make_copy) {
X        if (!limit_size) {
X	    if (device) {
X	        outfile = open (outname, O_WRONLY);
X	    } else {
X	        outfile = creat (outname, 0777);
X	    }
X	    if (outfile == -1) {
X	        perror ("open output file");
X	        exit (1);
X	    }
X        } else {
X	    outblocks = volume_size;
X        }
X    }
X    for (first = 0, Nblocks = 0; H = myread(0); ) {
X	if (Nblocks) {
X	    Nblocks--;
X	    if (first && verbose) {
X		for (p=H->D.name, q=p+TBLOCK, start=1; p<q; p++) {
X		    if (start && (strncmp(p, "Subject", 7)==0)) {
X			for (t=subject, r=p+8; r<q && *r != '\n'; r++) *(t++) = *r;
X			*t = 0;
X			printf ("	%s", subject);
X			break;
X		    }
X		    start = *p == '\n';
X		}
X		printf ("\n");
X	    }
X	    first = 0;
X	}
X	else if (H->D.name[0]) {
X	    if (H->D.name[NAMSIZ - 1]) {
X		H->D.name[NAMSIZ - 1] = 0;
X		fprintf(stderr,
X			"Name \"%s\" is more than %d bytes long, I give up!\n",
X			H->D.name,
X			NAMSIZ);
X		exit(1);
X	    }
X	    size = 0L;
X	    if (H->D.linkflag != '1')
X		for (p = H->D.size,q=H->D.size+12; p<q ; p++) {
X		    if (OCTAL(*p)) size = (size << 3) + *p - '0';
X		}
X	    Nblocks = size / TBLOCK + ((size % TBLOCK) > 0);
X	    first = Nblocks > 0;
X	    if (make_copy && ((outblocks+Nblocks+3) > volume_size)) {
X		if (outfile > 0) {
X		    write (outfile, zbuff, TBLOCK);
X		    write (outfile, zbuff, TBLOCK);
X		    close (outfile);
X		}
X		if (device) {
X		    fprintf (stderr, "Insert volume %d and hit any key\n", outvol);
X		    if (read (ttyfd, &c, 1) != 1) {
X			perror("read keyboard");
X			exit(1);
X		    }
X		    outfile = open (outname, O_WRONLY);
X		} else {
X		    sprintf (t_outname, "%s%03d", outname , outvol);
X	            outfile=creat(t_outname, 0777);
X		}
X		if (outfile == -1) {
X		    perror ("open output file");
X		    exit(1);
X	        } 
X		if (verbose && limit_size) 
X		    if (device) 
X			printf ("\n\nVolume %d on %s\n\n", outvol, outname);
X		    else
X			printf ("\n\nTarfile: %s\n\n", t_outname);
X		outvol++;
X		outblocks = 0;
X	    }
X	    if (verbose) printf ("%s", H->D.name);
X	} 
X	if (make_copy) {
X	    if (write(outfile, (char *) H, TBLOCK) != TBLOCK) { 
X		perror ("write tape block");
X		exit (1);
X	    }
X	    outblocks++;
X	}
X    }    
X
X    exit(0);
X}
SHAR_EOF
if test 5871 -ne "`wc -c < 'tarscan.c'`"
then
	echo shar: error transmitting "'tarscan.c'" '(should have been 5871 characters)'
fi
fi # end of overwriting check
#	End of shell archive
exit 0

-- 
------------------------------------------------------------------------
Paul L. Allen                       | pallen@atc.boeing.com
Boeing Advanced Technology Center   | ...!uw-beaver!bcsaic!pallen