[net.bugs.4bsd] bug in 4.3bsd file.c

ken@turtlevax.UUCP (07/17/86)

The 4.3bsd code for file.c has a bug in that compressed files are not
properly recognized.  Laziness is the cause of the bug, manifested
by not properly parenthesizing compound logical expressions.  A context
diff follows:

*** /usr/src/usr.bin/file.c.old	Thu Jul 17 03:06:02 1986
--- /usr/src/usr.bin/file.c	Thu Jul 17 03:07:35 1986
***************
*** 174,182 ****
  		return;
  	}
  
! 	if (buf[0] == '#' && buf[1] == '!' && shellscript(buf+2, &mbuf))
  		return;
! 	if (buf[0] == '\037' && buf[1] == '\235') {
  		if (buf[2]&0x80)
  			printf("block ");
  		printf("compressed %d bit code data\n", buf[2]&0x1f);
--- 174,182 ----
  		return;
  	}
  
! 	if ((buf[0] == '#') && (buf[1] == '!') && shellscript(buf+2, &mbuf))
  		return;
! 	if ((buf[0] == '\037') && (buf[1] == '\235')) {
  		if (buf[2]&0x80)
  			printf("block ");
  		printf("compressed %d bit code data\n", buf[2]&0x1f);
-- 
Ken Turkowski @ CIMLINC, Menlo Park, CA
UUCP: {amd,decwrl,hplabs,seismo}!turtlevax!ken
ARPA: turtlevax!ken@DECWRL.DEC.COM

donn@utah-cs.UUCP (Donn Seeley) (07/18/86)

Uh, Turk, the original code's lack of parentheses was quite proper.  I
suspect that you had another problem -- that you weren't running the
binary which was generated from the source file.  The change to
identify compressed files was made after the 4.3 beta distribution; run
'what' on the old binary to see if you had the old (rev. 4.10) or the
new (4.12) code.  The 4.3 distribution 'file' works fine on compressed
files (I tried it at Berkeley; Utah's still running 4.3 beta, alas).

There WAS a nasty parenthesis bug in the 4.10 version, but it was a
problem with the code for distinguishing character devices from block
devices!  The bug was fixed in 4.12, with the immortal sccs comment,
'who wrote this code anyway?'

Donn Seeley    University of Utah CS Dept    donn@utah-cs.arpa
40 46' 6"N 111 50' 34"W    (801) 581-5668    decvax!utah-cs!donn

chris@umcp-cs.UUCP (07/19/86)

In article <3857@utah-cs.UUCP> donn@utah-cs.UUCP (Donn Seeley) writes:
>The change to identify compressed files was made after the 4.3 beta
>distribution;

After about 2 July 1985?  Hey, that was *my* hack....

Did anyone include the code to recognise tar files?  How about CVL
format picture files?

Just for grins, below are all our local hacks (not all mine).  I
dropped recognition of compacted files from said hacks, as compact
is `out of favour'.  I have to apologise in advance for the
condition (or lack thereof) of these; indeed, the entire `file'
program is in need of a rewrite, but I have not the motivation.

RCS file: RCS/file.c,v
retrieving revision 1.1
diff -c2 -r1.1 file.c
*** /tmp/,RCSt1002676	Fri Jul 18 23:55:17 1986
--- file.c	Sat Oct 12 12:06:02 1985
***************
*** 34,37 ****
--- 34,40 ----
  	"alias", "breaksw", "endsw", "foreach", "limit",  "onintr",
  	"repeat", "setenv", "source", "path", "home", 0 };
+ char *simpl[] = {
+ 	"int", "real", "string", "char", "proc", "ext", "entry", "define",
+ 	"INT", "REAL", "STRING", "CHAR", "PROC", "EXT", "ENTRY", "DEFINE", 0};
  int	ifile;
  
***************
*** 132,136 ****
  		return;
  	}
! 	switch(*(int *)buf) {
  
  	case 0413:
--- 135,140 ----
  		return;
  	}
! whatsit:
! 	switch(*(u_short *)buf) {
  
  	case 0413:
***************
*** 145,148 ****
--- 149,157 ----
  		return;
  
+ #ifdef	Z80MAGIC		/* weirdo Z80 fake execuables */
+ 	case Z80MAGIC:
+ 		printf("Z-80 ");
+ #endif
+ 
  	case 0407:
  exec:
***************
*** 153,156 ****
--- 162,167 ----
  		if (mbuf.st_mode & S_ISVTX)
  			printf("sticky ");
+ 		if (((short *)buf)[1])
+ 			printf("PDP-11 ");
  		printf("executable");
  		if(((int *)buf)[4] != 0) {
***************
*** 162,165 ****
--- 173,196 ----
  		return;
  
+ 	/* Test for executable from wrong type of machine. */
+ #define SWAP(x)	(((x&0377)<<8)|((x>>8)&0377))
+ 	case 0:
+ 		switch (((short *)buf)[1]) {
+ 			case SWAP(0407): case SWAP(0410): case SWAP(0413):
+ #ifdef VAX
+ 			case SWAP(0414): case SWAP(0415): case SWAP(0416):
+ 				printf("PYRAMID executable\n");
+ #else
+ #  ifdef PYRAMID
+ 			case SWAP(0411):
+ 				printf("VAX executable\n");
+ #  else
+ 				printf("screwed up executable\n");
+ #  endif PYRAMID
+ #endif VAX
+ 				return;
+ 		}
+ 		break;
+ 
  	case 0177555:
  		printf("very old archive\n");
***************
*** 173,176 ****
--- 204,265 ----
  		printf("cpio data\n");
  		return;
+ 
+ 	case 044520:    /* 'PICT' w parity on the 'T' */
+ 		if ((((short *)buf)[1] & 0177777) == 0152103) {
+ 			printf("CVL format picture\n");
+ 			return;
+ 		}
+ 
+ 	default: {	/* test for compressed files */
+ 		int fds1[2], fds2[2], pid, wpid;
+ 
+ 		if (buf[0] != '\037' || buf[1] != '\235')
+ 			break;
+ 		printf("compressed");
+ 		fflush(stdout);
+ 		if (pipe(fds1))
+ 			goto cantexpand;
+ 		if (pipe(fds2)) {
+ 			close(fds1[0]);
+ 			close(fds1[1]);
+ 			goto cantexpand;
+ 		}
+ 		if ((pid = fork()) < 0) {
+ 			close(fds2[0]);
+ 			close(fds2[1]);
+ 			close(fds1[0]);
+ 			close(fds1[1]);
+ 			goto cantexpand;
+ 		}
+ 		if (pid == 0) {
+ 			close(ifile);
+ 			dup2(fds1[0], 0);
+ 			close(fds1[0]);
+ 			close(fds1[1]);
+ 			dup2(fds2[1], 1);
+ 			close(fds2[0]);
+ 			close(fds2[1]);
+ 			execl("/usr/ucb/uncompress", "uncompress", (char *)0);
+ 			execl("/usr/ucb/compress", "compress", "-d", (char *)0);
+ 			execlp("uncompress", "uncompress", (char *)0);
+ 			_exit(1);
+ 		}
+ 		(void) write(fds1[1], buf, in);
+ 		close(fds1[0]);
+ 		close(fds1[1]);
+ 		close(fds2[1]);
+ 		in = read(fds2[0], buf, BUFSIZ);
+ 		close(fds2[0]);
+ 		while ((wpid = wait((int *)0)) > 0 && wpid != pid)
+ 			;
+ 		if (in > 0) {
+ 			putchar(' ');
+ 			goto whatsit;
+ 		}
+ 	cantexpand:
+ 		putchar('\n');
+ 		return;
+ 		}
+ 
  	}
  
***************
*** 185,188 ****
--- 274,281 ----
  		return;
  	}
+ 	if (is_tar()) {		/* 21-Oct-83 FLB */
+ 		printf("tar archive\n");
+ 		return;
+ 	}
  	if (mbuf.st_size % 512 == 0) {	/* it may be a PRESS file */
  		lseek(ifile, -512L, 2);	/* last block */
***************
*** 245,249 ****
--- 338,349 ----
  	}
  notfort:
+ 	/* test for simpl source code: FLB 6/2/81 */
  	i=0;
+ 	if (scom() && lookup(simpl)) {
+ 		printf("simpl program text");
+ 		goto outa;
+ 	}
+ 
+ 	i=0;
  	if(ascom() == 0)goto notas;
  	j = i-1;
***************
*** 395,398 ****
--- 495,524 ----
  	return(1);
  }
+ 
+ scom(){
+ 	char cc;
+ 	while((cc = buf[i]) == ' ' || cc == '\t' || cc == '\n')if(i++ >= in)return(0);
+ 	if(buf[i] == '/' && buf[i+1] == '*'){
+ 		i += 2;
+ 		while(buf[i] != '*' || buf[i+1] != '/'){
+ 			if(buf[i] == '\\')i += 2;
+ 			else i++;
+ 			if(i >= in)return(0);
+ 		}
+ 		if((i += 2) >= in)return(0);
+ 	}
+ 	if(buf[i] == '/' && buf[i+1] == '+'){
+ 		i += 2;
+ 		while(buf[i] != '+' || buf[i+1] != '/'){
+ 			if(buf[i] == '\\')i += 2;
+ 			else i++;
+ 			if(i >= in)return(0);
+ 		}
+ 		if((i += 2) >= in)return(0);
+ 	}
+ 	if(buf[i] == '\n')if(scom() == 0)return(0);
+ 	return(1);
+ }
+ 
  ascom(){
  	while(buf[i] == '/'){
***************
*** 492,494 ****
--- 618,655 ----
  	} while (i < n);
  	return (0);
+ }
+ 
+ /*
+  * Determine whether the file is a tar file.
+  * 25-Oct-83 FLB
+  * Recoded 2 Jul 85 ACT
+  */
+ is_tar()
+ {
+ #define TBLOCK	512	/* This stuff is copied from tar.c. */
+ #define NAMSIZ	100
+ 	register struct header {
+ 		char name[NAMSIZ];
+ 		char mode[8];
+ 		char uid[8];
+ 		char gid[8];
+ 		char size[12];
+ 		char mtime[12];
+ 		char chksum[8];
+ 		char linkflag;
+ 		char linkname[NAMSIZ];
+ 	} *bp;
+ 	register comp_chksum;
+ 	register char *cp;
+ 	int header_chksum;
+ 
+ 	bp = (struct header *)buf;
+ 	sscanf(bp->chksum, "%8o", &header_chksum);
+ 	for (cp = bp->chksum; cp < &bp->chksum[8];)
+ 		*cp++ = ' ';
+ 	comp_chksum = 0;
+ 	for (cp = buf; cp < &buf[TBLOCK];)
+ 		comp_chksum += *cp++;
+ 
+ 	return (comp_chksum == header_chksum);
  }
-- 
In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 454 1516)
UUCP:	seismo!umcp-cs!chris
CSNet:	chris@umcp-cs		ARPA:	chris@mimsy.umd.edu

ken@turtlevax.UUCP (07/22/86)

In article <3857@utah-cs.UUCP> donn@utah-cs.UUCP (Donn Seeley) writes:
>Uh, Turk, the original code's lack of parentheses was quite proper.  I
>suspect that you had another problem -- that you weren't running the
>binary which was generated from the source file.

You seem to be right: our source and binary was not in sync.
Recompiling the original code seems to give the desired results.
-- 
Ken Turkowski @ CIMLINC, Menlo Park, CA
UUCP: {amd,decwrl,hplabs,seismo}!turtlevax!ken
ARPA: turtlevax!ken@DECWRL.DEC.COM