[comp.sys.ibm.pc] unix-like file utility for MSDOS

martin@cod.NOSC.MIL (Douglas W. Martin) (04/30/88)

     Has anyone written a program like /usr/bin/file that runs on the IBM-pc?
This utility under UNIX identifies the type of information in a file;
i.e. "ascii text", "executable code", "data", "csh commands", etc.
Often file extensions give no clue as to what type of information is in a file.
For example, config.sys is ascii, and ansi.sys is not.
Any help with finding this program is appreciated.

Doug Martin
Naval Ocean Systems Center, San Diego, Ca.
UUCPmail: {akgua,allegra,decvax,ihnp4,ucbvax}!sdcsvax!noscvax!martin
MILNET:   martin@NOsc.mil

fox@alice.marlow.reuters.co.uk (Paul Fox) (05/04/88)

In article <1080@cod.NOSC.MIL> martin@cod.NOSC.MIL (Douglas W. Martin) writes:
>
>
>     Has anyone written a program like /usr/bin/file that runs on the IBM-pc?
>This utility under UNIX identifies the type of information in a file;
>i.e. "ascii text", "executable code", "data", "csh commands", etc.
>Often file extensions give no clue as to what type of information is in a file.
>For example, config.sys is ascii, and ansi.sys is not.
>Any help with finding this program is appreciated.

Here's my version. Runs under MS C4. Should run under others maybe with minor
changes. 

-----------cut here-----------------
# include	<stdio.h>
# include	<ctype.h>
# include	<sys/types.h>
# include	<sys/stat.h>

/*--------------------------------------------------------
 *   file.c - similar to unix file(1). (C) P. Fox 1988
 *--------------------------------------------------------*/
struct	stat	stat_buf;

# define	SIZE	1024
# define	TRUE	1
# define	FALSE	0
# define	NIL	0

FILE	*fp;
unsigned char	lbuf[SIZE+32];

main(argc, argv)
char	**argv;
{
	int		i, n;
	int	*ip;

	for (i = 1; i < argc; i++) {
		printf("%s:%*s", argv[i], 20-strlen(argv[i]), "");
		n = stat(argv[i], &stat_buf);
		if (n < 0) {
			perror("");
			continue;
			}
		if (stat_buf.st_mode & S_IFDIR) {
			printf("Directory.\n");
			continue;
			}
		if (stat_buf.st_mode & S_IFCHR) {
			printf("Character special.\n");
			continue;
			}
		if ((fp = fopen(argv[i], "rb")) == NIL) {
			perror("");
			continue;
			}
		memset(lbuf, NULL, SIZE);
		n = fread(lbuf, 1, SIZE, fp);
		fclose(fp);
		ip = (int *) lbuf;
		if (n == 0)
			printf("Null file.");
		else if (lbuf[0] == 0xf0)
			printf("Object library.");
		else if (lbuf[0] == 0x80)
			printf("Object file.");
		else if (lbuf[0] == 0x4d && lbuf[1] == 0x5a)
			printf(".exe file.");
		else if (lbuf[0] == 0x31 && lbuf[1] == 0xbe)
			printf("Microsoft WORD document.");
		else if (lbuf[0] == 0x1a)
			printf("ARC file.");
		else if (lbuf[0] == 0xff && lbuf[1] == 0xff && lbuf[2] == 0xff &&
				lbuf[3] == 0xff) 
			printf("Device driver.");
		else if (c_file(n))
			printf("C source code.");
		else {
			unsigned char	*cp;
			for (cp = lbuf; cp < lbuf+n; cp++)
				if (!(isprint(*cp) || isspace(*cp)))
					break;
			if (cp >= lbuf+n)
				printf("ASCII Text file.");
			else
				printf("Binary data.");
			}
		putchar('\n');
		}
	exit(0);
}
# define	KEYWD_LIMIT	3
struct k {
	char	*name;
	int	length;
	} keywds[] = {
		"main",		4,
		"stdio",	5,
		"int",		3,
		"unsigned", 	8,
		"register", 	8,
		"long",		4,
		"char",		4,
		"typedef",	7,
		"if",		2,
		"return",	6,
		"while",	5,
		"struct",	6,
		"union",	5,
		(char *) NIL
		};
c_file(n)
{	register unsigned char	*cp;
	int	nkeywds = 0;
	struct k *kp;

# define	END	(lbuf+n)
	for (cp = lbuf; cp < END; cp++) {
		if (*cp == '\n' && (cp[1] == '{' || cp[1] == '}')) {
			if (++nkeywds > KEYWD_LIMIT)
				return TRUE;
			cp++;
			continue;
			}
		if (*cp == '\n' && cp[1] == '#') {
			while (cp < END && !isspace(*cp))
				cp++;
			if (strncmp(cp, "include", 7) == 0 ||
			    strncmp(cp, "if", 2) == 0 ||
			    strncmp(cp, "define", 6) == 0)
				if (++nkeywds > KEYWD_LIMIT)
					return TRUE;
			continue;
			}
		if (*cp == '/' && cp[1] == '*') {
			for (cp += 2; cp < END; cp++)
				if (*cp == '*' && cp[1] == '/') {
					cp ++;
					break;
					}
			if (cp>= END || ++nkeywds > KEYWD_LIMIT)
				return TRUE;
			continue;
			}
		if (!isalpha(*cp) || (cp != lbuf && isalpha(cp[-1])))
			continue;
		for (kp = keywds; kp->name; kp++)
			if (strncmp(cp, kp->name, kp->length) == 0) {
				if (++nkeywds > KEYWD_LIMIT)
					return TRUE;
				cp += kp->length - 1;
				break;
				}
		}
	return FALSE;
}