martin@cod.NOSC.MIL (Douglas W. Martin) (04/30/88)
Has anyone written a program like /usr/bin/file that runs on the IBM-pc? This utility under UNIX identifies the type of information in a file; i.e. "ascii text", "executable code", "data", "csh commands", etc. Often file extensions give no clue as to what type of information is in a file. For example, config.sys is ascii, and ansi.sys is not. Any help with finding this program is appreciated. Doug Martin Naval Ocean Systems Center, San Diego, Ca. UUCPmail: {akgua,allegra,decvax,ihnp4,ucbvax}!sdcsvax!noscvax!martin MILNET: martin@NOsc.mil
fox@alice.marlow.reuters.co.uk (Paul Fox) (05/04/88)
In article <1080@cod.NOSC.MIL> martin@cod.NOSC.MIL (Douglas W. Martin) writes: > > > Has anyone written a program like /usr/bin/file that runs on the IBM-pc? >This utility under UNIX identifies the type of information in a file; >i.e. "ascii text", "executable code", "data", "csh commands", etc. >Often file extensions give no clue as to what type of information is in a file. >For example, config.sys is ascii, and ansi.sys is not. >Any help with finding this program is appreciated. Here's my version. Runs under MS C4. Should run under others maybe with minor changes. -----------cut here----------------- # include <stdio.h> # include <ctype.h> # include <sys/types.h> # include <sys/stat.h> /*-------------------------------------------------------- * file.c - similar to unix file(1). (C) P. Fox 1988 *--------------------------------------------------------*/ struct stat stat_buf; # define SIZE 1024 # define TRUE 1 # define FALSE 0 # define NIL 0 FILE *fp; unsigned char lbuf[SIZE+32]; main(argc, argv) char **argv; { int i, n; int *ip; for (i = 1; i < argc; i++) { printf("%s:%*s", argv[i], 20-strlen(argv[i]), ""); n = stat(argv[i], &stat_buf); if (n < 0) { perror(""); continue; } if (stat_buf.st_mode & S_IFDIR) { printf("Directory.\n"); continue; } if (stat_buf.st_mode & S_IFCHR) { printf("Character special.\n"); continue; } if ((fp = fopen(argv[i], "rb")) == NIL) { perror(""); continue; } memset(lbuf, NULL, SIZE); n = fread(lbuf, 1, SIZE, fp); fclose(fp); ip = (int *) lbuf; if (n == 0) printf("Null file."); else if (lbuf[0] == 0xf0) printf("Object library."); else if (lbuf[0] == 0x80) printf("Object file."); else if (lbuf[0] == 0x4d && lbuf[1] == 0x5a) printf(".exe file."); else if (lbuf[0] == 0x31 && lbuf[1] == 0xbe) printf("Microsoft WORD document."); else if (lbuf[0] == 0x1a) printf("ARC file."); else if (lbuf[0] == 0xff && lbuf[1] == 0xff && lbuf[2] == 0xff && lbuf[3] == 0xff) printf("Device driver."); else if (c_file(n)) printf("C source code."); else { unsigned char *cp; for (cp = lbuf; cp < lbuf+n; cp++) if (!(isprint(*cp) || isspace(*cp))) break; if (cp >= lbuf+n) printf("ASCII Text file."); else printf("Binary data."); } putchar('\n'); } exit(0); } # define KEYWD_LIMIT 3 struct k { char *name; int length; } keywds[] = { "main", 4, "stdio", 5, "int", 3, "unsigned", 8, "register", 8, "long", 4, "char", 4, "typedef", 7, "if", 2, "return", 6, "while", 5, "struct", 6, "union", 5, (char *) NIL }; c_file(n) { register unsigned char *cp; int nkeywds = 0; struct k *kp; # define END (lbuf+n) for (cp = lbuf; cp < END; cp++) { if (*cp == '\n' && (cp[1] == '{' || cp[1] == '}')) { if (++nkeywds > KEYWD_LIMIT) return TRUE; cp++; continue; } if (*cp == '\n' && cp[1] == '#') { while (cp < END && !isspace(*cp)) cp++; if (strncmp(cp, "include", 7) == 0 || strncmp(cp, "if", 2) == 0 || strncmp(cp, "define", 6) == 0) if (++nkeywds > KEYWD_LIMIT) return TRUE; continue; } if (*cp == '/' && cp[1] == '*') { for (cp += 2; cp < END; cp++) if (*cp == '*' && cp[1] == '/') { cp ++; break; } if (cp>= END || ++nkeywds > KEYWD_LIMIT) return TRUE; continue; } if (!isalpha(*cp) || (cp != lbuf && isalpha(cp[-1]))) continue; for (kp = keywds; kp->name; kp++) if (strncmp(cp, kp->name, kp->length) == 0) { if (++nkeywds > KEYWD_LIMIT) return TRUE; cp += kp->length - 1; break; } } return FALSE; }