[comp.sources.atari.st] v02i089: strings -- Find text strings in files

koreth@panarthea.ebay.sun.com (Steven Grimm) (10/11/89)

Submitted-by: uunet!unido!infbs!tubsibr!hafer (Udo Hafermann)
Posting-number: Volume 2, Issue 89
Archive-name: strings

[This arrived packaged with the "yaff" program. -sg]

#!/bin/sh
# shar:	Shell Archiver  (v1.22)
#
#	Run the following text with /bin/sh to create:
#	  DEBUG.H
#	  GETOPT.C
#	  GETOPT.H
#	  MAKEFILE
#	  STRINGS.C
#	  STRINGS.MAN
#
sed 's/^X//' << 'SHAR_EOF' > DEBUG.H &&
X/*
X * debug.h -	debugging macros
X *
X */
X
X/*
X * dd(value, format) creates a printf-statement which dumps
X * the given value in the specified format.
X */
X
X#if NDEBUG
X#define	DD(value, format)
X#else
X#define	DD(value, format)	printf("[" __FILE__ "(%d): " #value "=" #format "]\n", __LINE__, value);
X#endif
X
X/* End of debug.h */
SHAR_EOF
chmod 0600 DEBUG.H || echo "restore of DEBUG.H fails"
sed 's/^X//' << 'SHAR_EOF' > GETOPT.C &&
X/*
X**  Return options and their values from the command line.
X**
X**  This comes from the AT&T public-domain getopt published in mod.sources.
X*/
X
X/*  adapted for MWC by hafer@infbs
X*/
X
X#include <stdio.h>
X
X#define TYPE	int
X
X#define ERR(s, c)	if(opterr){fprintf(stderr, "%s%s%c\n", argv[0], s, c);}
X
Xextern int strcmp();
Xextern char *index();
X
X/* external vars */
X
Xint	opterr = 1;	/* if reset to 0, getopt is silent on errors	*/
Xint	optind = 1;	/* argument number of current option		*/
Xint	optopt;		/* current option character			*/
Xchar	*optarg;	/* pointer to extra argument of current option	*/
X
Xint
Xgetopt(argc, argv, opts)
Xint	argc;
Xchar	**argv, *opts;
X{
X	static int sp = 1;
X	register int c;
X	register char *cp;
X
X	if(sp == 1)
X		if(optind >= argc ||
X		   argv[optind][0] != '-' || argv[optind][1] == '\0')
X			return(EOF);
X		else if(strcmp(argv[optind], "--") == 0) {
X			optind++;
X
X		}
X	optopt = c = argv[optind][sp];
X	if(c == ':' || (cp=index(opts, c)) == NULL) {
X		ERR(": illegal option -- ", c)
X		if(argv[optind][++sp] == '\0') {
X			optind++;
X			sp = 1;
X		}
X		return('?');
X	}
X	if(*++cp == ':') {
X		if(argv[optind][sp+1] != '\0')
X			optarg = &argv[optind++][sp+1];
X		else if(++optind >= argc) {
X			ERR(": option requires an argument -- ", c)
X			sp = 1;
X			return('?');
X		} else
X			optarg = argv[optind++];
X		sp = 1;
X	} else {
X		if(argv[optind][++sp] == '\0') {
X			sp = 1;
X			optind++;
X		}
X		optarg = NULL;
X	}
X	return(c);
X}
X
X
SHAR_EOF
chmod 0600 GETOPT.C || echo "restore of GETOPT.C fails"
sed 's/^X//' << 'SHAR_EOF' > GETOPT.H &&
X
X/*
X * getopt.h -	declarations for using getopt (needs <stdio.h>)
X */
X 
Xextern	int	opterr;	/* if reset to 0, getopt is silent on errors	*/
Xextern	int	optind;	/* argument number following current option	*/
Xextern	int	optopt;	/* current option character			*/
Xextern	char	*optarg;/* pointer to extra argument of current option	*/
X
Xextern	int	getopt(/* int argc, char **argv, char *opts */);
X	/* for each call, returns next option letter or EOF. */
X	/* opts is a list of permissible option letters, where a letter
X	   may be followed by a colon to indicate an expected parameter */
SHAR_EOF
chmod 0600 GETOPT.H || echo "restore of GETOPT.H fails"
sed 's/^X//' << 'SHAR_EOF' > MAKEFILE &&
X
XPROGRAM= strings.ttp
XOBJ= strings.o getopt.o
X
XCFLAGS=-DNDEBUG
XLDFLAGS = -s
X
X$(PROGRAM):	$(OBJ)
X	cc -o $(PROGRAM) $(OBJ)
X
Xstrings.o:	strings.c debug.h getopt.h
X
Xgetopt.o:	getopt.c getopt.h
X
X
X
SHAR_EOF
chmod 0600 MAKEFILE || echo "restore of MAKEFILE fails"
sed 's/^X//' << 'SHAR_EOF' > STRINGS.C &&
X
X/*
X *	strings.c -	extract strings from binary file
X *	hafer@infbs	06-AUG-89
X */
X
X#include <stdio.h>
X#include "debug.h"
X#include "getopt.h"
X
Xextern	char *index();
X
X#define	BUFLEN		1024
X#define	MINDEFAULT	4
X
Xtypedef	struct {
X	int	magic;
X	long	text;
X	long	data;
X	long	bss;
X	long	symtab;
X	long	reserved1, reserved2;
X	int	flags;
X} header;
X
Xstatic	char	buffer[BUFLEN];
Xstatic	int	bufcnt,
X		aflag,
X		lflag,
X		pflag,
X		nflag,
X		vflag,
X		minlen = MINDEFAULT;
Xstatic	char	*options = "adoxlnuvm:c:",
X		*whoami = "strings",
X		*format = "%8ld ",
X		extrachars[BUFLEN],
X		*umlaute = "\204\224\201\216\231\232\236",	/* "" */
X		*filename;
Xstatic	long	pos,
X		length,
X		offset;
X
Xstatic	void	addchar(ch)
Xint	ch;
X{
X	/* adds a char to buffer */
X	if (bufcnt < BUFLEN-1) {
X		buffer[bufcnt++] = ch;
X	}
X}
X
Xstatic	void	printbuf()
X{
X	/* prints buffer */
X	buffer[bufcnt] = '\0';
X	if (bufcnt >= minlen) {
X		if (pflag)
X			printf (format, pos+offset);
X		if (lflag)
X			printf ("%s\n", buffer);
X		else
X			printf ("\"%s\"\n", buffer);
X	}
X	bufcnt = 0;
X}
X
Xstatic	void	usage()
X{
X	fprintf (stderr, "Usage: %s [-%s] <filename>\n", whoami, options);
X	exit (-1);
X}
X		
Xmain (argc, argv)
Xint	argc;
Xchar	**argv;
X{
X	int	ch;
X	FILE	*fp;
X	header	h;
X
X	if (*argv[0])
X		whoami = argv[0];
X	opterr = 0;
X	while (EOF != getopt(argc, argv, options)) {
X		if (optopt == 'a') {
X			aflag = -1;
X		} else if (optopt == 'l') {
X			lflag = -1;
X		} else if (optopt == 'x') {
X			pflag = -1;
X			format = "%8lx ";
X		} else if (optopt == 'o') {
X			pflag = -1;
X			format = "%8lo ";
X		} else if (optopt == 'd') {
X			pflag = -1;
X		} else if (optopt == 'n') {
X			nflag = -1;
X		} else if (optopt == 'v') {
X			vflag = -1;
X		} else if (optopt == 'c') {
X			strcat (extrachars, optarg);
X		} else if (optopt == 'u') {
X			strcat (extrachars, umlaute);
X		} else if (optopt == 'm') {
X			minlen = atoi (optarg);
X			if (minlen == 0)
X				minlen = MINDEFAULT;
X		} else {
X			usage();
X		}
X	}
X	/* check whether there is exactly one argument left:	*/
X	if (argc-optind != 1) {
X		usage();
X	}
X	filename = argv[optind];
X	if (!(fp = fopen(filename, "rb"))) {
X		fprintf (stderr, "%s: cannot open file '%s'\n",
X			whoami, filename);
X		exit (-33);
X	}
X
X	if (aflag) {
X		offset = 0;
X		length = -1;
X	} else {
X		if (!fread(&h, sizeof(h), 1, fp) || h.magic != 0x601a) {
X			fprintf (stderr, "%s: unknown format: '%s'\n",
X				whoami, filename);
X				exit (-1);
X		}
X		if (vflag) {
X			printf ("    text     data      bss   symtab\n");
X			printf (format, h.text);
X			printf (format, h.data);
X			printf (format, h.bss);
X			printf (format, h.symtab);
X			printf ("\n");
X		}
X		offset = sizeof(h) + h.text;
X		length = h.data;
XDD(offset, %ld)
XDD(length, %ld)
X		fseek (fp, offset, 0);
X	}
X
X	/* off we go:	*/
X	for (pos; pos != length; pos++) {
X		ch = getc(fp);
X		if (feof(fp)) {
X			/* this shouldn't happen if the header is valid... */
X			fclose (fp);
X			exit(0);
X		} else if (ch == '\0' || 
X				(nflag && (ch == '\n' || ch == '\r'))) {
X			printbuf();
X		} else if (ch == '"' || ch == '\\') {
X			if (lflag) {
X				addchar(ch);
X			} else {
X				addchar('\\');
X				addchar(ch);
X			}
X		} else if (ch>=' ' && ch<127) {
X			addchar(ch);
X		} else if (ch=='\n' || ch=='\t') {
X			if (lflag) {
X				addchar(ch);
X			} else {
X				addchar('\\');
X				addchar((ch=='\n' ? 'n' : 't'));
X			}
X		} else if (*extrachars && index(extrachars, ch)) {
X			addchar(ch);
X		} else {
X			bufcnt = 0;
X		}
X	}	
X	exit(0);
X}
SHAR_EOF
chmod 0600 STRINGS.C || echo "restore of STRINGS.C fails"
sed 's/^X//' << 'SHAR_EOF' > STRINGS.MAN &&
XSTRINGS(1)			USER COMMANDS
X
X
XNAME
X	strings  -  extract strings from binary file
X
XSYNOPSIS
X	strings [-adoxnv] [-m <number>] [-c chars] filename
X
XDESCRIPTION
X	Extracts and prints sequences of printable characters, terminated by
X	a _z_e_r_o _b_y_t_e, from the _d_a_t_a segment of the specified file, which is
X	assumed to be a GEMDOS executable.
X	Strings are listed in C-format, i.e. as they would be accepted
X	as string constants in C.  Strings shorter than 4 characters are
X	ignored.  Printable characters are '\t', '\n', and ASCII 32 to 126.
X
X	Options:
X
X	-a	All:  Search the _c_o_m_p_l_e_t_e file (regardless of its format).
X	-d	_D_e_c_i_m_a_l: Precede each string with its _p_o_s_i_t_i_o_n in the file.
X	-o	_O_c_t_a_l: Same as -d, only in octal.
X	-x	_H_e_x_a_d_e_c_i_m_a_l: Same as -d, only in hex.
X	-l	Literal: Print strings _l_i_t_e_r_a_l_l_y (i.e., not in C-format).
X	-n	Newline: Accept '\n' and '\r' as string _t_e_r_m_i_n_a_t_o_r_s.
X	-m <no>	_M_i_n_i_m_u_m length:  Ignore strings shorter than <no> characters.
X	-c <ch>	_C_h_a_r_a_c_t_e_r_s: Regard the specified chars as _p_r_i_n_t_a_b_l_e.
X	-u	_U_m_l_a_u_t_s: Regard "umlaut" characters as printable ("").
X	-v	_V_e_r_b_o_s_e:  List the segment sizes.
X
XRESTRICTIONS
X	Printing of strings is limited to the internal buffer size of 1024
X	characters.
X
XBUGS
X	When printing in C-format, characters greater than 126 are not
X	escaped by '\\ddd' (only with -c or -u options).
X
XAUTHOR
X	hafer@infbs.uucp	August 1989
X
SHAR_EOF
chmod 0600 STRINGS.MAN || echo "restore of STRINGS.MAN fails"
exit 0