[alt.sources] Strings

pcg@aber-cs.UUCP (Piercarlo Grandi) (04/17/89)

Here is another old source I haved done, especially useful to Microport
SystemV/AT users, that implements the strings(1) command for COFF files.

Strings(1) extracts those that look like ascii strings from a datafile
or an executable. If the file looks file an executable, its text portion
is not scanned.

This strings(1) clone works on COFF files. It is also more flexible
thans BSD strings(1), and more easily customize to other formats and
tasks.

-------------------- cut here -----------------------------------
#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	strings.1
#	strings.c
# This archive created: Sun Mar 26 17:40:10 1989
export PATH; PATH=/bin:$PATH
if test -f 'strings.1'
then
	echo shar: will not over-write existing file "'strings.1'"
else
cat << \SHAR_EOF > 'strings.1'
.TH STRINGS 1
.ad b
.SH NAME
strings \- print strings in a file, or a COFF executable
.SH SYNOPSYS
.B strings
.BR - [ acop ]
.BR - nnnn
.RB [ - ]
[ file ... ]
.SH DESCRIPTION
This program reads the given
.IR file s
(or the standard input if none is given)
and prints out all strings in it whose length is at least
.IR nnnn ,
by default 4.
.LP
By default the file is tested to see if it is an executable or relocatable,
or an archive, and if so only the strings in the data section(s) are
considered. Note that if it is an archive then strings are printed for
.B all
the members of the archive.
.LP
The meaning of the options is:
.IP "\fB-a\fP"
Print all strings in the file regardless of its type.
.IP "\fB-c\fP"
Print only NUL terminated strings (or NL terminated ones), i.e. only
bona fide C strings.
.IP "\fB-o\fP"
Print before each string its offset in hex.
.IP "\fB-p\fP"
Print before each string the pathname of the file it is in.
.IP "\fB-\fP"
After this null option all the arguments are assumed to be filenames;
it can be used if the first filename begins with a dash.
.SH AUTHOR
(C) 1988 Piercarlo Grandi.
.SH BUGS
Just like the BSD
.IR strings (1)
options may be specified only before any file name.
.LP
The maximum string size is a compile time constant; longer strings are
truncated when printed (this size is typically 512, so it is not a problem).
.LP
The file offset if printed in hex, and thus not exactly compatible
with BSD
.IR strings (1).
SHAR_EOF
fi # end of overwriting check
if test -f 'strings.c'
then
	echo shar: will not over-write existing file "'strings.c'"
else
cat << \SHAR_EOF > 'strings.c'
/*
    $Header: /aware0/aware/piercarl/Src./Commands./strings.c,v 1.8 89/03/26 17:39:13 piercarl Exp $
*/

static char Notice[] =
    "Copyright (C) 1988 Piercarlo Grandi. All rights reserved.";

/*
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the  Free Software Foundation; either version 1, or (at your option)
    any later version.

    This  program is distributed in the hope that it will be useful, but
    WITHOUT   ANY   WARRANTY;  without  even  the  implied  warranty  of
    MERCHANTABILITY  or  FITNESS  FOR  A PARTICULAR PURPOSE. See the GNU
    General Public License for more details.

    You may have received a copy of the GNU General Public License along
    with  this  program;  if not, write to the Free Software Foundation,
    Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
    An introduction to this program.

    This  is  a  clone  of the strings(1) program present under BSD. It is
    much   more   general,   and  most  importantly  supports  COFF  style
    executables.

    It  is  easy to modify this program along two different lines, one way
    is for other types of executables, and another is for a task different
    from printing strings.

    This is possible because scanning a COFF file and printing strings are
    respectively  a  functional  and a function; the scanner may be passed
    any  suitable  function (e.g. a disassenbler), and the function may be
    used with many different scanners.

    It  is  not an exact clone of strings(1); two new options are allowed,
    -p  to  print  the  pathname of the file before each string, and -c to
    print only null terminated strings, that is C strings.
*/

#define public		/* extern */
#define private		static
#define reg		register

#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
extern long		ftell();

#define StringsTRACE	0

#define StringsCOFF	1		/* COFF executable format	*/
#define StringsBSD	0		/* BSD executables		*/
#define StringsV7	0		/* V7 executables		*/
#define StringsX86	0		/* Xenix/Intel executables	*/

#define StringsMAX	(512)

#if (StringsAOUT)
#   include "a.out.h"
#endif
#if (StringsCOFF)
#   include "filehdr.h"
#   include "scnhdr.h"
#   include "ldfcn.h"
#endif

/*
    The options. All but the last are boolean.
*/

short unsigned		dasha = 0;
short unsigned		dasho = 0;
short unsigned		dashp = 0;
short unsigned		dashc = 0;
short unsigned		dashmin = 4;

/*
    This returns non zero if the given path accessible thru the given file
    is an executable image (or an archive of executable images)
*/

private short unsigned	StringsAout(file,path)
    FILE		    *file;
    char		    *path;
{

    /*
	Our first test is that the given file is a regular file, because
	we  MUST  be able to lseek(2) an executable (admittedly we could
	relax this precondition, but the great care adn effort needed is
	not probably worth the utility.
    */

checkForFile:

    {
	struct stat		stat;

	if (fstat(fileno(file),&stat) < 0)
	{
	    perror(path);
	    return 0;
	}

#	if (StringsTRACE)
	    fprintf(stderr,"Aout: mode 0%o\n",stat.st_mode);
#	endif

	if ((stat.st_mode&S_IFMT) != S_IFREG)
	    return 0;
    }

#   if (StringsCOFF)

hasCoffFormat:
    {
	LDFILE			*ldfile;
	short unsigned		aout;

	if ((ldfile = ldopen(path,NULL)) == NULL)
	{
	    extern int		    errno;

	    if (errno != 0)
		perror(path);
	    return 0;
	}

#	if (StringsTRACE)
	    fprintf(stderr,"Aout: TYPE 0%o, f_magic 0%o\n",
		TYPE(ldfile),HEADER(ldfile).f_magic);
#	endif

	aout = (TYPE(ldfile) == ARTYPE) || ISCOFF(HEADER(ldfile).f_magic);
	ldaclose(ldfile);

	return aout;
    }
#   endif
}

/*
    This  will  apply  the  data extractor to the data section(s) of the
    executable  with  the given path and accessible thru the given file.
    It  is  guaranteed that if the file is stdin this procedure will not
    be called.
*/

private void		StringsData(file,path,extractor)
    FILE		    *file;
    char		    *path;
    void		    (*extractor)(/*FILE *,char *,long*/);
#if (StringsCOFF)
{
    LDFILE		    *ldfile;

scanAllArchiveMembers:

    for
    (
	ldfile = ldopen(path,NULL);
	ldfile != NULL;
	ldfile = (ldclose(ldfile) == SUCCESS) ? NULL : ldopen(path,ldfile)
    )
    if (ISCOFF(HEADER(ldfile).f_magic))
    {
	short unsigned		section;
	SCNHDR			scnhdr;

#	if (StringsTRACE)
	    fprintf(stderr,"Data: f_nscns %u\n",HEADER(ldfile).f_nscns);
#	endif

    ForAllSections:
	for (section = 1; section <= HEADER(ldfile).f_nscns; section++)
	if (ldshread(ldfile,section,&scnhdr) != SUCCESS)
	    perror(path);
	else
	{
#	    if (StringsTRACE)
		fprintf(stderr,"Data: section %u, s_flags 0%lo\n",
		    section,scnhdr.s_flags);
#	    endif

	skipNonDataSection:

	    if ((scnhdr.s_flags & 0x0000000f) != STYP_REG
		|| !(scnhdr.s_flags & STYP_DATA))
		continue;

#	    if (StringsTRACE)
		fprintf(stderr,"Data: s_scnptr 0x%08lx, s_size 0x%08lx\n",
		    scnhdr.s_scnptr,scnhdr.s_size);
#	    endif

	seekForSectionStart:

	    if (ldsseek(ldfile,section) < 0)
	    {
		perror(path);
		continue;
	    }

	extractFromSection:

	    (*extractor)(IOPTR(ldfile),path,scnhdr.s_size);
	}
    }
}
#endif

/*
    This defines which characters are part of a good string.
*/
#define StringsGOOD(c)	(isprint(c) || isspace(c))

/*
    This  accumulates  a  good  string,  starting  from the current file
    position  for  the  given  number  of  bytes.  We  consider a string
    terminated  either  by a newline or by a non string character, or if
    strict  C type strings are wanted, by a null character, as we assume
    that non newline, non null terminated strings are spurious.
*/

private void		StringsScan(file,path,bytes)
    FILE		    *file;
    char		    *path;
    long		    bytes;
{
    static char		    string[StringsMAX];
    reg char		    *s;
    reg int		    c;
    reg short unsigned	    l;

#   if (StringsTRACE)
	fprintf(stderr,"Scan: ftell 0x%08lx, bytes 0x%08lx\n",ftell(file),bytes);
#   endif

    for (s = string, l = 0; bytes != 0 && (c = getc(file)) != EOF; --bytes)
    {
	/*
	    A potential string is terminated either by a newline or by a
	    non  string  character. If this is a non terminal character,
	    we  add it to the string as long as the string max length is
	    not overflowed.

	    Note that if we only accept null terminated strings, newline
	    is no longer considered to be a string terminator.
	*/

	if (StringsGOOD(c) && (c != '\n' || dashc))
	{
	    if (s < (string + sizeof string - 1))
		*s++ = c;
	    l++;
	}
	/*
	    The  string  has  been  terminated;  if it is non empty, and
	    longer  than  the prescribed minimum, and (only when flag -c
	    is on) zero terminated, it is a bona fide string and it will
	    be printed.

	    Notice  that with the following condition we will print also
	    zero  length strings IFF they are newline terminated; with a
	    zero  dashmin  we  would  otherwise  recognize a null string
	    between every couple of bad characters.
	*/
	else if (s != string || c == '\n')
	{
	    *s++ = '\n'; if (c == '\n') l++; *s++ = '\0';
	    if (l >= dashmin && (!dashc || c == '\0'))
	    {
		if (dashp)	printf("%s: ",path);
		if (dasho)	printf("0x%08lx: ",ftell(file)-1-l);
		fputs(string,stdout);
	    }
	    s = string, l = 0;
	}
    }

    if (ferror(stdin))
	perror(path);
}

/*
    Our  job  is  easy;  first we collect the options, then we apply the
    extractor,  directly  or thru the executable scanner, to each of the
    files whose name remain as arguments.
*/

/*ARGSUSED*/
public int		main(argc,argv,envp)
    int			    argc;
    char		    **argv;
    char		    **envp;
{

parseOptions:

    for
    (
	--argc,argv++;
	argc > 0 && (*argv)[0] == '-' && (*argv)[1] != '\0';
	--argc, argv++
    )
    {
	register char	  *flags;

	for (flags = *argv + 1; *flags != '\0'; flags++)
	switch (*flags)
	{
	case 'o':	dasho = 1; break;
	case 'a':	dasha = 1; break;
	case 'p':	dashp = 1; break;
	case 'c':	dashc = 1; break;
	default:
	    if (!isdigit(*flags))
	    {
		fputs("Syntax: strings [ -[acop] ] [ -nnnn ] [ - ] [ file ...]\n",stderr);
		exit(1);
	    }

	    dashmin = *flags-'0';
	    for (flags++; isdigit(*flags); flags++)
		dashmin = dashmin*10 + (*flags-'0');
	    --flags;
	    if (dashmin < 1)
		dashmin = 1;
	}
    }

    /*
	A do because we must loop at least once on stdin, even if no arg
	path is given.
    */

    if (argc <= 0)
	*argv = "stdin";

processInputs:

    do
    {
	/*
	    If  this is an actual file (not a pipe) and is an executable
	    image  (and option a is not specified) we will scan just the
	    data sections, else we will scan all the file.

	    An  assumption here we guarantee is that StringsData will be
	    called  ONLY  on  a  regular  file  to which lseek(2) can be
	    applied, and this is ensured by Aout().
	*/

	if (argc > 0 && freopen(*argv,"r",stdin) == NULL)
	{
		perror(argv[0]);
		exit(1);
	}

	if (dasha || !StringsAout(stdin,*argv))
	    StringsScan(stdin,*argv,0x0fffffffL);
	else
	    StringsData(stdin,*argv,StringsScan);

	if (argc > 0)
	    --argc, argv++;
    }
    while (argc > 0);

    fclose(stdin);

    exit(0);
    /*NOTREACHED*/
}
SHAR_EOF
fi # end of overwriting check
#	End of shell archive
exit 0
-- 
Piercarlo "Peter" Grandi           | ARPA: pcg%cs.aber.ac.uk@nsfnet-relay.ac.uk
Dept of CS, UCW Aberystwyth        | UUCP: ...!mcvax!ukc!aber-cs!pcg
Penglais, Aberystwyth SY23 3BZ, UK | INET: pcg@cs.aber.ac.uk

dbw@mtunk.ATT.COM (Dave Wood) (05/11/89)

My Sys V machine supports COFF, but doesn't have any ISCOFF macro.
Since I don't know the ldfcn(5) stuff, I don't know how to use the
supplied functions to replace it.

Suggestions?