pcg@aber-cs.UUCP (Piercarlo Grandi) (04/17/89)
Here is another old source I haved done, especially useful to Microport SystemV/AT users, that implements the strings(1) command for COFF files. Strings(1) extracts those that look like ascii strings from a datafile or an executable. If the file looks file an executable, its text portion is not scanned. This strings(1) clone works on COFF files. It is also more flexible thans BSD strings(1), and more easily customize to other formats and tasks. -------------------- cut here ----------------------------------- #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # strings.1 # strings.c # This archive created: Sun Mar 26 17:40:10 1989 export PATH; PATH=/bin:$PATH if test -f 'strings.1' then echo shar: will not over-write existing file "'strings.1'" else cat << \SHAR_EOF > 'strings.1' .TH STRINGS 1 .ad b .SH NAME strings \- print strings in a file, or a COFF executable .SH SYNOPSYS .B strings .BR - [ acop ] .BR - nnnn .RB [ - ] [ file ... ] .SH DESCRIPTION This program reads the given .IR file s (or the standard input if none is given) and prints out all strings in it whose length is at least .IR nnnn , by default 4. .LP By default the file is tested to see if it is an executable or relocatable, or an archive, and if so only the strings in the data section(s) are considered. Note that if it is an archive then strings are printed for .B all the members of the archive. .LP The meaning of the options is: .IP "\fB-a\fP" Print all strings in the file regardless of its type. .IP "\fB-c\fP" Print only NUL terminated strings (or NL terminated ones), i.e. only bona fide C strings. .IP "\fB-o\fP" Print before each string its offset in hex. .IP "\fB-p\fP" Print before each string the pathname of the file it is in. .IP "\fB-\fP" After this null option all the arguments are assumed to be filenames; it can be used if the first filename begins with a dash. .SH AUTHOR (C) 1988 Piercarlo Grandi. .SH BUGS Just like the BSD .IR strings (1) options may be specified only before any file name. .LP The maximum string size is a compile time constant; longer strings are truncated when printed (this size is typically 512, so it is not a problem). .LP The file offset if printed in hex, and thus not exactly compatible with BSD .IR strings (1). SHAR_EOF fi # end of overwriting check if test -f 'strings.c' then echo shar: will not over-write existing file "'strings.c'" else cat << \SHAR_EOF > 'strings.c' /* $Header: /aware0/aware/piercarl/Src./Commands./strings.c,v 1.8 89/03/26 17:39:13 piercarl Exp $ */ static char Notice[] = "Copyright (C) 1988 Piercarlo Grandi. All rights reserved."; /* This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You may have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* An introduction to this program. This is a clone of the strings(1) program present under BSD. It is much more general, and most importantly supports COFF style executables. It is easy to modify this program along two different lines, one way is for other types of executables, and another is for a task different from printing strings. This is possible because scanning a COFF file and printing strings are respectively a functional and a function; the scanner may be passed any suitable function (e.g. a disassenbler), and the function may be used with many different scanners. It is not an exact clone of strings(1); two new options are allowed, -p to print the pathname of the file before each string, and -c to print only null terminated strings, that is C strings. */ #define public /* extern */ #define private static #define reg register #include <ctype.h> #include <sys/types.h> #include <sys/stat.h> #include <stdio.h> extern long ftell(); #define StringsTRACE 0 #define StringsCOFF 1 /* COFF executable format */ #define StringsBSD 0 /* BSD executables */ #define StringsV7 0 /* V7 executables */ #define StringsX86 0 /* Xenix/Intel executables */ #define StringsMAX (512) #if (StringsAOUT) # include "a.out.h" #endif #if (StringsCOFF) # include "filehdr.h" # include "scnhdr.h" # include "ldfcn.h" #endif /* The options. All but the last are boolean. */ short unsigned dasha = 0; short unsigned dasho = 0; short unsigned dashp = 0; short unsigned dashc = 0; short unsigned dashmin = 4; /* This returns non zero if the given path accessible thru the given file is an executable image (or an archive of executable images) */ private short unsigned StringsAout(file,path) FILE *file; char *path; { /* Our first test is that the given file is a regular file, because we MUST be able to lseek(2) an executable (admittedly we could relax this precondition, but the great care adn effort needed is not probably worth the utility. */ checkForFile: { struct stat stat; if (fstat(fileno(file),&stat) < 0) { perror(path); return 0; } # if (StringsTRACE) fprintf(stderr,"Aout: mode 0%o\n",stat.st_mode); # endif if ((stat.st_mode&S_IFMT) != S_IFREG) return 0; } # if (StringsCOFF) hasCoffFormat: { LDFILE *ldfile; short unsigned aout; if ((ldfile = ldopen(path,NULL)) == NULL) { extern int errno; if (errno != 0) perror(path); return 0; } # if (StringsTRACE) fprintf(stderr,"Aout: TYPE 0%o, f_magic 0%o\n", TYPE(ldfile),HEADER(ldfile).f_magic); # endif aout = (TYPE(ldfile) == ARTYPE) || ISCOFF(HEADER(ldfile).f_magic); ldaclose(ldfile); return aout; } # endif } /* This will apply the data extractor to the data section(s) of the executable with the given path and accessible thru the given file. It is guaranteed that if the file is stdin this procedure will not be called. */ private void StringsData(file,path,extractor) FILE *file; char *path; void (*extractor)(/*FILE *,char *,long*/); #if (StringsCOFF) { LDFILE *ldfile; scanAllArchiveMembers: for ( ldfile = ldopen(path,NULL); ldfile != NULL; ldfile = (ldclose(ldfile) == SUCCESS) ? NULL : ldopen(path,ldfile) ) if (ISCOFF(HEADER(ldfile).f_magic)) { short unsigned section; SCNHDR scnhdr; # if (StringsTRACE) fprintf(stderr,"Data: f_nscns %u\n",HEADER(ldfile).f_nscns); # endif ForAllSections: for (section = 1; section <= HEADER(ldfile).f_nscns; section++) if (ldshread(ldfile,section,&scnhdr) != SUCCESS) perror(path); else { # if (StringsTRACE) fprintf(stderr,"Data: section %u, s_flags 0%lo\n", section,scnhdr.s_flags); # endif skipNonDataSection: if ((scnhdr.s_flags & 0x0000000f) != STYP_REG || !(scnhdr.s_flags & STYP_DATA)) continue; # if (StringsTRACE) fprintf(stderr,"Data: s_scnptr 0x%08lx, s_size 0x%08lx\n", scnhdr.s_scnptr,scnhdr.s_size); # endif seekForSectionStart: if (ldsseek(ldfile,section) < 0) { perror(path); continue; } extractFromSection: (*extractor)(IOPTR(ldfile),path,scnhdr.s_size); } } } #endif /* This defines which characters are part of a good string. */ #define StringsGOOD(c) (isprint(c) || isspace(c)) /* This accumulates a good string, starting from the current file position for the given number of bytes. We consider a string terminated either by a newline or by a non string character, or if strict C type strings are wanted, by a null character, as we assume that non newline, non null terminated strings are spurious. */ private void StringsScan(file,path,bytes) FILE *file; char *path; long bytes; { static char string[StringsMAX]; reg char *s; reg int c; reg short unsigned l; # if (StringsTRACE) fprintf(stderr,"Scan: ftell 0x%08lx, bytes 0x%08lx\n",ftell(file),bytes); # endif for (s = string, l = 0; bytes != 0 && (c = getc(file)) != EOF; --bytes) { /* A potential string is terminated either by a newline or by a non string character. If this is a non terminal character, we add it to the string as long as the string max length is not overflowed. Note that if we only accept null terminated strings, newline is no longer considered to be a string terminator. */ if (StringsGOOD(c) && (c != '\n' || dashc)) { if (s < (string + sizeof string - 1)) *s++ = c; l++; } /* The string has been terminated; if it is non empty, and longer than the prescribed minimum, and (only when flag -c is on) zero terminated, it is a bona fide string and it will be printed. Notice that with the following condition we will print also zero length strings IFF they are newline terminated; with a zero dashmin we would otherwise recognize a null string between every couple of bad characters. */ else if (s != string || c == '\n') { *s++ = '\n'; if (c == '\n') l++; *s++ = '\0'; if (l >= dashmin && (!dashc || c == '\0')) { if (dashp) printf("%s: ",path); if (dasho) printf("0x%08lx: ",ftell(file)-1-l); fputs(string,stdout); } s = string, l = 0; } } if (ferror(stdin)) perror(path); } /* Our job is easy; first we collect the options, then we apply the extractor, directly or thru the executable scanner, to each of the files whose name remain as arguments. */ /*ARGSUSED*/ public int main(argc,argv,envp) int argc; char **argv; char **envp; { parseOptions: for ( --argc,argv++; argc > 0 && (*argv)[0] == '-' && (*argv)[1] != '\0'; --argc, argv++ ) { register char *flags; for (flags = *argv + 1; *flags != '\0'; flags++) switch (*flags) { case 'o': dasho = 1; break; case 'a': dasha = 1; break; case 'p': dashp = 1; break; case 'c': dashc = 1; break; default: if (!isdigit(*flags)) { fputs("Syntax: strings [ -[acop] ] [ -nnnn ] [ - ] [ file ...]\n",stderr); exit(1); } dashmin = *flags-'0'; for (flags++; isdigit(*flags); flags++) dashmin = dashmin*10 + (*flags-'0'); --flags; if (dashmin < 1) dashmin = 1; } } /* A do because we must loop at least once on stdin, even if no arg path is given. */ if (argc <= 0) *argv = "stdin"; processInputs: do { /* If this is an actual file (not a pipe) and is an executable image (and option a is not specified) we will scan just the data sections, else we will scan all the file. An assumption here we guarantee is that StringsData will be called ONLY on a regular file to which lseek(2) can be applied, and this is ensured by Aout(). */ if (argc > 0 && freopen(*argv,"r",stdin) == NULL) { perror(argv[0]); exit(1); } if (dasha || !StringsAout(stdin,*argv)) StringsScan(stdin,*argv,0x0fffffffL); else StringsData(stdin,*argv,StringsScan); if (argc > 0) --argc, argv++; } while (argc > 0); fclose(stdin); exit(0); /*NOTREACHED*/ } SHAR_EOF fi # end of overwriting check # End of shell archive exit 0 -- Piercarlo "Peter" Grandi | ARPA: pcg%cs.aber.ac.uk@nsfnet-relay.ac.uk Dept of CS, UCW Aberystwyth | UUCP: ...!mcvax!ukc!aber-cs!pcg Penglais, Aberystwyth SY23 3BZ, UK | INET: pcg@cs.aber.ac.uk
dbw@mtunk.ATT.COM (Dave Wood) (05/11/89)
My Sys V machine supports COFF, but doesn't have any ISCOFF macro. Since I don't know the ldfcn(5) stuff, I don't know how to use the supplied functions to replace it. Suggestions?