[net.bugs.usg] directory access routines

gwyn@brl-tgr.ARPA (Doug Gwyn <gwyn>) (03/03/85)

BUG:
	There is no system-independent way to read directories
on UNIX System V or its precursors.

SOLUTION:
	Add the enclosed routines to the C library in future
releases (and update the System V System Interface Definition):

#!/bin/sh
# Self-unpacking archive format.  To unbundle, sh this file.
echo 'Credits' 1>&2
cat >'Credits' <<'END OF Credits'
This software is in the PUBLIC DOMAIN.	It is a completely rewritten
version of code first made available by the University of California at
Berkeley.  Berkeley considered this a transition aid to help convert
programs to use the new 4.2BSD directory structure (255-character file
names, etc.).  It is more generally useful than that, however, as UNIX
has long lacked a sensible interface for dealing with directories.
This implementation is provided as a public service to help UNIX sites
produce more portable source code, since this interface can now be made
available on all UNIX systems, not just those from Berkeley.

Please do not change the interface, as that defeats the whole purpose
of this package!

This implementation is provided by:

	Douglas A. Gwyn
	U.S. Army Ballistic Research Laboratory
	AMXBR-VLD-V
	Aberdeen Proving Ground, MD 21005

	(301)278-6647

	gwyn@brl-vld.arpa or {decvax,esquire,research}!brl-bmd!gwyn
END OF Credits
echo 'READ_ME' 1>&2
cat >'READ_ME' <<'END OF READ_ME'
	NOTES FOR C LIBRARY EXTENSIONS FOR DIRECTORY ACCESS


The standard C library lacks support for reading UNIX directories, so
historically programs had knowledge of UNIX directory structure hard-
wired into them.  When Berkeley changed the format of directories for
4.2BSD, it became necessary to change programs to work with the new
structure.  Fortunately Berkeley designed a small set of directory
access routines to encapsulate knowledge of the new directory format
so that user programs could deal with directory entries as an abstract
data type.  The interface to these routines is independent of the
particular implementation of directories on any given UNIX system.

Therefore it becomes possible to write portable applications that
search directories by restricting all directory access to these new
interface routines.  The sources supplied here are a total rewrite of
Berkeley's code and are in the PUBLIC DOMAIN.  They simulate the new
4.2BSD directory entries on systems having the original UNIX directory
structure.  These routines should be added to the standard C library
on ALL non-BSD UNIX systems and all existing and future code changed to
use this interface.  Once this is done, there will be no portability
problems associated with the difference in underlying directory
structures between 4.2BSD and other UNIX systems.

An additional benefit of these routines is that they buffer directory
input, which provides improved access speed.


			INSTALLATION INSTRUCTIONS


If your system predates 7th Edition UNIX you have problems.  Adapt the
instructions and source code as required, or better still upgrade your
system (a modern compiler and library can be made to run on an older
kernel with a modest amount of work).


For systems (such as pre-System III UNIXes) that do not support C's
"void" data type:

0)  Edit the file ./include/dir.h to insert the following line before
    the first occurrence of the word "void":

	typedef int	void;


For 7th Edition or later UNIXes (but NOT 4.2BSD):

1)  Copy the file ./include/dir.h to /usr/include (note that there is
    NOT a conflict with /usr/include/sys/dir.h).

2)  Copy the file ./man/directory.3c to /usr/man/p_man/man3,
    /usr/man/u_man/man3, or /usr/man/man3; then print the manual page
    via the command

	man 3 directory

    to see what the new routines are like.  (If you have a "catman"
    type of on-line manual, adapt these instructions accordingly.)

3)  Copy the files ./src/* to the "gen" subdirectory of your C library
    source directory, then remake the C library.  Alternatively you
    might just compile the new sources and insert their objects near
    the front of the C library /lib/libc.a using the archiver (seekdir
    must precede telldir and readdir).

4)  To verify installation, try compiling, linking, and running the
    program test/testdir.c.  This program searches the current
    directory "." for each file named as a program argument and prints
    `"FOO" found.' or `"FOO" not found.' where FOO is replaced by the
    name being looked for in the directory.  Try something like

	cd /usr/bin			# a multi-block directory
	WHEREVER/testdir FOO lint BAR f77 XYZZY

    which should produce the output

	"FOO" not found.
	"lint" found.
	"BAR" not found.
	"f77" found.
	"XYZZY" not found.

5)  Notify your users that all directory access must be through the
    new routines, and that documentation is available via

	man 3 directory

6)  Change all system sources that were accessing directories to use
    the new routines.  Nearly all such sources contain the line

	#include <sys/dir.h>

    so they should be easy to find.

7)  After a suitable changeover period, remove /usr/include/sys/dir.h
    to force use of the new routines for all directory access.
END OF READ_ME
echo 'include/dir.h' 1>&2
mkdir include
cat >'include/dir.h' <<'END OF include/dir.h'
/*
	<dir.h> -- definitions for 4.2BSD-compatible directory access

	last edit:	09-Jul-1983	D A Gwyn
*/

#define DIRBLKSIZ	512		/* size of directory block */
#define MAXNAMLEN	15		/* maximum filename length */
	/* NOTE:  MAXNAMLEN must be one less than a multiple of 4 */

struct direct				/* data from readdir() */
	{
	long		d_ino;		/* inode number of entry */
	unsigned short	d_reclen;	/* length of this record */
	unsigned short	d_namlen;	/* length of string in d_name */
	char		d_name[MAXNAMLEN+1];	/* name of file */
	};

typedef struct
	{
	int	dd_fd;			/* file descriptor */
	int	dd_loc;			/* offset in block */
	int	dd_size;		/* amount of valid data */
	char	dd_buf[DIRBLKSIZ];	/* directory block */
	}	DIR;			/* stream data from opendir() */

extern DIR		*opendir();
extern struct direct	*readdir();
extern long		telldir();
extern void		seekdir();
extern void		closedir();

#define rewinddir( dirp )	seekdir( dirp, 0L )
END OF include/dir.h
echo 'man/directory.3c' 1>&2
mkdir man
cat >'man/directory.3c' <<'END OF man/directory.3c'
.TH DIRECTORY 3 "Standard Extension"
.SH NAME
opendir, readdir, telldir, seekdir, rewinddir, closedir \- directory operations
.SH SYNOPSIS
.B #include <dir.h>
.PP
.B DIR \(**opendir (filename)
.br
.B char \(**filename;
.PP
.B struct direct \(**readdir (dirp)
.br
.B DIR \(**dirp;
.PP
.B long telldir (dirp)
.br
.B DIR \(**dirp;
.PP
.B void seekdir (dirp, loc)
.br
.B DIR \(**dirp;
.br
.B long loc;
.PP
.B void rewinddir (dirp)
.br
.B DIR
.B *dirp;
.PP
.B void
.B closedir(dirp)
.br
.B DIR
.B *dirp;
.SM
.SH DESCRIPTION
.I Opendir\^
opens the directory named by
.I filename\^
and associates a
.I directory stream\^
with it.
.I Opendir\^
returns a pointer to be used to identify the
.I directory stream\^
in subsequent operations.
The pointer
.SM
.B NULL
is returned if
.I filename\^
cannot be accessed or is not a directory,
or if it cannot
.IR malloc (3)
enough memory to hold a
.SM
.I DIR\^
structure.
.PP
.I Readdir\^
returns a pointer to the next directory entry.
It returns
.SM
.B NULL
upon reaching the end of the directory or
upon detecting an invalid
.I seekdir\^
operation.
.PP
.I Telldir\^
returns the current location associated with the named
.I directory stream.
.PP
.I Seekdir\^
sets the position of the next
.I readdir\^
operation on the
.I directory stream.
The new position reverts to the one associated with the
.I directory stream\^
when the
.I telldir\^
operation from which
.I loc\^
was obtained was performed.
Values returned by
.I telldir\^
are good only for the lifetime of
the DIR pointer from which they are derived.
If the directory is closed and then reopened,
the 
.I telldir\^
value may be invalidated
due to undetected directory compaction.
It is safe to use a previous
.I telldir\^
value immediately after a call to
.I opendir\^
and before any calls to
.I readdir.
.PP
.I Rewinddir\^
resets the position of the named
.I directory stream\^
to the beginning of the directory.
.PP
.I Closedir\^
closes the named
.I directory stream\^
and frees the
.SM
.I DIR\^
structure.
.SH EXAMPLE
Sample code which searchs a directory for entry ``name'':
.PP
.br
	dirp = opendir( "." );
.br
	while ( (dp = readdir( dirp )) != NULL )
.br
		if ( strcmp( dp->d_name, name ) == 0 )
.br
			{
.br
			closedir( dirp );
.br
			return FOUND;
.br
			}
.br
	closedir( dirp );
.br
	return NOT_FOUND;
.SH "SEE ALSO"
open(2), close(2), read(2), lseek(2),
scandir(3B)
.SH WARNINGS
.I Rewinddir\^
is implemented as a macro,
so its function address cannot be taken.
.PP
All UNIX programs which examine directories
should be converted to use this package,
since directory format is no longer the same on all UNIX systems.
Unfortunately,
this package is not yet
supplied with all UNIX systems,
although public-domain source code is available
from BRL.
END OF man/directory.3c
echo 'src/Read_Me' 1>&2
mkdir src
cat >'src/Read_Me' <<'END OF src/Read_Me'
These routines implement the portable directory access interface
introduced in 4.2BSD.  They are in the public domain.

Programs that need to read directories must be changed to use these
directory access routines (see also ../include/dir.h), which should
be bundled into the standard system C library for convenience.

/usr/include/sys/dir.h should be removed to encourage conversion.
END OF src/Read_Me
echo 'src/closedir.c' 1>&2
cat >'src/closedir.c' <<'END OF src/closedir.c'
/*
	closedir -- C library extension routine

	last edit:	21-Jan-1984	D A Gwyn
*/

#include	<dir.h>

extern void	free();
extern int	close();

void
closedir( dirp )
	register DIR	*dirp;		/* stream from opendir() */
	{
	(void)close( dirp->dd_fd );
	free( (char *)dirp );
	}
END OF src/closedir.c
echo 'src/opendir.c' 1>&2
cat >'src/opendir.c' <<'END OF src/opendir.c'
/*
	opendir -- C library extension routine

	last edit:	09-Jul-1983	D A Gwyn
*/

#include	<dir.h>
#include	<sys/types.h>
#include	<sys/stat.h>

#ifdef	BRL
#define open	_open			/* avoid emulation */
#endif

extern char	*malloc();
extern int	open(), close(), fstat();

#define NULL	0

DIR *
opendir( filename )
	char		*filename;	/* name of directory */
	{
	register DIR	*dirp;		/* -> malloc'ed storage */
	register int	fd;		/* file descriptor for read */
	struct stat	sbuf;		/* result of fstat() */

	if ( (fd = open( filename, 0 )) < 0 )
		return NULL;

	if ( fstat( fd, &sbuf ) < 0
	  || (sbuf.st_mode & S_IFMT) != S_IFDIR
	  || (dirp = (DIR *)malloc( sizeof(DIR) )) == NULL
	   )	{
		(void)close( fd );
		return NULL;		/* bad luck today */
		}

	dirp->dd_fd = fd;
	dirp->dd_loc = dirp->dd_size = 0;	/* refill needed */

	return dirp;
	}
END OF src/opendir.c
echo 'src/readdir.c' 1>&2
cat >'src/readdir.c' <<'END OF src/readdir.c'
/*
	readdir -- C library extension routine for non-BSD UNIX

	last edit:	09-Jul-1983	D A Gwyn
*/

#include	<dir.h>
#include	<sys/types.h>

extern char	*strncpy();
extern int	read(), strlen();

#define NULL	0

#define DIRSIZ	14
struct olddir
	{
	ino_t	od_ino; 		/* inode */
	char	od_name[DIRSIZ];	/* filename */
	};

struct direct *
readdir( dirp )
	register DIR		*dirp;	/* stream from opendir() */
	{
	register struct olddir	*dp;	/* -> directory data */

	for ( ; ; )
		{
		if ( dirp->dd_loc >= dirp->dd_size )
			dirp->dd_loc = dirp->dd_size = 0;

		if ( dirp->dd_size == 0 	/* refill buffer */
		  && (dirp->dd_size = read( dirp->dd_fd, dirp->dd_buf, 
					    DIRBLKSIZ
					  )
		     ) <= 0
		   )
			return NULL;	/* error or EOF */

		dp = (struct olddir *)&dirp->dd_buf[dirp->dd_loc];
		dirp->dd_loc += sizeof(struct olddir);

		if ( dp->od_ino != 0 )	/* not deleted entry */
			{
			static struct direct	dir;	/* simulated */

			dir.d_ino = dp->od_ino;
			(void)strncpy( dir.d_name, dp->od_name, DIRSIZ
				     );
			dir.d_name[DIRSIZ] = '\0';
			dir.d_namlen = strlen( dir.d_name );
			dir.d_reclen = sizeof(struct direct)
				     - MAXNAMLEN + 3
				     + dir.d_namlen - dir.d_namlen % 4;
			return &dir;	/* -> simulated structure */
			}
		}
	}
END OF src/readdir.c
echo 'src/seekdir.c' 1>&2
cat >'src/seekdir.c' <<'END OF src/seekdir.c'
/*
	seekdir -- C library extension routine

	last edit:	21-Jan-1984	D A Gwyn
*/

#include	<dir.h>

extern long	lseek();

#define NULL	0

void
seekdir( dirp, loc )
	register DIR	*dirp;		/* stream from opendir() */
	long		loc;		/* position from telldir() */
	{
	long		base;		/* file location of block */
	long		offset; 	/* offset within block */

	if ( telldir( dirp ) == loc )
		return; 		/* save time */

	offset = loc % DIRBLKSIZ;
	base = loc - offset;

	(void)lseek( dirp->dd_fd, base, 0 );	/* change blocks */
	dirp->dd_loc = dirp->dd_size = 0;

	while ( dirp->dd_loc < offset ) /* skip entries */
		if ( readdir( dirp ) == NULL )
			return; 	/* "can't happen" */
	}
END OF src/seekdir.c
echo 'src/telldir.c' 1>&2
cat >'src/telldir.c' <<'END OF src/telldir.c'
/*
	telldir -- C library extension routine

	last edit:	09-Jul-1983	D A Gwyn
*/

#include	<dir.h>

extern long	lseek();

long
telldir( dirp )
	DIR	*dirp;			/* stream from opendir() */
	{
	return lseek( dirp->dd_fd, 0L, 1 ) - (long)dirp->dd_size
	     + (long)dirp->dd_loc;
	}
END OF src/telldir.c
echo 'test/testdir.c' 1>&2
mkdir test
cat >'test/testdir.c' <<'END OF test/testdir.c'
/*
	testdir -- test for C library directory access extensions

	last edit:	09-Jul-1983	D A Gwyn
*/

#include	<dir.h>
#include	<stdio.h>

extern void	exit();
extern int	strcmp(), strlen();

main( argc, argv )
	int			argc;
	char			**argv;
	{
	register DIR		*dirp;
	register struct direct	*dp;
	int			nerrs = 0;	/* total not found */

	if ( (dirp = opendir( "." )) == NULL )
		{
		(void)fprintf( stderr, "Cannot open \".\" directory\n"
			     );
		exit( 1 );
		}

	while ( --argc > 0 )
		{
		register int	len = strlen( *++argv );

		while ( (dp = readdir( dirp)) != NULL )
			if ( dp->d_namlen == len
			  && strcmp( dp->d_name, *argv ) == 0
			   )	{
				(void)printf( "\"%s\" found.\n",
					      *argv
					    );
				break;
				}
		if ( dp == NULL )
			{
			(void)printf( "\"%s\" not found.\n", *argv );
			++nerrs;
			}

		rewinddir( dirp );
		}

	closedir( dirp );
	exit( nerrs );
	}
END OF test/testdir.c