ast@cs.vu.nl (Andy Tanenbaum) (12/08/87)
I have written a program to recursively compare the contents of two given
directories, file for file. The program descends the tree and reports about
files that are missing or different. Some day, if I ever get around to
producing V1.3 of MINIX, I will make a tree of the current version next to
the V1.2 tree, and then run this program to get a list of all files that
are different. Then I can make diff listings etc. In reality, the reason
I wrote it however, is that I had just copied my MINIX tree from one part
of the disk to another, and I wanted to make sure nothing was forgotten.
I am sure there are other uses as well. One could no doubt write a shell
script to do this same thing, or perhaps use find, but this program is
much faster, being able to compare two 8 megabyte trees in about 12
minutes on a Z-248.
Please post any bugs you find.
Andy Tanenbaum (ast@cs.vu.nl)
----------------------------- treecmp.c ---------------------------------
/* treecmp - compare two trees Author: Andy Tanenbaum */
/* This program recursively compares two trees and reports on differences.
* It can be used, for example, when a project consists of a large number
* of files and directories. When a new release (i.e., a new tree) has been
* prepared, the old and new tree can be compared to give a list of what has
* changed. The algorithm used is that the first tree is recursively
* descended and for each file or directory found, the corresponding one in
* the other tree checked. The two arguments are not completely symmetric
* because the first tree is descended, not the second one, but reversing
* the arguments will still detect all the differences, only they will be
* printed in a different order. The program needs lots of stack space
* because routines with local arrays are called recursively. The call is
* treecmp [-v] dir1 dir2
* The -v flag (verbose) prints the directory names as they are processed.
*/
#include <stat.h>
#define BUFSIZE 4096 /* size of file buffers */
#define MAXPATH 128 /* longest acceptable path */
#define DIRENTLEN 14 /* number of characters in a file name */
struct dirstruct { /* layout of a directory entry */
unsigned inum;
char fname[DIRENTLEN];
};
struct stat stat1, stat2; /* stat buffers */
char buf1[BUFSIZE]; /* used for comparing bufs */
char buf2[BUFSIZE]; /* used for comparing bufs */
int verbose; /* set if mode is verbose */
main(argc, argv)
int argc;
char *argv[];
{
char *p;
if (argc < 3 || argc > 4) usage();
p = argv[1];
if (argc == 4) {
if (*p == '-' && *(p+1) == 'v')
verbose++;
else
usage();
}
if (argc == 3)
compare(argv[1], argv[2]);
else
compare(argv[2], argv[3]);
exit(0);
}
compare(f1, f2)
char *f1, *f2;
{
/* This is the main comparision routine. It gets two path names as arguments
* and stats them both. Depending on the results, it calls other routines
* to compare directories or files.
*/
int type1, type2;
if (stat(f1, &stat1) < 0) {
printf("Cannot stat %s\n", f1);
return;
}
if (stat(f2, &stat2) < 0) {
printf("Missing file: %s\n", f2);
return;
}
/* Examine the types of the files. */
type1 = stat1.st_mode & S_IFMT;
type2 = stat2.st_mode & S_IFMT;
if (type1 != type2) {
printf("Type diff: %s and %s\n", f1, f2);
return;
}
/* The types are the same. */
switch(type1) {
case S_IFREG: regular(f1, f2);
break;
case S_IFDIR: directory(f1, f2);
break;
case S_IFCHR:
case S_IFBLK: break;
default: printf("Unknown file type %o\n", type1);
}
return;
}
regular(f1, f2)
char *f1, *f2;
{
/* Compare to regular files. If they are different, complain. */
int fd1, fd2, n1, n2, i;
unsigned bytes;
long count;
char *p1, *p2;
if (stat1.st_size != stat2.st_size) {
printf("Size diff: %s and %s\n", f1, f2);
return;
}
/* The sizes are the same. We actually have to read the files now. */
fd1 = open(f1, 0);
if (fd1 < 0) {
printf("Cannot open %s for reading\n", f1);
return;
}
fd2 = open(f2, 0);
if (fd2 < 0) {
printf("Cannot open %s for reading\n", f2);
return;
}
count = stat1.st_size;
while (count > 0L) {
bytes = (unsigned) (count > BUFSIZE ? BUFSIZE : count); /* rd count */
n1 = read(fd1, buf1, bytes);
n2 = read(fd2, buf2, bytes);
if (n1 != n2) {
printf("Length diff: %s and %s\n", f1, f2);
close(fd1);
close(fd2);
return;
}
/* Compare the buffers. */
i = n1;
p1 = buf1;
p2 = buf2;
while (i--) {
if (*p1++ != *p2++) {
printf("File diff: %s and %s\n", f1, f2);
close(fd1);
close(fd2);
return;
}
}
count -= n1;
}
close(fd1);
close(fd2);
}
directory(f1, f2)
char *f1, *f2;
{
/* Recursively compare two directories by reading them and comparing their
* contents. The order of the entries need not be the same.
*/
int fd1, fd2, n1, n2, ent1, ent2, i, used1 = 0, used2 = 0;
char *dir1buf, *dir2buf;
char name1buf[MAXPATH], name2buf[MAXPATH];
struct dirstruct *dp1, *dp2;
unsigned dir1bytes, dir2bytes;
extern char *malloc();
/* Allocate space to read in the directories */
dir1bytes = (unsigned) stat1.st_size;
dir1buf = malloc(dir1bytes);
if (dir1buf == 0) {
printf("Cannot process directory %s: out of memory\n", f1);
return;
}
dir2bytes = (unsigned) stat2.st_size;
dir2buf = malloc(dir2bytes);
if (dir2buf == 0) {
printf("Cannot process directory %s: out of memory\n", f2);
free(dir1buf);
return;
}
/* Read in the directories. */
fd1 = open(f1, 0);
if (fd1 > 0) n1 = read(fd1, dir1buf, dir1bytes);
if (fd1 < 0 || n1 != dir1bytes) {
printf("Cannot read directory %s\n", f1);
free(dir1buf);
free(dir2buf);
if (fd1 > 0) close(fd1);
return;
}
close(fd1);
fd2 = open(f2, 0);
if (fd2 > 0) n2 = read(fd2, dir2buf, dir2bytes);
if (fd2 < 0 || n2 != dir2bytes) {
printf("Cannot read directory %s\n", f2);
free(dir1buf);
free(dir2buf);
close(fd1);
if (fd2 > 0) close(fd2);
return;
}
close(fd2);
/* Linearly search directories */
ent1 = dir1bytes/sizeof(struct dirstruct);
dp1 = (struct dirstruct *) dir1buf;
for (i = 0; i < ent1; i++) {
if (dp1->inum != 0) used1++;
dp1++;
}
ent2 = dir2bytes/sizeof(struct dirstruct);
dp2 = (struct dirstruct *) dir2buf;
for (i = 0; i < ent2; i++) {
if (dp2->inum != 0) used2++;
dp2++;
}
if (verbose) printf("Directory %s: %d entries\n", f1, used1);
/* Check to see if any entries in dir2 are missing from dir1. */
dp1 = (struct dirstruct *) dir1buf;
dp2 = (struct dirstruct *) dir2buf;
for (i = 0; i < ent2; i++) {
if (dp2->inum == 0 || strcmp(dp2->fname, ".") == 0 ||
strcmp(dp2->fname, "..") == 0) {
dp2++;
continue;
}
check(dp2->fname, dp1, ent1, f1);
dp2++;
}
/* Recursively process all the entries in dir1. */
dp1 = (struct dirstruct *) dir1buf;
for (i = 0; i < ent1; i++) {
if (dp1->inum == 0 || strcmp(dp1->fname, ".") == 0 ||
strcmp(dp1->fname, "..") == 0) {
dp1++;
continue;
}
if (strlen(f1) + DIRENTLEN >= MAXPATH) {
printf("Path too long: %s\n", f1);
free(dir1buf);
free(dir2buf);
return;
}
if (strlen(f2) + DIRENTLEN >= MAXPATH) {
printf("Path too long: %s\n", f2);
free(dir1buf);
free(dir2buf);
return;
}
strcpy(name1buf, f1);
strcat(name1buf, "/");
strncat(name1buf, dp1->fname, DIRENTLEN);
strcpy(name2buf, f2);
strcat(name2buf, "/");
strncat(name2buf, dp1->fname, DIRENTLEN);
/* Here is the recursive call to process an entry. */
compare(name1buf, name2buf); /* recursive call */
dp1++;
}
free(dir1buf);
free(dir2buf);
}
check(s, dp1, ent1, f1)
char *s;
struct dirstruct *dp1;
int ent1;
char *f1;
{
/* See if the file name 's' is present in the directory 'dirbuf'. */
int i;
for (i = 0; i < ent1; i++) {
if (strncmp(dp1->fname, s, DIRENTLEN) == 0) return;
dp1++;
}
printf("Missing file: %s/%s\n", f1, s);
}
usage()
{
printf("Usage: treecmp [-v] dir1 dir2\n");
exit(0);
}