[gnu.utils.bug] GNU diff 1.4 opens directories twice, and closes them three times

eggert@SM.UNISYS.COM (Paul Eggert) (11/17/88)

Suppose I invoke GNU diff 1.4 with "diff -r A B".  As it traverses the
directory trees A and B, it opens each directory twice (once with open() and
once with opendir()) and closes it three times (twice with close() and once
with closedir()).  The following change causes GNU diff to open and close each
directory only once.  The change affects only performance, except (1) the fatal
error if the second directory open failed is no longer possible, and (2) no
attempt is made to open B/.../x if A/.../x cannot be opened.

This patch also removes some lint: 0 where (char*)0 is needed,
and an unused variable ``i''.

This was tested on a Sun/3-60 running SunOS 4.0.

The change comes in two parts.

===== 1.  Replace the definition of compare_files in diff.c with the following:


int
compare_files (dir0, name0, dir1, name1, depth)
     char *dir0, *dir1;
     char *name0, *name1;
     int depth;
{
  struct file_data inf[2];
  register int i;
  int val;

  /* If this is directory comparison, perhaps we have a file
     that exists only in one of the directories.
     If so, just print a message to that effect.  */

  if (! entire_new_file_flag && (name0 == 0 || name1 == 0))
    {
      char *name = name0 == 0 ? name1 : name0;
      char *dir = name0 == 0 ? dir1 : dir0;
      message ("Only in %s: %s\n", dir, name);
      /* Return 1 so that diff_dirs will return 1 ("some files differ").  */
      return 1;
    }

  /* Mark any nonexistent file with -1 in the desc field.  */

  inf[0].desc = name0 == 0 ? -1 : 0;
  inf[1].desc = name1 == 0 ? -1 : 0;

  /* Now record the full name of each file, including nonexistent ones.  */

  if (name0 == 0)
    name0 = name1;
  if (name1 == 0)
    name1 = name0;

  inf[0].name = dir0 == 0 ? name0 : concat (dir0, "/", name0);
  inf[1].name = dir1 == 0 ? name1 : concat (dir1, "/", name1);

  /* Stat the files.  Record whether they are directories.  */

  for (i = 0; i <= 1; i++)
    {
      inf[i].stat.st_size = 0;
      inf[i].stat.st_mtime = 0;
      inf[i].dir_p = 0;

      if (inf[i].desc != -1
	  && strcmp (inf[i].name, "-"))
	{
	  char *filename = inf[i].name;

	  if (stat (filename, &inf[i].stat) < 0)
	    {
	      perror_with_name (filename);
	      val = 2;
	      goto done;
	    }

	  inf[i].dir_p = (S_IFDIR == (inf[i].stat.st_mode & S_IFMT));
	}
    }

  if (name0 == 0)
    inf[0].dir_p = inf[1].dir_p;
  if (name1 == 0)
    inf[1].dir_p = inf[0].dir_p;


  /* See if the two named files are actually the same physical file.
     If so, we know they are identical without actually reading them.  */

  if (inf[0].stat.st_ino == inf[1].stat.st_ino
      && inf[0].stat.st_dev == inf[1].stat.st_dev)
    {
      val = 0;
    }
  else if (inf[0].dir_p && inf[1].dir_p)
    {

      /* If both are directories, compare the files in them.  */

      if (depth > 0 && !recursive)
	{
	  /* But don't compare dir contents one level down
	     unless -r was specified.  */
	  message ("Common subdirectories %s and %s\n",
		   inf[0].name, inf[1].name);
	  val = 0;
	}
      else
	{
	  val = diff_dirs (inf[0].name, inf[1].name, compare_files, depth);
	}

    }
  else if (depth > 0 && (inf[0].dir_p || inf[1].dir_p))
    {
      /* Perhaps we have a subdirectory that exists only in one directory.
	 If so, just print a message to that effect.  */

      if (inf[0].desc == -1 || inf[1].desc == -1)
	{
	  char *dir = (inf[0].desc == -1) ? dir1 : dir0;
	  message ("Only in %s: %s\n", dir, name0);
	}
      else
	{
	  /* We have a subdirectory in one directory
	     and a file in the other.  */

	  if (inf[0].dir_p)
	    message ("%s is a directory but %s is not\n",
		     inf[0].name, inf[1].name);
	  else
	    message ("%s is a directory but %s is not\n",
		     inf[1].name, inf[0].name);
	}

      val = 1;
    }
  else
    {
      if (depth == 0 && (inf[0].dir_p || inf[1].dir_p))
	{

	  /* If only one is a directory, and it was given in the command line,
	     use the file in that dir whose basename matches the other's.  */

	  int dir_arg = (inf[0].dir_p ? 0 : 1);
	  int fnm_arg = (inf[0].dir_p ? 1 : 0);
	  char *p = rindex (inf[fnm_arg].name, '/');
	  char *filename = concat (inf[dir_arg].name,  "/",
				   p ? p+1 : inf[fnm_arg].name);

	  free (inf[dir_arg].name);
	  inf[dir_arg].name = filename;
	  if (stat (inf[dir_arg].name, &inf[dir_arg].stat) < 0)
	    {
	      perror_with_name (filename);
	      val = 2;
	      goto done;
	    }

	  inf[dir_arg].dir_p
	    = (S_IFDIR == (inf[dir_arg].stat.st_mode & S_IFMT));
	  if (inf[dir_arg].dir_p)
	    {
	      error ("%s is a directory but %s is not",
		     inf[dir_arg].name, inf[fnm_arg].name);
	      val = 1;
	      goto done;
	    }

	}

      /* Both exist, and neither is a directory.
	 Open the files and record their descriptors.  */

      for (i = 0; i <= 1; i++)
	{
	  if (inf[i].desc == -1)
	    ;
	  else if (!strcmp (inf[i].name, "-"))
	    inf[i].name = "Standard Input";
	  else
	    {
	      char *filename = inf[i].name;

	      inf[i].desc = open (filename, O_RDONLY, 0);
	      if (0 > inf[i].desc)
		{
		  perror_with_name (filename);
		  if (i > 0 && inf[0].desc > 0)
		    close (inf[0].desc);
		  val = 2;
		  goto done;
		}
	    }
	}

      val = diff_2_files (inf, depth);

      if (inf[0].desc > 0)
        close (inf[0].desc);
      if (inf[1].desc > 0)
        close (inf[1].desc);
    }

  if (val == 0 && print_file_same_flag && !inf[0].dir_p)
    message ("Files %s and %s are identical\n",
	     inf[0].name, inf[1].name);

  /* Now the comparison has been done, if no error prevented it,
     and VAL is the value this function will return.  */

 done:
  fflush (stdout);

  if (dir0 != 0)
    free (inf[0].name);
  if (dir1 != 0)
    free (inf[1].name);

  return val;
}


===== 2.  Apply the following patches.


*** diff1.4/diff.c	Tue Nov  8 18:24:04 1988
--- diffnew/diff.c	Tue Nov  8 13:57:59 1988
***************
*** 72,76 ****
       char *argv[];
  {
-   int i;
    int val;
    int c;
--- 72,75 ----
***************
*** 293,297 ****
    switch_string = option_list (argv + 1, optind - 1);
  
!   val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
  
    /* Print any messages that were saved up for last.  */
--- 292,296 ----
    switch_string = option_list (argv + 1, optind - 1);
  
!   val = compare_files ((char*)0, argv[optind], (char*)0, argv[optind + 1], 0);
  
    /* Print any messages that were saved up for last.  */



*** diff1.4/diff.h	Mon Nov 14 18:00:24 1988
--- diffnew/diff.h	Mon Nov 14 17:58:06 1988
***************
*** 311,314 ****
--- 311,315 ----
  void *xcalloc();
  char *concat ();
+ char *rindex ();
  void free ();
  


*** diff1.4/dir.c	Wed Nov 16 19:23:40 1988
--- diffnew/dir.c	Wed Nov 16 19:16:06 1988
***************
*** 54,59 ****
    if (!reading)
      {
!       pfatal_with_name (dirname);
!       return;
      }
  
--- 54,60 ----
    if (!reading)
      {
!       perror_with_name (dirname);
!       dirdata.length = -1;
!       return dirdata;
      }
  
***************
*** 119,123 ****
     DEPTH is the current depth in recursion.
  
!    Returns the maximum of all the values returned by HANDLE_FILE.  */
  
  int
--- 120,125 ----
     DEPTH is the current depth in recursion.
  
!    Returns the maximum of all the values returned by HANDLE_FILE,
!    or 2 if a directory could not be opened.  */
  
  int
***************
*** 133,137 ****
--- 135,143 ----
    /* Get sorted contents of both dirs.  */
    data1 = dir_sort (name1);
+   if (data1.length == -1)
+     return 2;
    data2 = dir_sort (name2);
+   if (data2.length == -1)
+     return 2;
  
    i1 = 0;
***************
*** 176,180 ****
  	{
  	  /* Next filename in dir 1 is less; that is a file in dir 1 only.  */
! 	  v1 = handle_file (name1, data1.files[i1], name2, 0, depth + 1);
  	  i1++;
  	}
--- 182,186 ----
  	{
  	  /* Next filename in dir 1 is less; that is a file in dir 1 only.  */
! 	  v1 = handle_file (name1, data1.files[i1], name2, (char*)0, depth + 1);
  	  i1++;
  	}
***************
*** 182,186 ****
  	{
  	  /* Next filename in dir 2 is less; that is a file in dir 2 only.  */
! 	  v1 = handle_file (name1, 0, name2, data2.files[i2], depth + 1);
  	  i2++;
  	}
--- 188,192 ----
  	{
  	  /* Next filename in dir 2 is less; that is a file in dir 2 only.  */
! 	  v1 = handle_file (name1, (char*)0, name2, data2.files[i2], depth + 1);
  	  i2++;
  	}