[comp.lang.c] Self-replication

djones@megatest.UUCP (Dave Jones) (03/22/89)

From article <2179@fireball.cs.vu.nl>, by maart@cs.vu.nl (Maarten Litmaath):
> frisk@rhi.hi.is (Fridrik Skulason) writes:
> \	   "write a one line C program that produces the source code
> \	   to itself when run."
> 


This problem, without the one-liner-clause, is posed here and there from
time to time. It goes back at least to the early days of electronic
computers.

To me, the really interesting problem is, Write a _portable_
program which writes its source code exactly.

Why portable?  

1. Because it seems, for most people at least, it is much more difficult
to do that way; But more importantly because,

2. introducing ASCII codes is an "escape clause" -- (yeah that's a pun) --
which allows the puzzler to avoid various theoretically instructive
problems about encoding and decoding. If you use ASCII (or EBCDIC or
whatever) encoding, the problem is practically solved before you start.

By the way, if you find this stuff to be fun, I recommend Douglas
Hofstadter's book _Goedel_Escher_and_Bach_.  You might even want to read
it before you attempt the problem.  Pay paricular attention to the
part about "Quining".


SPOILER:



I'm including a shar-file of an illustrative program I wrote about
three or four years ago. It is set up to be (in my arguably weird view)
elegant and instructive, not short. You might not think it was elegant
if it were written by hand; it is quite unreadable, even though it
has comments (which it faithfully replicates). But it is not hand-written.
It's bootstrapped from four files which are very straight-forward.

   1,2) Two procedures. One that expresses a coded string as a file
        (express_string), the other which reads a file into a string
        (file_to_string),

   3)   A trivial program which uses the two procedures to express a
        coded file (express_file),

   4)   A very short coded file called selfrep_main.cs.

The procedure express_string is used both in the boot-strap process
and in the final program.

It's all done with Unix, using "make", "cat", "fstat", etc., but even
if you don't own a Unix, you'll get the idea.

The result is in selfrep.c.

-- snip -- snip -- snip -- snip -- snip -- snip -- snip -- snip -- snip
#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
#
# Contents:  
#               Makefile selfrep.c selfrep.cs selfrep_main.cs
#               express_string.c express_file.c file_to_string.c
#
# Wrapped by djones@goofy on Tue Mar 21 18:22:14 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f Makefile -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"Makefile\"
else
echo shar: Extracting \"Makefile\" \(421 characters\)
sed "s/^X//" >Makefile <<'END_OF_Makefile'
X
Xproof:  selfrep
X	selfrep > temp.c; diff temp.c selfrep.c; rm -f temp.c
X
Xselfrep: selfrep.c
X	cc selfrep.c -o selfrep
X
Xselfrep.c: express_file selfrep.cs
X	express_file selfrep.cs > selfrep.c
X
Xselfrep.cs: selfrep_main.cs express_string.c
X	cat selfrep_main.cs express_string.c > selfrep.cs
X
Xexpress_file: express_file.o express_string.o file_to_string.o
X	cc express_file.o express_string.o file_to_string.o -o express_file
X
END_OF_Makefile
if test 421 -ne `wc -c <Makefile`; then
    echo shar: \"Makefile\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f selfrep.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"selfrep.c\"
else
echo shar: Extracting \"selfrep.c\" \(2075 characters\)
sed "s/^X//" >selfrep.c <<'END_OF_selfrep.c'
X/* This program prints its source. */
X
Xmain(argc, argv)
X  char** argv;
X{
X  char * dna =
X
X"/* This program prints its source. */\n\nmain(argc, argv)\n\
X  char** argv;\n{\n  char * dna =\n\nZ;\n\n\n  express_stri\
Xng(dna);\n  exit(0);\n}\n\n\n/* Express the string, substit\
Xuting a quotation of the string \n * for the character 'Z'.\
X  Breaks the literal into lines of no\n * more than 60 char\
Xs.\n */\nexpress_string(str)\n  char* str;\n{\n  char* ptr \
X= str;\n  char ch;\n  int is_quoted = 0;\n\n  while(ch = *p\
Xtr++)\n    {\n\n      if(ch == 'Z' && !is_quoted)\n\t{\n\t \
X int count = 1;\n\t  char* ptr = str;\n\t  char ch;\n\t  pu\
Xtchar('\"');\n\t  while(ch = *ptr++)\n\t    {\n\t      swit\
Xch(ch)\n\t      {\n\t\tcase '\\n': printf(\"\\\\n\");  coun\
Xt +=2; break;\n\t\tcase '\\t': printf(\"\\\\t\");  count +=\
X2; break;\n\t\tcase '\\\\': printf(\"\\\\\\\\\"); count +=2\
X; break;\n\t\tcase '\"':  printf(\"\\\\\\\"\"); count +=2; \
Xbreak;\n\t\tdefault:   putchar(ch);    count +=1; break;\n\t\
X      }\n\t      if(count >= 59)\n\t\t{ printf(\"\\\\\\n\")\
X;\n\t\t  count = 0;\n\t\t}\n\t    }\n\t  putchar('\"');\n\t\
X}\n\n      else putchar(ch);\n      is_quoted = ( ch == '\\\
X'');\n    }\n}\n";
X
X
X  express_string(dna);
X  exit(0);
X}
X
X
X/* Express the string, substituting a quotation of the string 
X * for the character 'Z'.  Breaks the literal into lines of no
X * more than 60 chars.
X */
Xexpress_string(str)
X  char* str;
X{
X  char* ptr = str;
X  char ch;
X  int is_quoted = 0;
X
X  while(ch = *ptr++)
X    {
X
X      if(ch == 'Z' && !is_quoted)
X	{
X	  int count = 1;
X	  char* ptr = str;
X	  char ch;
X	  putchar('"');
X	  while(ch = *ptr++)
X	    {
X	      switch(ch)
X	      {
X		case '\n': printf("\\n");  count +=2; break;
X		case '\t': printf("\\t");  count +=2; break;
X		case '\\': printf("\\\\"); count +=2; break;
X		case '"':  printf("\\\""); count +=2; break;
X		default:   putchar(ch);    count +=1; break;
X	      }
X	      if(count >= 59)
X		{ printf("\\\n");
X		  count = 0;
X		}
X	    }
X	  putchar('"');
X	}
X
X      else putchar(ch);
X      is_quoted = ( ch == '\'');
X    }
X}
END_OF_selfrep.c
if test 2075 -ne `wc -c <selfrep.c`; then
    echo shar: \"selfrep.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f selfrep.cs -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"selfrep.cs\"
else
echo shar: Extracting \"selfrep.cs\" \(959 characters\)
sed "s/^X//" >selfrep.cs <<'END_OF_selfrep.cs'
X/* This program prints its source. */
X
Xmain(argc, argv)
X  char** argv;
X{
X  char * dna =
X
XZ;
X
X
X  express_string(dna);
X  exit(0);
X}
X
X
X/* Express the string, substituting a quotation of the string 
X * for the character 'Z'.  Breaks the literal into lines of no
X * more than 60 chars.
X */
Xexpress_string(str)
X  char* str;
X{
X  char* ptr = str;
X  char ch;
X  int is_quoted = 0;
X
X  while(ch = *ptr++)
X    {
X
X      if(ch == 'Z' && !is_quoted)
X	{
X	  int count = 1;
X	  char* ptr = str;
X	  char ch;
X	  putchar('"');
X	  while(ch = *ptr++)
X	    {
X	      switch(ch)
X	      {
X		case '\n': printf("\\n");  count +=2; break;
X		case '\t': printf("\\t");  count +=2; break;
X		case '\\': printf("\\\\"); count +=2; break;
X		case '"':  printf("\\\""); count +=2; break;
X		default:   putchar(ch);    count +=1; break;
X	      }
X	      if(count >= 59)
X		{ printf("\\\n");
X		  count = 0;
X		}
X	    }
X	  putchar('"');
X	}
X
X      else putchar(ch);
X      is_quoted = ( ch == '\'');
X    }
X}
END_OF_selfrep.cs
if test 959 -ne `wc -c <selfrep.cs`; then
    echo shar: \"selfrep.cs\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f selfrep_main.cs -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"selfrep_main.cs\"
else
echo shar: Extracting \"selfrep_main.cs\" \(132 characters\)
sed "s/^X//" >selfrep_main.cs <<'END_OF_selfrep_main.cs'
X/* This program prints its source. */
X
Xmain(argc, argv)
X  char** argv;
X{
X  char * dna =
X
XZ;
X
X
X  express_string(dna);
X  exit(0);
X}
X
X
END_OF_selfrep_main.cs
if test 132 -ne `wc -c <selfrep_main.cs`; then
    echo shar: \"selfrep_main.cs\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f express_string.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"express_string.c\"
else
echo shar: Extracting \"express_string.c\" \(827 characters\)
sed "s/^X//" >express_string.c <<'END_OF_express_string.c'
X/* Express the string, substituting a quotation of the string 
X * for the character 'Z'.  Breaks the literal into lines of no
X * more than 60 chars.
X */
Xexpress_string(str)
X  char* str;
X{
X  char* ptr = str;
X  char ch;
X  int is_quoted = 0;
X
X  while(ch = *ptr++)
X    {
X
X      if(ch == 'Z' && !is_quoted)
X	{
X	  int count = 1;
X	  char* ptr = str;
X	  char ch;
X	  putchar('"');
X	  while(ch = *ptr++)
X	    {
X	      switch(ch)
X	      {
X		case '\n': printf("\\n");  count +=2; break;
X		case '\t': printf("\\t");  count +=2; break;
X		case '\\': printf("\\\\"); count +=2; break;
X		case '"':  printf("\\\""); count +=2; break;
X		default:   putchar(ch);    count +=1; break;
X	      }
X	      if(count >= 59)
X		{ printf("\\\n");
X		  count = 0;
X		}
X	    }
X	  putchar('"');
X	}
X
X      else putchar(ch);
X      is_quoted = ( ch == '\'');
X    }
X}
END_OF_express_string.c
if test 827 -ne `wc -c <express_string.c`; then
    echo shar: \"express_string.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f express_file.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"express_file.c\"
else
echo shar: Extracting \"express_file.c\" \(122 characters\)
sed "s/^X//" >express_file.c <<'END_OF_express_file.c'
Xextern char* file_to_string();
X
Xmain(argc, argv)
X  char** argv;
X{
X  express_string(file_to_string(argv[1]));
X  exit(0);
X}
END_OF_express_file.c
if test 122 -ne `wc -c <express_file.c`; then
    echo shar: \"express_file.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f file_to_string.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"file_to_string.c\"
else
echo shar: Extracting \"file_to_string.c\" \(1115 characters\)
sed "s/^X//" >file_to_string.c <<'END_OF_file_to_string.c'
X#include <sys/param.h> 
X#include <sys/file.h>
X#include <sys/dir.h>
X#include <sys/stat.h>
X#include <errno.h>
X
X/*************************************************************************
X * This procedure buffers up a named file, and returns a pointer to the
X * buffer.  If something goes wrong, it returns NULL, and errno will have
X * been set.
X ************************************************************************/
X
Xchar*
Xfile_to_string(file_name)
X     char* file_name;
X     
X{
X  
X  char* file_buffer;
X  struct stat stat_buf;
X  int fd = open(file_name, O_RDONLY, 0);
X  
X  if (fd < 0 || fstat( fd, &stat_buf) == -1)
X    return 0;
X  
X  file_buffer = (char*)malloc(stat_buf.st_size + 1);
X  
X  if(file_buffer == 0)
X    return 0;
X  
X  if( read( fd, file_buffer, stat_buf.st_size ) != stat_buf.st_size )
X    {
X      int error = errno;
X      close(fd);
X      free(file_buffer);
X      errno = error;
X      return 0;
X    }
X  
X  file_buffer[stat_buf.st_size] = '\0';
X  
X  close(fd);
X  
X  return file_buffer;
X  
X} /* end.. file_to_string */
X/****************************************************************************/
X
X
X
END_OF_file_to_string.c
if test 1115 -ne `wc -c <file_to_string.c`; then
    echo shar: \"file_to_string.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
echo shar: End of shell archive.
exit 0