[comp.mail.sendmail] fix for wideload headers

vixie@decwrl.dec.com (Paul A Vixie) (07/04/90)

Sendmail has a fixed buffer that it puts headers in; it's usually 2500
characters wide.  If you overflow it, various bad things happen, usually
starting with extra blank lines being inserted into your mail headers.

I boosted my MAXFIELD to 15000 (possible thanks to VM), and the nature of
the problem changed: instead of me sending out broken headers, the various
next-hop sites started blowing up.  Clearly unless everyone in the world
fixes their sendmail binaries at the same time, boosting the stupid fixed
buffer size isn't the right approach.

The approach I settled on was to take any H_RCPT header whose h_value was
more than 500 characters long and move it to the end of the message body,
leaving behind a "To: distribution:; (see end of body)" to keep sendmail
from adding an apparently-to and to keep users from wondering what the
hell was going on.

Diffs follow.  Comments welcome.  This will be part of King James Sendmail.

By the way, NULL "m" parameters to "commaize" don't work the way the 
comment says they do.  I've edited the comment in the diffs below.  I
also added one comment, but it looks silly now.

Paul Vixie
DEC WRL

RCS file: RCS/collect.c,v
retrieving revision 1.2
diff -c -r1.2 collect.c
*** /tmp/,RCSt1a00500	Wed Jul  4 01:03:22 1990
--- collect.c	Tue Jul  3 23:50:53 1990
***************
*** 248,258 ****
--- 248,267 ----
  		if (ferror(tf))
  			tferror(tf);
  	} while (sfgets(buf, MAXFIELD, InChannel) != NULL);
+ 	/* MUST flush to permanent storage and check completion status
+ 	 * before we say "OK" to the sending transport.
+ 	 */
  	if (fflush(tf) != 0)
  		tferror(tf);
  	if (!sayok)
  		(void) sfgetsfini();
+ # ifdef DECWRL
+ 	/* on DECWRL we need to keep the file open 'til eatheader
+ 	 * since eatheader might be appending oversized headers to it.
+ 	 */
+ # else
  	(void) fclose(tf);
+ # endif
  
  	/* An EOF when running SMTP is an error */
  	if ((feof(InChannel) || ferror(InChannel)) && OpMode == MD_SMTP)
***************
*** 269,274 ****
--- 278,287 ----
  		CurEnv->e_to = NULL;
  		CurEnv->e_flags &= ~EF_FATALERRS;
  
+ # ifdef DECWRL
+ 		(void) fclose(tf);
+ # endif
+ 
  		/* and don't try to deliver the partial message either */
  		finis();
  	}
***************
*** 278,284 ****
--- 291,302 ----
  	**	Examples are who is the from person & the date.
  	*/
  
+ # ifdef DECWRL
+ 	eatheader(CurEnv, tf);
+ 	(void) fclose(tf);
+ # else
  	eatheader(CurEnv);
+ # endif
  
  	/*
  	**  Add an Apparently-To: line if we have no recipient lines.
===================================================================
RCS file: RCS/headers.c,v
retrieving revision 1.1
diff -c -r1.1 headers.c
*** /tmp/,RCSt1a00500	Wed Jul  4 01:03:23 1990
--- headers.c	Wed Jul  4 00:27:59 1990
***************
*** 270,275 ****
--- 270,276 ----
  **
  **	Parameters:
  **		e -- the envelope to process.
+ **		tf -- (DECWRL only) file to append oversized headers to
  **
  **	Returns:
  **		none.
***************
*** 280,291 ****
--- 281,300 ----
  **		Aborts the message if the hop count is exceeded.
  */
  
+ # ifdef DECWRL
+ eatheader(e, tf)
+ 	register FILE *tf;
+ # else
  eatheader(e)
+ # endif
  	register ENVELOPE *e;
  {
  	register HDR *h;
  	register char *p;
  	int hopcnt = 0;
+ # ifdef DECWRL
+ 	bool ovfhdrs = 0;
+ # endif
  
  	if (tTd(32, 1))
  		printf("----- collected header -----\n");
***************
*** 307,312 ****
--- 316,344 ----
  			sendtolist(h->h_value, (ADDRESS *) NULL, &CurEnv->e_sendqueue);
  		}
  
+ # ifdef DECWRL
+ 		if ((tf != NULL)
+ 		 && bitset(H_RCPT, h->h_flags)
+ 		 && !bitset(H_DEFAULT, h->h_flags)
+ 		 && (strlen(h->h_value) > 500)	    /* should parameterize */
+ 		    ) {
+ 			bool oldstyle = bitset(EF_OLDSTYLE, e->e_flags);
+ 
+ 			if (!ovfhdrs) {
+ 				fputs("\n%%% overflow headers %%%\n", tf);
+ 				ovfhdrs++;
+ 			}
+ 
+ 			commaize(h, h->h_value, tf, oldstyle, LocalMailer);
+ 			/* two notes:
+ 			 * (1) this upsets e_msgsize, but we don't care;
+ 			 * (2) newstr() is needed since this is free()'d later
+ 			 */
+ 			h->h_value =
+ 				newstr("distribution:; (see end of body)");
+ 		}
+ # endif
+ 
  		/* log the message-id */
  #ifdef LOG
  		if (!QueueRun && LogLevel > 8 && h->h_value != NULL &&
***************
*** 327,332 ****
--- 359,370 ----
  	if (tTd(32, 1))
  		printf("----------------------------\n");
  
+ # ifdef DECWRL
+ 	if (ovfhdrs) {
+ 		fputs("%%% end overflow headers %%%\n", tf);
+ 	}
+ # endif
+ 
  	/* store hop count */
  	if (hopcnt > e->e_hopcount)
  		e->e_hopcount = hopcnt;
***************
*** 687,694 ****
  **		p -- the value to put in it.
  **		fp -- file to put it to.
  **		oldstyle -- TRUE if this is an old style header.
! **		m -- a pointer to the mailer descriptor.  If NULL,
! **			don't transform the name at all.
  **
  **	Returns:
  **		none.
--- 725,731 ----
  **		p -- the value to put in it.
  **		fp -- file to put it to.
  **		oldstyle -- TRUE if this is an old style header.
! **		m -- a pointer to the mailer descriptor.
  **
  **	Returns:
  **		none.
===================================================================
RCS file: RCS/queue.c,v
retrieving revision 1.10
diff -c -r1.10 queue.c
*** /tmp/,RCSt1a00500	Wed Jul  4 01:03:24 1990
--- queue.c	Tue Jul  3 23:32:55 1990
***************
*** 710,716 ****
--- 710,720 ----
  		/* read the queue control file */
  		readqf(CurEnv, TRUE);
  		CurEnv->e_flags |= EF_INQUEUE;
+ # ifdef DECWRL
+ 		eatheader(CurEnv, NULL);
+ # else
  		eatheader(CurEnv);
+ # endif
  
  		/* do the delivery */
  		if (!bitset(EF_FATALERRS, CurEnv->e_flags))
===================================================================
RCS file: RCS/savemail.c,v
retrieving revision 1.4
diff -c -r1.4 savemail.c
*** /tmp/,RCSt1a00500	Wed Jul  4 01:03:25 1990
--- savemail.c	Tue Jul  3 23:34:02 1990
***************
*** 456,462 ****
--- 456,466 ----
  	CurEnv = ee;
  	define('f', "\001n", ee);
  	define('x', "Mail Delivery Subsystem", ee);
+ # ifdef DECWRL
+ 	eatheader(ee, NULL);
+ # else
  	eatheader(ee);
+ # endif
  
  	/* actually deliver the error message */
  	sendall(ee, SM_DEFAULT);
--
Paul Vixie
DEC Western Research Lab	<vixie@wrl.dec.com>
Palo Alto, California		...!decwrl!vixie

forys@snake.utah.edu (Jeff Forys) (07/10/90)

In article <VIXIE.90Jul4011023@volition.pa.dec.com> Paul Vixie writes:
> Sendmail has a fixed buffer that it puts headers in; it's usually 2500
> characters wide.  If you overflow it, various bad things happen, usually
> starting with extra blank lines being inserted into your mail headers.

I just wanted to point out that the aforementioned problem has been
fixed in Sendmail 5.64.  Long headers are now correctly truncated [*]
to MAXFIELD; headers are collected using a double-buffering scheme.
Paul's modifications look like they are to sendmail 5.61 so they may
not patch right over the new collect().

[*] Technically speaking, the "To:" header should never be truncated!
---
Jeff Forys @ Unv of Utah/Salt Lake, Comp Sci Dept. (801-581-4280)
forys@cs.utah.edu  -or-  ..!{boulder,cs.utexas.edu}!utah-cs!forys

kjones@talos.pm.com (Kyle Jones) (07/10/90)

Paul Vixie writes:
 > Sendmail has a fixed buffer that it puts headers in; it's usually 2500
 > characters wide.  If you overflow it, various bad things happen, usually
 > starting with extra blank lines being inserted into your mail headers.

Jeff Forys writes:
 > I just wanted to point out that the aforementioned problem has been
 > fixed in Sendmail 5.64.  Long headers are now correctly truncated [*]
 > to MAXFIELD; headers are collected using a double-buffering scheme.
 > [...]
 > [*] Technically speaking, the "To:" header should never be
 > truncated!

I hope you meant long _lines_ are now correctly truncated.  SMTP
sets a limit on line length, but if a header is folded I don't
see why it need be truncated, barring virtual memory constraints.
Even inside RFC 821 (the SMTP specification) there is this
admonishment:

          ****************************************************
          *                                                  *
          *  TO THE MAXIMUM EXTENT POSSIBLE, IMPLEMENTATION  *
          *  TECHNIQUES WHICH IMPOSE NO LIMITS ON THE LENGTH *
          *  OF THESE OBJECTS SHOULD BE USED.                *
          *                                                  *
          ****************************************************

forys@snake.utah.edu (Jeff Forys) (07/11/90)

In article <1990Jul10.152320.6412@talos.pm.com> Kyle Jones writes:
> Jeff Forys writes:
> > I just wanted to point out that the aforementioned problem has been
> > fixed in Sendmail 5.64.  Long headers are now correctly truncated [*]
> > to MAXFIELD; headers are collected using a double-buffering scheme.
> > [...]
> > [*] Technically speaking, the "To:" header should never be truncated!
>
> I hope you meant long _lines_ are now correctly truncated.

No, I meant what I wrote.  With sendmail, each header (including
any continuation lines therein) is read into a static buffer of
size MAXFIELD.  If the length of a particular header is too long,
sendmail will truncate it.  Please, dont shoot the messenger! :-)
---
Jeff Forys @ Unv of Utah/Salt Lake, Comp Sci Dept. (801-581-4280)
forys@cs.utah.edu  -or-  ..!{boulder,cs.utexas.edu}!utah-cs!forys

vixie@decwrl.dec.com (Paul A Vixie) (07/19/90)

Jeff,

I like my fix better than 5.64's truncation method.

Paul
--
Paul Vixie
DEC Western Research Lab	<vixie@wrl.dec.com>
Palo Alto, California		...!decwrl!vixie