[comp.sys.sgi] SYSLOG

andrew@alice.UUCP (Andrew Hume) (01/09/90)

has anyone running 3.2 (or 3.2.1) persuaded the syslog system to
log disk errors? despite the obvious and default conf file,
nothing gets logged but some whining by some networking daemons.
i wrote a user program to log stuff; it worked.

paulm@kestrel.sgi.com (Paul Mielke) (01/10/90)

In article <10319@alice.UUCP>, andrew@alice.UUCP (Andrew Hume) writes:
> 
> 
> has anyone running 3.2 (or 3.2.1) persuaded the syslog system to
> log disk errors? despite the obvious and default conf file,
> nothing gets logged but some whining by some networking daemons.
> i wrote a user program to log stuff; it worked.

In 3.2 and earlier software, the only stuff that goes into the syslog is
from user level programs that call syslog(3).  In a subsequent system
release, all kernel printfs (which will include error messages from disk
drivers) will also go to the syslogd.

Paul Mielke                  paulm@sgi.com
Advanced Systems Division    (415) 962-3447
Silicon Graphics, Inc.

andrew@alice.UUCP (Andrew Hume) (01/12/90)

In article <10319@alice.UUCP>, andrew@alice.UUCP (Andrew Hume) writes:
> 
> 
> has anyone running 3.2 (or 3.2.1) persuaded the syslog system to
> log disk errors? despite the obvious and default conf file,
> nothing gets logged but some whining by some networking daemons.
> i wrote a user program to log stuff; it worked.


I have a solution for anyone who wants to log their kernel printfs
(such as disk erors etc). I am running it on a 4D/240 but it should apply
to other power series. Many thanks are due to dave olson who was kind
enough to help me understand some fine points.

basically, i have a program called syslogger (spawned from S20sysetup)
that constantly scans the kernel buffer and logs any messages found there.
(isn't this what syslogd should be doing?) i made the kernel buffer
larger than normal (the define is CONBUFSIZE in master.d/kernel: default
is 1024 - mine is 4096) and i scan the buffer every minute. (in less paranoid
circumstances, say on our vaxes, we do this less often, say every 15 mins.)
to compile the following source, i use
	$CC $CFLAGS -I/usr/include/bsd -o syslogger syslogger.c -lmld -lbsd
there is some crap at the start to try and do the right thing for a daemon
process (disconnect the controlling tty etc); i never ran into any trouble
before i put it in and have no idea why it is necessary but it is apparently
the right thing to do. if i bungled, tell me.

any complaints, comments or suggestions should be sent to andrew@research.att.com

the source is:

#include	<stdio.h>
#include	<sys/param.h>
#include	<nlist.h>
#include	<signal.h>
#include	<syslog.h>
#include	<sys/termio.h>

struct	nlist nl[] = {
	{ "conbuf" },
	{ "conbufndx" },
	{ "conbufsz" },
	{ 0 }
};

int mem;
char *whoami;

main(argc, argv)
	char **argv;
{
	int i;
	int size, amt;
	int ndx;
	char *buf;
	extern errno;

	whoami = argv[0];
	/*
		fork off and find myself
	*/
	if(fork())
		exit(0);
	for(i = 0; i < 20; i++)
		close(i);		/* close a bunch */
	i = open("/dev/null", 0);	/* set std descriptors to something */
	dup2(i, 2);			/* hope i isn't 2 */
	close(i);
	dup2(2, 0);
	dup2(2, 1);
	chdir("/");
	i = open("/dev/tty", 2);
	if(i > 0){
		ioctl(i, TIOCNOTTY, (char *)0);
		close(i);
	}
	setpgrp();
	/*
		do something with my life
	*/
	signal(SIGHUP, SIG_IGN);
	nlist("/unix", nl);
	mem = 0;
	for(i = 0; nl[i].n_name; i++)
		if(nl[i].n_type == 0){
			mem = 1;
			syslog(LOG_ERR, "can't find %s\n", nl[1].n_name);
		}
		else
			nl[i].n_value &= ~0x80000000;
	if(mem)
		done("namelist problems", 1);
	if((mem = open("/dev/kmem", 0)) < 0)
		done("/dev/kmem\n", -1);
	if(getval(&nl[2], &size, sizeof(size)) != sizeof(size))
		done("reading size");
	syslog(LOG_INFO, "kernel monitor: buffer = %d bytes", size);
	if((buf = (char *)malloc(3*size)) == 0)
		done("malloc failure", 1);
	ndx = 0;		/* assume we are starting from start of the buffer */
	do {
		ndx = dobuf(ndx, size, buf);
	} while(sleep(60) >= 0);
	exit(0);
}

dobuf(ostart, size, buffer)
	char *buffer;
{
	int out;
	int ndx;

	if(getval(&nl[1], &ndx, sizeof(ndx)) != sizeof(ndx))
		done("reading ndx");
	ndx = ndx%size;		/* it should be anyway */
	if(ndx == ostart)	/* most common case: nothing happened */
		return(ostart);
	if(getval(&nl[0], buffer, size) != size)
		done("reading buf");
	/* make it a contiguous region */
	memcpy(buffer+size, buffer, size);
	if(ndx < ostart)
		ndx += size;
	out = 2*size;
	while(ostart < ndx)
		switch(buffer[ostart++])
		{
		case 0:		/* apparently not uncommon */
			break;
		case '\n':
			buffer[out] = 0;
			out = 2*size;
			syslog(LOG_ERR, "%s", buffer+out);
			break;
		default:
			buffer[out++] = buffer[ostart-1];
			break;
		}
	buffer[out] = 0;
	out = 2*size;
	if(buffer[out])
		syslog(LOG_ERR, "%s <partial line>", buffer+out);
	return(ndx%size);
}

done(s, ec)
	char *s;
{
	if(ec < 0)
		syslog(LOG_ERR, "%s: %s: %m", whoami, s);
	else
		syslog(LOG_ERR, "%s: %s", whoami, s);
	exit(1);
}

getval(nl, buf, cnt)
	struct	nlist *nl;
	char *buf;
	int cnt;
{
	if(lseek(mem, (long)nl->n_value, 0) != (long)nl->n_value){
		fprintf(stderr, "lseek to %x for %s", nl[0].n_value, nl[0].n_name);
		return(-1);
	}
/*	printf("will read %d from %#x for %s\n", cnt, nl[0].n_value, nl[0].n_name);
	/* should be min of cnt and sizeof(buf) */
	return read(mem, buf, cnt);
}