comparc@twwells.uucp (comp.archives) (11/15/88)
I have been working on a program to validate the DB: articles. I
added an option to display the site entries in something more human
readable and so I thought it was time to send it out. Right at the
moment, it does a limited amount of interpretation of site entries
and just passes on the rest of the article. The human-readable part
could also stand quite a bit of work. However, it is a start, and I
thought some of you would like to see it, primitive as it is.
---
Bill
{uunet|novavax}!proxftl!twwells!bill
send comp.archives postings to twwells!comp-archives
send comp.archives related mail to twwells!comp-archives-request
---
Warning: this is long!
-------------------------<CUT HERE>-------------------------------------------
#include <stdio.h>
#include <ctype.h>
/* This is a program to deal with the comp.archives database. This
version is the earliest; it only verifies the DB: input files. */
extern int exit();
extern char *malloc();
extern char *realloc();
extern char *strchr();
extern char *optarg;
extern int optind;
extern int opterr;
/* This is used when constructing the database in memory, it let's us
keep track of several lines of the same kind of information. */
typedef struct STRLIST {
struct STRLIST *s_next;
char *s_string;
} STRLIST;
/* Here is a table of time zones. @@It is incomplete, but it is
better than nothing. */
char *Zones[] = {
"EST", "Eastern",
"PST", "Pacific",
0, 0,
};
/* The state stuff. Note that ST_DONE must be zero or the state loop
won't work as expected. */
int st_newart(), st_dbart(), st_delcmd(), st_addcmd(),
st_snm(), st_sen(), st_stm(), st_stt(),
st_sad(), st_sma(), st_sco(), st_six(),
st_skw(), st_sde(), st_skip(), st_sxx(),
st_end();
enum {ST_DONE, ST_NEWART, ST_DBART, ST_DELCMD, ST_ADDCMD,
ST_SNM, ST_SEN, ST_STM, ST_STT,
ST_SAD, ST_SMA, ST_SCO, ST_SIX,
ST_SKW, ST_SDE, ST_SKIP, ST_SXX,
ST_END};
int (*States[])() = {exit, st_newart, st_dbart, st_delcmd, st_addcmd,
st_snm, st_sen, st_stm, st_stt,
st_sad, st_sma, st_sco, st_six,
st_skw, st_sde, st_skip, st_sxx,
st_end};
char *Sname[] = {"DONE", "ST_NEWART", "ST_DBART", "ST_DELCMD", "ST_ADDCMD",
"ST_SNM", "ST_SEN", "ST_STM", "ST_STT",
"ST_SAD", "ST_SMA", "ST_SCO", "ST_SIX",
"ST_SKW", "ST_SDE", "ST_SKIP", "ST_SXX",
"ST_END"};
/* Input buffer stuff. */
char *Inbuf; /* The line input buffer. */
int Inend; /* Flag: at end of input. */
int Inlen; /* Length of the input buffer. */
int Inline; /* Line number of the input. */
/* Lines for the site database. */
STRLIST *S_nm; /* The name of the site. */
STRLIST *S_en; /* Who entered the entry. */
STRLIST *S_tm; /* Times to call. */
STRLIST *S_tt; /* Archive site title. */
STRLIST *S_ad; /* Archive administrator. */
STRLIST *S_ma; /* Mailind address. */
STRLIST *S_co; /* Communications method. */
STRLIST *S_ix; /* Index file. */
STRLIST *S_kw; /* Keyword file. */
STRLIST *S_de; /* Description. */
/* Miscellaneous declarations. */
void nzfree();
char *myalloc();
void readline();
int Debug; /* Set to print debugging output. */
int Error; /* Set if the current entry has an error. */
int Print; /* Print the data */
char Options[] = "px"; /* program options */
char *Program_name; /* The program name. */
void
usage(msg)
char *msg;
{
fprintf(stderr, "%s: %s.\nusage: %s -%s file\n",
Program_name, msg, Program_name, Options);
exit(1);
}
int
main(argc, argv)
int argc;
char **argv;
{
int state;
/* Parse the arguments. */
if (Program_name = strchr(argv[0], '/')) {
++Program_name;
} else {
Program_name = argv[0];
}
opterr = 0;
while (1) {
switch (getopt(argc, argv, Options)) {
case -1: break;
default: usage("illegal option");
case 'p': Print = 1; continue;
case 'x': Debug = 1; continue;
}
break;
}
argv += optind;
argc -= optind;
switch (argc) {
default: usage("too many arguments");
case 1:
if (!freopen(argv[0], "r", stdin)) {
fprintf(stderr, "%s: unable to open %s\n",
Program_name, argv[0]);
exit(3);
}
break;
case 0:
usage("missing file argument");
}
/* Read each line of the input. The current state determines
what is done with it. If a state returns a negative state
number, that means to go on to that state without reading
a new line. This loop exits because ST_DONE vectors to
exit(). */
for (state = ST_NEWART; ; state = (*States[state])()) {
if (state > 0) {
readline();
} else {
state = -state;
}
if (Debug) {
if (Inend) {
printf("%s:\n", Sname[state]);
} else {
printf("%s:\"%s\"\n", Sname[state], Inbuf);
}
}
}
}
/* Print an error message, with the current input line. */
void
complain(msg)
char *msg;
{
Error = 1;
fprintf(stderr, "%s: %s, ", Program_name, msg);
if (Inend) {
fprintf(stderr, "at the end of the input\n");
} else {
fprintf(stderr, "line %d: \"%s\"\n", Inline, Inbuf);
}
}
/* free a pointer, unless the pointer is null. */
void
nzfree(ptr)
char *ptr;
{
if (ptr) {
free(ptr);
}
}
/* Interface to malloc/realloc; error exits if malloc fails. */
char *
myalloc(obuf, size)
char *obuf; /* old buffer, if reallocating */
int size; /* allocation size */
{
char *ptr;
if (!size) {
nzfree(obuf);
return (0);
}
if (!(ptr = obuf ? realloc(obuf, (unsigned)size)
: malloc((unsigned)size))) {
fprintf(stderr, "%s: out of memory\n", Program_name);
exit(2);
}
return (ptr);
}
/* Allocate space for a string and copy the string into that space. */
char *
stralloc(ptr)
char *ptr;
{
char *np;
np = myalloc((char *)0, strlen(ptr) + 1);
strcpy(np, ptr);
return (np);
}
/* Read a line into the input buffer; extend the buffer if necessary.
It returns the number of characters in the line, counting the end
of the line. */
void
readline()
{
int cc;
int len;
if (Inend) {
return;
}
++Inline;
len = 0;
do {
switch (cc = getchar()) {
case 0:
fprintf(stderr, "%s: warning, nul on line %d\n",
Inline);
continue;
case EOF:
if (!len) {
Inend = 1;
if (Inbuf) {
Inbuf[0] = 0;
}
return;
}
/* no break */
case '\n':
cc = 0;
break;
}
if (len >= Inlen) {
Inlen += Inlen < 480 ? Inlen + 32 : 100;
Inbuf = myalloc(Inbuf, Inlen);
}
Inbuf[len++] = cc;
} while (cc);
}
/* Add a string to the end of list of strings. Leading spaces are
removed from the string. */
STRLIST **
add_string(list, buf)
STRLIST **list;
char *buf;
{
while (*list) {
list = &(*list)->s_next;
}
while (isspace(*buf)) {
++buf;
}
*list = (STRLIST *)myalloc((char *)0, sizeof(STRLIST));
(*list)->s_next = 0;
(*list)->s_string = stralloc(buf);
return (&(*list)->s_next);
}
/* Add a part of a string to the string list. */
STRLIST **
add_segment(list, buf, end)
STRLIST **list;
char *buf;
char *end;
{
char *ptr;
int len;
while (*list) {
list = &(*list)->s_next;
}
while (buf < end && isspace(*buf)) {
++buf;
}
len = end - buf;
*list = (STRLIST *)myalloc((char *)0, sizeof(STRLIST));
(*list)->s_next = 0;
(*list)->s_string = ptr = myalloc((char *)0, len + 1);
strncpy(ptr, buf, len);
ptr[len] = 0;
return (&(*list)->s_next);
}
/* Add fields from a line containing semicolon separated fields. */
void
add_fields(list, ptr)
STRLIST **list;
char *ptr;
{
char *ep;
for ( ; ep = strchr(ptr, ';'); ptr = ep + 1) {
list = add_segment(list, ptr, ep);
}
(void)add_string(list, ptr);
}
/* Count the number of fields on a line. */
int
count_fields(ptr)
char *ptr;
{
char *ep;
int cnt;
cnt = 0;
for (++ptr; ep = strchr(ptr, ';'); ptr = ep + 1) {
++cnt;
}
return (cnt + 1);
}
/* Print the strings in a string list. */
void
prstrlist(msg, sp)
char *msg;
STRLIST *sp;
{
for ( ; sp; sp = sp->s_next) {
printf("%s%s\n", msg, sp->s_string);
}
}
/* Free a list of strings. */
void
freestrlist(sp0)
STRLIST **sp0;
{
STRLIST *sp;
STRLIST *sp1;
for (sp = *sp0; sp; sp = sp1) {
sp1 = sp->s_next;
nzfree(sp->s_string);
nzfree((char *)sp);
}
*sp0 = 0;
}
/* This takes a user name and address and a name and stores it in the
string table. */
void
add_name(list, buf)
STRLIST **list;
char *buf;
{
char *p1;
char *p2;
char *p3;
while (isspace(*buf)) {
++buf;
}
if (!(p1 = strchr(buf, ' '))) {
complain("missing real name");
return;
}
p2 = p1;
while (isspace(*p2)) {
++p2;
}
if (*p2 != '(') {
complain("missing the '('");
return;
}
++p2;
for (p3 = p2; *p3 && *p3 != ')'; ++p3)
;
if (*p3 != ')' || p3[1] != 0) {
complain("the end of the real name is missing.");
return;
}
list = add_segment(list, buf, p1);
(void)add_segment(list, p2, p3);
}
/* This takes an "EN" line and creates the strings for it. */
void
add_enterer(list, buf)
STRLIST **list;
char *buf;
{
char *p1;
char *p2;
char *p3;
char *p4;
while (isspace(*buf)) {
++buf;
}
if (!(p1 = strchr(buf, ' '))) {
complain("missing real name");
return;
}
p2 = p1;
while (isspace(*p2)) {
++p2;
}
if (*p2 != '(') {
complain("missing the '('");
return;
}
++p2;
for (p3 = p2; *p3 && *p3 != ')'; ++p3)
;
if (*p3 != ')') {
complain("missing the ')'");
return;
}
p4 = p3 + 1;
while (isspace(*p4)) {
++p4;
}
list = add_segment(list, buf, p1);
list = add_segment(list, p2, p3);
(void)add_string(list, p4);
}
/* This is called when at the start of a new article. It looks for
the Subject: line and goes to state ST_DBART. */
int
st_newart()
{
char *ptr;
if (Inend) {
complain("where's the article?");
return (ST_DONE);
}
ptr = Inbuf;
if (strncmp(ptr, "Subject:", 8) == 0) {
ptr += 8;
while (isspace(*ptr)) {
++ptr;
}
if (strncmp(ptr, "DB:", 3) != 0) {
complain("invalid Subject:");
} else {
printf("%s\n", Inbuf);
return (ST_DBART);
}
} else if (*ptr == '@') {
complain("missing Subject:");
return (-ST_DBART);
}
return (ST_NEWART);
}
/* This is called after we have determined that this is an article.
It looks for the start of the database information. */
int
st_dbart()
{
if (Inend) {
complain("no database commands");
return (ST_DONE);
}
if (Inbuf[0] == '@') {
return (-ST_DELCMD);
}
if (strchr(Inbuf, '@')) {
complain("possible misplaced @");
}
return (ST_DBART);
}
/* Processing any delete commands. These always come first. */
int
st_delcmd()
{
if (Inend) {
complain("missing @END");
return (ST_DONE);
}
if (strncmp(Inbuf, "@ADD ", 5) == 0) {
return (-ST_ADDCMD);
}
if (strcmp(Inbuf, "@END") == 0) {
return (ST_END);
}
if (strncmp(Inbuf, "@DEL ", 5) == 0) {
if (strncmp(Inbuf + 5, "INFO ", 5) == 0) {
/*@@validate the delete */
printf("deleting %s from info database", Inbuf + 10);
} else if (strncmp(Inbuf + 5, "SITE ", 5) == 0) {
/*@@validate the delete */
printf("deleting %s from site database", Inbuf + 10);
} else if (strncmp(Inbuf + 5, "INDEX ", 6) == 0) {
/*@@validate the delete */
printf("deleting %s from index database", Inbuf + 11);
} else {
complain("invalid delete command");
}
} else if (strncmp(Inbuf, "@DELALL ", 8) == 0) {
/*@@validate the delete */
printf("deleting all %s from index database", Inbuf + 8);
} else {
complain("command expected");
}
return (ST_DELCMD);
}
/* Have process all delete commands, now do any add commands. */
int
st_addcmd()
{
if (Inend) {
complain("missing @END");
return (ST_DONE);
}
if (strcmp(Inbuf, "@END") == 0) {
return (ST_END);
}
if (strncmp(Inbuf, "@ADD ", 5) == 0) {
if (strcmp(Inbuf + 5, "INFO") == 0) {
/*@@return (ST_INM);*/
return (ST_SKIP);
} else if (strcmp(Inbuf + 5, "SITE") == 0) {
Error = 0;
return (ST_SNM);
} else if (strcmp(Inbuf + 5, "INDEX") == 0) {
/*@@return (ST_XADD);*/
return (ST_SKIP);
} else {
complain("invalid add command");
}
} else {
complain("command expected");
}
return (-ST_SKIP);
}
/* We should have a site name. */
int
st_snm()
{
if (strncmp(Inbuf, "NM ", 3) == 0) {
(void)add_string(&S_nm, Inbuf + 3);
return (ST_SEN);
}
complain("missing site name");
return (-ST_SEN);
}
/* We should have the line for the enterer. */
int
st_sen()
{
if (strncmp(Inbuf, "EN ", 3) == 0) {
(void)add_enterer(&S_en, Inbuf + 3);
return (ST_STM);
}
complain("missing enterer");
return (-ST_STM);
}
/* We should have the line for the timezone and best times to call.
*/
int
st_stm()
{
char *ptr;
char **zp;
if (strncmp(Inbuf, "TM ", 3) != 0) {
complain("missing timezone");
return (-ST_STT);
}
if (ptr = strchr(Inbuf + 3, ';')) {
*ptr = 0;
}
for (zp = Zones; *zp && strcmp(*zp, Inbuf + 3) != 0;
zp += 2)
;
if (ptr) {
*ptr = ';';
}
if (!*zp) {
complain("invalid time zone");
return (ST_STT);
}
(void)add_string(&S_tm, zp[1]);
if (ptr) {
add_fields(&S_tm, ptr + 1);
}
return (ST_STT);
}
/* We should have the archive title. */
int
st_stt()
{
if (strncmp(Inbuf, "TT ", 3) == 0) {
(void)add_string(&S_tt, Inbuf + 3);
return (ST_SAD);
}
complain("missing archive title");
return (-ST_SAD);
}
/* We should have the administrator. */
int
st_sad()
{
if (strncmp(Inbuf, "AD ", 3) == 0) {
(void)add_name(&S_ad, Inbuf + 3);
return (ST_SMA);
}
complain("missing administrator");
return (-ST_SMA);
}
/* We should have the administrator's mailing address. */
int
st_sma()
{
char *ptr;
char *ep;
if (strncmp(Inbuf, "MA ", 3) == 0) {
add_fields(&S_ma, Inbuf + 3);
return (ST_SCO);
}
if (strcmp(Inbuf, "MA") == 0) {
return (ST_SCO);
}
complain("missing administrator's address");
return (-ST_SCO);
}
/* We should have a communications method line. */
int
st_sco()
{
char *ptr;
ptr = Inbuf + 3;
if (strncmp(Inbuf, "CO ", 3) == 0) {
if (strncmp(ptr, "uucp;", 5) == 0) {
if (count_fields(ptr) != 4) {
complain("wrong number of fields for uucp");
} else {
add_fields(&S_co, ptr);
}
} else if (strncmp(ptr, "ftp;", 4) == 0) {
if (count_fields(ptr) != 6) {
complain("wrong number of fields for ftp");
} else {
add_fields(&S_co, ptr);
}
} else {
complain("unknown communications method");
}
return (ST_SCO);
}
if (!S_co) {
complain("missing communications data");
}
return (-ST_SIX);
}
/* We may have an index line. */
int
st_six()
{
if (strncmp(Inbuf, "IX ", 3) == 0) {
if (count_fields(Inbuf + 3) != 6) {
complain("wrong number of fields in index line");
} else {
add_fields(&S_ix, Inbuf + 3);
}
return (ST_SIX);
}
if (strcmp(Inbuf, "IX") == 0) {
return (ST_SKW);
}
return (-ST_SKW);
}
/* We may have a keyword line. */
int
st_skw()
{
if (strncmp(Inbuf, "KW ", 3) == 0) {
(void)add_string(&S_kw, Inbuf + 3);
return (ST_SKW);
}
if (strcmp(Inbuf, "KW") == 0) {
return (ST_SDE);
}
return (-ST_SDE);
}
/* We may have a description line. */
int
st_sde()
{
if (strncmp(Inbuf, "DE ", 3) == 0) {
(void)add_string(&S_de, Inbuf + 3);
return (ST_SDE);
}
if (strcmp(Inbuf, "DE") == 0) {
return (ST_SDE);
}
return (-ST_SXX);
}
/* Have not found an @ADD where expected. Skip till the next line found with
a leading @. */
int
st_skip()
{
if (Inend) {
complain("missing @END");
return (ST_DONE);
}
if (Inbuf[0] == '@') {
return (-ST_ADDCMD);
}
if (strchr(Inbuf, '@')) {
complain("possible misplaced @");
}
return (ST_SKIP);
}
/* Print an archive site entry. */
void
print_site()
{
char *sptr;
char *nptr;
STRLIST *sp;
printf("\nArchive site %s, %s\n", S_nm->s_string, S_tt->s_string);
if (S_de) {
prstrlist("\t", S_de);
}
if (S_kw) {
printf("Here are some words that describe");
printf(" what's in the archive:\n");
prstrlist("\t", S_kw);
}
printf("This archive is administered by %s, %s",
S_ad->s_next->s_string, S_ad->s_string);
if (strcmp(S_en->s_string, S_ad->s_string) == 0
&& strcmp(S_en->s_next->s_string, S_ad->s_next->s_string) == 0) {
printf(",\nwho submitted this entry on %s.\n",
S_en->s_next->s_next->s_string);
} else {
printf(";\nthe archive entry was submitted by %s, %s,\n",
S_en->s_next->s_string, S_en->s_string);
printf("\ton %s.\n", S_en->s_next->s_next->s_string);
}
if (S_ma) {
printf("The archive mailing address is:\n");
prstrlist("\t", S_ma);
}
printf("The archive is in the %s time zone.\n", S_tm->s_string);
if (S_tm->s_next) {
printf("Here are times the archive is less loaded:\n");
prstrlist("\t", S_tm->s_next);
}
for (sp = S_co; sp; sp = sp->s_next) {
sptr = sp->s_string;
sp = sp->s_next;
printf("Files tagged with a pattern matching \"%s\"",
sp->s_string);
sp = sp->s_next;
if (strcmp(sptr, "uucp") == 0) {
printf(" can be obtained via uucp.\n");
printf("They can be found in directory \"%s\". ",
sp->s_string);
sp = sp->s_next;
printf("The L.sys entry to use is:\n\t%s\n",
sp->s_string);
} else if (strcmp(sptr, "ftp") == 0) {
printf(" can be obtained with ftp.\n");
printf("The domain name is \"%s\"", sp->s_string);
sp = sp->s_next;
printf(" and its internet address is %s.\n",
sp->s_string);
sp = sp->s_next;
printf("The files are in directory \"%s\".\n",
sp->s_string);
sp = sp->s_next;
if (sp->s_string[0]) {
printf("These are the times the files");
printf(" may be accessed: %s.\n",
sp->s_string);
}
}
}
for (sp = S_ix; sp; sp = sp->s_next) {
printf("There is an index file whose access tag is \"%s\" ",
sp->s_string);
sp = sp->s_next;
printf("in file \"%s\".", sp->s_string);
sp = sp->s_next;
if (sp->s_string[0]) {
printf(" The file is %sK bytes.",
sp->s_string);
}
printf("\n");
sp = sp->s_next;
sp = sp->s_next;
if (sp->s_string[0]) {
printf("You will need these programs to uncompress");
printf(" the file: %s.\n", sp->s_string);
}
sp = sp->s_next;
if (sp->s_string[0]) {
printf("\t%s\n", sp->s_string);
}
}
}
/* This is the end of the site entry. Display the site entry and free
the data used by it. Then go to state ST_ADDCMD. */
int
st_sxx()
{
if (Inend) {
complain("missing @END");
return (ST_DONE);
}
if (Inbuf[0] == '@') {
complain("missing blank line after add");
return (-ST_ADDCMD);
}
if (strchr(Inbuf, '@')) {
complain("possible misplaced @");
return (ST_SKIP);
}
if (Inbuf[0]) {
complain("extraneous data");
return (ST_SKIP);
}
if (Print && !Error) {
print_site();
}
freestrlist(&S_nm);
freestrlist(&S_en);
freestrlist(&S_tm);
freestrlist(&S_tt);
freestrlist(&S_ad);
freestrlist(&S_ma);
freestrlist(&S_co);
freestrlist(&S_ix);
freestrlist(&S_kw);
freestrlist(&S_de);
return (ST_ADDCMD);
}
/* Have found an @END, skip to the end of the file, checking for
possible misplaced @'s. */
st_end()
{
if (Inend) {
return (ST_DONE);
}
if (strchr(Inbuf, '@')) {
complain("possible misplaced @");
}
return (ST_END);
}
-------------------------<CUT HERE>-------------------------------------------