comparc@twwells.uucp (comp.archives) (11/15/88)
I have been working on a program to validate the DB: articles. I added an option to display the site entries in something more human readable and so I thought it was time to send it out. Right at the moment, it does a limited amount of interpretation of site entries and just passes on the rest of the article. The human-readable part could also stand quite a bit of work. However, it is a start, and I thought some of you would like to see it, primitive as it is. --- Bill {uunet|novavax}!proxftl!twwells!bill send comp.archives postings to twwells!comp-archives send comp.archives related mail to twwells!comp-archives-request --- Warning: this is long! -------------------------<CUT HERE>------------------------------------------- #include <stdio.h> #include <ctype.h> /* This is a program to deal with the comp.archives database. This version is the earliest; it only verifies the DB: input files. */ extern int exit(); extern char *malloc(); extern char *realloc(); extern char *strchr(); extern char *optarg; extern int optind; extern int opterr; /* This is used when constructing the database in memory, it let's us keep track of several lines of the same kind of information. */ typedef struct STRLIST { struct STRLIST *s_next; char *s_string; } STRLIST; /* Here is a table of time zones. @@It is incomplete, but it is better than nothing. */ char *Zones[] = { "EST", "Eastern", "PST", "Pacific", 0, 0, }; /* The state stuff. Note that ST_DONE must be zero or the state loop won't work as expected. */ int st_newart(), st_dbart(), st_delcmd(), st_addcmd(), st_snm(), st_sen(), st_stm(), st_stt(), st_sad(), st_sma(), st_sco(), st_six(), st_skw(), st_sde(), st_skip(), st_sxx(), st_end(); enum {ST_DONE, ST_NEWART, ST_DBART, ST_DELCMD, ST_ADDCMD, ST_SNM, ST_SEN, ST_STM, ST_STT, ST_SAD, ST_SMA, ST_SCO, ST_SIX, ST_SKW, ST_SDE, ST_SKIP, ST_SXX, ST_END}; int (*States[])() = {exit, st_newart, st_dbart, st_delcmd, st_addcmd, st_snm, st_sen, st_stm, st_stt, st_sad, st_sma, st_sco, st_six, st_skw, st_sde, st_skip, st_sxx, st_end}; char *Sname[] = {"DONE", "ST_NEWART", "ST_DBART", "ST_DELCMD", "ST_ADDCMD", "ST_SNM", "ST_SEN", "ST_STM", "ST_STT", "ST_SAD", "ST_SMA", "ST_SCO", "ST_SIX", "ST_SKW", "ST_SDE", "ST_SKIP", "ST_SXX", "ST_END"}; /* Input buffer stuff. */ char *Inbuf; /* The line input buffer. */ int Inend; /* Flag: at end of input. */ int Inlen; /* Length of the input buffer. */ int Inline; /* Line number of the input. */ /* Lines for the site database. */ STRLIST *S_nm; /* The name of the site. */ STRLIST *S_en; /* Who entered the entry. */ STRLIST *S_tm; /* Times to call. */ STRLIST *S_tt; /* Archive site title. */ STRLIST *S_ad; /* Archive administrator. */ STRLIST *S_ma; /* Mailind address. */ STRLIST *S_co; /* Communications method. */ STRLIST *S_ix; /* Index file. */ STRLIST *S_kw; /* Keyword file. */ STRLIST *S_de; /* Description. */ /* Miscellaneous declarations. */ void nzfree(); char *myalloc(); void readline(); int Debug; /* Set to print debugging output. */ int Error; /* Set if the current entry has an error. */ int Print; /* Print the data */ char Options[] = "px"; /* program options */ char *Program_name; /* The program name. */ void usage(msg) char *msg; { fprintf(stderr, "%s: %s.\nusage: %s -%s file\n", Program_name, msg, Program_name, Options); exit(1); } int main(argc, argv) int argc; char **argv; { int state; /* Parse the arguments. */ if (Program_name = strchr(argv[0], '/')) { ++Program_name; } else { Program_name = argv[0]; } opterr = 0; while (1) { switch (getopt(argc, argv, Options)) { case -1: break; default: usage("illegal option"); case 'p': Print = 1; continue; case 'x': Debug = 1; continue; } break; } argv += optind; argc -= optind; switch (argc) { default: usage("too many arguments"); case 1: if (!freopen(argv[0], "r", stdin)) { fprintf(stderr, "%s: unable to open %s\n", Program_name, argv[0]); exit(3); } break; case 0: usage("missing file argument"); } /* Read each line of the input. The current state determines what is done with it. If a state returns a negative state number, that means to go on to that state without reading a new line. This loop exits because ST_DONE vectors to exit(). */ for (state = ST_NEWART; ; state = (*States[state])()) { if (state > 0) { readline(); } else { state = -state; } if (Debug) { if (Inend) { printf("%s:\n", Sname[state]); } else { printf("%s:\"%s\"\n", Sname[state], Inbuf); } } } } /* Print an error message, with the current input line. */ void complain(msg) char *msg; { Error = 1; fprintf(stderr, "%s: %s, ", Program_name, msg); if (Inend) { fprintf(stderr, "at the end of the input\n"); } else { fprintf(stderr, "line %d: \"%s\"\n", Inline, Inbuf); } } /* free a pointer, unless the pointer is null. */ void nzfree(ptr) char *ptr; { if (ptr) { free(ptr); } } /* Interface to malloc/realloc; error exits if malloc fails. */ char * myalloc(obuf, size) char *obuf; /* old buffer, if reallocating */ int size; /* allocation size */ { char *ptr; if (!size) { nzfree(obuf); return (0); } if (!(ptr = obuf ? realloc(obuf, (unsigned)size) : malloc((unsigned)size))) { fprintf(stderr, "%s: out of memory\n", Program_name); exit(2); } return (ptr); } /* Allocate space for a string and copy the string into that space. */ char * stralloc(ptr) char *ptr; { char *np; np = myalloc((char *)0, strlen(ptr) + 1); strcpy(np, ptr); return (np); } /* Read a line into the input buffer; extend the buffer if necessary. It returns the number of characters in the line, counting the end of the line. */ void readline() { int cc; int len; if (Inend) { return; } ++Inline; len = 0; do { switch (cc = getchar()) { case 0: fprintf(stderr, "%s: warning, nul on line %d\n", Inline); continue; case EOF: if (!len) { Inend = 1; if (Inbuf) { Inbuf[0] = 0; } return; } /* no break */ case '\n': cc = 0; break; } if (len >= Inlen) { Inlen += Inlen < 480 ? Inlen + 32 : 100; Inbuf = myalloc(Inbuf, Inlen); } Inbuf[len++] = cc; } while (cc); } /* Add a string to the end of list of strings. Leading spaces are removed from the string. */ STRLIST ** add_string(list, buf) STRLIST **list; char *buf; { while (*list) { list = &(*list)->s_next; } while (isspace(*buf)) { ++buf; } *list = (STRLIST *)myalloc((char *)0, sizeof(STRLIST)); (*list)->s_next = 0; (*list)->s_string = stralloc(buf); return (&(*list)->s_next); } /* Add a part of a string to the string list. */ STRLIST ** add_segment(list, buf, end) STRLIST **list; char *buf; char *end; { char *ptr; int len; while (*list) { list = &(*list)->s_next; } while (buf < end && isspace(*buf)) { ++buf; } len = end - buf; *list = (STRLIST *)myalloc((char *)0, sizeof(STRLIST)); (*list)->s_next = 0; (*list)->s_string = ptr = myalloc((char *)0, len + 1); strncpy(ptr, buf, len); ptr[len] = 0; return (&(*list)->s_next); } /* Add fields from a line containing semicolon separated fields. */ void add_fields(list, ptr) STRLIST **list; char *ptr; { char *ep; for ( ; ep = strchr(ptr, ';'); ptr = ep + 1) { list = add_segment(list, ptr, ep); } (void)add_string(list, ptr); } /* Count the number of fields on a line. */ int count_fields(ptr) char *ptr; { char *ep; int cnt; cnt = 0; for (++ptr; ep = strchr(ptr, ';'); ptr = ep + 1) { ++cnt; } return (cnt + 1); } /* Print the strings in a string list. */ void prstrlist(msg, sp) char *msg; STRLIST *sp; { for ( ; sp; sp = sp->s_next) { printf("%s%s\n", msg, sp->s_string); } } /* Free a list of strings. */ void freestrlist(sp0) STRLIST **sp0; { STRLIST *sp; STRLIST *sp1; for (sp = *sp0; sp; sp = sp1) { sp1 = sp->s_next; nzfree(sp->s_string); nzfree((char *)sp); } *sp0 = 0; } /* This takes a user name and address and a name and stores it in the string table. */ void add_name(list, buf) STRLIST **list; char *buf; { char *p1; char *p2; char *p3; while (isspace(*buf)) { ++buf; } if (!(p1 = strchr(buf, ' '))) { complain("missing real name"); return; } p2 = p1; while (isspace(*p2)) { ++p2; } if (*p2 != '(') { complain("missing the '('"); return; } ++p2; for (p3 = p2; *p3 && *p3 != ')'; ++p3) ; if (*p3 != ')' || p3[1] != 0) { complain("the end of the real name is missing."); return; } list = add_segment(list, buf, p1); (void)add_segment(list, p2, p3); } /* This takes an "EN" line and creates the strings for it. */ void add_enterer(list, buf) STRLIST **list; char *buf; { char *p1; char *p2; char *p3; char *p4; while (isspace(*buf)) { ++buf; } if (!(p1 = strchr(buf, ' '))) { complain("missing real name"); return; } p2 = p1; while (isspace(*p2)) { ++p2; } if (*p2 != '(') { complain("missing the '('"); return; } ++p2; for (p3 = p2; *p3 && *p3 != ')'; ++p3) ; if (*p3 != ')') { complain("missing the ')'"); return; } p4 = p3 + 1; while (isspace(*p4)) { ++p4; } list = add_segment(list, buf, p1); list = add_segment(list, p2, p3); (void)add_string(list, p4); } /* This is called when at the start of a new article. It looks for the Subject: line and goes to state ST_DBART. */ int st_newart() { char *ptr; if (Inend) { complain("where's the article?"); return (ST_DONE); } ptr = Inbuf; if (strncmp(ptr, "Subject:", 8) == 0) { ptr += 8; while (isspace(*ptr)) { ++ptr; } if (strncmp(ptr, "DB:", 3) != 0) { complain("invalid Subject:"); } else { printf("%s\n", Inbuf); return (ST_DBART); } } else if (*ptr == '@') { complain("missing Subject:"); return (-ST_DBART); } return (ST_NEWART); } /* This is called after we have determined that this is an article. It looks for the start of the database information. */ int st_dbart() { if (Inend) { complain("no database commands"); return (ST_DONE); } if (Inbuf[0] == '@') { return (-ST_DELCMD); } if (strchr(Inbuf, '@')) { complain("possible misplaced @"); } return (ST_DBART); } /* Processing any delete commands. These always come first. */ int st_delcmd() { if (Inend) { complain("missing @END"); return (ST_DONE); } if (strncmp(Inbuf, "@ADD ", 5) == 0) { return (-ST_ADDCMD); } if (strcmp(Inbuf, "@END") == 0) { return (ST_END); } if (strncmp(Inbuf, "@DEL ", 5) == 0) { if (strncmp(Inbuf + 5, "INFO ", 5) == 0) { /*@@validate the delete */ printf("deleting %s from info database", Inbuf + 10); } else if (strncmp(Inbuf + 5, "SITE ", 5) == 0) { /*@@validate the delete */ printf("deleting %s from site database", Inbuf + 10); } else if (strncmp(Inbuf + 5, "INDEX ", 6) == 0) { /*@@validate the delete */ printf("deleting %s from index database", Inbuf + 11); } else { complain("invalid delete command"); } } else if (strncmp(Inbuf, "@DELALL ", 8) == 0) { /*@@validate the delete */ printf("deleting all %s from index database", Inbuf + 8); } else { complain("command expected"); } return (ST_DELCMD); } /* Have process all delete commands, now do any add commands. */ int st_addcmd() { if (Inend) { complain("missing @END"); return (ST_DONE); } if (strcmp(Inbuf, "@END") == 0) { return (ST_END); } if (strncmp(Inbuf, "@ADD ", 5) == 0) { if (strcmp(Inbuf + 5, "INFO") == 0) { /*@@return (ST_INM);*/ return (ST_SKIP); } else if (strcmp(Inbuf + 5, "SITE") == 0) { Error = 0; return (ST_SNM); } else if (strcmp(Inbuf + 5, "INDEX") == 0) { /*@@return (ST_XADD);*/ return (ST_SKIP); } else { complain("invalid add command"); } } else { complain("command expected"); } return (-ST_SKIP); } /* We should have a site name. */ int st_snm() { if (strncmp(Inbuf, "NM ", 3) == 0) { (void)add_string(&S_nm, Inbuf + 3); return (ST_SEN); } complain("missing site name"); return (-ST_SEN); } /* We should have the line for the enterer. */ int st_sen() { if (strncmp(Inbuf, "EN ", 3) == 0) { (void)add_enterer(&S_en, Inbuf + 3); return (ST_STM); } complain("missing enterer"); return (-ST_STM); } /* We should have the line for the timezone and best times to call. */ int st_stm() { char *ptr; char **zp; if (strncmp(Inbuf, "TM ", 3) != 0) { complain("missing timezone"); return (-ST_STT); } if (ptr = strchr(Inbuf + 3, ';')) { *ptr = 0; } for (zp = Zones; *zp && strcmp(*zp, Inbuf + 3) != 0; zp += 2) ; if (ptr) { *ptr = ';'; } if (!*zp) { complain("invalid time zone"); return (ST_STT); } (void)add_string(&S_tm, zp[1]); if (ptr) { add_fields(&S_tm, ptr + 1); } return (ST_STT); } /* We should have the archive title. */ int st_stt() { if (strncmp(Inbuf, "TT ", 3) == 0) { (void)add_string(&S_tt, Inbuf + 3); return (ST_SAD); } complain("missing archive title"); return (-ST_SAD); } /* We should have the administrator. */ int st_sad() { if (strncmp(Inbuf, "AD ", 3) == 0) { (void)add_name(&S_ad, Inbuf + 3); return (ST_SMA); } complain("missing administrator"); return (-ST_SMA); } /* We should have the administrator's mailing address. */ int st_sma() { char *ptr; char *ep; if (strncmp(Inbuf, "MA ", 3) == 0) { add_fields(&S_ma, Inbuf + 3); return (ST_SCO); } if (strcmp(Inbuf, "MA") == 0) { return (ST_SCO); } complain("missing administrator's address"); return (-ST_SCO); } /* We should have a communications method line. */ int st_sco() { char *ptr; ptr = Inbuf + 3; if (strncmp(Inbuf, "CO ", 3) == 0) { if (strncmp(ptr, "uucp;", 5) == 0) { if (count_fields(ptr) != 4) { complain("wrong number of fields for uucp"); } else { add_fields(&S_co, ptr); } } else if (strncmp(ptr, "ftp;", 4) == 0) { if (count_fields(ptr) != 6) { complain("wrong number of fields for ftp"); } else { add_fields(&S_co, ptr); } } else { complain("unknown communications method"); } return (ST_SCO); } if (!S_co) { complain("missing communications data"); } return (-ST_SIX); } /* We may have an index line. */ int st_six() { if (strncmp(Inbuf, "IX ", 3) == 0) { if (count_fields(Inbuf + 3) != 6) { complain("wrong number of fields in index line"); } else { add_fields(&S_ix, Inbuf + 3); } return (ST_SIX); } if (strcmp(Inbuf, "IX") == 0) { return (ST_SKW); } return (-ST_SKW); } /* We may have a keyword line. */ int st_skw() { if (strncmp(Inbuf, "KW ", 3) == 0) { (void)add_string(&S_kw, Inbuf + 3); return (ST_SKW); } if (strcmp(Inbuf, "KW") == 0) { return (ST_SDE); } return (-ST_SDE); } /* We may have a description line. */ int st_sde() { if (strncmp(Inbuf, "DE ", 3) == 0) { (void)add_string(&S_de, Inbuf + 3); return (ST_SDE); } if (strcmp(Inbuf, "DE") == 0) { return (ST_SDE); } return (-ST_SXX); } /* Have not found an @ADD where expected. Skip till the next line found with a leading @. */ int st_skip() { if (Inend) { complain("missing @END"); return (ST_DONE); } if (Inbuf[0] == '@') { return (-ST_ADDCMD); } if (strchr(Inbuf, '@')) { complain("possible misplaced @"); } return (ST_SKIP); } /* Print an archive site entry. */ void print_site() { char *sptr; char *nptr; STRLIST *sp; printf("\nArchive site %s, %s\n", S_nm->s_string, S_tt->s_string); if (S_de) { prstrlist("\t", S_de); } if (S_kw) { printf("Here are some words that describe"); printf(" what's in the archive:\n"); prstrlist("\t", S_kw); } printf("This archive is administered by %s, %s", S_ad->s_next->s_string, S_ad->s_string); if (strcmp(S_en->s_string, S_ad->s_string) == 0 && strcmp(S_en->s_next->s_string, S_ad->s_next->s_string) == 0) { printf(",\nwho submitted this entry on %s.\n", S_en->s_next->s_next->s_string); } else { printf(";\nthe archive entry was submitted by %s, %s,\n", S_en->s_next->s_string, S_en->s_string); printf("\ton %s.\n", S_en->s_next->s_next->s_string); } if (S_ma) { printf("The archive mailing address is:\n"); prstrlist("\t", S_ma); } printf("The archive is in the %s time zone.\n", S_tm->s_string); if (S_tm->s_next) { printf("Here are times the archive is less loaded:\n"); prstrlist("\t", S_tm->s_next); } for (sp = S_co; sp; sp = sp->s_next) { sptr = sp->s_string; sp = sp->s_next; printf("Files tagged with a pattern matching \"%s\"", sp->s_string); sp = sp->s_next; if (strcmp(sptr, "uucp") == 0) { printf(" can be obtained via uucp.\n"); printf("They can be found in directory \"%s\". ", sp->s_string); sp = sp->s_next; printf("The L.sys entry to use is:\n\t%s\n", sp->s_string); } else if (strcmp(sptr, "ftp") == 0) { printf(" can be obtained with ftp.\n"); printf("The domain name is \"%s\"", sp->s_string); sp = sp->s_next; printf(" and its internet address is %s.\n", sp->s_string); sp = sp->s_next; printf("The files are in directory \"%s\".\n", sp->s_string); sp = sp->s_next; if (sp->s_string[0]) { printf("These are the times the files"); printf(" may be accessed: %s.\n", sp->s_string); } } } for (sp = S_ix; sp; sp = sp->s_next) { printf("There is an index file whose access tag is \"%s\" ", sp->s_string); sp = sp->s_next; printf("in file \"%s\".", sp->s_string); sp = sp->s_next; if (sp->s_string[0]) { printf(" The file is %sK bytes.", sp->s_string); } printf("\n"); sp = sp->s_next; sp = sp->s_next; if (sp->s_string[0]) { printf("You will need these programs to uncompress"); printf(" the file: %s.\n", sp->s_string); } sp = sp->s_next; if (sp->s_string[0]) { printf("\t%s\n", sp->s_string); } } } /* This is the end of the site entry. Display the site entry and free the data used by it. Then go to state ST_ADDCMD. */ int st_sxx() { if (Inend) { complain("missing @END"); return (ST_DONE); } if (Inbuf[0] == '@') { complain("missing blank line after add"); return (-ST_ADDCMD); } if (strchr(Inbuf, '@')) { complain("possible misplaced @"); return (ST_SKIP); } if (Inbuf[0]) { complain("extraneous data"); return (ST_SKIP); } if (Print && !Error) { print_site(); } freestrlist(&S_nm); freestrlist(&S_en); freestrlist(&S_tm); freestrlist(&S_tt); freestrlist(&S_ad); freestrlist(&S_ma); freestrlist(&S_co); freestrlist(&S_ix); freestrlist(&S_kw); freestrlist(&S_de); return (ST_ADDCMD); } /* Have found an @END, skip to the end of the file, checking for possible misplaced @'s. */ st_end() { if (Inend) { return (ST_DONE); } if (strchr(Inbuf, '@')) { complain("possible misplaced @"); } return (ST_END); } -------------------------<CUT HERE>-------------------------------------------