jerry@olivey.olivetti.com (Jerry Aguirre) (08/17/90)
Here is a tool I thru together when my news history got corrupted and users started complaining about seeing duplicates of articles. ===BEGIN histdups.c=== #include <stdio.h> #define LINESIZ 1024 #define MAXF 32 /* Expects the stdin to be the history file, sorted. Stdout is a list * of file names which are duplicates of earlier articles. Run after * expire -r and then "rm" the files listed in the output. * * sort <history | histdups >dupfiles; xargs <dupfiles rm * * If the news history becomes corrupted then you can wind up with * duplicates. These are both a waste of space and a pain for people * reading news. * * B news expire -r will find the dups and then enter all of them into * the history file. (It doesn't even match up the cross postings * to each other correctly.) This program will output the names of all * but the first duplicate in each news group. (Where "first" is based * on article numbering which presumably represents arrival order.) * * 16Aug90 Jerry Aguirre <jerry@atc.olivetti.com> */ char files[MAXF][LINESIZ]; int nf; long atol(); char *index(); main() { char c, *p; int i, j; char line[LINESIZ]; char id[LINESIZ]; char lastline[LINESIZ]; nf = 0; id[0] = '\0'; lastline[0] = '\0'; while (gets(line)) { p = index(line, '\t'); if (p) { *p = '\0'; if (strcmp(line, id) == 0) { /* we have a dup */ if (lastline[0] != '\0') { parsefiles(lastline); lastline[0] = '\0'; } *p = '\t'; parsefiles(line); } else { printdups(); strcpy(id, line); *p = '\t'; strcpy(lastline, line); nf = 0; } } } } parsefiles(line) char *line; { char *pd, *pf, *p; pd = index(line, '\t'); if (pd) pd++; else return; pf = index(pd, '\t'); if (pf) pf++; else return; while (*pf) { while (*pf == ' ') pf++; if (*pf == '\0') return; if (nf >= MAXF) return; p = index(pf, ' '); if (p) *p = '\0'; strcpy(files[nf], pf); nf++; if (p) { pf = p + 1; *p = ' '; } else return; } } printdups() { int i1, i2, flags[MAXF]; long n1, n2; char *p1, *p2; for (i1 = 0; i1 < nf; i1++) flags[i1] = 0; for (i1 = 0; i1 < nf; i1++) { p1 = index(files[i1], '/'); if (!p1) continue; *p1 = '\0'; n1 = atol(p1+1); for (i2 = i1 + 1; i2 < nf; i2++) { p2 = index(files[i2], '/'); if (!p2) continue; *p2 = '\0'; if (strcmp(files[i1], files[i2]) == 0) { /* same group */ n2 = atol(p2+1); if (n2 > n1) flags[i2] = 1; /* lowest number stays */ else if (n2 < n1) flags[i1] = 1; } *p2 = '/'; n2 = atol(p2+1); } *p1 = '/'; } for (i1 = 0; i1 < nf; i1++) { if (flags[i1] == 1) { for (p1 = files[i1]; *p1; p1++) { if (*p1 == '.') putchar('/'); else putchar(*p1); } putchar('\n'); } } } ===END histdups.c===