[comp.sources.misc] v03i084: .newsrc shrinking/sorting programs

paulsc@radio_flyer.gwd.tek.com (Paul Scherf) (07/15/88)

Posting-number: Volume 3, Issue 84
Submitted-by: "Paul Scherf" <paulsc@radio_flyer.gwd.tek.com>
Archive-name: sort-.newsrc

Did I post this correctly?
[Yup.  ++bsa]

#!/bin/sh
# run this as a Bourne shell script
# to extract the files archived here
echo x - README
sed -e 's/^!!/!/' > README <<'!Funky!Stuff!'
Once upon a time, I wrote a sed command to mark all the skipped
articles (in unsubscribed to groups), as if I had read them.

Another day (about the time all the news group names changed), I
started telling rn just to add new groups at the end of my .newsrc.
The idea was that I would edit my .newsrc later.

Yet another day, I realized that I didn't want to edit my 463 line
.newsrc file, to make sure a couple new groups were in the "right"
place.  So I whipped together a program that would "sort" my .newsrc
the way I like it.

Today, I wondered if anyone else might be interested in having these
programs for their own use, hacking, lint testing, bird cage lining (-:
or other amusement.

You probably have a different set of regional "distributions"
(categories), or you just like them in a different order, so you will
probably want to edit the categories[] table to suit your tastes and
locale.  You probably have different preferences on how to order the
groups, so you probably want to edit the rest of the program.  I'm
posting this program mostly to publish the idea of sorting (or
shrinking) your .newsrc file as a convenience (or a way to waste more
disk space by storing this program, than is saved by using this program
(-: ).

Anyone who wants to can copy this program and change it any way they
wish. (How am I going to stop you?)  I don't think anyone would buy a
program as trivial as this, so I don't care, if you try to sell it.  If
you manage to get someone to buy this from you, I would find it
amusing.

If you come up with an interesting variation, I am interested in
hearing about it.

I have run this only on 4.2BSD-based machines, so there is probably
some machine somewhere out there where it won't run unchanged.  There
may well be machines where it doesn't work right.  I haven't run it
through lint for a long time, so you might get a bunch of complaints
from lint.  This program works fine for me, but your mileage may vary.
Use this program at your own peril.

Paul Scherf, Tektronix, Box 1000, MS 61-033, Wilsonville, OR, USA
paulsc@orca.GWD.Tek.COM			tektronix!orca!paulsc
!Funky!Stuff!
echo x - shrink.newsrc
sed -e 's/^!!/!/' > shrink.newsrc <<'!Funky!Stuff!'
#!/bin/sh
# Usage: shrink.newsrc
#
# Shrink out all the commas in unsubscribed to groups.
#	(i.e. mark skipped articles as read)
# Sort the groups into the desired order.
#
# It would have been nice to use a pipe from the sed part to the
# sort.newsrc part, but sh, on the machines I run this on at least, only
# yields the exit status of the last program on the pipeline.  I want to
# make sure every stage of the pipeline succeeds.

sed < $HOME/.newsrc 's/^\([^!]*![^,-]*\).*[,-]\(.*\)/\1-\2/' > /tmp/newsrc$$ &&
mv /tmp/newsrc$$ $HOME/.newsrc &&
sort.newsrc < $HOME/.newsrc > /tmp/newsrc$$ &&
mv /tmp/newsrc$$ $HOME/.newsrc
!Funky!Stuff!
echo x - sort.newsrc.c
sed -e 's/^!!/!/' > sort.newsrc.c <<'!Funky!Stuff!'
/* re-order the .newsrc according to my likes */

#include <stdio.h>
#include <strings.h>

/*
 * misc utility routines
 */

/* like malloc, except emalloc() will abort instead of returning NULL */
static char *emalloc(len)
int len;	/* number of bytes to allocate */
{
	char *malloc();
	char *new = malloc(len);

	if (!new) {
		fprintf(stderr, "Out of memory\n");
		exit(1);
	}

	return new;
}

/* return a copy of string str */
static char *stralloc(str)
char *str;	/* string to copy */
{
	return strcat(emalloc(strlen(str) + 1), str);
}

/* like strncmp, except the lengths of both strings are supplied */
static int mystrncmp(str1, str1len, str2, str2len)
char *str1;	/* first string */
int str1len;	/* str1 length */
char *str2;	/* second string */
int str2len;	/* str2 length */
{
	if (str1len < str2len) {
		int tmp = strncmp(str1, str2, str1len);

		if (tmp == 0)	/* longest prefix is equal */
			return -1;
		return tmp;

	} else if (str1len > str2len) {
		int tmp = strncmp(str1, str2, str2len);

		if (tmp == 0)	/* longest prefix is equal */
			return 1;
		return tmp;

	} /* else str1len == str2len */

	return strncmp(str1, str2, str1len);
}

/*
 * low level group list routines/data
 */

/* return the category index for category at grp and catlen characters long */
/* return -1 if category is not found */
static int catindex(grp, catlen)
char *grp;	/* group name string */
int catlen;	/* category name length */
{
	char **cat;				/* loop index */
	static char *categories[] = {		/* the category names */
		/* edit to taste and locale */

		/* most local to all but most global */
		"gwd",
		"tekwv",
		"tek",
		"pdx",
		"uwcsa",
		"or",
		"pnw",

		/* among the most global, */
		/* interesting/important to boring/worthless */
		"news",
		"comp",
		"sci",
		"misc",
		"alt",
		"gnu",
		"rec",
		"soc",
		"talk",
		"control",
		"junk",
		"test",
	};
/* number of entries in categories[] */
#define NCATEGORIES	(sizeof categories / sizeof categories[0])

	for (cat = categories; cat < &categories[NCATEGORIES]; ++cat) {
		if (catlen == strlen(*cat) && strncmp(grp, *cat, catlen) == 0)
			return cat - categories;
	}
	return -1;
}

/* a group list element */
struct group {	/* a doubly-linked list for easy insertion */
	struct	group *next;	/* next struct group in list */
	struct	group *prev;	/* previous struct group in list */
	char	*group;		/* the original input line */
	int	catindex;	/* corresponding index into categories */
	int	categorylen;	/* length of category prefix */
	int	grouplen;	/* length of group prefix */
};

struct group *groups = NULL;	/* pointer to the head of the group list */
struct group *end_groups = NULL; /* pointer to the tail of the group list */

/* loop to iterate through the groups, indexed by cur */
#define FOREACH_GROUP(cur) for (cur = groups; cur; cur = cur->next)

#ifdef	DEBUG	/* for debugging */
/* print the data in a struct group */
static void groupprint(grp)
struct group *grp;		/* pointer to the group to print */
{
	printf("%x next %x prev %x cat %d catlen %d grplen %d %s", grp,
		grp->next, grp->prev, grp->catindex, grp->categorylen,
		grp->grouplen, grp->group);
}

/* print all groups */
static void groupallprint()
{
	struct group *grp;	/* loop index */

	FOREACH_GROUP(grp) groupprint(grp);
}
#endif	/* DEBUG */

/* return a struct group, initialized from line, abort if out of memory */
static struct group *groupalloc(line)
char *line;	/* the .newsrc line to "parse" */
{
	register struct group *new; /* pointer to the "parsed" .newsrc line */

	/* allocate/initialize a new group entry */

	new = (struct group *)emalloc(sizeof(struct group));
	new->group = stralloc(line);

	/* find group length (first [:!\n]) */
	new->grouplen = strcspn(line, ":!\n");

	/* find category length (first [.:!\n]) */
	new->categorylen = strcspn(line, ".:!\n");

	/* error checking */
	if ((new->catindex = catindex(line, new->categorylen)) < 0) {
		/* If this message comes out, edit the categories[] array */
		fprintf(stderr, "Unknown category: %.*s\n",
			new->categorylen, line);
	}

	new->next = NULL;
	new->prev = NULL;

	return new;
}

/* insert grp just before cur (at tail, if cur == NULL) */
static void groupinsert(grp, cur)
register struct group *grp; /* group to insert */
register struct group *cur; /* group to be just after grp, ow/ NULL */
{
	grp->next = cur;
	if (cur) {			/* insert just before cur */
		grp->prev = cur->prev;
		if (cur->prev)		/* grp is after cur->prev */
			cur->prev->next = grp;
		else			/* grp is now the first group */
			groups = grp;
		cur->prev = grp;

	} else {			/* insert at tail */
		grp->prev = end_groups;
		if (end_groups)
			end_groups->next = grp;
		else			/* group list was empty */
			groups = grp;	/* grp is now also the first group */
		end_groups = grp;
	}
}

/* return logical: "Is grp a *.tail group (or the group 'tail')?" */
static int grptailcmp(grp, tail)
struct group *grp;	/* the group to test */
char *tail;		/* the tail to test for (e.g. "general", "misc") */
{
	char *tmp = rindex(grp->group, '.');	/* pointer to tail of grp */
	int taillen = strlen(tail);		/* length of tail */

	if (tmp)	/* Is grp a *.tail group? */
		return grp->group + grp->grouplen == ++tmp + taillen
			&& strncmp(tmp, tail, taillen) == 0;
	else		/* Is grp the group "tail"? */
		return grp->grouplen == taillen
			&& strncmp(grp->group, tail, taillen) == 0;
}

/* like strcmp(), except for checking ordering of group heads (X.*) */
/* Does not know about categories, or *.general, *.misc, ... */
static int grpheadcmp(grp1, grp2)
struct group *grp1;	/* first group to compare */
struct group *grp2;	/* second group to compare */
{
	char *head1tail;	/* pointer to "tail" of grp1 */
	char *head2tail;	/* pointer to "tail" of grp2 */

#ifdef	KNOW_CATEGORIES
	if (grp1->catindex > grp2->catindex) {
		return 1;
	}
	if (grp1->catindex < grp2->catindex) {
		return -1;
	}
	/* hereafter: categories are equal */
#else	/* KNOW_CATEGORIES */
	/* assume categories are equal or don't matter */
#endif	/* KNOW_CATEGORIES */

	head1tail = rindex(grp1->group, '.');
	head2tail = rindex(grp2->group, '.');
	if (head1tail == head2tail)	/* grp1 is grp2 */
		return 0;
	if (!head1tail)			/* only grp1 has no head */
		return -1;
	if (!head2tail)			/* only grp2 has no head */
		return 1;

	/* finally, the implied comparison */
	return mystrncmp(grp1->group, head1tail - grp1->group,
		grp2->group, head2tail - grp2->group);
}

/*
 * high level group routines
 */

/* like strcmp(), except it compares "struct group"'s instead of strings */
static int grpcmp(grp1, grp2)
struct group *grp1;	/* first group to compare */
struct group *grp2;	/* second group to compare */
{
	/* optimization */
	/* . the categories are in a specified order */
	if (grp2->catindex > grp1->catindex)
		return -1;	/* grp1 is before grp2 */
	if (grp2->catindex < grp1->catindex)
		return 1;	/* grp1 is after grp2 */

	/* hereafter: grp2->catindex == grp1->catindex */

#ifdef	PREFIX_GOES_AFTER	/* e.g. comp.sources after comp.sources.bugs */
	/* . X is after all other X.* */
	if (grp1->grouplen < grp2->grouplen) {
		/* if grp1 is a prefix of grp2 ... */
		if (grp2->group[grp1->grouplen] == '.' &&
			strncmp(grp1->group, grp2->group, grp1->grouplen) == 0)
			return 1;

	/* . X is after all other X.* */
	} else if (grp1->grouplen > grp2->grouplen) {
		/* if grp2 is a prefix of grp1 ... */
		if (grp1->group[grp2->grouplen] == '.' &&
			strncmp(grp1->group, grp2->group, grp2->grouplen) == 0)
			return -1;
	}
#endif	/* PREFIX_GOES_AFTER */

	/* . X.general is before all other X* */
	if (grptailcmp(grp1, "general")) {
		if (grpheadcmp(grp1, grp2) == 0)
			return -1;	/* grp1 <= grp2 */

	/* . X.misc is after all other X* */
	} else if (grptailcmp(grp1, "misc")) {
		if (grpheadcmp(grp1, grp2) == 0)
			return 1;	/* grp1 >= grp2 */
	}

	/* . X.general is before all other X* */
	if (grptailcmp(grp2, "general")) {
		if (grpheadcmp(grp1, grp2) == 0)
			return 1;	/* grp1 >= grp2 */

	/* . X.misc is after all other X* */
	} else if (grptailcmp(grp2, "misc")) {
		if (grpheadcmp(grp1, grp2) == 0)
			return -1;	/* grp1 <= grp2 */
	}

	/* . other groups within one category are sorted as if by "sort" */
	return mystrncmp(grp1->group, grp1->grouplen,
		grp2->group, grp2->grouplen);
}

/* add line from .newsrc to group list at the proper place */
static void addgroup(line)
char *line;	/* .newsrc line to insert */
{
	struct group *new = groupalloc(line);	/* "parsed" version of line */
	struct group *cur;			/* search loop index */

	/* optimization, the list is usually "almost" already sorted */
	if (!end_groups || grpcmp(new, end_groups) > 0) {
		groupinsert(new, (struct group *)NULL);
		return;
	}

	/* find insertion point */
	FOREACH_GROUP(cur)
		if (grpcmp(new, cur) < 0) break;

	/* insert new just before cur (at tail, if cur == NULL) */
	groupinsert(new, cur);
}

/* Read a .newsrc from stdin.  Write a sorted .newsrc to stdout. */
main()
{
	struct group *cur;	/* loop index */
	static char line[512];	/* hopefully more than long enough */

	/* read in .newsrc, (bubble) sorting on the fly */
	while (fgets(line, sizeof line, stdin))
		addgroup(line);

	/* write .newsrc */
	FOREACH_GROUP(cur) printf("%s", cur->group);

	exit(0);
}
!Funky!Stuff!
exit 0

Paul Scherf, Tektronix, Box 1000, MS 61-033, Wilsonville, OR, USA
paulsc@orca.GWD.Tek.COM			tektronix!orca!paulsc