ctl@OCF.Berkeley.EDU (Case Larsen) (08/28/90)
Archive-name: ctl-mh-cache-patch/27-Aug-90 Original-posting-by: ctl@OCF.Berkeley.EDU (Case Larsen) Original-subject: Re: XMH cache Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti) [Reposted from comp.mail.mh. Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).] In article <9008230524.AA07100@PROMETHEUS.MIT.EDU> Raeburn@MIT.EDU (Ken Raeburn) writes: Does anyone have patches (hacks, suggestions) that would allow for maintenance of such a cache? Does anyone else think it would be a good idea for an addition to MH proper, as an extra option (preferably runtime-selectable, I'd say)? I've made a few hacks to MH 6.7 to allow caching of both the output of scan, and the initial directory scan of the folder which gathers various statistics like highest message, lowest message, deleted messages, etc. The caching code uses ndbm to 1) speed access to any given scan output of a particular message, and 2) make insertions and deletions of new messages easy. The only affected commands are 'folders', 'rmm', and 'scan'. The only command with a runtime selectable option is 'scan'. Use 'scan -fast' to cache the output and make use of cached output. Caching on the other two commands is automatic, and doesn't consume very much disk space (a few Kbytes). The limit of the scan line length is currently 80 characters. The limit can be changed, but the longer your scan line length is, the more space the cache file takes. For emacs users, if you use 'mh-e', you will want to change the 'scan' command to 'scan -fast' in mh-e.el. Here is an example of the speedup you can expect. This was on a folder of about 1000 messages. + /* + * The 'fast' option uses ndbm to store the output of scan. Speedup is + * typically around 10 to 20 times on large folders. As you see, the + * first time takes a while to build the cache. The subsequent times are + * much quicker. + * + * dingo uip [887] time xscan -fast > & /dev/null + * 18.380u 15.060s 1:02.58 53.4% 0+26k 1408+316io 1413pf+0w + * dingo uip [888] time xscan -fast > & /dev/null + * 1.120u 1.420s 0:03.58 70.9% 0+22k 53+0io 63pf+0w + */ -- Case Larsen ctl@OCF.Berkeley.EDU Open Computing Facility -------------cut here-------cut here-------cut here-------------------- diff -cr mh-6.7/sbr/m_gmsg.c mh-6.7.new/sbr/m_gmsg.c *** mh-6.7/sbr/m_gmsg.c Thu Apr 12 13:29:01 1990 --- mh-6.7.new/sbr/m_gmsg.c Tue Jul 31 10:28:01 1990 *************** *** 3,11 **** #include "../h/mh.h" #include "../h/local.h" #include <stdio.h> - #define NINFO (MAXFOLDER / 5) /* PLEASE be non-trivial... */ struct info { int msgno; short stats; --- 3,13 ---- #include "../h/mh.h" #include "../h/local.h" #include <stdio.h> + #include <fcntl.h> #define NINFO (MAXFOLDER / 5) /* PLEASE be non-trivial... */ + #define GMSG_CACHE ".m_gmsgcache" + struct info { int msgno; short stats; *************** *** 43,49 **** --- 45,67 ---- register DIR * dd; #endif SYS5DIR struct stat st; + int cache; + if ((cache = open(GMSG_CACHE,O_RDONLY)) >= 0) { + int len; + + read(cache,&len,sizeof(len)); + mp = (struct msgs *) malloc (len); + if (mp == NULL) + adios (NULLCP, "unable to allocate folder storage"); + read(cache,mp,len); + mp->foldpath = name; + m_getatr (mp); + close(cache); + return mp; + } + + /* else do scan and save */ if ((dd = opendir (name = m_mailpath (name))) == NULL) { free (name); return NULL; *************** *** 191,196 **** --- 209,223 ---- #endif MTR for (tail = head; tail < rover; tail++) mp -> msgstats[tail -> msgno] = tail -> stats; + + if ((cache = open(GMSG_CACHE,O_RDWR | O_CREAT, 0600)) >= 0) { + int len; + len = MSIZE(mp,mp->lowoff,mp->hghoff); + + write(cache,&len,sizeof(len)); + write(cache,mp,len); + close(cache); + } m_getatr (mp); return mp; diff -cr mh-6.7/sbr/m_sync.c mh-6.7.new/sbr/m_sync.c *** mh-6.7/sbr/m_sync.c Thu Apr 12 13:29:03 1990 --- mh-6.7.new/sbr/m_sync.c Tue Jul 31 13:02:44 1990 *************** *** 1,4 **** --- 1,5 ---- /* m_sync.c - synchronize message sequences */ + static char *RCSid="$Id: m_sync.c,v 1.2 90/07/31 10:27:25 case Exp Locker: case $"; #include "../h/mh.h" #include <stdio.h> *************** *** 7,13 **** --- 8,17 ---- #define sigmask(s) (1 << ((s) - 1)) #endif not sigmask + #include <fcntl.h> + #define GMSG_CACHE ".m_gmsgcache" + /* decision logic 1. public and folder readonly: make it private 2a. public: add it to the sequences file *************** *** 14,19 **** --- 18,31 ---- 2b. private: add it to the profile */ + /* + *$Log: m_sync.c,v $ + * Revision 1.2 90/07/31 10:27:25 case + * Save incremental changes to msgs structure to GMSG_CACHE. + * + * ctl: Added logic for caching of message stats to speedup m_gmsg(). + */ + void m_sync_cache(); void m_sync (mp) register struct msgs *mp; *************** *** 32,39 **** #endif BSD42 if (!(mp -> msgflags & SEQMOD)) ! return; mp -> msgflags &= ~SEQMOD; m_getdefs (); (void) sprintf (seq, "%s/%s", mp -> foldpath, mh_seq); --- 44,52 ---- #endif BSD42 if (!(mp -> msgflags & SEQMOD)) ! return; mp -> msgflags &= ~SEQMOD; + m_sync_cache(mp); m_getdefs (); (void) sprintf (seq, "%s/%s", mp -> foldpath, mh_seq); *************** *** 59,69 **** if ((cp = m_seq (mp, mp -> msgattrs[i])) == NULL) continue; if (fp == NULL) { ! if ((fp = fopen (seq, "w")) == NULL ! && unlink (seq) != NOTOK ! && (fp = fopen (seq, "w")) == NULL) { ! admonish (attr, "unable to write"); ! goto priv; } #ifndef BSD42 hstat = signal (SIGHUP, SIG_IGN); --- 72,88 ---- if ((cp = m_seq (mp, mp -> msgattrs[i])) == NULL) continue; if (fp == NULL) { ! fp = fopen (seq, "w"); ! if (fp == NULL) { ! if (unlink (seq) != NOTOK ) { ! fp = fopen (seq, "w"); ! } else { ! fp = fopen(mh_seq,"w"); ! } ! if (fp == NULL) { ! admonish (attr, "unable to write"); ! goto priv; ! } } #ifndef BSD42 hstat = signal (SIGHUP, SIG_IGN); *************** *** 95,98 **** --- 114,131 ---- (void) unlink (seq); mp -> msgflags = flags; + } + + void m_sync_cache(mp) + register struct msgs* mp; + { + int cache; + if ((cache = open(GMSG_CACHE,O_RDWR)) >= 0) { + int len; + len = MSIZE(mp,mp->lowoff,mp->hghoff); + + write(cache,&len,sizeof(len)); + write(cache,mp,len); + close(cache); + } } diff -cr mh-6.7/uip/annosbr.c mh-6.7.new/uip/annosbr.c *** mh-6.7/uip/annosbr.c Thu Apr 12 13:29:25 1990 --- mh-6.7.new/uip/annosbr.c Tue Jul 31 13:38:13 1990 *************** *** 1,6 **** /* annosbr.c - prepend annotation to messages */ #ifndef lint ! static char ident[] = "@(#)$Id: annosbr.c,v 2.4 90/04/05 15:35:09 sources Exp $"; #endif lint #include "../h/mh.h" --- 1,6 ---- /* annosbr.c - prepend annotation to messages */ #ifndef lint ! static char ident[] = "@(#)$Id: annosbr.c,v 1.1 90/07/31 13:29:50 case Exp Locker: case $"; #endif lint #include "../h/mh.h" *************** *** 9,15 **** --- 9,18 ---- #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> + #include <ndbm.h> + #include <fcntl.h> + #define DBMDIR ".mhcache" extern int errno; long lseek (); *************** *** 121,126 **** return 1; } } ! return 0; } --- 124,141 ---- return 1; } } ! { ! /* annotate could change fmtsbr()'s output */ ! int msgnum = atoi(file); ! DBM *scache = dbm_open(DBMDIR,O_RDWR, 0600); ! datum key; ! printf ("file: %s",file); ! if (scache != NULL) { ! key.dptr = (char *) &msgnum; ! key.dsize = sizeof(msgnum); ! dbm_delete(scache,key); ! dbm_close(scache); ! } ! } return 0; } diff -cr mh-6.7/uip/folder.c mh-6.7.new/uip/folder.c *** mh-6.7/uip/folder.c Thu Apr 12 13:29:27 1990 --- mh-6.7.new/uip/folder.c Tue Jul 31 10:25:42 1990 *************** *** 1,6 **** /* folder(s).c - report on folders */ #ifndef lint ! static char ident[] = "@(#)$Id: folder.c,v 2.4 90/04/05 14:56:54 sources Exp $"; #endif lint #include "../h/mh.h" --- 1,6 ---- /* folder(s).c - report on folders */ #ifndef lint ! static char ident[] = "@(#)$Id: folder.c,v 1.1 90/07/30 16:53:36 case Exp $"; #endif lint #include "../h/mh.h" diff -cr mh-6.7/uip/inc.c mh-6.7.new/uip/inc.c *** mh-6.7/uip/inc.c Thu Apr 12 13:29:28 1990 --- mh-6.7.new/uip/inc.c Tue Jul 31 14:16:30 1990 *************** *** 1,6 **** /* inc.c - incorporate messages from a maildrop into a folder */ #ifndef lint ! static char ident[] = "@(#)$Id: inc.c,v 1.4 90/04/05 14:57:51 sources Exp $"; #endif lint #include "../h/mh.h" --- 1,6 ---- /* inc.c - incorporate messages from a maildrop into a folder */ #ifndef lint ! static char ident[] = "@(#)$Id: inc.c,v 1.1 90/07/31 14:15:49 case Exp Locker: case $"; #endif lint #include "../h/mh.h" *************** *** 625,630 **** --- 625,631 ---- (void) fflush (stdout); msgnum++, mp -> hghmsg++; + mp->nummsg++; mp -> msgstats[msgnum] = EXISTS; #ifdef TMA if (i == SCNENC) { diff -cr mh-6.7/uip/rmm.c mh-6.7.new/uip/rmm.c *** mh-6.7/uip/rmm.c Thu Apr 12 13:29:37 1990 --- mh-6.7.new/uip/rmm.c Mon Jul 30 16:54:30 1990 *************** *** 2,7 **** --- 2,9 ---- #include "../h/mh.h" #include <stdio.h> + #include <ndbm.h> + #include <fcntl.h> /* */ *************** *** 11,16 **** --- 13,19 ---- NULL, NULL }; + #define DBMDIR ".mhcache" /* */ *************** *** 34,40 **** *arguments[MAXARGS], *msgs[MAXARGS]; struct msgs *mp; ! invo_name = r1bindex (argv[0], '/'); if ((cp = m_find (invo_name)) != NULL) { ap = brkstring (cp = getcpy (cp), " ", "\n"); --- 37,44 ---- *arguments[MAXARGS], *msgs[MAXARGS]; struct msgs *mp; ! DBM *scache; ! datum key; invo_name = r1bindex (argv[0], '/'); if ((cp = m_find (invo_name)) != NULL) { ap = brkstring (cp = getcpy (cp), " ", "\n"); *************** *** 124,129 **** --- 128,134 ---- execvp (rmmproc, vec); adios (rmmproc, "unable to exec"); } + scache = dbm_open(DBMDIR,O_RDWR, 0600); for (msgnum = mp -> lowsel; msgnum <= mp -> hghsel; msgnum++) if (mp -> msgstats[msgnum] & SELECTED) { *************** *** 130,136 **** (void) strcpy (buf, m_backup (dp = m_name (msgnum))); if (rename (dp, buf) == NOTOK) admonish (buf, "unable to rename %s to", dp); } ! done (0); } --- 135,146 ---- (void) strcpy (buf, m_backup (dp = m_name (msgnum))); if (rename (dp, buf) == NOTOK) admonish (buf, "unable to rename %s to", dp); + if (scache != NULL) { + key.dptr = (char *)&msgnum; + key.dsize = sizeof(msgnum); + dbm_delete(scache,key); + } } ! if (scache != NULL) dbm_close(scache); done (0); } diff -cr mh-6.7/uip/scan.c mh-6.7.new/uip/scan.c *** mh-6.7/uip/scan.c Thu Apr 12 13:29:37 1990 --- mh-6.7.new/uip/scan.c Wed Aug 8 11:06:10 1990 *************** *** 1,6 **** /* scan.c - display a one-line "scan" listing */ #ifndef lint ! static char ident[] = "@(#)$Id: scan.c,v 1.8 90/04/05 14:59:58 sources Exp $"; #endif lint #include "../h/mh.h" --- 1,6 ---- /* scan.c - display a one-line "scan" listing */ #ifndef lint ! static char ident[] = "@(#)$Id: scan.c,v 1.3 90/07/31 10:24:27 case Exp Locker: case $"; #endif lint #include "../h/mh.h" *************** *** 9,14 **** --- 9,16 ---- #include "../zotnet/tws.h" #include <errno.h> #include <stdio.h> + #include <ndbm.h> + #include <fcntl.h> /* */ *************** *** 42,47 **** --- 44,62 ---- #define HELPSW 10 "help", 4, + #define FASTSW 11 + "fast", 4, + /* + * The 'fast' option uses ndbm to store the output of scan. Speedup is + * typically around 10 to 20 times on large folders. As you see, the + * first time takes a while to build the cache. The subsequent times are + * much quicker. + * + * dingo uip [887] time xscan -fast > & /dev/null + * 18.380u 15.060s 1:02.58 53.4% 0+26k 1408+316io 1413pf+0w + * dingo uip [888] time xscan -fast > & /dev/null + * 1.120u 1.420s 0:03.58 70.9% 0+22k 53+0io 63pf+0w + */ NULL, NULL }; *************** *** 52,58 **** --- 67,81 ---- extern struct msgs *fmt_current_folder; #endif + typedef struct record { + char scanout[80]; + } Record; + #define SBUFSIZ 256 + #define DBMDIR ".mhcache" + #define MAXPATHLEN 2048+1 + static struct format *fmt; + void clear_screen (); /* */ *************** *** 68,73 **** --- 91,97 ---- revflag = 0, /* used to be #ifdef BERK */ width = 0, msgp = 0, + fast = 0, ontty, state, msgnum; *************** *** 86,92 **** *msgs[MAXARGS]; struct msgs *mp; FILE * in; ! invo_name = r1bindex (argv[0], '/'); mts_init (invo_name); if ((cp = m_find (invo_name)) != NULL) { --- 110,118 ---- *msgs[MAXARGS]; struct msgs *mp; FILE * in; ! DBM *scache; ! datum key, rec; ! invo_name = r1bindex (argv[0], '/'); mts_init (invo_name); if ((cp = m_find (invo_name)) != NULL) { *************** *** 150,155 **** --- 176,184 ---- case NREVSW: revflag = 0; continue; + case FASTSW: + fast = 1; + continue; case FILESW: if (!(cp = *argp++) || *cp == '-') *************** *** 217,256 **** #endif /* */ ! for (msgnum = revflag ? mp -> hghsel : mp -> lowsel; (revflag ? msgnum >= mp -> lowsel : msgnum <= mp -> hghsel); msgnum += revflag ? (-1) : 1) ! if (mp -> msgstats[msgnum] & SELECTED) { ! if ((in = fopen (cp = m_name (msgnum), "r")) == NULL) { #ifdef notdef ! if (errno != EACCES) #endif ! admonish (cp, "unable to open message"); #ifdef notdef ! else ! printf ("%*d unreadable\n", DMAXFOLDER, msgnum); #endif ! continue; ! } ! if (hdrflag) { ! (void) time (&clock); ! printf ("Folder %-32s%s\n\n", folder, ! dasctime (dlocaltime (&clock), TW_NULL)); ! } ! switch (state = scan (in, msgnum, 0, nfs, width, ! msgnum == mp -> curmsg, ! hdrflag, 0L, 1)) { ! case SCNMSG: ! case SCNENC: ! case SCNERR: break; ! default: adios (NULLCP, "scan() botch (%d)", state); ! case SCNEOF: #ifdef notdef printf ("%*d empty\n", DMAXFOLDER, msgnum); #else --- 246,350 ---- #endif /* */ ! if (fast) { ! scache = dbm_open(DBMDIR,O_RDWR | O_CREAT, 0600); ! if (scache == NULL) { ! perror("dbm"); ! exit(1); ! } ! } for (msgnum = revflag ? mp -> hghsel : mp -> lowsel; (revflag ? msgnum >= mp -> lowsel : msgnum <= mp -> hghsel); msgnum += revflag ? (-1) : 1) ! if ((mp -> msgstats[msgnum] & (SELECTED | EXISTS)) == (SELECTED | EXISTS)) { ! if (fast) { ! /* caching stuff here using (n)dbm ! key on message number ! just keep a few of the fields around */ ! key.dptr = (char *)&msgnum; ! key.dsize = sizeof(msgnum); ! rec = dbm_fetch(scache,key); ! if (hdrflag) { ! (void) time (&clock); ! printf ("Folder %-32s%s\n\n", folder, ! dasctime (dlocaltime (&clock), TW_NULL)); ! } ! if (rec.dptr == NULL) { ! int compnum; ! char name[NAMESZ]; ! char tmpbuf[NAMESZ]; ! Record rectmp; ! char *scanout; ! ! rec.dptr = (char *)&rectmp; ! rec.dsize = sizeof(rectmp); ! rectmp.scanout[0] = '\0'; ! ! if ((in = fopen (cp = m_name (msgnum), "r")) == NULL) { ! admonish (cp, "unable to open message"); ! continue; ! } ! switch (state = scan (in, msgnum, 0, nfs, width, ! msgnum == mp -> curmsg, ! hdrflag, 0L, 0, 1, &scanout)) { ! case SCNMSG: ! case SCNENC: ! case SCNERR: ! break; ! ! default: ! adios (NULLCP, "scan() botch (%d)", state); ! ! case SCNEOF: #ifdef notdef ! printf ("%*d empty\n", DMAXFOLDER, msgnum); ! #else ! advise (NULLCP, "message %d: empty", msgnum); #endif ! break; ! } ! hdrflag = 0; ! (void) fclose (in); ! strncpy(rectmp.scanout,scanout,sizeof(rectmp.scanout)); ! if (strlen(scanout) >= sizeof(rectmp.scanout)) ! rectmp.scanout[sizeof(rectmp.scanout)-1] = '\n'; ! dbm_store(scache,key,rec,DBM_INSERT); ! } ! ((Record *)rec.dptr)->scanout[4] = ! (mp->curmsg == msgnum) ? '+' : ' '; ! fputs(((Record *)rec.dptr)->scanout,stdout); ! if (ontty) ! (void) fflush (stdout); ! } else { ! if ((in = fopen (cp = m_name (msgnum), "r")) == NULL) { #ifdef notdef ! if (errno != EACCES) #endif ! admonish (cp, "unable to open message"); ! #ifdef notdef ! else ! printf ("%*d unreadable\n", DMAXFOLDER, msgnum); ! #endif ! continue; ! } ! if (hdrflag) { ! (void) time (&clock); ! printf ("Folder %-32s%s\n\n", folder, ! dasctime (dlocaltime (&clock), TW_NULL)); ! } ! switch (state = scan (in, msgnum, 0, nfs, width, ! msgnum == mp -> curmsg, ! hdrflag, 0L, 1, 0, NULL)) { ! case SCNMSG: ! case SCNENC: ! case SCNERR: break; ! default: adios (NULLCP, "scan() botch (%d)", state); ! case SCNEOF: #ifdef notdef printf ("%*d empty\n", DMAXFOLDER, msgnum); #else *************** *** 257,268 **** advise (NULLCP, "message %d: empty", msgnum); #endif break; } - hdrflag = 0; - (void) fclose (in); - if (ontty) - (void) fflush (stdout); } #ifdef VAN m_sync (mp); /* because formatsbr might have made changes */ #endif --- 351,365 ---- advise (NULLCP, "message %d: empty", msgnum); #endif break; + } + hdrflag = 0; + (void) fclose (in); + if (ontty) + (void) fflush (stdout); } } + if (fast && scache != NULL) dbm_close(scache); + #ifdef VAN m_sync (mp); /* because formatsbr might have made changes */ #endif *************** *** 274,276 **** --- 371,375 ---- done (0); } + + diff -cr mh-6.7/uip/scansbr.c mh-6.7.new/uip/scansbr.c *** mh-6.7/uip/scansbr.c Thu Apr 12 13:29:37 1990 --- mh-6.7.new/uip/scansbr.c Mon Jul 30 16:54:30 1990 *************** *** 1,6 **** /* scansbr.c - routines to help scan along... */ #ifndef lint ! static char ident[] = "@(#)$Id: scansbr.c,v 1.5 90/04/05 14:57:59 sources Exp $"; #endif lint #include "../h/mh.h" --- 1,6 ---- /* scansbr.c - routines to help scan along... */ #ifndef lint ! static char ident[] = "@(#)$Id: scansbr.c,v 1.2 90/07/30 16:54:18 case Exp $"; #endif lint #include "../h/mh.h" *************** *** 53,59 **** /* ARGSUSED */ ! int scan (inb, innum, outnum, nfs, width, curflg, header, size, noisy) char *nfs; int innum, outnum, --- 53,59 ---- /* ARGSUSED */ ! int scan (inb, innum, outnum, nfs, width, curflg, header, size, noisy,fast,scanout) char *nfs; int innum, outnum, *************** *** 60,66 **** width, curflg, header, ! noisy; long size; register FILE *inb; { --- 60,68 ---- width, curflg, header, ! noisy, ! fast; ! char **scanout; long size; register FILE *inb; { *************** *** 292,297 **** --- 294,301 ---- } if (noisy) (void) fputs (scanl, stdout); + if (fast) + *scanout = scanl; /* return formatted buffer */ FINDCOMP (cptr, "encrypted"); encrypted = cptr && cptr -> c_text; diff -cr mh-6.7/uip/sendsbr.c mh-6.7.new/uip/sendsbr.c *** mh-6.7/uip/sendsbr.c Thu Apr 12 13:29:37 1990 --- mh-6.7.new/uip/sendsbr.c Tue Jul 31 13:28:55 1990 *************** *** 1,6 **** /* sendsbr.c - routines to help WhatNow/Send along */ #ifndef lint ! static char ident[] = "@(#)$Id: sendsbr.c,v 2.3 90/04/05 14:57:18 sources Exp $"; #endif lint #include "../h/mh.h" --- 1,6 ---- /* sendsbr.c - routines to help WhatNow/Send along */ #ifndef lint ! static char ident[] = "@(#)$Id: sendsbr.c,v 1.1 90/07/31 13:28:44 case Exp $"; #endif lint #include "../h/mh.h" -- Case Larsen ctl@OCF.berkeley.edu
ctl@OCF.Berkeley.EDU (Case Larsen) (08/28/90)
Archive-name: ctl-mh-patch-bugs/28-Aug-90 Original-posting-by: ctl@OCF.Berkeley.EDU (Case Larsen) Original-subject: Re: XMH cache Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti) [Reposted from comp.mail.mh. Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).] The MH patches I posted had a few unresolved bugs. I'll be posting replacement patches in a few days. -- Case Larsen ctl@OCF.berkeley.edu