[comp.unix.wizards] LC MARC FORMAT-UNLABELLED

bldrnr@apple.com (Brian Hurley) (03/11/89)

A co-worker in our group recently received a set of 9-track tapes 
containing a selection databse of a public library.  The shipping 
information describes the tapes as follows:

ASCII  1600-BPI
LC MARC FORMAT-UNLABELLED

Does anyone out there recognize this format?  I can read the tapes, but 
the data are either compressed or encoded in a format that standard UNIX 
tools (compress, pack, etc.) do not recognize.  

Is anyone out there familar with this format?   Are sources avalible?  The 
company that made the tapes has not been helpful thus far;  anyone who 
might know has been, "Unavalible."


   Thanx in advance,

    Brian

Brian Hurley   bldrnr@apple.com
"There is no dark side of the moon, really.  As a matter of fact, its all 
dark."
                - Pink Floyd [Dark Side of the Moon]"
-----------------------------------------
Standard Disclaimer: "Did I say that?  Oops!"

budd@bu-cs.BU.EDU (Philip Budne) (03/13/89)

Enclosed is code I wrote to prove the viability of producing fiche for
emergency use if our online catalog was not available.  The reference
provided to me by our library systems manager was "OCLC-MARK Tape
Format" ISBN: 0-933418-62-0; 1984, OCLC

The encoding format is interesting, with variable length strings.  To
someone who had never looked into the world of catalogging I was blown
away by the number of fields used to fully describe a work!

	-Phil

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  Makefile disk.c mdisplay.c ndisplay.c oclc.c oclc.h
# Wrapped by budd@buit2 on Mon Mar 13 02:45:24 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f Makefile -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"Makefile\"
else
echo shar: Extracting \"Makefile\" \(226 characters\)
sed "s/^X//" >Makefile <<'END_OF_Makefile'
XCFLAGS=-g
X
Xall:	oclc noclc
X
XOCLC=oclc.o disk.o mdisplay.o
Xoclc:	$(OCLC)
X	$(CC) $(CFLAGS) -o oclc $(OCLC)
X
XNOCLC=oclc.o disk.o ndisplay.o
Xnoclc:	$(NOCLC)
X	$(CC) $(CFLAGS) -o noclc $(NOCLC)
X
Xmdisplay.o ndisplay.o oclc.o: oclc.h
END_OF_Makefile
if test 226 -ne `wc -c <Makefile`; then
    echo shar: \"Makefile\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f disk.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"disk.c\"
else
echo shar: Extracting \"disk.c\" \(1743 characters\)
sed "s/^X//" >disk.c <<'END_OF_disk.c'
X/*
X *	disk.c -- read oclc mark tape records from disk
X *
X *	Philip L. Budne, Boston University, Distributed Systems Group
X *	Copyright 1988, 1989, Philip L. Budne
X *	May be used in not-for-profit applications provided this
X *	notice and the above copyright are not removed.  No warranty
X *	is expressed or implied.
X */
X
X# include "oclc.h"
X
X/*
X * assumes we are reading from a file unblocked to disk.
X * ie; dd if=/dev/rmt0 of=datafile ibs=2048
X *
X * reading from a real MARC tape the first file will be the
X * ANSI label.
X *
X * To read from tape all reads must be of size 2048.
X */
X
Xint
Xgetrec( fd, record )
X    int fd;
X    char *record;
X{
X    int cc, llen;
X
X    /*
X     * would like to read 2048 here, but if any records are less
X     * than 2048, we might read past the end, into the next
X     * record.
X     *
X     * It might be better to have the tape reader always write
X     * 2048 bytes to disk??
X     *
X     * reading from tape this is not a problem, as the read would
X     * just return "short".
X     */
X
X    /* read just lrec len (this only works on disks) */
X    if( (cc = read( fd, record, 5 )) != 5 ) /* not what we expected? */
X	if( cc > 0 )			/* but got some??? */
X	    lose("premature eof (read %d)\n", cc ); /* premature eof */
X	else				/* got nothing */
X	    return( -1 );		/* EOF on time! */
X
X    llen = getint( record, 5 );		/* get integer value */
X					/* of logical record length */
X
X    if( llen < 5 || llen > MAXLREC )	/* is it sane? */
X	lose("bad llen %d", llen );	/* no. give up. */
X
X    cc = read( fd, record+5, llen - 5 ); /* read rest of logical record */
X    if( cc != llen - 5 )
X	lose("read %d, expected %d", cc, llen-5 );
X
X    record[ llen ] = EOS;		/* blast char past end */
X    return( llen );
X} /* getrec */
END_OF_disk.c
if test 1743 -ne `wc -c <disk.c`; then
    echo shar: \"disk.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f mdisplay.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"mdisplay.c\"
else
echo shar: Extracting \"mdisplay.c\" \(2621 characters\)
sed "s/^X//" >mdisplay.c <<'END_OF_mdisplay.c'
X/*
X *	mdisplay.c -- display records in TOMAS "marc" format
X *
X *	Philip L. Budne, Boston University, Distributed Systems Group
X *	Copyright 1988, 1989, Philip L. Budne
X *	May be used in not-for-profit applications provided this
X *	notice and the above copyright are not removed.  No warranty
X *	is expressed or implied.
X */
X
X# include <stdio.h>			/* get standard i/o library defns */
X# include "oclc.h"			/* get our defns */
X
Xstatic int record_count;
X
Xvoid
Xpdata( dp, len )			/* print data */
X    char *dp;
X    int len;
X{
X    while( len-- > 0 ) {
X	char c;
X
X	c = *dp++;			/* get next character */
X
X	if( c == RS )			/* field terminator? (RS) */
X	    break;			/* break loop */
X
X	if( c == US ) {			/* subfield delim (doub dag) (US) */
X	    len--;			/* account for subsection letter */
X	    if( len < 0 )		/* no more data? */
X		lose("bad subfield");	/* quit. */
X
X	    /* print next char too (sub section letter) */
X	    printf(" %c%c ", SCHAR, *dp++); /* dont' use putchar(*dp++); */
X					/* as putchar is a macro!! */
X	    continue;			/* continue printing data */
X	} /* found US */
X
X	if( c >= ' ' && c <= '~' )	/* printing ascii? */
X	    putchar( c );		/* just copy to stdout */
X# ifdef DISPLAY_DIACRITICS
X	else
X	    printf("<%02x>", c & 0xff);	/* print hex code. */
X					/* interpret? (output troff?) */
X# endif /* DISPLAY_DIACRITICS defined */
X    } /* while */
X} /* pdata */
X
Xdisplay( code, record, offset, length )
X    int code, offset, length;
X    char *record;
X{
X    if( code < 10 ) {			/* process 001...009 */
X	process( code, record, offset, length );
X	return;
X    }
X    else if( record_count == 0 )
X	dump_header_info();
X
X    record_count++;
X    printf("%3d %03d ", record_count, code );
X    pdata( record+offset, length );	/* give pointer to data */
X    putchar('\n');			/* output newline char */
X
X} /* display */
X
Xend_record() {
X    putchar('\n');
X} /* end */
X
X/****************************************************************/
X
Xstatic long oclc_id;
X
Xstart_record() {
X    record_count = 0;
X    oclc_id = -1;
X} /* start */
X
Xstatic
Xdump_header_info() {
X    if( oclc_id != -1 )
X	printf("OCLC: %8d\n", oclc_id );
X}
X
Xstatic
Xprocess( code, record, offset, length )
X    int code, offset, length;
X    char *record;
X{
X    char bib_lvl;
X
X    switch( code ) {
X    case 1:				/* careful!! 001 is octal! */
X	if( record[offset] != 'o' || record[offset+1] != 'c' ||
X	   record[offset+2] != 'm' )
X	    lose( "Bad info in cols 0-2 of 001 record" );
X	oclc_id = getint( record+offset+3, 8 );
X	break;
X
X    case 8:
X	/* check record[7] (bib level) for interpretation!? */
X	bib_lvl = record[7];
X
X    } /* switch on code */
X} /* process */
END_OF_mdisplay.c
if test 2621 -ne `wc -c <mdisplay.c`; then
    echo shar: \"mdisplay.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f ndisplay.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"ndisplay.c\"
else
echo shar: Extracting \"ndisplay.c\" \(3335 characters\)
sed "s/^X//" >ndisplay.c <<'END_OF_ndisplay.c'
X/*
X *	ndisplay.c -- display records in "normal" format (TOMAS D/DN)
X *
X *	Philip L. Budne, Boston University, Distributed Systems Group
X *	Copyright 1988, 1989, Philip L. Budne
X *	May be used in not-for-profit applications provided this
X *	notice and the above copyright are not removed.  No warranty
X *	is expressed or implied.
X *
X */
X
X# include <stdio.h>			/* get standard i/o library defns */
X# include "oclc.h"			/* get our defns */
X
Xvoid
Xcopy( dp, sp, len )
X    register char *dp, *sp;
X    register len;
X{
X    while( len-- > 0 ) {
X	register c;
X
X	c = *sp++;			/* get next character */
X	if( c == RS )			/* field terminator? (RS) */
X	    break;			/* break loop */
X	else if( c == US ) {		/* subfield delim (doub dag) (US) */
X	    len--;			/* account for subsection letter */
X	    if( len < 0 )		/* no more data? */
X		lose("bad subfield");	/* quit. */
X
X	    sp++;			/* toss section letter */
X	    *dp++ = ' ';		/* insert space */
X
X	} /* found US */
X	else if( c >= ' ' && c <= '~' )	/* printing ascii? */
X	    *dp++ = c;
X    } /* while */
X    *dp = EOS;
X} /* copy */
X
X/****************************************************************/
X
Xtypedef char str[ 1024 ];
Xstr auth, title, publ, descr, notes, subj, other, loc, call;
Xint authtype, titletype, publtype, descrtype, notestype,
X    subjtype, othertype, loctype,  calltype;
X
Xstart_record() {
X    authtype = titletype = publtype = descrtype = notestype =
X	subjtype = othertype = loctype = calltype = -1;
X} /* start */
X
Xend_record() {
X    int n;
X
X# define MUMBLE(data,type,name) \
X    if( type != -1 ) { printf("%-16s %s\n", name, data ); n++; }
X
X    n = 0;
X    MUMBLE(auth, authtype,  "AUTHOR" );
X    MUMBLE(title,titletype, "TITLE" );
X    MUMBLE(publ, publtype,  "PUBLICATION" );
X    MUMBLE(descr,descrtype, "DESCRIPTION" );
X    MUMBLE(notes,notestype, "NOTES" );		/* NEVER SET! */
X    MUMBLE(subj, subjtype,  "SUBJECTS" );	/* NEVER SET! */
X    MUMBLE(other,othertype, "OTHER" );		/* NEVER SET! */
X    MUMBLE(loc,  loctype,   "LOCATION" );	/* need to expand! */
X    MUMBLE(call, calltype,  "CALL NUMBER" );	/* clean up? */
X
X    if( n > 0 )
X	putchar('\n');			/* blank line */
X} /* end */
X
Xdisplay( code, record, offset, length )
X    int code, offset, length;
X    char *record;
X{
X    char bib_lvl;
X
X    record += offset+2;				/* skip 2 indicators */
X
X    /* MUST PRIORITIZE MULTIPLE RECORD TYPES */
X
X    switch( code ) {
X    case 49:
X	if( loctype != -1 )
X	    puts("second location");
X	copy( loc, record, length );
X	loctype = code;
X	break;
X
X    case 50:
X    case 90:
X    case 99:
X	if( calltype != -1 )
X	    printf("second call %03d (was %03d)\n", code, calltype );
X	copy( call, record, length );
X	calltype = code;
X	break;
X
X
X    case 100:
X    case 110:
X    case 130:
X	if( authtype != -1 )
X	    printf("second author %03d (was %03d)\n", code, authtype );
X	copy( auth, record, length );
X	authtype = code;
X	break;
X
X    case 240:
X    case 245:
X	if( titletype != -1 )
X	    printf("second title %03d (was %03d)\n", code, titletype );
X	copy( title, record, length );
X	titletype = code;
X	break;
X
X    case 260:
X	if( publtype != -1 )
X	    puts("second publication");
X	copy( publ, record, length );
X	publtype = code;
X	break;
X
X    case 300:
X	if( descrtype != -1 )
X	    puts("second description");
X	copy( descr, record, length );
X	descrtype = code;
X	break;
X
X    } /* switch on code */
X} /* process */
END_OF_ndisplay.c
if test 3335 -ne `wc -c <ndisplay.c`; then
    echo shar: \"ndisplay.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f oclc.c -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"oclc.c\"
else
echo shar: Extracting \"oclc.c\" \(5031 characters\)
sed "s/^X//" >oclc.c <<'END_OF_oclc.c'
X/*
X *	oclc.c -- read oclc mark tape records
X *
X *	Philip L. Budne, Boston University, Distributed Systems Group
X *	Copyright 1988, 1989, Philip L. Budne
X *	May be used in not-for-profit applications provided this
X *	notice and the above copyright are not removed.  No warranty
X *	is expressed or implied.
X *
X *	Coded as per "OCLC-MARK Tape Format" ISBN: 0-933418-62-0
X *	1984, OCLC
X */
X
X# include <stdio.h>			/* get standard i/o library defns */
X# include "oclc.h"			/* get our defns */
X
Xvoid lose(), holdings(), bibliographic(), pdata(); /* forward defn. */
X
Xint
Xmain() {
X    char record[ MAXLREC+1 ];		/* entire logical record */
X    int llen;				/* 0-4 logical record length */
X    int fd = 0;				/* input file descr. */
X					/* 0 is "standard input" opened */
X					/* for us by the shell */
X
X    while( (llen = getrec( fd, record )) > 0 ) {
X	if( record[6] == 'y' )		/* check if a holdings record */
X	    holdings( record, llen );	/* it is???? */
X	else
X	    bibliographic( record, llen );
X    } /* while getrec */
X} /* main */
X
Xvoid
Xlose(f, a, b, c )			/* dangerous... should use varargs */
X    char *f, *a, *b, *c;
X{
X    printf(f, a, b, c );
X    putchar('\n');
X    exit( 1 );
X} /* lose */
X
Xvoid
Xholdings( record, llen )		/* print a holdings record */
X    char *record;
X    int llen;
X{
X    if( llen < MINH || llen > MAXH )
X	lose("invalid llen for holdings rec %d", llen );
X
X    puts("holdings records nyi");	/* never seen one!! */
X					/* (not fatal) */
X} /* holdings */
X
Xvoid
Xbibliographic( record, llen )
X    char *record;
X    int llen;
X{
X    int baseaddr, p;
X
X    if( llen < MINB || llen > MAXB )
X	lose("invalid llen for bib rec %d", llen );
X
X    baseaddr = getint( record+12, 5 );	/* 12-16 base address */
X    if( baseaddr < 24 || baseaddr > llen )
X	lose("bad baseaddr %d (llen %d)", baseaddr, llen );
X
X# ifdef DISPLAY_LEADER
X    pr_bib_fixed( record );		/* print fixed leader */
X# endif /* DISPLAY_LEADER defined */
X
X    start_record();
X    p = 24;				/* data starts in column 24 */
X    while( p < baseaddr && record[p] != RS ) {
X	int code, offset, length, lensiz, offsiz;
X
X# define CODELEN 3
X	code = getint( record+p, CODELEN );
X	p += CODELEN;
X
X	lensiz = record[20] - '0';	/* get numer of columns in length */
X	if( lensiz != 4 )
X	    lose("%03d: unusual size of length field (column 20): %d",
X		 code, lensiz );
X	length = getint( record+p, lensiz ); /* get length */
X	p += lensiz;			/* advance pointer */
X
X	offsiz = record[21] - '0';	/* get number of columns for offset */
X	if( offsiz != 5 )
X	    lose("%03d: unusual size of offset field (column 21): %d",
X		 code, offsiz );
X	offset = getint( record+p, offsiz ); /* get offset */
X	p += offsiz;			/* advance pointer */
X
X	/* baseaddr+offset is first char of data */
X	/* baseaddr+offset+length-1 is last char of data */
X	if( offset+baseaddr+length-1 > llen ) /* end of data */
X	    lose("data out of bounds");	/* past end of logical record?? */
X
X	/****************************************************************
X	 * perform record type dependant processing here!!
X	 */
X	display( code, record, offset+baseaddr, length );
X	/*
X	 ****************************************************************/
X    } /* while */
X    end_record();
X} /* bib record */
X
Xint
Xgetint( cp, cc )			/* get a fixed length integer */
X    register char *cp;			/* pointer */
X    register int cc;			/* length */
X{
X    register c, i;
X
X    i = 0;
X    while( cc-- ) {
X	c = *cp++;
X	if( c < '0' || c > '9' )
X	    lose("bad digit '%c'", c );
X	i = (i * 10) + c - '0';
X    } /* while */
X    return( i );
X} /* getint */
X
Xpr_bib_fixed( record )			/* print bib record fixed leader */
X    char *record;
X{
X    switch( record[5] ) {		/* record status */
X    case 'n':
X	puts("new record");
X	break;
X    case 'c':
X	puts("corrected record");
X	break;
X    case 'p':
X	puts("previously prepublication record");
X	break;
X    case 'a':
X	puts("increase in coding level");
X	break;
X    default:
X	printf("** unknown bib rec stat '%c'\n", record[5] );
X	break;
X    } /* rec stat */
X
X    printf("record type '%c'\n", record[6] );
X    printf("bib level '%c'\n", record[7] );
X
X    if( record[8] != ' ' || record[9] != ' ' ||
X       record[10] != '2' || record[11] != '2' )
X	lose("invalid contents in 8, 9, 10, or 11 of bib record");
X
X    /* 12-16 base address */
X    printf("encoding level '%c'\n", record[17] );
X    printf("desc catalogging form '%c'\n", record[18] );
X    /* 19 blank */
X    /* 20, 21 size of length and offset */
X
X    switch( record[22] ) {		/* transaction type */
X    case 0x01:
X	puts("produce");
X	break;
X    case 0x02:
X	puts("update");
X	break;
X    case 0x03:
X	puts("cancel update");
X	break;
X    case 0x11:
X	puts("replace");
X	break;
X    case 0x50:
X	puts("all produce");
X	break;
X    case 0x90:
X	puts("offline retrieve");
X	break;
X    case 0x92:
X	puts("offline update");
X	break;
X    case 0x93:
X	puts("offline cancel update");
X	break;
X    case 0x94:
X	puts("microcon update");
X	break;
X    default:
X	lose("bad transaction code %#02x\n", record[22] );
X	break;
X    } /* transaction code */
X
X    /* 23 '0' */
X} /* pr_bib_fixed */
END_OF_oclc.c
if test 5031 -ne `wc -c <oclc.c`; then
    echo shar: \"oclc.c\" unpacked with wrong size!
fi
# end of overwriting check
fi
if test -f oclc.h -a "${1}" != "-c" ; then 
  echo shar: Will not over-write existing file \"oclc.h\"
else
echo shar: Extracting \"oclc.h\" \(817 characters\)
sed "s/^X//" >oclc.h <<'END_OF_oclc.h'
X/*
X *	oclc.h -- defns oclc mark tape records
X *
X *	Philip L. Budne, Boston University, Distributed Systems Group
X *	Copyright 1988, 1989, Philip L. Budne
X *	May be used in not-for-profit applications provided this
X *	notice and the above copyright are not removed.  No warranty
X *	is expressed or implied.
X */
X
X# define EOS '\0'			/* End of String Char */
X
X# define MAXB 6114			/* Max length for bib record */
X# define MINB 124			/* Min length for bib record */
X
X# define MAXH 6114			/* Max length for holdings record */
X# define MINH 112			/* Min length for holdings record */
X
X# define MAXLREC MAXB			/* Max length for any logical record */
X
X# define RS '\036'			/* record sep */
X# define US '\037'			/* start of subsection */
X
X# define SCHAR '|'			/* char to output for subsection */
X					/* could also use '$' */
END_OF_oclc.h
if test 817 -ne `wc -c <oclc.h`; then
    echo shar: \"oclc.h\" unpacked with wrong size!
fi
# end of overwriting check
fi
echo shar: End of shell archive.
exit 0