notes@hpislx.HP.COM (Notesfiles System) (05/29/90)
Is there anyone who would be willing to give insight on B-tree data structures? I have a B-tree program that works up to the first split but when it needs to split again it goes into a recursive loop. I've looked and looked at the code and it seems ok logically???Stumped??? I am taking data 4444 with index 4 as input up to 1000 records. Here is the code: have fun ;-) and thanks for any inputs on this- btw I am running this on an HP9000 7.0 unix without the ansi compiler- Laurie Schaaf Hewlett Packard 303-679-3225 --------------------------8< cut here 8<---------------------------------- /* driver.c... Driver for btree tests: Opens or creates btree file. Gets next key and calls insert to insert key in tree. If necessary, creates a new root. */ #include <stdio.h> #include "bt.h" #include "fileio.h" short getroot(); short create_tree(); main() { int promoted; /* boolean: tells of promotion from below */ short root; /* RRN of root page */ short promo_rrn; /* RRN promoted from below */ int promo_key; /* key promoted from below */ int promo_recnum; /* record number promoted from below */ int key; /* next key to insert in tree */ int recnum; /* record number of actual data record */ if (btopen()) /* try to open btree.dat and get root */ root = getroot(); else /* if btree.dat not there, create it */ root = create_tree(); while (scanf("%d",&key)) { scanf("%d",&recnum); promoted = insert(root, key, recnum, &promo_rrn, &promo_key, &promo_recnum); if (promoted) { root = create_root(promo_key, promo_recnum, root, promo_rrn); printf("promo_key=%d\n",promo_key); printf("promo_recnum=%d\n",promo_recnum); } } /* end while */ btclose(); } /* end driver.c */ /* insert.c... Contains insert() -- function to insert a key into a B-tree Calls itself recursively until bottom of tree is reached. Then inserts key in node. If node is out of room, -calls split() to split node -promotes middle key and RRN of new node */ insert(rrn, key, recnum, promo_r_child, promo_key, promo_recnum) short rrn; /* RRN of page to make insertion in */ short *promo_r_child; /* child promoted up to next level */ int key; /* key inserted here or lower */ int recnum; /* recnum inserted here or lower */ int *promo_key; /* key promoted to next level */ int *promo_recnum; /* recnum promoted to next level */ { BTPAGE page; /* current page */ BTPAGE newpage; /* new page if split occurs */ int found, promoted; /* boolean values */ short pos; short p_b_rrn; /* RRN promoted from below */ int p_b_key; /* key promoted from below */ int p_b_recnum; /* recnum promoted from below */ printf("Key is %d\n",key); printf("Recnum is %d\n",recnum); if (rrn == NIL) { /* past bottom of tree...promote */ *promo_key = key; /* original key so that it will be */ *promo_r_child = NIL; /* inserted at leaf level */ *promo_recnum = recnum; return(YES); } btread(rrn, &page); found = search_node(key, &page, &pos); if (found) { printf("Error - attempt to insert duplicate key: %c \n",key); return(0); } promoted = insert(page.child[pos], key, recnum, &p_b_rrn, &p_b_key, &p_b_recnum); if (!promoted) return(NO); /* no promotion */ if (page.keycount < MAXKEYS) { ins_in_page(p_b_key, p_b_recnum, p_b_rrn, &page); /* insert key and recnum */ btwrite(rrn, &page); /* and pointer */ return(NO); /* no promotion */ } else { split(p_b_key, p_b_recnum, p_b_rrn, &page, promo_key, promo_recnum, promo_r_child, &newpage); btwrite(rrn, &page); btwrite(*promo_r_child, &newpage); return(YES); /* promotion */ } } /* btio.c... Contains B-tree functions that directly involve file i/o: btopen() -- open file "btree.dat" to hold the btree btclose() -- close "btree.dat" getroot() -- get rrn of rot node from first two bytes of btree.dat putroot() -- put rrn of root node in first two bytes of btree.dat create_tree() -- create "btree.dat" and root node getpage() -- get next available block in "btree.dat" for new page btread() -- read page number RRN from "btree.dat" btwrite() -- write page number RRN to "btree.dat" */ int btfd; /* global file descriptor for "btree.dat" */ btopen() { btfd = open("btree.dat", READWRITE); return(btfd > 0); } btclose() { close(btfd); } short getroot() { short root; long lseek(); lseek(btfd, 0L, 0); if (read(btfd, &root, 2) == 0) { printf("Error: Unable to get root.\n"); exit(1); } return(root); } putroot(root) short root; { long lseek(); lseek(btfd, 0L, 0); write(btfd, &root, 2); } short create_tree() { int key; int recnum; btfd = creat("btree.dat",PMODE); close(btfd); /* Have to close and reopen to ensure */ btopen(); /* r/w access on many systems */ key = NIL; /* insert bogus key to initialize index */ recnum = NIL; /* insert bogus key to initialize index */ return(create_root(key, recnum, NIL, NIL)); } short getpage() { long lseek(), addr; addr = lseek(btfd, 0L, 2) - 2L; return((short) addr / PAGESIZE); } btread(rrn, page_ptr) short rrn; BTPAGE *page_ptr; { long lseek(), addr; addr = (long)rrn * (long)PAGESIZE + 2L; lseek(btfd, addr, 0); return(read(btfd, page_ptr, PAGESIZE)); } btwrite(rrn, page_ptr) short rrn; BTPAGE *page_ptr; { long lseek(), addr; addr = (long) rrn * (long) PAGESIZE + 2L; lseek(btfd, addr, 0); return(write(btfd, page_ptr, PAGESIZE)); } /* btutil.c... Contains utility functions for btree program: create_root() -- get and initialize root node and insert one key pageinit() -- put NOKEY in all "key" slots and NIL in "child" slots search_node() -- return YES if key in node, else NO, In either case put key's correct position in pos. ins_in_page() -- insert key and right child in page split() -- split node by creating new node and moving half of keys to new node. Promote middle key and RRN of new node. */ create_root(key, recnum, left, right) int key; int recnum; short left, right; { BTPAGE page; short rrn; rrn = getpage(); pageinit(&page); page.key[0] = key; page.recnum[0] = recnum; page.child[0] = left; page.child[1] = right; page.keycount = 1; btwrite(rrn, &page); putroot(rrn); return(rrn); } pageinit(p_page) BTPAGE *p_page; /* p_page: pointer to a page */ { int j; for (j = 0; j < MAXKEYS; j++) { p_page->key[j] = NOKEY; p_page->recnum[j] = NOKEY; p_page->child[j] = NIL; } p_page->child[MAXKEYS] = NIL; } search_node(key, p_page, pos) int key; BTPAGE *p_page; short *pos; /* position where key is or should be inserted */ { int i; for (i = 0; i < p_page->keycount && key > p_page->key[i]; i++); *pos = i; if (*pos < p_page->keycount && key == p_page->key[*pos]) return(YES); /* key is in page */ else return(NO); /* key is not in page */ } ins_in_page(key, recnum, r_child, p_page) int key; int recnum; short r_child; BTPAGE *p_page; { int i; for (i = p_page->keycount; key < p_page->key[i-1] && i > 0; i--) { p_page->key[i] = p_page->key[i-1]; p_page->recnum[i] = p_page->recnum[i-1]; p_page->child[i+1] = p_page->child[i]; } p_page->keycount++; p_page->key[i] = key; p_page->recnum[i] = recnum; p_page->child[i+1] = r_child; } split(key, recnum, r_child, p_oldpage, promo_key, promo_recnum, promo_r_child, p_newpage) int key; /* key to be inserted */ int recnum; /* recnum to be inserted */ int *promo_key; /* key to be promoted up from here */ int *promo_recnum; /* recnum to be promoted up from here */ short r_child; /* child RRN to be inserted */ short *promo_r_child; /* RRN to be promoted up from here */ BTPAGE *p_oldpage; /* pointer to old page */ BTPAGE *p_newpage; /* pointer to new page */ { int i; short mid; /* tells where split is to occur */ int workkeys[MAXKEYS+1]; /* temp. holds keys before split */ int workrecnum[MAXKEYS+1]; /* temp. holds recnums before split */ short workch[MAXKEYS+2]; /* temp. holds childs before split */ for (i = 0; i < MAXKEYS; i++) { /* move keys and children */ workkeys[i] = p_oldpage->key[i]; /* old page into work array*/ workrecnum[i] = p_oldpage->recnum[i];/* " " " " */ workch[i] = p_oldpage->child[i]; } workch[i] = p_oldpage->child[i]; for (i = MAXKEYS; key < workkeys[i-1] && i > 0; i--) { /* insert new key */ workkeys[i] = workkeys[i-1]; /* insert new recnum */ workrecnum[i] = workrecnum[i-1]; workch[i+1] = workch[i]; } workkeys[i] = key; workrecnum[i] = recnum; workch[i+1] = r_child; *promo_r_child = getpage(); /* create new page for split */ pageinit(p_newpage); /* promote RRN of new page */ for (i = 0; i < MINKEYS; i++) { /* move first half of keys */ p_oldpage->key[i] = workkeys[i]; /* childs to old page */ p_oldpage->recnum[i] = workrecnum[i]; /* childs to old page */ p_oldpage->child[i] = workch[i]; /* half to new page */ p_newpage->key[i] = workkeys[i+1+MINKEYS]; p_newpage->recnum[i] = workrecnum[i+1+MINKEYS]; p_newpage->child[i] = workch[i+1+MINKEYS]; p_oldpage->key[i+MINKEYS] = NOKEY; /* mark second half of old*/ p_oldpage->recnum[i+MINKEYS] = NOKEY;/*mark second half of old*/ p_oldpage->child[i+1+MINKEYS] = NIL; /*mark second half of old*/ } p_oldpage->child[MINKEYS] = workch[MINKEYS]; p_newpage->child[MINKEYS] = workch[i+1+MINKEYS]; p_newpage->keycount = MAXKEYS - MINKEYS; p_oldpage->keycount = MINKEYS; *promo_key = workkeys[MINKEYS]; /* promote middle key */ *promo_recnum = workrecnum[MINKEYS]; /* promote middle key */ } --------------------------8< cut here 8<---------------------------------- /* header file for B-tree programs */ #define MAXKEYS 4 #define MINKEYS MAXKEYS/2 #define NIL (-1) #define NOKEY (-1) #define YES 1 #define NO 0 typedef struct { short keycount; int key[MAXKEYS]; short child[MAXKEYS+1]; long recnum[MAXKEYS]; } BTPAGE; #define PAGESIZE sizeof(BTPAGE) --------------------------8< cut here 8<---------------------------------- /* fileio.h --- header file containing file I/O definitions */ #define PMODE 0755 #define READONLY 0 #define WRITEONLY 1 #define READWRITE 2 #define MAX_REC_SIZE 512 #define DELIM_CHR '|' #define DELIM_STR "|" #define REC_LGTH 57 --------------------------8< cut here 8<----------------------------------
ronald@atcmp.nl (Ronald Pikkert) (06/03/90)
From article <8790001@hpislx.HP.COM>, by notes@hpislx.HP.COM (Notesfiles System): > I have a B-tree program that works up to the first split but when it needs > to split again it goes into a recursive loop. I've looked and looked at the > code and it seems ok logically???Stumped??? > > > short getpage() > { > long lseek(), addr; > addr = lseek(btfd, 0L, 2) - 2L; > return((short) addr / PAGESIZE); > } > Your problem is in the page calculation routine. The addr is first converted into a short and then divided by PAGESIZE. Change this line into: return((short) (addr / PAGESIZE)); Don't worry, it's only a pair of brackets on a lot of code :-) - Ronald Pikkert E-mail: ronald@atcmp.nl @ AT Computing b.v. Tel: 080 - 566880 Toernooiveld 6525 ED Nijmegen