andrew@megadata.mega.oz.au (Andrew McRae) (10/11/90)
Does anyone have a 68000 specific in_cksum routine for doing IP checksums? I have been using the machine independent version, but I have looked at the VAX and CCI routines that came with the 4.3 BSD network source, and it seems it would be a big win to have a 68k version. I am actually running a 68000 rather than a 68020, but a 68020 version would be useful as a starting point. Thanks! Andrew McRae inet: andrew@megadata.mega.oz.au Megadata Pty Ltd, uucp: ..!uunet!megadata.mega.oz.au!andrew North Ryde 2113 Phone: +61 2 805 0899 NSW AUSTRALIA Fax: +61 2 887 4847
louie@sayshell.umd.edu (Louis A. Mamakos) (10/13/90)
In article <347@megadata.mega.oz.au> andrew@megadata.mega.oz.au (Andrew McRae) writes: >Does anyone have a 68000 specific in_cksum routine for >doing IP checksums? The is the checksum routing that I use in the Amiga port of the KA9Q TCP/IP package. It seems to work. louie ; ; Compute the 1's complement sum of data buffer. Called from C as ; ; unsigned short ; lcsum(buf, cnt) ; unsigned short *buf; ; unsigned short cnt; ; ; _lcsum: MOVE.L 4(A7),A0 ; get pointer to data block MOVE.L 8(A7),D1 ; get number of 16bit words to sum MOVE D2,A1 ; save D2 in a volitile register MOVE D1,D2 ; save a copy of the count LSR.L #1,D1 ; convert from words to longs MOVEQ.L #0,D0 ; D0 used to accumulate the sum, clear CC BRA.S endl ; jump to end of loop to start things off ; ; Take advantage of 68010 loop mode cache and add 2 words at a time until ; a carry propagates out. 68020 users win 'cause of instruction cache. ; loop: ADD.L (A0)+,D0 ; add two words in endl: DBCS D1,loop BCC.S done ; jump if done ADDQ.L #1,D0 ; add in carry BRA.S endl ; resume loop done: BTST #0,D2 ; was word count odd? BEQ.S done2 MOVEQ.L #0,D2 MOVE.W (A0),D2 ; get the last word ADD.L D2,D0 ; add it in BCC.S done2 ; did that cause a carry? ADDQ.L #1,D0 ; yes done2: MOVE.L A1,D2 ; restore register MOVE.L D0,D1 ; get copy of sum D0=ABCD D1=ABCD SWAP.W D1 ; into low order part of D1 D0=ABCD D1=CDAB AND.L #$FFFF,D1 ; zap (is this necessary?) D0=ABCD D1=00AB ADD.W D0,D1 ; two halfs of sum together MOVEQ.L #0,D0 ADDX.W D0,D1 ; get last carry MOVE.W D1,D0 RTS
mb@ttidca.TTI.COM (Michael Bloom) (10/13/90)
In article <347@megadata.mega.oz.au> andrew@megadata.mega.oz.au (Andrew McRae) writes: >Does anyone have a 68000 specific in_cksum routine for >doing IP checksums? > >I have been using the machine independent version, >but I have looked at the VAX and CCI routines that came >with the 4.3 BSD network source, and it seems it would be >a big win to have a 68k version. I am actually running a >68000 rather than a 68020, but a 68020 version would be >useful as a starting point. A number of years back, I posted a request similar to yours and got not a single response. So I went ahead and hand optimized the assembly output from compiling the machine independent version, taking a few hints from RFC 1071. I measured about 35 % improvement over the straight C file compiled by PCC. It might be the case that compiling the straight C source with GNU C is nearly as good as (or perhaps better than) a hand optimized version. I don't know. We didn't have gcc when I did this. There's still almost certainly room for some more optimization. I'd like to see your improvements. I've been using this for a couple of years on our machines. If you use it, please do not remove the notice at the start of the file. By the way, although the routine won't be re-entered if you are using the bsd networking code, if you are using some other networking code, you might want to move s_util to the stack. #! /bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #! /bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create: # in_cksum.s # This archive created: Sat Oct 13 05:34:53 1990 export PATH; PATH=/bin:/usr/bin:$PATH echo shar: "extracting 'in_cksum.s'" '(6279 characters)' if test -f 'in_cksum.s' then echo shar: "will not over-write existing file 'in_cksum.s'" else sed 's/^ X//' << \SHAR_EOF > 'in_cksum.s' X# X# Please do not remove this comment. X# X# This file was created by Michael Bloom (mb@ttidca.tti.com) by hand X# optimizing the assembly output from compiling the source file "in_cksum.c" X# which is covered by the following notice allowing redistribution: X# X# /* X# * Copyright (c) 1988 Regents of the University of California. X# * All rights reserved. X# * X# * Redistribution and use in source and binary forms are permitted X# * provided that this notice is preserved and that due credit is given X# * to the University of California at Berkeley. The name of the University X# * may not be used to endorse or promote products derived from this X# * software without specific prior written permission. This software X# * is provided ``as is'' without express or implied warranty. X# * X# * @(#)in_cksum.c 7.1 (Berkeley) 3/29/88 X# */ X# X file "in_cksum.c" X data 1 X lcomm s_util,2 X text X global nin_cksum,in_cksum X# X# in_cksum(m,len) X# m -> %a0 X# len -> %d2 X# X# locals: X# scratch: %a1,%d0,%d1 X# sum: %d3 X# mlen: %d4 X# Xin_cksum: Xnin_cksum: X link.l %fp,&F%1 X movm.l &M%1,(4,%sp) X mov.l (8,%fp),%a0 # m X mov.l (12,%fp),%d2 # len X X clr.l ((S%1-4).w,%fp) # 59 int byte_swapped = 0; X X mov.l &0,%d3 # 60 register sum = 0;3 X mov.l &0,%d4 # register mlen = 0; XL%cksm50: # 63 for (;m && len; m = m->m_next) { X mov.l %a0,%d0 X beq L%cksm49 X tst.l %d2 X beq L%cksm49 X tst.w (%a0,8.w) # if (m->m_len == 0) X beq L%cksm48 # continue; XL%cksm51: X mov.l %a0,%d0 # w = (( u_short *)((int)(m) + (m)->m_off)); X add.l (%a0,4.w),%d0 # X mov.l %d0,%a1 # X mov.l &-1,%d0 # X cmp.l %d4,%d0 # if (mlen == -1) { X bne.b L%cksm52 X# The first byte of this mbuf is the continuation of a word spanning X# between this mbuf and the last mbuf. s_util.c[0] was already saved X# when scanning previous mbuf. X X mov.b (%a1),s_util+1 # s_util.c[1] = *(char *)w; X mov.l &0,%d0 # X mov.w s_util,%d0 # X add.l %d0,%d3 # sum += s_util.s; X lea.l (%a1,1.w),%a1 # w = (u_short *)((char *)w + 1); X mov.w (%a0,8.w),%d0 # mlen = m->m_len - 1; X ext.l %d0 # X sub.l &1,%d0 # X mov.l %d0,%d4 # "" X sub.l &1,%d2 # len--; X bra.b L%cksm53 # } else { XL%cksm52: # not a cont. of word spanning 2 mbufs X mov.w (%a0,8.w),%d0 # X ext.l %d0 # X mov.l %d0,%d4 # mlen = m->m_len; XL%cksm53: # } X cmp.l %d2,%d4 # if (len < mlen) X bge.b L%cksm54 # X mov.l %d2,%d4 # mlen = len; XL%cksm54: # X sub.l %d4,%d2 # len -= mlen; X# # X# Force to even boundary # X# # X mov.l %a1,%d0 # if ((1 & (int) w) && (mlen > 0)) { X and.l &1,%d0 # X beq.b L%cksm55 # X tst.l %d4 # X ble.b L%cksm55 # X mov.l %d3,%d0 # REDUCE X swap %d0 # X add.w %d0,%d3 # X mov.l &0,%d0 # X addx.w %d0,%d3 # X and.l &0xffff,%d3 # "" X X lsl.l &8,%d3 # sum <<= 8; X mov.b (%a1),s_util # s_util.c[0] = *(u_char *)w; X lea.l (%a1,1.w),%a1 # w = (u_short *)((char *)w + 1); X sub.l &1,%d4 # mlen--; X mov.l &1,((S%1-4).w,%fp)# byte_swapped = 1; X # } XL%cksm55: # if ((2 & (int) w) && (mlen > 0)) { X mov.l %a1,%d0 # if >= 2 bytes left and now X and.l &2,%d0 # short aligned, add first X beq.b L%cksm56 # short so rest is long X mov.l &2,%d0 # aligned. X cmp.l %d4,%d0 # X blt.b L%cksm56 # X mov.l &0,%d0 # sum += *w++; X mov.w (%a1)+,%d0 # X add.l %d0,%d3 # X sub.l &2,%d4 # mlen-=2; X # } XL%cksm56: X mov.l %d4,%d1 # X mov.l %d1,%d0 # X lsr.l &6,%d1 # number of times in loop = mlen/64 X and.l &0x3c,%d0 # X neg.l %d0 # X add.l %d0,%d4 # X and.b &0xf,%cc # X jmp 66(%pc,%d0.b) # jump into middle of table for first iter Xnextloop: X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 X mov.l (%a1)+,%d0 X addx.l %d0,%d3 Xendloop: X mov.l &0,%d0 # (move does not affect X bit) X addx.l %d0,%d3 # add in carry from addx last operation X dbra %d1,nextloop # (dbra does not affect X bit) X and.l &0x3,%d4 # above loop got all but possibly last 4 bytes X # if (mlen == 0 && byte_swapped == 0) X # continue X bne.b L%cksm57 X tst.l ((S%1-4).w,%fp) X beq.b L%cksm48 X# REDUCE XL%cksm57: X mov.l %d3,%d1 X swap %d1 X add.w %d1,%d3 X mov.l &0,%d0 X addx.w %d0,%d3 X and.l &0xffff,%d3 XL%cksm_11: XL%cksm58: # while ((mlen -= 2) >= 0) { X sub.l &2,%d4 X blt.b L%cksm59 X mov.l &0,%d0 # sum += *w++; X mov.w (%a1)+,%d0 X add.l %d0,%d3 X bra.b L%cksm58 # } XL%cksm59: X tst.l ((S%1-4).w,%fp) # if (byte_swapped) { X beq.b L%cksm60 X # REDUCE X mov.l %d3,%d1 X swap %d1 X add.w %d1,%d3 X mov.l &0,%d0 X addx.w %d0,%d3 X and.l &0xffff,%d3 XL%cksm_13: X lsl.l &8,%d3 # sum <<= 8; X clr.l ((S%1-4).w,%fp) # byte_swapped = 0; X mov.l &-1,%d0 # if (mlen == -1) { X cmp.l %d4,%d0 X bne.b L%cksm61 X mov.b (%a1),s_util+1 # s_util.c[1] = *(char *)w; X mov.l &0,%d0 # sum += s_util.s; X mov.w s_util,%d0 X add.l %d0,%d3 X mov.l &0,%d4 # mlen = 0; X # } else X bra.b L%cksm62 XL%cksm61: X mov.l &-1,%d4 # mlen = -1; XL%cksm62: X bra.b L%cksm63 # } else if (mlen == -1) XL%cksm60: X mov.l &-1,%d0 X cmp.l %d4,%d0 X bne.b L%cksm64 X mov.b (%a1),s_util # s_util.c[0] = *(char *)w; X # } XL%cksm64: XL%cksm63: XL%cksm48: X mov.l (%a0),%a0 X bra L%cksm50 XL%cksm49: X tst.l %d2 # if (len) X beq.b L%cksm65 X data 2 # printf("cksum: out of data\n"); XL%cksm67: X byte 'c,'k,'s,'u,'m,':,0x20,'o X byte 'u,'t,0x20,'o,'f,0x20,'d,'a X byte 't,'a,'\n,0x00 X text X mov.l &L%cksm67,(%sp) X jsr printf XL%cksm65: # if (mlen == -1) { X mov.l &-1,%d0 X cmp.l %d4,%d0 X bne.b L%cksm68 X clr.b s_util+1 # s_util.c[1] = 0; X mov.l &0,%d0 # sum += s_util.s; X mov.w s_util,%d0 X add.l %d0,%d3 X mov.l &0,%d0 X addx.l %d0,%d3 # handle carry X # 183 } X # REDUCE XL%cksm68: X mov.l %d3,%d1 X swap %d1 X add.w %d1,%d3 X mov.l &0,%d0 X addx.w %d3,%d0 X and.l &0xffff,%d0 X not.w %d0 # return (~sum & 0xffff); X # 186 } X movm.l (4,%sp),&M%1 X unlk %fp X rts X set S%1,0 X set T%1,0 X set F%1,-28 X set M%1,0x001c X data 1 SHAR_EOF if test 6279 -ne "`wc -c < 'in_cksum.s'`" then echo shar: "error transmitting 'in_cksum.s'" '(should have been 6279 characters)' fi fi exit 0 # End of shell archive