[alt.sources] [tcp-ip...] Re: Looking for in_cksum for 68k.

mb@ttidca.TTI.COM (Michael Bloom) (10/15/90)

Archive-name: in_cksum/13-Oct-90
Original-posting-by: mb@ttidca.TTI.COM (Michael Bloom)
Original-subject: Re: Looking for in_cksum for 68k.
Reposted-by: emv@math.lsa.umich.edu (Edward Vielmetti)

[Reposted from comp.protocols.tcp-ip,comp.sources.wanted.
Comments on this service to emv@math.lsa.umich.edu (Edward Vielmetti).]

In article <347@megadata.mega.oz.au> andrew@megadata.mega.oz.au (Andrew McRae) writes:
>Does anyone have a 68000 specific in_cksum routine for
>doing IP checksums?
>
>I have been using the machine independent version,
>but I have looked at the VAX and CCI routines that came
>with the 4.3 BSD network source, and it seems it would be
>a big win to have a 68k version. I am actually running a
>68000 rather than a 68020, but a 68020 version would be
>useful as a starting point.

A number of years back, I posted a request similar to yours and got
not a single response.  So I went ahead and hand optimized the
assembly output from compiling the machine independent version,
taking a few hints from RFC 1071.

I measured about 35 % improvement over the straight C file compiled by
PCC.  It might be the case that compiling the straight C source with
GNU C is nearly as good as (or perhaps better than) a hand optimized
version. I don't know.  We didn't have gcc when I did this.

There's still almost certainly room for some more optimization. I'd like
to see your improvements.

I've been using this for a couple of years on our machines.  If you
use it, please do not remove the notice at the start of the file.

By the way, although the routine won't be re-entered if you are using
the bsd networking code, if you are using some other networking code, 
you might want to move s_util to the stack.

#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create:
#	in_cksum.s
# This archive created: Sat Oct 13 05:34:53 1990
export PATH; PATH=/bin:/usr/bin:$PATH
echo shar: "extracting 'in_cksum.s'" '(6279 characters)'
if test -f 'in_cksum.s'
then
	echo shar: "will not over-write existing file 'in_cksum.s'"
else
sed 's/^	X//' << \SHAR_EOF > 'in_cksum.s'
	X#
	X# Please do not remove this comment.
	X#
	X# This file was created by Michael Bloom (mb@ttidca.tti.com) by hand
	X# optimizing the assembly output from compiling the source file "in_cksum.c"
	X# which is covered by the following notice allowing redistribution:
	X#
	X# /*
	X#  * Copyright (c) 1988 Regents of the University of California.
	X#  * All rights reserved.
	X#  *
	X#  * Redistribution and use in source and binary forms are permitted
	X#  * provided that this notice is preserved and that due credit is given
	X#  * to the University of California at Berkeley. The name of the University
	X#  * may not be used to endorse or promote products derived from this
	X#  * software without specific prior written permission. This software
	X#  * is provided ``as is'' without express or implied warranty.
	X#  *
	X#  *      @(#)in_cksum.c  7.1 (Berkeley) 3/29/88
	X#  */
	X# 
	X	file	"in_cksum.c"
	X	data	1
	X	lcomm	s_util,2
	X	text
	X	global	nin_cksum,in_cksum
	X#
	X# in_cksum(m,len)
	X#	m -> %a0
	X#	len -> %d2
	X#
	X# locals:
	X#	scratch: %a1,%d0,%d1
	X#	sum: %d3
	X#	mlen: %d4
	X#		 
	Xin_cksum:
	Xnin_cksum:
	X	link.l	%fp,&F%1
	X	movm.l	&M%1,(4,%sp)
	X	mov.l	(8,%fp),%a0	# m
	X	mov.l	(12,%fp),%d2	# len
	X
	X	clr.l	((S%1-4).w,%fp)	#   59 int byte_swapped = 0;
	X
	X	mov.l	&0,%d3		#   60 register sum = 0;3
	X	mov.l	&0,%d4		# register mlen = 0;
	XL%cksm50:				#   63 for (;m && len; m = m->m_next) {
	X	mov.l	%a0,%d0
	X	beq	L%cksm49
	X	tst.l	%d2
	X	beq	L%cksm49
	X	tst.w	(%a0,8.w)	# if (m->m_len == 0) 
	X	beq	L%cksm48			# continue;
	XL%cksm51:	
	X	mov.l	%a0,%d0		# w = (( u_short *)((int)(m) + (m)->m_off)); 
	X	add.l	(%a0,4.w),%d0	#
	X	mov.l	%d0,%a1		#
	X	mov.l	&-1,%d0		#
	X	cmp.l	%d4,%d0		# if (mlen == -1) {
	X	bne.b	L%cksm52	
	X# The first byte of this mbuf is the continuation of a word spanning
	X# between this mbuf and the last mbuf.  s_util.c[0] was already saved
	X# when scanning previous mbuf.
	X
	X	mov.b	(%a1),s_util+1	#	 s_util.c[1] = *(char *)w;
	X	mov.l	&0,%d0		#
	X	mov.w	s_util,%d0	#
	X	add.l	%d0,%d3		#	 sum += s_util.s;
	X	lea.l	(%a1,1.w),%a1	#	 w = (u_short *)((char *)w + 1);
	X	mov.w	(%a0,8.w),%d0	#	 mlen = m->m_len - 1;
	X	ext.l	%d0		#
	X	sub.l	&1,%d0		#
	X	mov.l	%d0,%d4		#	""
	X	sub.l	&1,%d2		#	 len--;
	X	bra.b	L%cksm53		# } else {
	XL%cksm52:				#	 not a cont. of word spanning 2 mbufs
	X	mov.w	(%a0,8.w),%d0	#
	X	ext.l	%d0		#
	X	mov.l	%d0,%d4		#	 mlen = m->m_len;
	XL%cksm53:				# }
	X	cmp.l	%d2,%d4		# if (len < mlen)
	X	bge.b	L%cksm54		#
	X	mov.l	%d2,%d4		#	 mlen = len;
	XL%cksm54:				#
	X	sub.l	%d4,%d2		# len -= mlen;
	X#				#
	X# Force to even boundary	#
	X#				#
	X	mov.l	%a1,%d0		# if ((1 & (int) w) && (mlen > 0)) {
	X	and.l	&1,%d0		#
	X	beq.b	L%cksm55		#
	X	tst.l	%d4		# 
	X	ble.b	L%cksm55		#
	X	mov.l	%d3,%d0		# REDUCE
	X	swap	%d0		#
	X	add.w	%d0,%d3		#
	X	mov.l	&0,%d0		#
	X	addx.w	%d0,%d3		#
	X	and.l	&0xffff,%d3 	# ""
	X
	X	lsl.l	&8,%d3		#	 sum <<= 8;
	X	mov.b	(%a1),s_util	#	 s_util.c[0] = *(u_char *)w; 
	X	lea.l	(%a1,1.w),%a1	#	 w = (u_short *)((char *)w + 1); 
	X	sub.l	&1,%d4		#	 mlen--;
	X	mov.l	&1,((S%1-4).w,%fp)#	 byte_swapped = 1;
	X				#  }
	XL%cksm55:				# if ((2 & (int) w) && (mlen > 0)) {
	X	mov.l	%a1,%d0		#	 if >= 2 bytes left and now
	X	and.l	&2,%d0		#	 short aligned, add first
	X	beq.b	L%cksm56		#	 short so rest is long
	X	mov.l	&2,%d0		#	 aligned.
	X	cmp.l	%d4,%d0		#
	X	blt.b	L%cksm56		#
	X	mov.l	&0,%d0		#	 sum += *w++;
	X	mov.w	(%a1)+,%d0	#
	X	add.l	%d0,%d3		#
	X	sub.l	&2,%d4		#	 mlen-=2;
	X				#  }
	XL%cksm56:	
	X	mov.l	%d4,%d1		#
	X	mov.l    %d1,%d0 	#
	X	lsr.l    &6,%d1 	# number of times in loop = mlen/64
	X	and.l    &0x3c,%d0 	#
	X	neg.l    %d0 		#
	X	add.l	%d0,%d4 	#
	X	and.b    &0xf,%cc 	#
	X	jmp     66(%pc,%d0.b) 	# jump into middle of table for first iter
	Xnextloop: 
	X	mov.l    (%a1)+,%d0 	
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	X	mov.l    (%a1)+,%d0 
	X	addx.l   %d0,%d3 
	Xendloop: 
	X	mov.l	&0,%d0		# (move does not affect X bit)
	X	addx.l	%d0,%d3		# add in carry from addx last operation
	X	dbra    %d1,nextloop 	# (dbra does not affect X bit)
	X	and.l	&0x3,%d4	# above loop got all but possibly last 4 bytes
	X				# if (mlen == 0 && byte_swapped == 0)
	X				#	   continue
	X	bne.b	L%cksm57
	X	tst.l	((S%1-4).w,%fp)
	X	beq.b	L%cksm48	
	X# REDUCE
	XL%cksm57:
	X	mov.l	%d3,%d1
	X	swap	%d1
	X	add.w	%d1,%d3
	X	mov.l	&0,%d0
	X	addx.w	%d0,%d3
	X	and.l	&0xffff,%d3
	XL%cksm_11:
	XL%cksm58:				# while ((mlen -= 2) >= 0) {
	X	sub.l	&2,%d4
	X	blt.b	L%cksm59
	X	mov.l	&0,%d0			# sum += *w++;
	X	mov.w	(%a1)+,%d0
	X	add.l	%d0,%d3
	X	bra.b	L%cksm58		# }
	XL%cksm59:
	X	tst.l	((S%1-4).w,%fp)	# if (byte_swapped) {
	X	beq.b	L%cksm60
	X	# REDUCE
	X	mov.l	%d3,%d1
	X	swap	%d1
	X	add.w	%d1,%d3
	X	mov.l	&0,%d0
	X	addx.w	%d0,%d3
	X	and.l	&0xffff,%d3
	XL%cksm_13:
	X	lsl.l	&8,%d3		# sum <<= 8;
	X	clr.l	((S%1-4).w,%fp)		# byte_swapped = 0;
	X	mov.l	&-1,%d0		# if (mlen == -1) {
	X	cmp.l	%d4,%d0
	X	bne.b	L%cksm61
	X	mov.b	(%a1),s_util+1		# s_util.c[1] = *(char *)w;
	X	mov.l	&0,%d0			# sum += s_util.s;
	X	mov.w	s_util,%d0
	X	add.l	%d0,%d3
	X	mov.l	&0,%d4			# mlen = 0;
	X				# } else
	X	bra.b	L%cksm62
	XL%cksm61:
	X	mov.l	&-1,%d4		#	 mlen = -1;
	XL%cksm62:
	X	bra.b	L%cksm63		# } else if (mlen == -1)
	XL%cksm60:
	X	mov.l	&-1,%d0
	X	cmp.l	%d4,%d0
	X	bne.b	L%cksm64
	X	mov.b	(%a1),s_util	#	 s_util.c[0] = *(char *)w;
	X				# }
	XL%cksm64:
	XL%cksm63:
	XL%cksm48:
	X	mov.l	(%a0),%a0
	X	bra	L%cksm50
	XL%cksm49:
	X	tst.l	%d2		# if (len)
	X	beq.b	L%cksm65
	X	data	2		# printf("cksum: out of data\n");
	XL%cksm67:
	X	byte	'c,'k,'s,'u,'m,':,0x20,'o
	X	byte	'u,'t,0x20,'o,'f,0x20,'d,'a
	X	byte	't,'a,'\n,0x00
	X	text
	X	mov.l	&L%cksm67,(%sp)
	X	jsr	printf
	XL%cksm65:		# if (mlen == -1) {
	X	mov.l	&-1,%d0
	X	cmp.l	%d4,%d0
	X	bne.b	L%cksm68
	X	clr.b	s_util+1		# s_util.c[1] = 0;
	X	mov.l	&0,%d0		# sum += s_util.s;
	X	mov.w	s_util,%d0
	X	add.l	%d0,%d3
	X	mov.l	&0,%d0
	X	addx.l	%d0,%d3		# handle carry
	X	#  183 }
	X	# REDUCE
	XL%cksm68:
	X	mov.l	%d3,%d1		
	X	swap	%d1
	X	add.w	%d1,%d3
	X	mov.l	&0,%d0
	X	addx.w	%d3,%d0
	X	and.l	&0xffff,%d0
	X	not.w	%d0		# return (~sum & 0xffff);
	X				#  186 }
	X	movm.l	(4,%sp),&M%1
	X	unlk	%fp
	X	rts
	X	set	S%1,0
	X	set	T%1,0
	X	set	F%1,-28
	X	set	M%1,0x001c 
	X	data	1
SHAR_EOF
if test 6279 -ne "`wc -c < 'in_cksum.s'`"
then
	echo shar: "error transmitting 'in_cksum.s'" '(should have been 6279 characters)'
fi
fi
exit 0
#	End of shell archive