[comp.sys.isis] A bug and fix in "isis_remote"

ken@gvax.cs.cornell.edu (Ken Birman) (01/09/91)

We have fixed a problem that affects multicast's with replies to
groups containing members that joined using "isis_remote".  Such
multicasts sometimes returned prematurely, as if some of the non-remote
processes had sent null replies.

The problem is in clib/cl_inter.c:net_send(...).  At the end of this
procedure you will find a section of code that starts:
        while(!aptr_isnull(to))
        {
            if(!addr_ismine(to))
            {
                register qnode *qp = pg_find(ISIS_IO, to);
                register ioq *ioqp;
                if(qp == 0)
                {
                    register msgid = msg_getid(mp);
                    if(exmode == -1 && (msgid&1)) /* ISIS -> remote_xxx */
                    {
                        register message *rmsg = msg_newmsg();
                        msg_addfield(rmsg, FLD_ISNULLREP, 0, 0, 0);
                        msg_setid(rmsg, msgid);
                        msg_setsender(rmsg, to);
                        fbcast_l("mR", msg_getreplyto(mp), GENERIC_RCV_REPLY, rm
sg, 0);
                    }
                    if(callback)
                        (*callback)(to, arg0, arg1);
                }
		else ... etc ...

Change this to the following:

        while(!aptr_isnull(to))
        {
            if(!addr_ismine(to) && addr_islocal(to) && addr_isrclient(to))
            {
                register qnode *qp = pg_find(ISIS_IO, to);
                register ioq *ioqp;
                if(qp == 0)
                {
                    register msgid = msg_getid(mp);
                    address save;
                    save = my_address;
                    if(exmode == -1 && (msgid&1)) /* ISIS -> remote_xxx */
                    {
                        register message *rmsg = msg_newmsg();
                        msg_addfield(rmsg, FLD_ISNULLREP, 0, 0, 0);
                        msg_setid(rmsg, msgid);
                        my_address = *to;
			/* nullreply() on behalf of failed remote process */
                        fbcast_l("mR", msg_getreplyto(mp), GENERIC_RCV_REPLY, rm
sg, 0);
                    }
                    my_address = save;
                    if(callback)
                        (*callback)(to, arg0, arg1);
                }
		else ... etc ...

You will also need to fix a macro: addr_islocal in mlib/msg.h.
Change it to the following:

#define     addr_islocal(addr)  (addr->addr_site == my_site_no && addr->addr_inc
arn == my_site_incarn)