ken@gvax.cs.cornell.edu (Ken Birman) (01/09/91)
We have fixed a problem that affects multicast's with replies to
groups containing members that joined using "isis_remote". Such
multicasts sometimes returned prematurely, as if some of the non-remote
processes had sent null replies.
The problem is in clib/cl_inter.c:net_send(...). At the end of this
procedure you will find a section of code that starts:
while(!aptr_isnull(to))
{
if(!addr_ismine(to))
{
register qnode *qp = pg_find(ISIS_IO, to);
register ioq *ioqp;
if(qp == 0)
{
register msgid = msg_getid(mp);
if(exmode == -1 && (msgid&1)) /* ISIS -> remote_xxx */
{
register message *rmsg = msg_newmsg();
msg_addfield(rmsg, FLD_ISNULLREP, 0, 0, 0);
msg_setid(rmsg, msgid);
msg_setsender(rmsg, to);
fbcast_l("mR", msg_getreplyto(mp), GENERIC_RCV_REPLY, rm
sg, 0);
}
if(callback)
(*callback)(to, arg0, arg1);
}
else ... etc ...
Change this to the following:
while(!aptr_isnull(to))
{
if(!addr_ismine(to) && addr_islocal(to) && addr_isrclient(to))
{
register qnode *qp = pg_find(ISIS_IO, to);
register ioq *ioqp;
if(qp == 0)
{
register msgid = msg_getid(mp);
address save;
save = my_address;
if(exmode == -1 && (msgid&1)) /* ISIS -> remote_xxx */
{
register message *rmsg = msg_newmsg();
msg_addfield(rmsg, FLD_ISNULLREP, 0, 0, 0);
msg_setid(rmsg, msgid);
my_address = *to;
/* nullreply() on behalf of failed remote process */
fbcast_l("mR", msg_getreplyto(mp), GENERIC_RCV_REPLY, rm
sg, 0);
}
my_address = save;
if(callback)
(*callback)(to, arg0, arg1);
}
else ... etc ...
You will also need to fix a macro: addr_islocal in mlib/msg.h.
Change it to the following:
#define addr_islocal(addr) (addr->addr_site == my_site_no && addr->addr_inc
arn == my_site_incarn)