[comp.sys.sun] Need help in RPC/Light Weight Process combo.

andy@uunet.uu.net (andrew sulistyo) (09/13/89)

I need help from you guys, the programming whizs or anyone, to find the
'bugs' in my server program.  I wrote a program in SUN OS 4.0  that is
supposed to function as a multi-threading file server, i.e. capable of
process- ing many/multi requests from remote clients.  The method that I
use is the combination of Remote Procedure Call (RPC) and Light Weight
Process (lwp) that is available in my SUN workstation.  I use lwp because
I don't want to fork UNIX heavy weight process (--> expensive).

This is how I want file server to work (I also list a summary of my
program below). Run server in the background.  It will do the usual RPC
stuff.  Then it will spawn lwp's called scheduler() and svc_run_lwp().
This svc_run_lwp() is similar to the one explained on the SUN Network
Programming Manual, except that it is a lwp process now.  The
svc_run_lwp() is to wait for client requests (for connection or service)
and create lwp client threads (svc_run_lwp() thread and client threads, if
any, are created with a MINPRIO).  The scheduler() which is created the
highest priority is to sleep and reschedule the MINPRIO threads.  If a
remote client create a connection with the server and request for a
service, the svc_run_lwp() thread will spawn a client thread do the job.
This client thread is destroyed after fulfilling the request.

#include <stdio.h>
#include <fcntl.h>
#include <signal.h>
#include <rpc/rpc.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/types.h>

#include <lwp/lwp.h>
#include <lwp/stackdep.h>
#include <lwp/lwperror.h>

/* File Server include file */
#include "Server.h"

#define MAXPRIO 10		/* Maximum priority for the LWP. */

/* Thread id's for the LWP */
thread_t clnt[MAXREQ];		/* Client requests' thread id's */
thread_t sch;			/* Scheduler's thread id */
thread_t conn;			/* Connection listener thread id */

int ind = -1;    		/* Index for the array of client thread id's */
int numfinish = 0;		/* The number of LWP's that are done. */
extern int errno;

static void archserverprog_1();
int sig_catch();

main(argc, argv)
     int argc;
     char *argv[];
{
     SVCXPRT *transp;
     int svc_run_lwp(), scheduler();
     extern void if_pod_exits();

     on_exit(if_pod_exits, (caddr_t) 0);

     (void)pmap_unset(ARCHSERVERPROG, ARCHSERVERVERS);

     transp = svcudp_create(RPC_ANYSOCK);
     if (transp == NULL) {
	  (void)fprintf(stderr, "cannot create udp service.\n");
	  exit(1);
     }
     if (!svc_register(transp, ARCHSERVERPROG, ARCHSERVERVERS, archserverprog_1, IPPROTO_UDP)) {
	  (void)fprintf(stderr, "unable to register (ARCHSERVERPROG, ARCHSERVERVERS, udp).\n");
	  exit(1);
     }

     transp = svctcp_create(RPC_ANYSOCK, 0, 0);
     if (transp == NULL) {
	  (void)fprintf(stderr, "cannot create tcp service.\n");
	  exit(1);
     }
     if (!svc_register(transp, ARCHSERVERPROG, ARCHSERVERVERS, archserverprog_1, IPPROTO_TCP)) {
	  (void)fprintf(stderr, "unable to register (ARCHSERVERPROG, ARCHSERVERVERS, tcp).\n");
	  exit(1);
     }

     pod_setmaxpri(1);

     (void) lwp_setstkcache(10000, 20);

     /* Create the scheduler for light weight process. */
     (void) lwp_create(&sch, scheduler, MAXPRIO, 0, lwp_newstk(), 0);

     /* Create a thread that waits for (listen to) the client request at the "readfds". */
     (void) lwp_create(&conn, svc_run_lwp, MINPRIO, 0, lwp_newstk(), 0);

     lwp_setpri(SELF, MINPRIO);
}

static void archserverprog_1(rqstp, transp)
struct svc_req *rqstp;
SVCXPRT *transp;
{
     union {
	  ReadFile read_file_1_arg;
	  WriteFile write_file_1_arg;
     } argument;
     char *result;

     switch (rqstp->rq_proc) {
	case:
	   .
	   .
     }
     bzero((char *)&argument, sizeof(argument));
     if (!svc_getargs(transp, xdr_argument, &argument)) {
	  svcerr_decode(transp);
	  return;
     }
     result = (*local)(&argument, rqstp);
     if (result != NULL && !svc_sendreply(transp, xdr_result, result)) {
	  fprintf(stderr, "ArServ_svc.c: archserverprog_1(), sendreply fails.\n");
	  svcerr_systemerr(transp);
     }
     if (!svc_freeargs(transp, xdr_argument, &argument)) {
	  (void)fprintf(stderr, "unable to free arguments\n");
	  exit(1);
     }
}


svc_run_lwp()
{
     fd_set readfds;
     int client_req_lwp();
     extern void detectfinish();

     for (;;)
     {
	  readfds = svc_fdset;

	  fprintf(stderr, "\tsvc_run thread is running.\n"); /* for debugging */

	  switch (select(_rpc_dtablesize(), &readfds, NULL, NULL, &timeout))
	  {
	     case -1:
	       if (errno == EINTR)   /* <--- get EINTR all the time, why */
	       {
		    fprintf(stderr, "errno = %d\n", errno);
		    continue;
	       }

	       perror("ArServ_svc.c: svc_run(): select");
	       return;

	     case 0:
	       break;

	     default:
	       fprintf(stderr, "\nsvc_run_lwp(): DEFAULT: CLIENT REQUEST COMES IN.\n");

	       /* When the first request comes, 'ind' should start from 0 (not -1). */
	       ind++;
	       (void) lwp_create(&clnt[ind], client_req_lwp, MINPRIO, 0, lwp_newstk(), 1, &readfds);
	       lwp_yield(clnt[ind]);
	  }
     }
}

client_req_lwp(readfd)
     fd_set *readfd;
{
     extern void detectfinish();

     (void) exc_on_exit(detectfinish, (caddr_t) &clnt[ind]);

     /* Process the client request with the corresponding procedure. */
     svc_getreqset(readfd);
}

void detectfinish(cl)
     caddr_t cl;		/* thread id of a lwp that will be destroyed. */
{
     thread_t exit_thread;
     int n;

     exit_thread = *(thread_t *)(cl);

     ind--;			/* decrement the array index. */
     lwp_destroy(exit_thread);
}

scheduler()
{
     struct timeval wait;
     wait.tv_sec = 0;
     wait.tv_usec = 500000;

     for (;;)
     {
	  lwp_sleep(&wait);
	  fprintf(stderr, "scheduler thread running.\n");   /* for debugging*/
	  lwp_resched(MINPRIO);
     }
}

I had tested the RPC program without the lwp and it worked perfectly.

	My problems are:

1. Why does 'select()' in svc_run_lwp() return a -1 and errno == EINTR (It
   doesn't happen without lwp)?  why is this EINTR generated in this RPC/lwp
   combo?

2. I tested the RPC/lwp server with 2 clients: clnt #1 and clnt #2.  Clnt #1 & 
   #2 get the socket connections.  clnt #1 request a service and this request 
   takes 1 minute to be completed by the server.  While processing this (client
   thread #1), clnt #2 send a request that takes no time for the server to
   complete.  Once clnt #2 get the reply, it sends another request, and get the
   reply back.  When a minute is up, the server returns the reply to clnt #1 and
   clnt #1 gets it.  As soon as thread #1 dies the scheduler() and the
   svc_run_lwp() threads stop (This is not the case when clnt #2 only sends 1 re   quest).
   Why do they stop (stop printing the debug message)? are they blocked by 
   something that I am not aware of ?   I also don't understand why at this 
   point the server still can accept a connection request (say, from clnt #3)
   but it won't process any request for service from any client (clients will
   timeout since they don't get the reply from server).  WHY? Is it because of
   lwp stack problem ? (when I force the server to core dump it dies in a lwp
   special thread stkreaper() which is created by the lwp library.)

   I would really be thankful if you can send your ideas/solutions/suggestions 
   to solve these problems.  

   My email address is:   andy@jtsv16.jts.com          ---> try to use this.
                       or {suncan|geac|uunet|}!jtsv16!andy

	Thank you.   				
        (Andrew Sulistyo)