[comp.sys.apollo] Getting load statistics from remote nodes

i91@np1.hep.nl (Fons Rademakers) (03/05/89)

Some people were asking for the routines to get load statistics from
remote nodes. You are lucky, I still had some laying around that do 
exactly that. Even better, there is a simple demo program included that
uses these routines. To make it work on your system replace in the tgl.c
file the node numbers by the ones appropriate for your network. The program
works on SR9.7 and SR10. For SR10, however, have a look at the lines
containing the string "rdm". Since I do not yet have SR10 running I had
to make these changes.

   Concerning my own program "load_stat", there was a problem to compile and
run it on SR10 systems. When compiled on SR9.7 it worked on SR10 but not
when compiled on SR10. I finally had the change to look at it on an SR10
system and found pretty quickly the problem. It seems that the SR10 Fortran
compiler is not handling the initialization of the variable
bitmap_size well. So to make it compile and run on SR10 change the line
      data bitmap_size /1280, 1280/
by the lines
      bitmap_size(1) = 1280
      bitmap_size(2) = 1280
and everything works fine. Could somebody (at Apollo) maybe look into this
problem. Why is bitmap_size(1)=0, bitmap_size(2)=0 when the program starts?
The ts of the ftn compiler used is: 3 FTN    1988/06/28 15:12:32  /com/ftn.

BTW: with the remote load statistics routines it is trivial to change
     "load_stat" to a "remote_load_stat".

Cheers, Fons Rademakers.

======================== remote load statistics routines =============

# This is a shell archive.  Remove anything before this line, then
# unpack it by saving it in a file and typing "sh file".  (Files
# unpacked will be owned by you and have default permissions.)
#
# This archive contains:
# asknode.h proc2.h tgl.c getloads.c

echo x - asknode.h
cat > "asknode.h" << '//E*O*F asknode.h//'
/*  ASKNODE.INS.PAS, /us/ins, phl, 01/09/84
    Inserts for node queries. */

/*"
     --------------------------------------------------------------------
    |THE FOLLOWING PROGRAMS ARE THE SOLE PROPERTY OF APOLLO COMPUTER INC.|
    |      AND CONTAIN ITS PROPRIETARY AND CONFIDENTIAL INFORMATION.     |
     --------------------------------------------------------------------
"*/
/* CHANGES:
  01/09/84 phl  added CONFIG variant -- add more stuff as needed.
  10/14/83 gms  added lword varient to asknode_$rqst_data_t.
  08/30/83 spf  added ask_upids function
  08/29/83 spf  upgraded proc1info data to proc2_$info_t and added
                proc2info to asknode_$reply_t.
  08/25/83 spf  added us_asknode_ins_pas declaration and conditional insert
                of network.ins.pas and proc1.ins.pas
  08/24/83 spf  added proc1info to asknode_$get_info
  08/24/83 spf  added asknode_$get_info
  06/10/83 tvc  added ask_ring_info
  03/28/83 rod  added  ask_clear_net to clear statistics
  11/15/82 phl  added  asknode key:  sys_uids, to reply into uid_list.      
  09/22/82 phl  added  asknode keys: ask_bldt, ask_net_root
  07/09/82 phl  added proc2list, proc2fault, proc2info
  07/09/82 phl  changed info to take a univ rec instead of an integer 
  03/24/82 GMS  reply.sm_cnt now array of sm_$cnt_t's.
  03/15/82 GMS  asknode_$cal, etc. all incorporated into asknode_$info.
                rqst, reply.version now = 2.
  09/15/81 PHL  added asknode_$cal.
  03/12/81 PHL  added asknode_$net_stats.
*/

/* ---------------------------------------------------------------------- */
/*
%IFDEF us_asknode_ins_pas %THEN
    %EXIT
%ELSE
    %VAR us_asknode_ins_pas
%ENDIF

%IFDEF NOT us_network_ins_pas %THEN
    %include '/us/ins/network.ins.pas';
%ENDIF

%IFDEF NOT us_proc1_ins_pas %THEN
    %include '/us/ins/proc2.ins.pas';
%ENDIF
*/
/* ---------------------------------------------------------------------- */

/*
CONST asknode_$version     = 2;
      asknode_$who_max     = 1000;
      asknode_$uidlist_max = 25;
      asknode_$infomax     = sizeof(network_$stats_rec_t);
*/
#define asknode_$version 2
#define asknode_$who_max 1000
#define asknode_$uidlist_max 25
#define asknode_$infomax 256

/*TYPE asknode_$kind_t =                        */ /* ADD NEW REQUEST KINDS TO END ONLY! */
/*     (ask_who, who_r,                         */ /* asknode_$who */
/*      ask_time, time_r,                       */ /* asknode_$[get_]info */
/*      ask_node_root, node_root_r,             */ /* asknode_$[get_]info */
/*      ask_network_stats, network_stats_r,     */ /* asknode_$[get_]info */
/*      ask_cal, cal_r,                         */ /* asknode_$[get_]info */
/*      ask_volx, volx_r,                       */ /* asknode_$[get_]info */
/*      ask_diskless, diskless_r,               */ /* asknode_$[get_]info */
/*      ask_failure_report, failure_report_r,   */ /* asknode_$read_failure_rec */
/*      ask_sm, sm_r,                           */ /* asknode_$[get_]info */  
/*      ask_proc2list, proc2list_sr,            */ /* asknode_$[get_]info */  
/*      ask_proc2info, proc2info_sr,            */ /* asknode_$[get_]info */
/*      ask_proc2fault,proc2fault_sr,           */ /* asknode_$[get_]info */
/*      ask_net_root,  net_root_r,              */ /* asknode_$[get_]info */
/*      ask_bldt, bldt_r,                       */ /* asknode_$[get_]info */
/*      ask_sysuids, sysuids_r,                 */ /* asknode_$[get_]info */   /* //, /, `node_data */
/*      ask_clear_net,                          */ /* asknode_$[get_]info - clear network stats */
/*      ask_ring_info, ask_ring_info_r,         */ /* asknode_$[get_]info - produces results even if target can't page */
/*      ask_proc1info, proc1info_sr,            */ /* asknode_$[get_]info */
/*      ask_upids, upids_sr,                    */ /* asknode_$[get_]info */
/*      ask_log, log_r,                         */ /* ??? */
/*     ask_config,  config_r);                  */ /* asknode_$[get_]info - return system configuration info */
typedef short enum
{
	ask_who = 0, who_r,
	ask_time, time_r,
	ask_node_root, node_root_r,
	ask_network_stats, network_stats_r,
	ask_cal, cal_r,
	ask_volx, volx_r,
	ask_diskless, diskless_r,
	ask_failure_report, failure_report_r,
	ask_sm, sm_r,
	ask_proc2list, proc2list_sr,
	ask_proc2info, proc2info_sr,
	ask_proc2fault,proc2fault_sr,
	ask_net_root,  net_root_r,
	ask_bldt, bldt_r,
	ask_sysuids, sysuids_r,
	ask_clear_net,
	ask_ring_info, ask_ring_info_r,
	ask_proc1info, proc1info_sr,
	ask_upids, upids_sr,
	ask_log, log_r,
	ask_config,  config_r
} asknode_$kind_t;

/*
TYPE ask_log_t = record */
/*   ring_cmd : integer;   /* ringlog command */
/*   netl_cmd : integer;   /* netlog command */
/*   node     : node_t;    /* to node for netlog */
/*   sock     : integer;   /* to sock for netlog */
/*   kind     : integer;   /* kinds to log */
/*   end;
*/
/*
TYPE asknode_$rqst_t = RECORD
        version : pinteger;
        kind    : asknode_$kind_t;
        CASE integer OF
            1 : (node   : node_t;       /* for asknode_$who request */
/*                 diecnt : integer);    
/*            2 : (volx   : volx_t);      /* for asknode_$info volx request */
/*            3 : (unit   : integer);     /* for asknode_$info sm request */
/*            4 : (procuid: uid_t;        /* for asknode_$info proc2 request */
/*                 procdat: linteger);
/*            5 : (fail_type : integer32);/* for ask_failure_report */
/*            6 : (pid    : pid_t);       /* for asknode_$info proc1 request */
/*            7 : (ask_log_stuff : ask_log_t);  /* for asknode_$log request */
/*            8 : (lword  : linteger);    /* just for copying for rmt request */
/*            END;
*/
typedef struct
{
	unsigned short version;
	asknode_$kind_t kind;
	union
	{
		struct
		{
			unsigned long node;
			short diecnt;
		} one;
		struct
		{
			char volx[256];
		} two;
		struct
		{
			short unit;
		} three;
		struct
		{
			uid_$t procuid;
			unsigned long procdat;
		} four;
		struct
		{
			long fail_type;
		} five;
		struct
		{
			unsigned short pid;
		} six;
		struct
		{
			char ask_log_stuff[256];
		} seven;
		struct
		{
			unsigned long lword;
		} height;
	} data;
} asknode_$rqst_t;

/*
TYPE
/*     asknode_$rqst_data_t = RECORD  CASE integer OF
/*            1 : (node   : node_t;       /* for asknode_$who request */
/*                 diecnt : integer);    
/*            2 : (volx   : volx_t);      /* for asknode_$info volx request */
/*            3 : (unit   : integer);     /* for asknode_$info sm request */
/*            4 : (procuid: uid_t;        /* for asknode_$info proc2 request */
/*                 procdat: linteger);    
/*            5 : (fail_type : integer32);/* for ask_failure_report */
/*            6 : (pid    : pid_t);       /* for asknode_$info proc1 request */
/*            7 : (ask_log_stuff : ask_log_t);  /* for asknode_$log request */
/*            8 : (lword  : linteger);    /* just for copying for rmt request */
/*            END;
*/
typedef union
{
	struct
	{
		unsigned long node;
		short diecnt;
	} one;
	struct
	{
		char volx[256];
	} two;
	struct
	{
		short unit;
	} three;
	struct
	{
		uid_$t procuid;
		unsigned long procdat;
	} four;
	struct
	{
		long fail_type;
	} five;
	struct
	{
		unsigned short pid;
	} six;
	struct
	{
		char ask_log_stuff[256];
	} seven;
	struct
	{
		unsigned long lword;
	} height;
} asknode_$rqst_data_t;

/*
TYPE
/*    asknode_$txt_t = array [1..100] of char;
/*
/*    asknode_$reply_t = RECORD
/*        version : pinteger;
/*        kind    : asknode_$kind_t;
/*        status  : status_t;
/*        CASE integer OF
/*            1 : (cal_timezone_rec: cal_timezone_rec_t);   /* ask_cal */
/*            2 : (partner: node_t;                         /* ask_diskless */
/*                 diskless:boolean);
/*            3 : (net_stats_rec:network_$stats_rec_t);     /* ask_network_stats */
/*            4 : (uid:uid_t);                              /* ask_node_root */
/*            5 : (sm_cnt : array[0..3] of sm_$cnt_t);      /* ask_sm */
/*            6 : (btime:integer32;                         /* ask_time */
/*                 ctime:integer32);
/*            7 : (ed_uid: uid_t;                           /* ask_volx */
/*                 n_free: linteger;
/*                 n_blk : linteger);
/*            8 : (node:node_t);                            /* ask_who */
/*            9 : (uidlist_cnt: integer;                    /* ask_proc2list */
/*                 uidlist : array [1..asknode_$uidlist_max] of uid_t);
/*            10: (procinfo: array [1..asknode_$infomax] of char);/* ask_proc[1/2]info */
/*            11: (txt_len: integer;                        /* ask bldt */
/*                 txt: asknode_$txt_t);
/*            12: (ring_diag_info :                         /* ask_ring_info */
/*                    network_$psrv_ringinfo_t);
/*            13: (proc1info: proc2_$info_t);               /* ask_proc1info */
/*            14: (proc2info: proc2_$info_t);               /* ask_proc2info */
/*            15: (upid:   integer;                         /* ask_upids */
/*                 uppid:  integer;
/*                 upgid:  integer);
/*            16: (config_valid_cnt: integer;   /* cnt of valid flds in config */
/*                                              /*e.g. 1 says Mach_id is OK,  2 says MACH_ID & AUX_INFO are OK*/
/*                 config_mach_id:   integer;   /* Machine ID -- from prom_$machine_id */
/*                 config_aux_info:  integer;   /* aux_info   -- from prom_$machine_id */
/*                 config_disp_type: integer;   /* Display type */
/*                 config_peb_present:boolean); /* got a PEB??? */
/*
/*            END;
*/
typedef struct
{
	unsigned short version;
	asknode_$kind_t kind;
	status_$t status;
	union
	{
		struct
		{
			unsigned long partner;
			unsigned char diskless;
		} two;
		struct
		{
			unsigned char ns1[2];
			unsigned long nid;
			unsigned char ns2[24];
			unsigned long netxmit;
			unsigned char ns3[18];
			unsigned long netrcv;
			unsigned char ns4[28];
			unsigned long diskr;
			unsigned long diskw;
			unsigned char ns5[932];
		} three;
		struct
		{
			unsigned long btime;
			unsigned long ctime;
		} six;
		struct
		{
			short uidlist_cnt;
			proc2_$uid_list_t uidlist;
		} nine;
		struct
		{
			proc2_$info_add_t proc1info;
		} thirteen;
		struct
		{
			proc2_$info_add_t proc2info;
		} fourteen;
	} data;
} asknode_$reply_t;

/*
TYPE
   asknode_$node_list_t = ARRAY [1..asknode_$who_max] OF node_t;
   asknode_$tzname_t    = ARRAY [1..4] of char;
*/
/* ---------------------------------------------------------------------- */
/*
PROCEDURE asknode_$info (
    IN  kind   : asknode_$kind_t;
    IN  node   : node_t;
    IN  rqstdat: univ asknode_$rqst_data_t;
    OUT reply  : asknode_$reply_t;
    OUT status : status_t
    ); EXTERN;
*/                           
/* asknode_$info returns a record containing requested info from the local
  or a remote node. */

/* ---------------------------------------------------------------------- */
/*
PROCEDURE asknode_$get_info (
    IN  kind   : asknode_$kind_t;
    IN  node   : node_t;
    IN  rqstdat: univ asknode_$rqst_data_t;
    IN  length : pinteger;
    OUT reply  : asknode_$reply_t;
    OUT status : status_t
    ); EXTERN;
*/
/* asknode_$get_info returns a record containing requested info from the local
  or a remote node. */

/* ---------------------------------------------------------------------- */
/*
PROCEDURE asknode_$read_failure_rec
   (out fail_rec: network_$failure_rec_t); EXTERN;
*/
/* --------------------------------------------------------------------- */
/*
PROCEDURE asknode_$report_failure; EXTERN;
*/
/* --------------------------------------------------------------------- */
/*
PROCEDURE asknode_$server; EXTERN;
*/
/* This routine is called to process an incoming network info request. */

/* --------------------------------------------------------------------- */
/*
PROCEDURE asknode_$who
   (OUT node_list : UNIV asknode_$node_list_t;
    IN  maxcnt : pinteger;
    OUT nodcnt : pinteger); EXTERN;
*/
/*This routine returns a list of the nodes that are currently responding.
 Today, it uses a ring broadcast message to solicit replies, then collects
 the responses.  In case some nodes miss the solicitation because of disk
 or other ring activity, this routine sends the WHO request "loopcnt" times. */

/* ---------------------------------------------------------------------- */

/*%EJECT*/
void asknode_$info();
void asknode_$get_info();
void asknode_$read_failure_rec();
void asknode_$report_failure();
void asknode_$server();
void asknode_$who();
//E*O*F asknode.h//

echo x - proc2.h
cat > "proc2.h" << '//E*O*F proc2.h//'

#define proc2_$max_np			proc1_$n_user_processes+1
#define proc2_$dont_reuse_stacks	-1


/* bits in proc2_$state field of info structure */

#define proc2_$waiting 1
#define proc2_$suspended 2
#define proc2_$susp_pending 4
#define proc2_$bound 8

typedef unsigned char pid_t;

#define pid_tn 64

typedef struct {
	uid_$t a[4];
	unsigned long nodeid;
} acl_$sid;

typedef  short enum {
           proc1_$nil,
           proc1_$level_1, 
           proc1_$level_2,
           proc1_$null_process,
           proc1_$wired_dxm,
           proc1_$page_purifier,
           proc1_$unwired_dxm,
           proc1_$net_receive_server,
           proc1_$net_paging_server,
           proc1_$net_request_server,
           proc1_$mem_lights_process,
           proc1_$initial_system_process,
           proc1_$router_process,
           proc1_$iic_guardian_process
} proc1_$type_t;

typedef struct {
      pid_t pid;		          /* level 1 process ID */
      proc1_$type_t proc1_type;   /* level 1 process type */
} proc1_$liste_t;

typedef proc1_$liste_t   proc1_$list_t[pid_tn];

typedef struct {
	uid_$t		stack_uid;	/* uid of user stack */
	linteger	stack_base;	/* base address of user stack */
	proc2_$state_t	procstate;	/* ready, waiting, etc. */
	pinteger	usr;		/* user sr */
	linteger	upc;		/* user pc */
	linteger	usp;		/* user stack pointer */
	linteger	usb;		/* user sb ptr (A6) */
	time_$clock_t	cpu_total;	/* cumulative cpu used by process */
	unsigned short	priority;	/* process priority */
	acl_$sid	sid;
	uid_$t		pgroup;
	boolean		is_server;
	boolean		pad;
	pinteger	min_pri;
	pinteger	max_pri;
	linteger	priv_faults;
	linteger	glob_faults;
	linteger	disk_page_io;
	linteger	net_page_io;
	uid_$t		orig_pgroup;
	short		orig_upgid;
} proc2_$info_add_t;


//E*O*F proc2.h//

echo x - tgl.c
cat > "tgl.c" << '//E*O*F tgl.c//'
#include <stdio.h>

main()
{
	static unsigned long nodes[3] = {0x13919, 0x4950, 0x91a3};
	int i;
	int j;
	struct { double fc; long disk; long net; } stats;

	for (i=0; i < 3; i++)
		if (j=loadav_init(&nodes[i]))
		{
			printf("Error %x on node %x\n",j,nodes[i]);
			exit(1);
		}
	while (1)
	{
		sleep(5);
		for (i=0; i < 3; i++)
		{
			if (j=loadav(&nodes[i],&stats))
			{
				printf("Loadav: Error %x on node %x\n",j,nodes[i]);
				exit(1);
			}
			printf("Node %8x: free cpu (%%) %6.2f, diskact %8d, netact %8d\n",
				nodes[i],stats.fc,stats.disk,stats.net);
		}
	}
	exit(2);
}
//E*O*F tgl.c//

echo x - getloads.c
cat > "getloads.c" << '//E*O*F getloads.c//'
#include <stdio.h>
#include "/sys/ins/base.ins.c"
#include "/sys/ins/proc2.ins.c"
#include "/sys/ins/error.ins.c"
#include "/sys/ins/pfm.ins.c"
#include "/sys/ins/cal.ins.c"
#include "./proc2.h"
#include "./asknode.h"

#define MAXNODES 64

typedef struct nodestat_t
{
	double cpu;
	long disk;
	long net;
	unsigned long done;
	unsigned long nid;
	struct nodestat_t *next;
} nodestat_t;

static nodestat_t *ns = NULL;

extern nodestat_t *malloc();

int loadav_init(nid)
unsigned long *nid;
{
	nodestat_t *p;
	int i;

	p = ns;
	if ((ns = malloc(sizeof(nodestat_t))) == NULL)
	{
		ns = p;
		return(-1);
	}
	ns->nid = *nid;
	if (i = loadtot(ns))
	{
		free(ns);
		ns = p;
		return(i);
	}
	ns->next = p;
	return(0);
}
int loadav(nid,r)
unsigned long *nid;
nodestat_t *r;
{
	nodestat_t *p;
	nodestat_t *n;
	nodestat_t local;
	int i;
	double a;

	for (p = ns; p != NULL; p = p->next)
		if (p->nid == *nid) break;
	if (p == NULL) return(-1);
	n = &local;
	*n = *p;
	if (i=loadtot(p))
	{
		*p = *n;
		return(-2);
	}
	n->cpu = p->cpu - n->cpu;
	n->disk = p->disk - n->disk;
	n->net = p->net - n->net;
	n->done = p->done - n->done;
	a = n->done * 65536 * 4 / 1.0e06;
	r->cpu = n->cpu/a;
	r->disk = n->disk;
	r->net = n->net;
	return(0);
}
static int loadtot(p)
nodestat_t *p;
{
#define nullproc_pid 2

	asknode_$kind_t rmt;
	static asknode_$reply_t rep;
	asknode_$rqst_data_t rqst;
	status_$t st;
/*	time_$clock_t &nullcpu = rep.data.thirteen.proc1info.cpu_total;  sr10? rdm */
	time_$clock_t nullcpu;  /* rdm */


	rmt = ask_proc1info;
	rqst.pid = nullproc_pid;
	asknode_$info(&rmt,&p->nid,&rqst,&rep,&st);
	if (st.all != status_$ok) return(st.all);
	if (rep.status.all != status_$ok && rep.status.all != 0x190002) return(rep.status.all);
      nullcpu = rep.data.thirteen.proc1info.cpu_total;  /* rdm */
	cal_$float_clock(nullcpu,p->cpu);
	rmt = ask_network_stats;
	asknode_$info(&rmt,&p->nid,&rqst,&rep,&st);
	if (st.all != status_$ok) return(st.all);
	if (rep.status.all != status_$ok) return(rep.status.all);
	p->disk = rep.data.diskr + rep.data.diskw;
	p->net = rep.data.netrcv + rep.data.netxmit;
	rmt = ask_time;
	asknode_$info(&rmt,&p->nid,&rqst,&rep,&st);
	if (st.all != status_$ok) return(st.all);
	if (rep.status.all != status_$ok) return(rep.status.all);
	p->done = rep.data.six.ctime;
	return(0);
}
//E*O*F getloads.c//

exit 0
-- 
Org:    NIKHEF-H, National Institute for Nuclear and High-Energy Physics.
Mail:   Kruislaan 409, P.O. Box 41882, 1009 DB Amsterdam, the Netherlands
Phone:  (20)5925018 or 5925003                      Telex: 10262 (hef nl)
UUCP:   i91@nikhefh.hep.nl               BITNET: nikhefh!i91@mcvax.bitnet