i91@np1.hep.nl (Fons Rademakers) (03/05/89)
Some people were asking for the routines to get load statistics from remote nodes. You are lucky, I still had some laying around that do exactly that. Even better, there is a simple demo program included that uses these routines. To make it work on your system replace in the tgl.c file the node numbers by the ones appropriate for your network. The program works on SR9.7 and SR10. For SR10, however, have a look at the lines containing the string "rdm". Since I do not yet have SR10 running I had to make these changes. Concerning my own program "load_stat", there was a problem to compile and run it on SR10 systems. When compiled on SR9.7 it worked on SR10 but not when compiled on SR10. I finally had the change to look at it on an SR10 system and found pretty quickly the problem. It seems that the SR10 Fortran compiler is not handling the initialization of the variable bitmap_size well. So to make it compile and run on SR10 change the line data bitmap_size /1280, 1280/ by the lines bitmap_size(1) = 1280 bitmap_size(2) = 1280 and everything works fine. Could somebody (at Apollo) maybe look into this problem. Why is bitmap_size(1)=0, bitmap_size(2)=0 when the program starts? The ts of the ftn compiler used is: 3 FTN 1988/06/28 15:12:32 /com/ftn. BTW: with the remote load statistics routines it is trivial to change "load_stat" to a "remote_load_stat". Cheers, Fons Rademakers. ======================== remote load statistics routines ============= # This is a shell archive. Remove anything before this line, then # unpack it by saving it in a file and typing "sh file". (Files # unpacked will be owned by you and have default permissions.) # # This archive contains: # asknode.h proc2.h tgl.c getloads.c echo x - asknode.h cat > "asknode.h" << '//E*O*F asknode.h//' /* ASKNODE.INS.PAS, /us/ins, phl, 01/09/84 Inserts for node queries. */ /*" -------------------------------------------------------------------- |THE FOLLOWING PROGRAMS ARE THE SOLE PROPERTY OF APOLLO COMPUTER INC.| | AND CONTAIN ITS PROPRIETARY AND CONFIDENTIAL INFORMATION. | -------------------------------------------------------------------- "*/ /* CHANGES: 01/09/84 phl added CONFIG variant -- add more stuff as needed. 10/14/83 gms added lword varient to asknode_$rqst_data_t. 08/30/83 spf added ask_upids function 08/29/83 spf upgraded proc1info data to proc2_$info_t and added proc2info to asknode_$reply_t. 08/25/83 spf added us_asknode_ins_pas declaration and conditional insert of network.ins.pas and proc1.ins.pas 08/24/83 spf added proc1info to asknode_$get_info 08/24/83 spf added asknode_$get_info 06/10/83 tvc added ask_ring_info 03/28/83 rod added ask_clear_net to clear statistics 11/15/82 phl added asknode key: sys_uids, to reply into uid_list. 09/22/82 phl added asknode keys: ask_bldt, ask_net_root 07/09/82 phl added proc2list, proc2fault, proc2info 07/09/82 phl changed info to take a univ rec instead of an integer 03/24/82 GMS reply.sm_cnt now array of sm_$cnt_t's. 03/15/82 GMS asknode_$cal, etc. all incorporated into asknode_$info. rqst, reply.version now = 2. 09/15/81 PHL added asknode_$cal. 03/12/81 PHL added asknode_$net_stats. */ /* ---------------------------------------------------------------------- */ /* %IFDEF us_asknode_ins_pas %THEN %EXIT %ELSE %VAR us_asknode_ins_pas %ENDIF %IFDEF NOT us_network_ins_pas %THEN %include '/us/ins/network.ins.pas'; %ENDIF %IFDEF NOT us_proc1_ins_pas %THEN %include '/us/ins/proc2.ins.pas'; %ENDIF */ /* ---------------------------------------------------------------------- */ /* CONST asknode_$version = 2; asknode_$who_max = 1000; asknode_$uidlist_max = 25; asknode_$infomax = sizeof(network_$stats_rec_t); */ #define asknode_$version 2 #define asknode_$who_max 1000 #define asknode_$uidlist_max 25 #define asknode_$infomax 256 /*TYPE asknode_$kind_t = */ /* ADD NEW REQUEST KINDS TO END ONLY! */ /* (ask_who, who_r, */ /* asknode_$who */ /* ask_time, time_r, */ /* asknode_$[get_]info */ /* ask_node_root, node_root_r, */ /* asknode_$[get_]info */ /* ask_network_stats, network_stats_r, */ /* asknode_$[get_]info */ /* ask_cal, cal_r, */ /* asknode_$[get_]info */ /* ask_volx, volx_r, */ /* asknode_$[get_]info */ /* ask_diskless, diskless_r, */ /* asknode_$[get_]info */ /* ask_failure_report, failure_report_r, */ /* asknode_$read_failure_rec */ /* ask_sm, sm_r, */ /* asknode_$[get_]info */ /* ask_proc2list, proc2list_sr, */ /* asknode_$[get_]info */ /* ask_proc2info, proc2info_sr, */ /* asknode_$[get_]info */ /* ask_proc2fault,proc2fault_sr, */ /* asknode_$[get_]info */ /* ask_net_root, net_root_r, */ /* asknode_$[get_]info */ /* ask_bldt, bldt_r, */ /* asknode_$[get_]info */ /* ask_sysuids, sysuids_r, */ /* asknode_$[get_]info */ /* //, /, `node_data */ /* ask_clear_net, */ /* asknode_$[get_]info - clear network stats */ /* ask_ring_info, ask_ring_info_r, */ /* asknode_$[get_]info - produces results even if target can't page */ /* ask_proc1info, proc1info_sr, */ /* asknode_$[get_]info */ /* ask_upids, upids_sr, */ /* asknode_$[get_]info */ /* ask_log, log_r, */ /* ??? */ /* ask_config, config_r); */ /* asknode_$[get_]info - return system configuration info */ typedef short enum { ask_who = 0, who_r, ask_time, time_r, ask_node_root, node_root_r, ask_network_stats, network_stats_r, ask_cal, cal_r, ask_volx, volx_r, ask_diskless, diskless_r, ask_failure_report, failure_report_r, ask_sm, sm_r, ask_proc2list, proc2list_sr, ask_proc2info, proc2info_sr, ask_proc2fault,proc2fault_sr, ask_net_root, net_root_r, ask_bldt, bldt_r, ask_sysuids, sysuids_r, ask_clear_net, ask_ring_info, ask_ring_info_r, ask_proc1info, proc1info_sr, ask_upids, upids_sr, ask_log, log_r, ask_config, config_r } asknode_$kind_t; /* TYPE ask_log_t = record */ /* ring_cmd : integer; /* ringlog command */ /* netl_cmd : integer; /* netlog command */ /* node : node_t; /* to node for netlog */ /* sock : integer; /* to sock for netlog */ /* kind : integer; /* kinds to log */ /* end; */ /* TYPE asknode_$rqst_t = RECORD version : pinteger; kind : asknode_$kind_t; CASE integer OF 1 : (node : node_t; /* for asknode_$who request */ /* diecnt : integer); /* 2 : (volx : volx_t); /* for asknode_$info volx request */ /* 3 : (unit : integer); /* for asknode_$info sm request */ /* 4 : (procuid: uid_t; /* for asknode_$info proc2 request */ /* procdat: linteger); /* 5 : (fail_type : integer32);/* for ask_failure_report */ /* 6 : (pid : pid_t); /* for asknode_$info proc1 request */ /* 7 : (ask_log_stuff : ask_log_t); /* for asknode_$log request */ /* 8 : (lword : linteger); /* just for copying for rmt request */ /* END; */ typedef struct { unsigned short version; asknode_$kind_t kind; union { struct { unsigned long node; short diecnt; } one; struct { char volx[256]; } two; struct { short unit; } three; struct { uid_$t procuid; unsigned long procdat; } four; struct { long fail_type; } five; struct { unsigned short pid; } six; struct { char ask_log_stuff[256]; } seven; struct { unsigned long lword; } height; } data; } asknode_$rqst_t; /* TYPE /* asknode_$rqst_data_t = RECORD CASE integer OF /* 1 : (node : node_t; /* for asknode_$who request */ /* diecnt : integer); /* 2 : (volx : volx_t); /* for asknode_$info volx request */ /* 3 : (unit : integer); /* for asknode_$info sm request */ /* 4 : (procuid: uid_t; /* for asknode_$info proc2 request */ /* procdat: linteger); /* 5 : (fail_type : integer32);/* for ask_failure_report */ /* 6 : (pid : pid_t); /* for asknode_$info proc1 request */ /* 7 : (ask_log_stuff : ask_log_t); /* for asknode_$log request */ /* 8 : (lword : linteger); /* just for copying for rmt request */ /* END; */ typedef union { struct { unsigned long node; short diecnt; } one; struct { char volx[256]; } two; struct { short unit; } three; struct { uid_$t procuid; unsigned long procdat; } four; struct { long fail_type; } five; struct { unsigned short pid; } six; struct { char ask_log_stuff[256]; } seven; struct { unsigned long lword; } height; } asknode_$rqst_data_t; /* TYPE /* asknode_$txt_t = array [1..100] of char; /* /* asknode_$reply_t = RECORD /* version : pinteger; /* kind : asknode_$kind_t; /* status : status_t; /* CASE integer OF /* 1 : (cal_timezone_rec: cal_timezone_rec_t); /* ask_cal */ /* 2 : (partner: node_t; /* ask_diskless */ /* diskless:boolean); /* 3 : (net_stats_rec:network_$stats_rec_t); /* ask_network_stats */ /* 4 : (uid:uid_t); /* ask_node_root */ /* 5 : (sm_cnt : array[0..3] of sm_$cnt_t); /* ask_sm */ /* 6 : (btime:integer32; /* ask_time */ /* ctime:integer32); /* 7 : (ed_uid: uid_t; /* ask_volx */ /* n_free: linteger; /* n_blk : linteger); /* 8 : (node:node_t); /* ask_who */ /* 9 : (uidlist_cnt: integer; /* ask_proc2list */ /* uidlist : array [1..asknode_$uidlist_max] of uid_t); /* 10: (procinfo: array [1..asknode_$infomax] of char);/* ask_proc[1/2]info */ /* 11: (txt_len: integer; /* ask bldt */ /* txt: asknode_$txt_t); /* 12: (ring_diag_info : /* ask_ring_info */ /* network_$psrv_ringinfo_t); /* 13: (proc1info: proc2_$info_t); /* ask_proc1info */ /* 14: (proc2info: proc2_$info_t); /* ask_proc2info */ /* 15: (upid: integer; /* ask_upids */ /* uppid: integer; /* upgid: integer); /* 16: (config_valid_cnt: integer; /* cnt of valid flds in config */ /* /*e.g. 1 says Mach_id is OK, 2 says MACH_ID & AUX_INFO are OK*/ /* config_mach_id: integer; /* Machine ID -- from prom_$machine_id */ /* config_aux_info: integer; /* aux_info -- from prom_$machine_id */ /* config_disp_type: integer; /* Display type */ /* config_peb_present:boolean); /* got a PEB??? */ /* /* END; */ typedef struct { unsigned short version; asknode_$kind_t kind; status_$t status; union { struct { unsigned long partner; unsigned char diskless; } two; struct { unsigned char ns1[2]; unsigned long nid; unsigned char ns2[24]; unsigned long netxmit; unsigned char ns3[18]; unsigned long netrcv; unsigned char ns4[28]; unsigned long diskr; unsigned long diskw; unsigned char ns5[932]; } three; struct { unsigned long btime; unsigned long ctime; } six; struct { short uidlist_cnt; proc2_$uid_list_t uidlist; } nine; struct { proc2_$info_add_t proc1info; } thirteen; struct { proc2_$info_add_t proc2info; } fourteen; } data; } asknode_$reply_t; /* TYPE asknode_$node_list_t = ARRAY [1..asknode_$who_max] OF node_t; asknode_$tzname_t = ARRAY [1..4] of char; */ /* ---------------------------------------------------------------------- */ /* PROCEDURE asknode_$info ( IN kind : asknode_$kind_t; IN node : node_t; IN rqstdat: univ asknode_$rqst_data_t; OUT reply : asknode_$reply_t; OUT status : status_t ); EXTERN; */ /* asknode_$info returns a record containing requested info from the local or a remote node. */ /* ---------------------------------------------------------------------- */ /* PROCEDURE asknode_$get_info ( IN kind : asknode_$kind_t; IN node : node_t; IN rqstdat: univ asknode_$rqst_data_t; IN length : pinteger; OUT reply : asknode_$reply_t; OUT status : status_t ); EXTERN; */ /* asknode_$get_info returns a record containing requested info from the local or a remote node. */ /* ---------------------------------------------------------------------- */ /* PROCEDURE asknode_$read_failure_rec (out fail_rec: network_$failure_rec_t); EXTERN; */ /* --------------------------------------------------------------------- */ /* PROCEDURE asknode_$report_failure; EXTERN; */ /* --------------------------------------------------------------------- */ /* PROCEDURE asknode_$server; EXTERN; */ /* This routine is called to process an incoming network info request. */ /* --------------------------------------------------------------------- */ /* PROCEDURE asknode_$who (OUT node_list : UNIV asknode_$node_list_t; IN maxcnt : pinteger; OUT nodcnt : pinteger); EXTERN; */ /*This routine returns a list of the nodes that are currently responding. Today, it uses a ring broadcast message to solicit replies, then collects the responses. In case some nodes miss the solicitation because of disk or other ring activity, this routine sends the WHO request "loopcnt" times. */ /* ---------------------------------------------------------------------- */ /*%EJECT*/ void asknode_$info(); void asknode_$get_info(); void asknode_$read_failure_rec(); void asknode_$report_failure(); void asknode_$server(); void asknode_$who(); //E*O*F asknode.h// echo x - proc2.h cat > "proc2.h" << '//E*O*F proc2.h//' #define proc2_$max_np proc1_$n_user_processes+1 #define proc2_$dont_reuse_stacks -1 /* bits in proc2_$state field of info structure */ #define proc2_$waiting 1 #define proc2_$suspended 2 #define proc2_$susp_pending 4 #define proc2_$bound 8 typedef unsigned char pid_t; #define pid_tn 64 typedef struct { uid_$t a[4]; unsigned long nodeid; } acl_$sid; typedef short enum { proc1_$nil, proc1_$level_1, proc1_$level_2, proc1_$null_process, proc1_$wired_dxm, proc1_$page_purifier, proc1_$unwired_dxm, proc1_$net_receive_server, proc1_$net_paging_server, proc1_$net_request_server, proc1_$mem_lights_process, proc1_$initial_system_process, proc1_$router_process, proc1_$iic_guardian_process } proc1_$type_t; typedef struct { pid_t pid; /* level 1 process ID */ proc1_$type_t proc1_type; /* level 1 process type */ } proc1_$liste_t; typedef proc1_$liste_t proc1_$list_t[pid_tn]; typedef struct { uid_$t stack_uid; /* uid of user stack */ linteger stack_base; /* base address of user stack */ proc2_$state_t procstate; /* ready, waiting, etc. */ pinteger usr; /* user sr */ linteger upc; /* user pc */ linteger usp; /* user stack pointer */ linteger usb; /* user sb ptr (A6) */ time_$clock_t cpu_total; /* cumulative cpu used by process */ unsigned short priority; /* process priority */ acl_$sid sid; uid_$t pgroup; boolean is_server; boolean pad; pinteger min_pri; pinteger max_pri; linteger priv_faults; linteger glob_faults; linteger disk_page_io; linteger net_page_io; uid_$t orig_pgroup; short orig_upgid; } proc2_$info_add_t; //E*O*F proc2.h// echo x - tgl.c cat > "tgl.c" << '//E*O*F tgl.c//' #include <stdio.h> main() { static unsigned long nodes[3] = {0x13919, 0x4950, 0x91a3}; int i; int j; struct { double fc; long disk; long net; } stats; for (i=0; i < 3; i++) if (j=loadav_init(&nodes[i])) { printf("Error %x on node %x\n",j,nodes[i]); exit(1); } while (1) { sleep(5); for (i=0; i < 3; i++) { if (j=loadav(&nodes[i],&stats)) { printf("Loadav: Error %x on node %x\n",j,nodes[i]); exit(1); } printf("Node %8x: free cpu (%%) %6.2f, diskact %8d, netact %8d\n", nodes[i],stats.fc,stats.disk,stats.net); } } exit(2); } //E*O*F tgl.c// echo x - getloads.c cat > "getloads.c" << '//E*O*F getloads.c//' #include <stdio.h> #include "/sys/ins/base.ins.c" #include "/sys/ins/proc2.ins.c" #include "/sys/ins/error.ins.c" #include "/sys/ins/pfm.ins.c" #include "/sys/ins/cal.ins.c" #include "./proc2.h" #include "./asknode.h" #define MAXNODES 64 typedef struct nodestat_t { double cpu; long disk; long net; unsigned long done; unsigned long nid; struct nodestat_t *next; } nodestat_t; static nodestat_t *ns = NULL; extern nodestat_t *malloc(); int loadav_init(nid) unsigned long *nid; { nodestat_t *p; int i; p = ns; if ((ns = malloc(sizeof(nodestat_t))) == NULL) { ns = p; return(-1); } ns->nid = *nid; if (i = loadtot(ns)) { free(ns); ns = p; return(i); } ns->next = p; return(0); } int loadav(nid,r) unsigned long *nid; nodestat_t *r; { nodestat_t *p; nodestat_t *n; nodestat_t local; int i; double a; for (p = ns; p != NULL; p = p->next) if (p->nid == *nid) break; if (p == NULL) return(-1); n = &local; *n = *p; if (i=loadtot(p)) { *p = *n; return(-2); } n->cpu = p->cpu - n->cpu; n->disk = p->disk - n->disk; n->net = p->net - n->net; n->done = p->done - n->done; a = n->done * 65536 * 4 / 1.0e06; r->cpu = n->cpu/a; r->disk = n->disk; r->net = n->net; return(0); } static int loadtot(p) nodestat_t *p; { #define nullproc_pid 2 asknode_$kind_t rmt; static asknode_$reply_t rep; asknode_$rqst_data_t rqst; status_$t st; /* time_$clock_t &nullcpu = rep.data.thirteen.proc1info.cpu_total; sr10? rdm */ time_$clock_t nullcpu; /* rdm */ rmt = ask_proc1info; rqst.pid = nullproc_pid; asknode_$info(&rmt,&p->nid,&rqst,&rep,&st); if (st.all != status_$ok) return(st.all); if (rep.status.all != status_$ok && rep.status.all != 0x190002) return(rep.status.all); nullcpu = rep.data.thirteen.proc1info.cpu_total; /* rdm */ cal_$float_clock(nullcpu,p->cpu); rmt = ask_network_stats; asknode_$info(&rmt,&p->nid,&rqst,&rep,&st); if (st.all != status_$ok) return(st.all); if (rep.status.all != status_$ok) return(rep.status.all); p->disk = rep.data.diskr + rep.data.diskw; p->net = rep.data.netrcv + rep.data.netxmit; rmt = ask_time; asknode_$info(&rmt,&p->nid,&rqst,&rep,&st); if (st.all != status_$ok) return(st.all); if (rep.status.all != status_$ok) return(rep.status.all); p->done = rep.data.six.ctime; return(0); } //E*O*F getloads.c// exit 0 -- Org: NIKHEF-H, National Institute for Nuclear and High-Energy Physics. Mail: Kruislaan 409, P.O. Box 41882, 1009 DB Amsterdam, the Netherlands Phone: (20)5925018 or 5925003 Telex: 10262 (hef nl) UUCP: i91@nikhefh.hep.nl BITNET: nikhefh!i91@mcvax.bitnet