#!/usr/bin/perl -w # Purpose of this document: # Part of the EU DataGrid project. # Implementation of an information provider that returns information about # PBS/LSF queues or CONDOR a pool. # # The software is distributed "AS IS" WITHOUT ANY WARRANTY; # without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. # # Version: # $Revision: 1.68 $ # Last Change: $Date: 2003/04/23 10:10:03 $ by $Author: meltzer $ =head1 NAME ce-all - CE Information Provider =head1 COPYRIGHT (c) by Florian Schintke Eschintke@zib.deE, Thomas Röblitz Eroeblitz@zib.deE, Jörg Meltzer Emeltzer@zib.deE Konrad-Zuse-Zentrum für Informationstechnik Berlin, 2001, 2002 =head1 AUTHOR Author(s): Thomas Röblitz Eroeblitz@zib.deE Florian Schintke Eschintke@zib.deE Jörg Meltzer Emeltzer@zib.deE =head1 Assumptions & Tools =over 4 =item Assumptions We assume that Globus has been installed. =item Used Tools B host, which B qstat B lsid, bqueues, lshosts, bhosts, bjobs B condor_q, condor_status, condor_version, condor_config_val B globus-hostname =back =head1 Parameters =over 4 =item ARGS B '-cluster ' specifies the name of the server. B '-cluster-batch-system-bin-path ' specifies the path where the commands of the cluster batch system can be found. B '-globus-hostname-script ' specifies the complete path to the a script that prints out the hostname of the gatekeeper (e.g. 'globus-hostname'). B '-globus-config-file ' specifies the path + filename of the Globus configuration files (e.g. globus-jobmanager.conf). B '-auth-users-from-grid-mapfile' reads authorized users from the grid-mapfile rather than from the static configuration file. B '-host-info-bin' specifies the location of the hosts info script. B '-lrms specifies the my Ressource Management System (default: PBS). B '-maxcputime ' defines the maximum cpu time for a job submitted to the ce (Condor only). B '-maxwalltime' defines the maximum wall clock time allowed for jobs submitted to the ce (Condor only). B '-queue ' specifies non rms submission queues or a set of condor central managers (CONDOR_HOST). B '-static ' specifies the name of the file that contains static information. B '-rms-execution-queue specifies the rms execution queue (stats are only shown if set in -queue switch) (PBS & LSF only). B '-rms-submission-queues ...' specifies rms submission queues (PBS & LSF only). B '-cesebind | queueregex se | queueregex se directory' specifies the cesebind configuration B '-ttl ' specifies the value for entryTtl. =back =head1 Notes =head2 Notes-LSF B LSF allows various ways to configure a queue, we will try to find out the CE's values directly using values by bqueues command, usually we have a node by node configuration, so we view a queue as a set of nodes and accumulate missing bqueues values with the nodes values instead (bhost, lshost command). B Unless a queue specifies the MaxRunningJobs we take the value from the nodes MaxRunningJobs. In case the admin employed a user/group based policy and absolutely no information for this attribute can be found we use the value of the total cpus. B TotalCPUs differ from queue to queue, the value an accumulation of each host taken from bqueues -l field 'HOSTS: {hosts}'. Each hosts Cpus are determined first from lshosts command or second bhosts MaxRunningJobs. =head2 Notes-PBS =head2 Notes-CONDOR See README.Condor =head1 Program Sequence (when script is configured for all record types) set defaults & initialize variables parse cmdline obtain static attributes values create ce records create cluster record create subcluster records create filesystem records print ce records print cluster record print subcluster records print remote filessystem records call host info script =cut use POSIX; # set global default values here &setGlobalDefaults; # get Lrms &determineLrms; # set Lrms default values here &setLrmsDefaults; # parse command line parameters &processCommandLineParameters; # collect raw static and dynamic values of the CE &getCeInformation; # if cluster should be printed if ($ClusterArg ne ""){ # postprocess cluster, subCluster & fileSystem records &unifyClusterData; # all lrms add the printable CE to its clusters clusterservicelist # add GlueForeignKey GlueCEUniqueIDs=ceid to clusters in %UnifiedCluster &addCEs; &unifySubClusterData; #add GlueChunkKeys to all subclusters &addSubClusterGlueKeys; &unifyFileSystemData; #add GlueChunkKeys to all filesystems &addFileSystemGlueKeys; } # print CE-Information &printCeInformation; if ($ClusterArg ne ""){ # print cluster subCluster and filesystem records &printClusters; &printSubClusters; &printFileSystems; &printHostInfo; } #print ce se bindings &printCesebindings if ($CesebindArg || @CESEBinds); =head1 Configuration & initialisation functions =for html

see subprocedures:  setGlobalDefaults determineLrmssetLrmsDefaultsprocessCommandLineParameters

=head2 setGlobalDefaults Here we set some global defaults. =cut sub setGlobalDefaults { # these are hardcoded defaults $GlueSchemaVersionMajor = 1; $GlueSchemaVersionMinor = 1; # print only 1 subCluster / cluster & subclusterID=clusterID $Wp1SubClusterMode = 1; # do not create remote filesystem records for these filesystems # @ExcludedFileSystems = qw(tmpfs ramfs shm romfs cdfs iso9660 dvdfs); @IncludedFileSystems = qw(afs coda gfs intermezzo nfs knfs); $MaxCPUTimeUndefined = 999999; # set arbitrarily value if not otherwise specified $MaxCPUTimeServer = "-"; $MaxWallTimeServer = "-"; $DefaultCPUTimeServer = "-"; $DefaultWallTimeServer = "-"; $MaxRunningUndefined = 99999; $MaxQueuableUndefined = 999999; $EstimatedResponseUndefined = 999999; $WorstResponseUndefined = 9999999; $PriorityUndefined = "1"; # number of jobs which indicate the average utilisation of a timehared node. $AvgJobsTimeshared = 100; # threshold variables # average consumption of maxWallClockTime for a job 0< mWCTCF <1 # This value is used to give better estimated response times. $MaxWallClockTimeConsumtionFactor = 0.5; # $IdleJobThreshold=0 = ignore threshold # $IdleJobThreshold = 1 = do not allow immediate job start bypass # default: if we have more than 10% idle jobs, # we do not allow a bypass in the response time calculation even # if we have sufficient freecpus. $IdleJobThreshold = 0.1; # predefined values for command line parameters $ClusterArg = ""; # name of the cluster $GrisPortArg = ""; # gris port $MdsArg = "local"; # name of the virtual organisation $StaticConfArg = ""; # location of static config file $TtlArg = 600; $GlobusConfigFileArg= ""; # location of globus config file $HostInfoBinArg = ""; $AuthUsersFromGridMapfileArg = ""; # location of gridmap file $CbsBinPathArg = ""; $CesebindArg = ""; $Lrms = "pbs";# set your default lrms here @Queues = (); # show only these queues / pools @RemoteFiles = (); # configfiles containing subcluster and filesystem # information # these are the actual variables # set default values for common 'globally' computed attributes %AllCE = (); # key: dn string of ce data: CE Attributes @AllQueues = (); # all queues hosted the cluster @AuthorizedUsersList = ();# authorized users from gridmapfile $CbsBinPath = ""; # the cluster batch system bin path @CESEBinds = (); # the queue, se, mountpt triples/tuples $Cluster = ""; # name of the cluster %Clusters = (); # all found clusters in static configfile %FileSystems = (); # all found file systems in static configfile unresolved dn $GlobusProvisionalCeid = "-"; # ceid without queuename %Jobs = (); # all jobs on the cluster $LrmsVersion = "-"; # version of the my resource management system %Nodes = (); # all nodes on the cluster $Server = ""; # name of the central manager %SubCluster = (); # all found subClusters in static/remote configfile %StaticCE = (); # all CE specified in the static configfile %SE = (); # all SE specified by cese configfile %UnifiedCluster = (); # all clusters with fully resolved keys %UnifiedSubCluster= (); # all subClusters with fully resolved keys %UnifiedFileSystem= (); # all fileSystems with fully resolved keys $ServerParam = ""; # set defaults for rms computed values # rms submission queues will use these value as their response times. $GlobalMaxTotalJobs=0; $GlobalEstimatedResponseTime=0; $GlobalWorstResponseTime=0; # default values for globus variables $GlobusGatekeeperHost = ""; $GlobusGatekeeperPort = ""; } # try to get lrms information from the program name # and check for '-lrms' commandLine option =head2 determineLrms try to get lrms information from the program name and check for '-lrms' commandLine option aborts: unknown lrms is found in commandline scriptname are commandline option are mutually exclusive =cut sub determineLrms { my $tmpLrms=""; if ($0 =~ /pbs$/i) { $tmpLrms = "pbs"; } elsif ($0 =~ /lsf$/i) { $tmpLrms = "lsf" } elsif ($0 =~ /condor$/i) { $tmpLrms = "condor"; } # which LRMS shall I use? my ($foundFlag) = 0; for($i = 0; $i <= $#ARGV; $i++) { next if $ARGV[$i] !~ /^-lrms$/; $i < $#ARGV or die "value for parameter -lrms is missing"; $ARGV[$i+1] =~ /pbs|lsf|condor/i or die "batch system '$ARGV[$i+1]' is not supported"; # \L returns a lower case $Lrms = "\L$ARGV[$i+1]"; $foundFlag = 1; $tmpLrms eq "" or $tmpLrms eq $Lrms or die "LRMSType collision: Do not use switch '-lrms $Lrms'" . "with softlinked $tmpLrms\bskript."; last; } $Lrms = $tmpLrms if $tmpLrms ne ""; $Lrms eq "pbs" || $Lrms eq "lsf" || $Lrms eq "condor" or die "determineLrms: Lrms is unknown. Please set lrms type.\n"; } =head2 setLrmsDefaults B set wp4 rms variables B set defaults for config files and policy variables =cut sub setLrmsDefaults { # set LRMS specific parameter if (($Lrms eq "pbs") or ($Lrms eq "lsf")) { @RmsSubmissionQueues = (); # special treatment for these queues $RmsExecutionQueue = "-"; %Lshosts = () if ($Lrms eq "lsf"); # lshost information } elsif ($Lrms eq "condor") { $CondorMaxTotalJobsFactor = 200; $maxWallTime = "01:00:00"; # unusual in condor, so use standard value $maxCpuTime = "01:00:00"; %CondorHosts = (); } } =head2 processCommandLineParameters parse command line parameters aborts: Cluster batch systems bin path could not be found, wp4 rms managed queues are misconfigured. =cut sub processCommandLineParameters { for($i=0;$i<=$#ARGV;$i++) { # common parameters if ($ARGV[$i] =~ /^-help$/) { &usage(); exit(-1); } if ($ARGV[$i] =~ /^-remotefiles$/) { if ($i < $#ARGV) { for($j = $i + 1; $j <= $#ARGV; $j++) { last if $ARGV[$j] =~ /^-/; push (@RemoteFiles,$ARGV[$j]); $i = $j-1; } } $i < $#ARGV or warn ("value for parameter -remotefiles is missing.\n"); } if ($ARGV[$i] =~ /^-cluster$/) { if ($i < $#ARGV) { if ($ARGV[$i+1] =~ /^[^-]/){ $ClusterArg = $ARGV[++$i]; next; } } $ClusterArg="default"; } if ($ARGV[$i] =~ /^-grisport$/) { if ($i < $#ARGV) { $GrisPortArg = $ARGV[++$i], next if $i < $#ARGV; } } if ($ARGV[$i] =~ /^-mds-vo-name$/) { $MdsArg = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -mds-vo-name is missing.\n"; } if ($ARGV[$i] =~ /^-static$/) { $StaticConfArg = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -static is missing.\n"; } if ($ARGV[$i] =~ /^-ttl$/) { $TtlArg = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -ttl is missing.\n"; } if ($ARGV[$i] =~ /^-globus-config-file$/) { $GlobusConfigFileArg = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -globus-config-file is missing.\n"; } if ($ARGV[$i] =~ /^-host-info-bin$/) { $HostInfoBinArg = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -host-info-bin is missing.\n"; } if ($ARGV[$i] =~ /^-globus-gatekeeperport$/) { $GlobusGatekeeperPort = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -globus-gatekeeperport is missing.\n"; } if ($ARGV[$i] =~ /^-globus-gatekeeperhost$/) { $GlobusGatekeeperHost = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -globus-gatekeeperhost is missing.\n"; } if ($ARGV[$i] =~ /^-auth-users-from-grid-mapfile$/) { $AuthUsersFromGridMapfileArg = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -auth-users-from-grid-mapfile is missing.\n"; } if ($ARGV[$i] =~ /^-cluster-batch-system-bin-path$/) { $CbsBinPathArg = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -cluster-batch-system-bin-path is missing.\n"; } if ($ARGV[$i] =~ /^-cesebind$/){ $CesebindArg = $ARGV[++$i], if $i < $#ARGV; $i < $#ARGV or warn ("value for parameter -cesebind is missing.\n"); my $tmp=[$CesebindArg]; for($j = $i + 1; $j <= $#ARGV; $j++) { last if $ARGV[$j] =~ /^-/; push @$tmp, $ARGV[$j]; $i = $j-1; } next if $j <= $i + 1; # found cmdline args $CesebindArg=""; push (@CESEBinds, $tmp), next if @$tmp <= 3; warn "CESEBind information invalid @$tmp.\n"; } if ($ARGV[$i] =~ /^-queue$/) { $i < $#ARGV or warn ("value for parameter -queue is missing.\n"); for($j = $i + 1; $j <= $#ARGV; $j++) { last if $ARGV[$j] =~ /^-/; push (@Queues,$ARGV[$j]); $i = $j-1; } } if ($ARGV[$i] =~ /^-rms-execution-queue$/) { if ($Lrms eq "pbs" || $Lrms eq "lsf") { $RmsExecutionQueue = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -rms-execution-queue is missing.\n"; } elsif ($Lrms eq "condor") { warn "switch 'rms-execution-queue ' cannot be used with condor.\n"; } } if ($ARGV[$i] =~ /^-rms-submission-queues$/) { if ($Lrms eq "pbs" || $Lrms eq "lsf") { if ($i < $#ARGV) { for($j = $i + 1; $j <= $#ARGV; $j++) { last if $ARGV[$j] =~ /^-/; push (@RmsSubmissionQueues,$ARGV[$j]); $i = $j-1; } } $i < $#ARGV or warn ("value for parameter -rms-submission-queues is missing.\n"); } elsif ($Lrms eq "condor") { warn "parameter 'rms-submission-queues cannot be used with condor.\n"; } } if ($ARGV[$i] =~ /^-maxcputime$/) { if ($Lrms eq "condor") { $maxCpuTime = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -maxcputime is missing.\n"; } else { warn "parameter -maxcputime can only be used with condor.\n"; } } if ($ARGV[$i] =~ /^-maxwalltime$/) { if ($Lrms eq "condor") { $maxWallTime = $ARGV[++$i], next if $i < $#ARGV; warn "value for parameter -maxwalltime is missing.\n"; } else { warn "parameter -maxwalltime can only be used with condor.\n"; } } } &normalizePathname; if ($Lrms eq "pbs" || $Lrms eq "lsf") { # if a submission or an execution queue has been given both should be # die if only one if (($#RmsSubmissionQueues == -1) && ($RmsExecutionQueue ne "-")) { die "processCommandLineParameters: add at least one submission queue\n"; } if (($#RmsSubmissionQueues > -1) && ($RmsExecutionQueue eq "-")) { die "processCommandLineParameters: specify an execution queue\n"; } } } # check if pathname exists and ends with / # if nonexistent use the path given by the which command sub normalizePathname { my $tmpPath = ""; if ($Lrms eq "pbs"){ if (($CbsBinPathArg ne "") && ($CbsBinPathArg !~ /.*\/$/)){ $CbsBinPath = $CbsBinPathArg . "/"; } elsif ($CbsBinPathArg eq ""){ $tmpPath = `which qstat`; # get qstat path $tmpPath =~ /(.*)qstat\n/; $CbsBinPath = $1; } else { $CbsBinPath = $CbsBinPathArg; } } elsif ($Lrms eq "lsf"){ if ($CbsBinPathArg ne "" && $CbsBinPathArg !~ /.*\/$/){ $CbsBinPath = $CbsBinPathArg . "/"; } elsif ($CbsBinPathArg eq ""){ $tmpPath = `which bqueues`; # get bqueues path $tmpPath =~ /(.*)bqueues\n/; $CbsBinPath = $1; } else { $CbsBinPath = $CbsBinPathArg; } } elsif ($Lrms eq "condor"){ if ($CbsBinPathArg ne "" && $CbsBinPathArg !~ /.*\/$/){ $CbsBinPath = $CbsBinPathArg . "/"; } elsif ($CbsBinPathArg eq ""){ $_= `which condor_q`; # get condor_q path $CbsBinPath = $1 if /(.*)condor_q\n/; } else { $CbsBinPath = $CbsBinPathArg; } } if ($CbsBinPath eq "") { die "Unable to find the cluster batch system bin path.\n"; } } =head1 Accumulation of static data The procedures =for html

getStaticDatareadStaticInformationgetCeid

obtain all relevant static data, which is integrated to L and L =cut sub getCeInformation { # all lrms get value for LRMSVersion and determine available CE's # pbs and lsf get value for $Cluster & $Server # condor create CondorHosts hash &getStaticData; # all lrms read static config, worker node, authUser information &readStaticInformation; # copy matching records from static configfile to CE hashs &staticCEDataToQueue; # append commandline cesebindings &cesebindsToQueue; # integrate jobs info to queue record &jobsToQueue if ($Lrms ne "condor"); # push nodenames to array NODES in the AllCE hash in AllCE{$dn}{NODES} &nodesToQueue if ($Lrms ne "condor"); # add ComputingElement data to queues in the AllCE hash &glueCEToQueue; # add cluster foreign key &glueKeysToQueue; # add ComputingElementInfo data to queues in the AllCE hash &glueCEInfoToQueue; # add policy data to queues in the AllCE hash &glueCEPolicyToQueue; # add state data to queues in the AllCE hash &glueCEStateToQueue; # calculate maximum values for rms execution queue &computeExecutionQueueSizes; # add rms data to queues in the AllCE hash &rmsStatetoQueue if ($Lrms ne "condor"); # enable queues for output depending on cmdline options &activateQueues; } =head2 getStaticData B post: the Cluster name is ClusterArg or if none given the server info from qstat -B -f shell command the LrmsVersion is either pbs_version info taken from qstat -B -f shell command or "-" if info could not be found the ServerParam is specified as @ and the name of the Cluster $ServerParam = "@$Cluster" [Max|Default][CPU|Wall]TimeServer is the value in seconds of resources_[max|default].[cput|walltime] from qstat -B -f $Server" command or if the value is missing "-" @AllQueues array contains queuenames from qstat -Q -f $ServerParam aborts: $Cluster could not be determined, no queues could be found on the cluster, no node information found B post: the [Server|Cluster] name is ClusterArg or the full hostname determined by lsid shell command the LrmsVersion value is taken from lsid shell command node and the running jobs information are gathered here in the hashs %Jobs and %Nodes since calling bjobs and bhost/lshost shell calls are expensive in run-time nodes and jobs have references to each other (see Readme.Implementationdetails) which enables us to view the queue as a set of nodes @AllQueues contains all queues found by bqueues aborts: $Cluster = "" Cluster could not be determinated, no queues could be found on the cluster, no node information found B post: the LrmsVersion value is taken from condor_version shell command set Cluster attribute value to the condor_config_val CONDOR_HOST value of the first pool of the pools specified by -queue or the default pool @AllQueues contains the collector names (CENames) of all condor hosts specified with condor_config_val -pool CONDOR_HOST -name CONDOR_HOST COLLECTOR_NAME %CondorHosts hash contains a map collector name (CE name) -> CONDOR_HOST aborts: no pools could be found =cut sub getStaticData { # PBS if ($Lrms eq "pbs") { # full hostname for short hostname clusterargs $ClusterArg = &getFullHostname($ClusterArg) if $ClusterArg ne "default"; &getPbsServerInfo; # aquire node values, build nodes hash &getPbsCpuValues; $Cluster = $Server = &getFullHostname($Server); # aquire job values, build jobs hash &getPBSJobAttributes; } elsif ($Lrms eq "lsf") { &getLsfServerInfo; # aquire node, job, values, build nodes, jobs hash &generateLsfInformationPool; } elsif ($Lrms eq "condor") { # CONDOR # full hostname for short hostname clusterargs $ClusterArg = &getFullHostname($ClusterArg) if $ClusterArg ne "default"; # obtain the 'LRMSVersion' open CONDORVERSION, "$CbsBinPath" . "condor_version |" or warn ("getStaticData could not open condor_version.\n"); ($LrmsVersion) = =~ /^\$CondorVersion: (\d+\.\d+\.\d+)/; close CONDORVERSION; warn "getStaticData: could not obtain LrmsVersion" if $LrmsVersion eq "-"; # add the default pool to @Queues if no condor pools were specified # otherwise set the $ENV(CONDOR_CONFIG) to the first pools condor_config. &setCondorDefaultPool; &getCollectorNames; (@AllQueues) || die "getStaticData: getLsfServerInfo: no pools found on $Cluster.\n"; } # cluster variable must be set $Cluster ne "" or die "getStaticData: no cluster information could be ". "found, set clustername \nmanually with '-cluster ' option\n"; } # gets the full Hostname if possible sub getFullHostname{ my $param=shift; my $host = ""; # Get the full hostName. return $param if $param eq ""; $host=`host $param 2>&1`; $host= ($host !~ /^host: usage error/ && $host =~ /^(\S+)\s/) ? $1 : $param; # ignore last dot chop($host) if $host =~ /\.$/; return $host; } # obtain values for the attributes $Server and $LrmsVersion and all queuenames sub getPbsServerInfo{ my $clusterParam = ($ClusterArg ne "default") ? $ClusterArg : ""; open QSTAT, "$CbsBinPath" . "qstat -B -f $clusterParam 2>&1 |" or die "getPbsServerInfo: could not open qstat.\n"; while() { $Cluster = $Server = $1, next if /^Server:\s+(\S+)/; $LrmsVersion = $1, next if /pbs_version\s+=\s+(\S+)/; $MaxRunningUndefined = $1, next if /^\s+max_running\s+=\s+(\d+)/; $MaxCPUTimeServer = &convertHhMmSs($1), next if /^\s+resources_max.cput\s+=\s+(\S+)/; $MaxWallTimeServer = &convertHhMmSs($1), next if /^\s+resources_max.walltime\s+=\s+(\S+)/; $DefaultCPUTimeServer = &convertHhMmSs($1), next if /^\s+resources_default.cput\s+=\s+(\S+)/; $DefaultWallTimeServer = &convertHhMmSs($1), next if /^\s+resources_default.walltime\s+=\s+(\S+)/; } close QSTAT; $Server=$Cluster=$ClusterArg if $ClusterArg ne "" && $ClusterArg ne "default"; # qstat server param $ServerParam = "\@$Server"; open QSTAT, "$CbsBinPath" . "qstat -Q -f $ServerParam 2>&1 |" or die "getPbsServerInfo: could not open qstat.\n"; @AllQueues = map { /Queue:\s(\S+)/ } ; close QSTAT; #check for post conditions (@AllQueues) or die "getPbsServerInfo: could not find any queues.\n"; die "getPbsServerInfo: could not determine clustername.\n" if $Cluster eq ""; warn "getPbsServerInfo: could not determine LRMSVersion.\n" if $LrmsVersion eq "-"; } # convert tripel hours:minutes:seconds into seconds sub convertHhMmSs { return $1 * 3600 + $2 * 60 + $3 if $_[0] =~ /(\d+):(\d+):(\d+)/; return ($_[0] ne "-") ? $_[0] : 0; } # Collect information about nodes (total cpus, free cpus) managed by # the server '$Server'. sub getPbsCpuValues { my @tmpJobs=(); my $node="-"; my $cpus = 1; my $state=""; my $jobCount=0; my $type = ""; open NODES, "$CbsBinPath" . "pbsnodes -a -s $Cluster |" or die "getPbsCpuValues: could not open pbsnodes.\n"; while() { $node = $1, next if /^(\S+)/i; $state = "down", next if /^\s+state\s+=\s+.*down.*/i; $state = $1, next if /^\s+state\s+=\s+(\S+)/i; $cpus = $1, next if /^\s+np\s+=\s+(\d+)/i; $state = $1, next if /^\s+state\s+=\s+(\S+)/i; $type = $1, next if /^\s+ntype\s+=\s+(\S+)/i; # PARSE Jobs line if (/^\s+jobs\s+=\s+(\S+.*)$/i){ $jobList = $1; for (map { /^\s?\d+\/(\S+)/ } split(/,/,$jobList)){ $jobCount++; $Jobs{$_} = {}; push (@{$Nodes{$node}{JOBS}}, $_); } } if ( /^$/ ){ # end of node record found if (($state !~ /down/i) && ($node ne "-")){ #STORE NODE data $Nodes{$node}{TOTALCPUS} = $cpus; $Nodes{$node}{NUMJOBS} = $jobCount; # type cluster or timeshared factor $Nodes{$node}{TYPE} = $type; $Nodes{$node}{JOBSLOTFACTOR} = ($type eq "cluster") ? 1 : 1/$AvgJobsTimeshared; $Nodes{$node}{MAXJOBS} = ($type eq "cluster") ? $cpus : $AvgJobsTimeshared*$cpus; # all queues share the node, setting flags $Nodes{$node}{QUEUE}{$_} = 1 for @AllQueues; if ($state !~ /offline/ ){ $Nodes{$node}{FREECPUS} = &max(floor ($Nodes{$node}{JOBSLOTFACTOR} * ($Nodes{$node}{MAXJOBS} - $jobCount)), 0); } else { $Nodes{$node}{FREECPUS} = 0 ; } } # set defaults $node = "-"; $cpus = 1; $state = ""; $jobCount = 0; $type = ""; } } close NODES; } # returns the minimum of two values sub max{ my $maxa = shift; my $maxb = shift; return ($maxa > $maxb)? $maxa : $maxb; } sub getPBSJobAttributes{ foreach $job (sort keys %Jobs){ open QSTAT, "$CbsBinPath" . "qstat -f $job$ServerParam 2>&1 |" or warn "getPBSJobAttributes: unable to open qstat.\n"; while (){ $Jobs{$job}{QUEUE} = $1, next if /^\s+queue\s=\s(\S+)/; $Jobs{$job}{JOBSTATE} = $1, next if /^\s+job_state\s+=\s+(\S+)/; $Jobs{$job}{USEDWALLTIME} = &convertHhMmSs($1), next if /^\s+resources_used.walltime\s=\s(\S+)/; $Jobs{$job}{USEDCPUTIME} = &convertHhMmSs($1), next if /^\s+resources_used.cput\s=\s(\S+)/; $Jobs{$job}{WALLTIME} = &convertHhMmSs($1), next if /^\s+Resource_List.walltime\s+=\s+(\S+)/; $Jobs{$job}{CPUTIME} = &convertHhMmSs($1), next if /^\s+Resource_List.cput\s+=\s+(\S+)/; $Jobs{$job}{NODECOUNT} = $1, next if /^\s+Resource_List.nodect\s+=\s+(\S+)/; } close QSTAT; # set to zero if missing $Jobs{$job}{USEDWALLTIME} = 0 unless exists $Jobs{$job}{USEDWALLTIME}; $Jobs{$job}{USEDCPUTIME} = 0 unless exists $Jobs{$job}{USEDCPUTIME}; # if no nodecount was found in qstat record assume job need 1 node $Jobs{$job}{NODECOUNT} = 1 unless exists $Jobs{$job}{NODECOUNT}; # store only running jobs delete $Jobs{$job} unless $Jobs{$job}{JOBSTATE} eq "R"; delete $Jobs{$job} unless exists $Jobs{$job}{QUEUE}; } } # obtain values for the attributes $Cluster and $LrmsVersion sub getLsfServerInfo{ # obtain values for the hostname and the LRMSVersion open LSID, "$CbsBinPath" . "lsid 2>&1 |" or die "getLsfServerInfo: unable to open lsid.\n"; while() { $Cluster = $1, last if /^My cluster name is (\S+)/; $LrmsVersion = $1, next if /^LSF ([^,]+), /; } close LSID; $Cluster = $ClusterArg if $ClusterArg ne "" && $ClusterArg ne "default"; $Cluster ne "" or die ("getLsfServerInfo: Clustername could not be determinated.\n"); $LrmsVersion ne "-" or warn ("getLsfServerInfo: LrmsVersion could not be determinated.\n"); # get all queuenames open BQUEUES, "$CbsBinPath" . "bqueues 2>&1 |" or die "getLsfServerInfo: unable to open bqueues.\n"; # skip QUEUE_NAME line ; # store queues while() { push @AllQueues, $1 if /^(\S+)/; } close BQUEUES; (@AllQueues) || die "getLsfServerInfo: no queues found.\n"; } # Node and the running jobs information are gathered here # in the hashs %Jobs and %Nodes, since calling bjobs and bhost/lshost shell calls # are expensive in run-time. # Nodes and jobs have references to each other, which enables us to view # the queue as a set of nodes. sub generateLsfInformationPool { my $cpus = 0; my $job = ""; my $maxJobs = 0; my $node = ""; my $numJobs = 0; my $queue = ""; my $state = ""; my @tmpNode = (); my $type = ""; # get node information open LSHOSTS, "$CbsBinPath" . "lshosts 2>&1 |" or die "generateLsfInformationPool: unable to open lshosts command.\n"; while() { next if ! /^(\S+)\s+\S+\s+\S+\s+\S+\s+(\S+)\s+.*\((.*)\)/; $node = $1; $cpus = $2; $res = $3; # ignore domain names, use only short hostname ($node) = split /\./, $node; # must have totalCpus $Lshosts{$node}{CPUS} = $cpus if $cpus ne "-"; $Lshosts{$node}{RES} = $res; } close LSHOSTS; if (0==keys %Lshosts){ die "generateLsfInformationPool: Unable to find any hosts using lshosts command.\n"; } open BHOSTS, "$CbsBinPath" . "bhosts 2>&1 |" or die "generateLsfInformationPool: Unable to open bhosts command.\n"; # skip HOST_NAME\s+STATUS\+JL/U header ; while() { next if ! /^(\S+)\s+(\S+)\s+\S+\s+(\S+)\s+(\d+)/; $node = $1; $state = $2; $maxJobs = $3; $numJobs = $4; # ignore domain names, use only short hostname ($node) = split(/\./,$node); # If not defined totalCpus approximate with MaxRunningJobs. $Lshosts{$node}{CPUS} = $maxJobs unless exists $Lshosts{$node}{CPUS}; # skip node if neither cpus nor maxjobs is defined next if $Lshosts{$node}{CPUS} == 0; # ideally each cpu serves 1 job, however if we have more # jobs allowed than cpus on a node we need to downgrade the # jobSlotFactor. if (exists $Lshosts{$node}{CPUS} && ($state =~ /ok/ || $state =~ /closed/)){ $JobslotFactor = ($Lshosts{$node}{CPUS}<$maxJobs) ? $Lshosts{$node}{CPUS} / $maxJobs : 1; $Nodes{$node}{QUEUE} = {}; $Nodes{$node}{STATUS} = $state; $Nodes{$node}{MAXJOBS} = $maxJobs; $Nodes{$node}{NUMJOBS} = $numJobs; $Nodes{$node}{TOTALCPUS} = $Lshosts{$node}{CPUS}; $Nodes{$node}{FREECPUS} = &max(ceil (($maxJobs-$numJobs) * $JobslotFactor),0); $Nodes{$node}{JOBSLOTFACTOR} = $JobslotFactor; } } close BHOSTS; (%Nodes) or die ("generateLsfInformationPool: Unable to find any host using bhost command.\n"); # get job information open BJOBS, "$CbsBinPath" . "bjobs -u all -r 2>&1 |" or die "generateLsfInformationPool: Unable to open bjobs command.\n"; # skip JOBID USER STAT QUEUE line ; while() { next if ! /(\S+)\s+\S+\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)/; # ignore nodes like lost_and_found where admin must run jobs manually $job = $1; $state= $2; $queue= $3; $node = $4; ($state)=split //,$state; if (exists $Nodes{$node}){ $Jobs{$job}{JOBSTATE} = $state; $Jobs{$job}{QUEUE} = $queue; # assign Job to node and node to job $Jobs{$job}{NODE} = $node; $Nodes{$node}{JOBS}{$job} = $Jobs{$job}; } } close BJOBS; open DETAILJOBS, "$CbsBinPath" . "bjobs -l -u all -r 2>&1 |" or die "generateLsfInformationPool: Unable to open bjobs command.\n"; while (){ $jobId = $1, next if /^Job\s\<(\d+)/; next if ! /^\s+The\sCPU\stime\sused\sis\s(\d+)\sseconds/; next if ! exists $Jobs{$jobId}; $Jobs{$jobId}{USEDWALLTIME} = $Jobs{$jobId}{USEDCPUTIME} = $1; } close DETAILJOBS; open BQUEUES, "$CbsBinPath" . "bqueues -l 2>&1 |" or die "generateLsfInformationPool: Unable to open bqueues command.\n"; while() { my ($takeAll,$lsfhosts); my @hosts; $queueName = $1, next if /^QUEUE:\s+(\S+)/; next if !/HOSTS:\s+(.+)$/; $lsfhosts = $1; # this is a global queue running on all nodes in the Lsf system if ($lsfhosts =~ /all hosts used by the LSF Batch system/) { @hosts = (keys %Lshosts); $takeAll = 1; } else { # this queue uses only nodes specified by $lsfhosts $takeAll = 0; @hosts = split(/\s+/, $lsfhosts); } my %hosts_avail_to_queue; my @group_names; foreach my $host (@hosts) { if ($host =~ m|/$|) { chop($host); push(@group_names,$host); } else { my ($node) = split(/\./,$host); $hosts_avail_to_queue{$node} = 1; # indicate node is used } } if (scalar(@group_names)>0) { open BHOSTS, "$CbsBinPath" . "bhosts -w ".join(" ",@group_names)." 2>&1 |" or die "generateLsfInformationPool: Unable to open bhosts command.\n"; # skip HOST_NAME\s+STATUS\+JL/U header ; while() { next if ! /^(\S+)\s+(\S+)\s+\S+\s+(\S+)\s+(\d+)/; $node = $1; # ignore domain names, use only short hostname ($node) = split(/\./,$node); $hosts_avail_to_queue{$node} = 1; } close(BHOSTS); } foreach my $host (keys %Lshosts) { next if ! exists $Lshosts{$host}{RES}; $takeAll || defined $hosts_avail_to_queue{$host} or next; # set flag if node is used by queue $queueName $Nodes{$host}{QUEUE}{$queueName} = 1 if exists $Nodes{$host}; } } close BQUEUES; } # Add the default pool to @Queues if no condor pools were specified # Set the cluster variable to the first Condor pool in @Queues, # if no ClusterArg was specified in commandline. sub setCondorDefaultPool { my $condorConfigFile = ""; # use the hosts default config & localconfigfile to print information # of STANDARD pool if no pools are specified. unless (@Queues){ # these 3 ways lead to condor_config if (exists $ENV{CONDOR_CONFIG}) { $condorConfigFile = $ENV{"CONDOR_CONFIG"}; } elsif (-e "/etc/condor/condor_config") {# env CONDOR_CONFIG does not exist $condorConfigFile="/etc/condor/condor_config"; } else { ($bla, $bla, $bla, $bla, $bla, $bla, $bla, $condorConfigFile) = getpwnam("condor"); $condorConfigFile .= "/condor_config"; } $ENV{"CONDOR_CONFIG"} = $condorConfigFile; $Cluster = &getFullHostname(`"$CbsBinPath"condor_config_val CONDOR_HOST 2>&1`); chomp($Cluster); push (@Queues,$Cluster); } else { $Cluster=&getFullHostname($Queues[0]); } $Cluster = $ClusterArg if $ClusterArg ne "default"; } # Get all CENames from the pools specified by -queue switch # and store them to @AllQueue array using the COLLECTOR_NAME attribute of the # condor hosts local config file. # Print a warning message if a collector name # is undefined or a host could not be found. sub getCollectorNames{ # check hostnames or condor_config location my $condorConfigFile = ""; my $collectorname = ""; my $val = ""; my $filePos = ""; for ($pool=0;$pool<=$#Queues;$pool++){ if ($Queues[$pool] !~ /(.*condor_config)$/){ $val = `host $Queues[$pool] 2>&1`; # skip if host not found if ($val !~ /^(\S+)/ || $val eq "Host not found.\n"){ warn "getCollectorNames:Host not found for $Queues[$pool].\n"; $Queues[$pool] = $Queues[0]; shift @Queues; next; } $Queues[$pool] = $1; chop($Queues[$pool]) if $Queues[$pool] =~ /\.$/; } } # now build CondorHost hash for ($pool=0;$pool<=$#Queues;$pool++){ $tmpHn = `"$CbsBinPath"condor_config_val -pool $Queues[$pool] CONDOR_HOST`; chomp $tmpHn; $collectorname = `"$CbsBinPath"condor_config_val -pool $tmpHn -name $tmpHn COLLECTOR_NAME 2>&1`; if ($collectorname eq "Not defined" || $collectorname =~ /^Can\'t find/){ warn "getCollectorNames:collectorname for pool $Queues[$pool] not found.\n"; $Queues[$pool] = $Queues[0]; shift @Queues; } else { chomp $collectorname; $collectorname =~ s/\s+/_/; push (@AllQueues, $collectorname); #remember pools condorhost $CondorHosts{$collectorname}=$tmpHn; } } } =head2 readStaticInformation Here we perform all configfile reading. =cut # Read Information from Globus configfile (and create a provisional ceid), # GridMapfile, staticConfigfile and remoteFile if they are known. sub readStaticInformation { # If desired, read authorized users from grid mapfile. They are stored # in '@AuthorizedUsersList'. &readAuthUsersFromGridMapfile if $AuthUsersFromGridMapfileArg ne ""; # read additional attributes from static config file &readStaticConfigFile if $StaticConfArg ne ""; # read worker nodes information for subcluster info generation &readRemoteFile($_) for @RemoteFiles; &readCesebindFile if $CesebindArg ne ""; } =over 4 =for html =item getCeid We return a provisional CEID which is the same for each queue in the following course of the program. The string is gatekeeper_host:gatekeeper_port/jobmanager-lrms. If we have to determine the gatekeeper host, gatekeeper port and have a globus config file specified in commandline we read out the globus variables otherwise we try to read the values from static config files first CE using the current lrms. aborts: GlobusGatekeeperHost or GlobusGatekeeperPort is undefined. =cut sub getCeid { # once we have it return the globus provisional ceid return $GlobusProvisionalCeid if $GlobusProvisionalCeid ne "-"; # get the ceid parameters GETPARAMS: { # nothing to do if we know ceids parameters last GETPARAMS if $GlobusGatekeeperHost ne "" && $GlobusGatekeeperPort ne ""; # read from globus config file if ($GlobusConfigFileArg ne "-"){ # obtain/calculate values for 'CEID' open DN,"<$GlobusConfigFileArg" || die "getCeid: could not open globus config file.\n"; while () { # change single quotes AND NO QUOTES in double-quotes for versions compatibility s/^\s*(\S*)\s*['"]?([^\s\n'"]*)['"]?/$1 \"$2\"/; chomp; $GlobusGatekeeperHost = $1 if (/^\s*-globus-gatekeeper-host\s+"(.+)"/i && $GlobusGatekeeperHost eq ""); $GlobusGatekeeperPort = $1 if (/^\s*-globus-gatekeeper-port\s+"(.+)"/i && $GlobusGatekeeperPort eq ""); } # while DN close DN; last GETPARAMS; } # take the ceid from first ce entry in static config file foreach (grep {exists $StaticCE{$_}{GlueCEHostingCluster}} keys %StaticCE){ # short hostname should be supported $StaticCE{$_}{GlueCEHostingCluster} =~ /^$Cluster\./ || $StaticCE{$_}{GlueCEHostingCluster} =~ /^$Cluster$/ or next; if (/GlueCEUniqueID=(\S*):(\d+)\/jobmanager-$Lrms/){ # get the globus variables from the first ce record that has # a hosting cluster reference to $Cluster and uses the same lrms and # has got no a regular expression in the ceid. $GlobusGatekeeperHost = $1; $GlobusGatekeeperPort = $2; last GETPARAMS; } } # all param searching failed } # check params die "getCeid: The globus gatekeeperport is unknown, set " . "-globus-gatekeeper-port \nin the globus" . " configfile or set -globus-gatekeeperport option.\n" if $GlobusGatekeeperPort eq ""; die "getCeid: The globus gatekeeperhost is unknown, set " . "-globus-gatekeeper-host \nin the globus" . " configfile or set -globus-gatekeeperhost option.\n" if $GlobusGatekeeperHost eq ""; #construct ceid $GlobusProvisionalCeid = sprintf "%s:%s/jobmanager-%s", $GlobusGatekeeperHost, $GlobusGatekeeperPort, $Lrms; return $GlobusProvisionalCeid; } =item readAuthUsersFromGridMapfile Read a list of user records, which will be added to the GlueCEAccessControlBaseRules of all found CE records. =cut sub readAuthUsersFromGridMapfile { open GRIDMAP, "<$AuthUsersFromGridMapfileArg" or warn "readAuthUsersFromGridMapfile: could not open grid map file.\n" and return; while() { push @AuthorizedUsersList, $1 if /\"([^"]+)\"/; } close GRIDMAP; } =for html =item readStaticConfigFile We can redefine or add new CE/subcluster/cluster/filesystem information by adding a static config files information to dynamically created CE records. Read out static config file by fetching either Computing Elements, cluster, subCluster or remoteFileSystems. At first, check the line for an object identifier. Second, scan the dn string for sub keys and store their value Third, check found keys values and figure out what kind of record the dn points to. Fourth, now that we are context aware, read the attribute/value pairs and store the attributename in the key of the hash of the identified hash and the location specified by the full dn string and their attribute value to the value of the same hash. aborts: Static config file could not be opened. =cut sub readStaticConfigFile { my $val = "-"; # some value buffers my $val2 = "-"; my $fileSys = "-"; my $cl = "-"; my $scl = "-"; my $ce = "-"; my $mds = "-"; open STATIC, "<$StaticConfArg" or die "readStaticConfigFile: could not read static configuration file $StaticConfArg: $!\n"; while() { /^dn: .+/ || next; chomp; $lastDn = $_; #try to read a attribute or initialize with "-" $fileSys = /GlueHostRemoteFileSystemName=([^,\s]+)/ ? $1 : "-"; $cl = /GlueClusterUniqueID=([^,\s]+)/ ? $1 : "-"; $scl = /GlueSubClusterUniqueID=([^,\s]+)/ ? $1 : "-"; $ce = /GlueCEUniqueID=([^,\s]+)/ ? $1 : "-"; $mds = /mds-vo-name=([^,\s]+)/ ? $1 : "-"; if ($ce ne "-"){ $StaticCE{$lastDn}={}; while(){ # skip comment next if /^#/; # store baserules in array push (@{$StaticCE{$lastDn}{GlueCEAccessControlBaseRule}},$1), next if /^GlueCEAccessControlBaseRule:\s+(.+)$/i; # store ce attribute $StaticCE{$lastDn}{$1} = $2 if /^(Glue\S+):\s+(.+)$/i; # blank line indicates end of ce record last if /^$/; } next; } if ($fileSys ne "-"){ # found a file system record if (!exists($FileSystems{$lastDn})){ $FileSystems{$lastDn}={}; while(){ # skip comment next if /^#/; if (/^GlueInformationService/){ $FileSystems{$lastDn}{OBJECTCLASS_GlueInformationService}=1; } # read fileSysArg if (/^(Glue\S+):\s+(.+)$/i){ # store filesystem attribute $FileSystems{$lastDn}{$1} = $2; next; } # end of filesystem record last if /^$/; } } else { warn "readStaticConfigFile: Double fileSystem "; warn "definition of filesystem $fileSys.\n"; } next; } if ($scl ne "-") { # found a subcluster record # do we already have a subcluster named scl? if (!exists $SubCluster{$lastDn}){ $SubCluster{$lastDn}={}; $SubCluster{$lastDn}{GlueSubClusterName} = $scl; $SubCluster{$lastDn}{GlueSubClusterUniqueID} = $scl; while(){ # skip comment next if /^#/; if (/^GlueHostApplicationSoftwareRunTimeEnvironment:\s+(.+)$/i){ # store environments in array push (@{$SubCluster{$lastDn} {GlueHostApplicationSoftwareRunTimeEnvironment}},$1); # set object class flag "OBJECTCLASS_ $SubCluster{$lastDn}{OBJECTCLASS_GlueHostApplicationSoftware} = 1; next; } if (/^GlueInformationService/){ $SubCluster{$lastDn}{OBJECTCLASS_GlueInformationService}=1; } # store subcluster attribute $SubCluster{$lastDn}{$1} = $2 if /^(Glue\S+):\s+(.+)$/i; # set object class flags $SubCluster{$lastDn}{OBJECTCLASS_GlueHostArchitecture} = 1, next if /^GlueHostArchitecture/i; $SubCluster{$lastDn}{OBJECTCLASS_GlueHostProcessor} = 1, next if /^GlueHostProcessor/i; $SubCluster{$lastDn}{OBJECTCLASS_GlueHostOperatingSystem} = 1, next if /^GlueHostOperatingSystem/i; $SubCluster{$lastDn}{OBJECTCLASS_GlueHostMainMemory} = 1, next if /^GlueHostMainMemory/i; $SubCluster{$lastDn}{OBJECTCLASS_GlueHostNetworkAdapter} = 1, next if /^GlueHostNetworkAdapter/i; $SubCluster{$lastDn}{OBJECTCLASS_GlueHostBenchmark} = 1, next if /^GlueHostBenchmark/i; # blank line indicates end of subcluster record last if /^$/; } } else { warn "readStaticConfigFile: Double subcluster definition of subcluster $scl.\n"; } next; } else { # found a cluster record if (!exists $Clusters{$lastDn}){ $Clusters{$lastDn}={}; $Clusters{$lastDn}{GlueClusterName} = $cl; $Clusters{$lastDn}{GlueClusterUniqueID} = $cl; while(){ # skip comments next if /^#/; # store cluster attribute $Clusters{$lastDn}{OBJECTCLASS_GlueInformationService}=1 if /^GlueInformationService/; $Clusters{$lastDn}{$1} = $2, next if /^(Glue\S+):\s+(.+)$/i; # blank line indicates end of cluster record last if /^$/; } } else { warn "readStaticConfigFile: Double cluster definition of cluster $cl.\n"; } } } close STATIC; } =item readRemoteFile(file) This procedure enables us to include subcluster and filesystem information from a periodically updated remote host config file (by admin). DN-STRING construction for subcluster and filesystem record: Read out remote host config file specified by first parameter. There are two modes, if we allow only one subcluster (WP1 people can operate with only one) then we create a subcluster record and set the subcluster unique id rdn to the clusters unique id. Otherwise, if we allow more than one subcluster then we process an additional hostname parameter from the file, which represents the subcluster unique id. We create the filesystem dn by adding the filesystem name rdn to the subclusters dn. ASSIGNING Attributes: The attributes GlueSubClusterUniqueID unique id and GlueSubClusterName are set after we found out the subclusters subcluster unique id rdn. Then we fetch cpu mem and filesystem info (see README) and set attributes. =back =cut # if (@RemoteFiles) we read each file and append its information to # %SubCluster and/or %FileSystems sub readRemoteFile { my $file = shift; my @cpuInfo = (); my @memInfo = (); my $lastDn = ""; my $nextDn = ""; open REMOTEFILE,"<$file" or warn "readRemoteFile: could not read $file\n" and return; while (){ if (/^hostname:\s+(\S+)/){ # change back to $1 if worker nodes hostname should be taken if ($Wp1SubClusterMode){ $nextDn = &getDnSclString($Cluster, $Cluster, $MdsArg); $SubCluster{$nextDn}{GlueSubClusterName} = $Cluster; $SubCluster{$nextDn}{GlueSubClusterUniqueID} = $Cluster; } else { $nextDn = &getDnSclString($1, $Cluster, $MdsArg); $SubCluster{$nextDn}{GlueSubClusterName} = $1; $SubCluster{$nextDn}{GlueSubClusterUniqueID} = $1; } &storeCPUandMemInfo($lastDn, @cpuInfo, @memInfo) if (@cpuInfo && @memInfo); @cpuInfo=(); @memInfo=(); $lastDn = $nextDn; } while(($_=) !~ /processor/){ $SubCluster{$lastDn}{OBJECTCLASS_GlueHostOperatingSystem} = 1; $SubCluster{$lastDn}{GlueHostOperatingSystemName} = $1, next if /^OS_name:\s+(.*)/; $SubCluster{$lastDn}{GlueHostOperatingSystemRelease} = $1, next if /^OS_release:\s+(.*)/; $SubCluster{$lastDn}{GlueHostOperatingSystemVersion} = $1, next if /^OS_version:\s+#(.*)/; } push @cpuInfo, $_; push @cpuInfo, $_ while ($_ = ) !~ /^\s+total:/; while($_ = ){ push @memInfo, $_; last if /^dn:/; } } close REMOTEFILE; &storeCPUandMemInfo($lastDn, \@cpuInfo, \@memInfo) if ($lastDn ne ""); } sub storeCPUandMemInfo{ my ($lastDn,$cpuInfo, $memInfo)=@_; my $fserver=""; my @processors; my %mountFilesys=(); my %tmp=(); # set values from var $cpuInfo $SubCluster{$lastDn}{GlueHostArchitectureSMPSize} = scalar(grep {/processor\s+:/} @$cpuInfo); $SubCluster{$lastDn}{OBJECTCLASS_GlueHostArchitectureSMPSize} = 1; foreach (@$cpuInfo){ if (/^vendor_id\s*:\s*(.*)\n/) # vendor=string after colon { $SubCluster{$lastDn}{OBJECTCLASS_GlueHostProcessor} = 1; $SubCluster{$lastDn}{GlueHostProcessorVendor} = $1; } if (/^model name\s*:\s*(.*)\n/) # model name=string after colon { $SubCluster{$lastDn}{OBJECTCLASS_GlueHostProcessor} = 1; $SubCluster{$lastDn}{GlueHostProcessorModel} = $1; } if (/^cpu MHz\s*:\s*(\S+)\n/) # cspeed=floor (cpuinfo clkvalue) { $SubCluster{$lastDn}{OBJECTCLASS_GlueHostProcessor} = 1; $SubCluster{$lastDn}{GlueHostProcessorClockSpeed}=floor($1); } } while (@$memInfo){ $_ = shift @$memInfo; $SubCluster{$lastDn}{OBJECTCLASS_GlueHostMainMemory} = 1; $SubCluster{$lastDn}{GlueHostMainMemoryRAMSize} = floor ($1/1024), next if /^MemTotal:\s+(\d+) kB\n/; # memtotal=memtotal/1024 # skip this attrib, difficult to find homogenuous groups with "avail" attribs $SubCluster{$lastDn}{GlueHostMainMemoryRAMAvailable} = floor ($1/1024), next if 0 && /^MemFree:\s*(\d+) kB\n/; # memtotal=memtotal/1024 if (/^SwapTotal:\s*(\d+) kB\n/ && defined $SubCluster{$lastDn}{GlueHostMainMemoryRAMSize}){ $SubCluster{$lastDn}{GlueHostMainMemoryVirtualSize} = floor ($SubCluster{$lastDn}{GlueHostMainMemoryRAMSize}+$1/1024); # last entry next is /proc/mounts info last; } # skip this attrib, difficult to find homogenuous groups with "avail" attribs $SubCluster{$lastDn}{GlueHostMainMemoryVirtualAvailable} = floor ($SubCluster{$lastDn}{GlueHostMainMemoryRAMAvailable}+$1/1024), last if 0 && /^SwapFree:\s*(\d+) kB\n/ && defined $SubCluster{$lastDn}{GlueHostMainMemoryRAMAvailable}; } MEMINFO: while (@$memInfo){ $_ = shift @$memInfo; if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\S+).*/){ last if $1 eq "Filesystem" ; # df record begins # for (@ExcludedFileSystems) # { next MEMINFO if "\L$3" eq $_; } # must find filesystem in IncludedFilesystems next if ! grep $_ eq "\L$3", @IncludedFileSystems; $mountFilesys{"dn: GlueHostRemoteFileSystemName=$2, $lastDn"} {GlueHostRemoteFileSystemServer} = $1; $mountFilesys{"dn: GlueHostRemoteFileSystemName=$2, $lastDn"} {GlueHostRemoteFileSystemName} = $2; $mountFilesys{"dn: GlueHostRemoteFileSystemName=$2, $lastDn"} {GlueHostRemoteFileSystemType} = $3; $mountFilesys{"dn: GlueHostRemoteFileSystemName=$2, $lastDn"} {GlueHostRemoteFileSystemReadOnly} = (($4 eq "rw") ? "TRUE":"FALSE"); $tmp{$1} = "dn: GlueHostRemoteFileSystemName=$2, $lastDn"; } } while (@$memInfo){ $_ = shift @$memInfo; # copy entries that do not match /dev but are found in df and in /proc/mounts if ( /^(\S+)\s+(\d+)\.*/ ){ # one df line $fserver = $1; if (exists $tmp{$fserver} && $fserver !~ /^\/dev/){ while (($key,$data) = each %{$mountFilesys{$tmp{$fserver}}}) { $FileSystems{$tmp{$fserver}}{$key} = $data; } $FileSystems{$tmp{$fserver}}{GlueHostRemoteFileSystemSize} = $2*1024 if ($FileSystems{$tmp{$fserver}}{GlueHostRemoteFileSystemType} eq "nfs"); } } # handle multilined filesystem info elsif ( /^(\S+)$/ && @$memInfo){ $fserver = $1; $_ = shift @$memInfo; if ( /\s+(\d+)\.*/ && exists $tmp{$fserver} && $fserver !~ /^\/dev/){ while (($key,$data) = each %{$mountFilesys{$tmp{$fserver}}}) { $FileSystems{$tmp{$fserver}}{$key} = $data; } $FileSystems{$tmp{$fserver}}{GlueHostRemoteFileSystemSize} = $1*1024 if ($FileSystems{$tmp{$fserver}}{GlueHostRemoteFileSystemType} eq "nfs"); } } else { last; } } } sub readCesebindFile{ my $dn = ""; my $ce = ""; my $ceid = ""; open CESEBINDFILE,"<$CesebindArg" or warn "readCesebindFile: could not read $CesebindArg\n" and return; while (){ next if /^#/; # ce se binding with mountpoint if ( /^(\S+)\s+(\S+)\s+(\S+)/ ){ my ($queue,$host,$dir) = ($1,$2,$3); my @queues = (); push @queues, $_ for grep {/$queue/} @AllQueues; foreach $queue (@queues){ &storeCESEBind($queue, $host, $dir); } next; } # ce se binding without mountpoint if ( /^(\S+)\s+(\S+)/ ){ my ($queue, $host) = ($1, $2); my @queues = (); push @queues, $_ for grep {/$queue/} @AllQueues; foreach $queue (@queues){ &storeCESEBind($queue, $host); } next; } } close CESEBINDFILE; } sub storeCESEBind { my ($queue, $host, $dir) = @_; $ce = &getDnCEString(&getCeid,$queue,$MdsArg); $ceid = sprintf "%s-%s", &getCeid, $queue; $dn = "dn: GlueCESEBindSEUniqueID=$host, GlueCESEBindGroupCEUniqueID=$ceid, mds-vo-name=$MdsArg, o=grid"; if (defined $dir){ push @{$AllCE{$ce}{CESEBIND}}, { "GlueCESEBindGroupSEUniqueID", $host, "GlueCESEBindCEAccesspoint", $dir}; $SE{$dn}{GlueCESEBindCEAccesspoint} = $dir; } else { push @{$AllCE{$ce}{CESEBIND}}, { "GlueCESEBindGroupSEUniqueID", $host}; } $SE{$dn}{GlueCESEBindCEUniqueID} = $ceid; $SE{$dn}{GlueCESEBindSEUniqueID} = $host; } =head1 Creation of CE Records We add dynamical information from either cluster batch system commands, or from static information we gathered before. =for html

see subprocedures:  staticCEDataToQueueglueCEToQueueglueCEInfoToQueueglueCEPolicyToQueueglueCEStateToQueue
=head2 staticCEDataToQueue Resolve the static config file CE records dn string regular expression and integrate the content to matching actual CE records. =cut sub staticCEDataToQueue{ my $dn=""; foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); &storeStaticData($dn,$allQueue); } } # ce is ceid without queue param sub getDnCEString{ return sprintf("dn: GlueCEUniqueID=%s-%s, mds-vo-name=%s, o=grid",@_); } # We append data from other queues that match the $CurrentDn with their CEID # and and their Mds-vo-name. # It is made possible for the administrator to configure multiple CE in # using regular expressions in the static configfile. sub storeStaticData { my ($dn,$queue) = @_; # Copy data if either ce is specified directly or if $queue matches # a static queues regular expression. foreach $staticDn (grep { &validCE($_,$queue) } keys %StaticCE){ ©CEValues($staticDn,$dn); } } # If the parameter contains GlueCEUniqueID substring the method # will returns true if it matches the current CEID. sub validCE{ my ($dnString, $queue) = @_; # current CEID my $ceid=sprintf "%s-%s", &getCeid, $queue; my $dnCE = ($dnString =~ /GlueCEUniqueID=([^,\s]+)/) ? $1 : "-"; return $ceid =~/$dnCE/ && &validMds($dnString); } # If the parameter contains mds-vo-name substring the method # will returns true if it matches the mdsArg. # It will also return true if no mds-vo-name is found. sub validMds{ my $dnString = shift; my $dnMds = ($dnString =~ /mds-vo-name=([^,\s]+)/) ? $1 : "-"; return $MdsArg =~ /$dnMds/ || $dnMds eq "-"; } # Append information from static configfile to current queue. # Also set short variables so that computed attributes base on # the static variables. sub copyCEValues{ my ($staticDn, $dn) = @_; while (my ($key,$data) = each %{$StaticCE{$staticDn}}){ if ($key eq "GlueCEInfoLRMSVersion") { $LrmsVersion = $data; } elsif ($key eq "GlueCEPolicyMaxWallClockTime") {$maxWallTimeSeconds = $data;} elsif ($key eq "GlueCEPolicyMaxCPUTime") { $maxCpuTimeSeconds = $data; } if ($key eq "CESEBIND"){ push @{$AllCE{$dn}{CESEBIND}}, $_ for @{$StaticCE{$staticDn}{CESEBIND}}; } else { $AllCE{$dn}{$key} = $data; } # set Object Class Flags $AllCE{$dn}{OBJECTCLASS_GlueCEInfo} = 1, next if $key =~ /^GlueCEInfo/; $AllCE{$dn}{OBJECTCLASS_GlueCEState} = 1, next if $key =~ /^GlueCEState/; $AllCE{$dn}{OBJECTCLASS_GlueCEPolicy} = 1, next if $key =~ /^GlueCEPolicy/; $AllCE{$dn}{OBJECTCLASS_GlueCEAccessControlBase} = 1, next if $key =~ /^GlueCEAccessControlBase/; $AllCE{$dn}{OBJECTCLASS_GlueInformationService} = 1, next if $key =~ /^GlueInformationServiceURL/; } } =head2 cesebindsToQueue Append ce se bindings specified by commandline tuples/triples which are stored in Array CESEBinds. =cut sub cesebindsToQueue{ for (@CESEBinds){ my ($queueRegEx, $se, $dir)=@$_; my @queues = (); push @queues, $_ for grep {/$queueRegEx/} @AllQueues; foreach $queue (@queues){ &storeCESEBind($queue,$se, $dir); } } } =head2 jobsToQueue For each CE add all job records to a CE when the jobs queue and the CEName are equal. =cut sub jobsToQueue{ my $dn=""; foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); foreach $job (grep {$Jobs{$_}{QUEUE} eq $allQueue} keys %Jobs) { push @{$AllCE{$dn}{JOBS}}, $job; } } } =head2 nodesToQueue For each CE add all node records to a CE that have a QUEUE flag for the CEName. =cut sub nodesToQueue{ my $dn = ""; return if ($Lrms eq "condor"); foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); foreach $node (grep {exists $Nodes{$_}{QUEUE}{$allQueue}} keys %Nodes){ push @{$AllCE{$dn}{NODES}}, $node; } } } =head2 glueCEToQueue For each CE add the GlueCE attributes to the CE. =cut sub glueCEToQueue{ my $dn=""; QUEUE: foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); $AllCE{$dn}{GlueCEUniqueID} = sprintf "%s-%s", &getCeid,$allQueue unless exists $AllCE{$dn}{GlueCEUniqueID}; $AllCE{$dn}{GlueCEName} = $allQueue unless exists $AllCE{$dn}{GlueCEName}; $AllCE{$dn}{GlueCEHostingCluster} = $Cluster unless exists $AllCE{$dn}{GlueCEHostingCluster}; } } =head2 glueKeysToQueue For all CEs add the hostingclusters cluster id to the queues GlueForeignKeys. =cut sub glueKeysToQueue{ foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); unless (grep /^GlueClusterUniqueID=/, @{$AllCE{$dn}{GlueForeignKey}}){ push @{$AllCE{$dn}{GlueForeignKey}}, "GlueClusterUniqueID=$Cluster"; $AllCE{$dn}{OBJECTCLASS_GlueKey}=1; } } } =head2 glueCEInfoToQueue For each CE add the GlueCEInfo attributes to the CE. B Count CPUs of each of the queues nodes. =cut sub glueCEInfoToQueue{ my $dn = ""; my $cpus = 0; foreach $allQueue (@AllQueues){ $cpus = 0; $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); $AllCE{$dn}{OBJECTCLASS_GlueCEInfo} = 1; if (&getCeid =~ /(\S+):(\d+)/){ $AllCE{$dn}{OBJECTCLASS_GlueCEInfo} = 1; $AllCE{$dn}{GlueCEInfoHostName} = $1 unless exists $AllCE{$dn}{GlueCEInfoHostName}; $AllCE{$dn}{GlueCEInfoGatekeeperPort} = $2 unless exists $AllCE{$dn}{GlueCEInfoGatekeeperPort}; } $AllCE{$dn}{GlueCEInfoLRMSType} = $Lrms unless exists $AllCE{$dn}{GlueCEInfoLRMSType}; $AllCE{$dn}{GlueCEInfoLRMSVersion} = $LrmsVersion unless exists $AllCE{$dn}{GlueCEInfoLRMSVersion}; if ($Lrms eq "pbs"){ $cpus=0; $cpus += $Nodes{$_}{TOTALCPUS} for grep {defined $Nodes{$_}{TOTALCPUS}} keys %Nodes; } elsif ($Lrms eq "lsf"){ $cpus = 0; # add cpus if queue uses $node foreach $node (grep { exists $Nodes{$_}{QUEUE}{$allQueue} } keys %Nodes){ $cpus += $Nodes{$node}{TOTALCPUS}; } } elsif ($Lrms eq "condor") { $Server=$CondorHosts{$allQueue}; $cpus+=$_ for `"$CbsBinPath"condor_status -long -pool $Server 2>&1` =~ /Cpus\s+=\s+(\d+)/gi; } $AllCE{$dn}{GlueCEInfoTotalCPUs} = $cpus unless exists $AllCE{$dn}{GlueCEInfoTotalCPUs}; } } =head2 glueCEPolicyToQueue For each CE add the GlueCEPolicy attributes to the CE. B Process 'qstat -Q -f queue@server' for policy information. Get GlueCEPolicyPriority from line Priority = aPriority Get GlueCEPolicyMaxTotalJobs from line max_queuable = aMaxQueuable Get GlueCEPolicyMaxRunningJobs from line max_running = aMaxRunning Get GlueCEMaxCPUTime and GlueCEMaxWallClockTime from line resources_max.cput = aCPUTime resources_max.walltime = aWallClockTime (for full description and fallbacks see README) B Process 'bqueues ClusterParam -l' for policy information. Get GlueCEMaxCPUTime and GlueCEMaxWallClockTime from line following CPULIMIT RUNLIMIT and GlueCEPolicyPriority and GlueCEPolicyMaxRunningJobs from line following PRIO NICE STATUS MAX JL/U JL/P JL/H NJOBS PEND RUN SSUSP USUSP RSV We also set GlueCEPolicyMaxTotalJobs to GlueCEPolicyMaxRunningJobs. (for full description and fallbacks see README) B Process condor_status and condor_config_val for policy information. Get GlueCEPolicyMax[CPU|Wall]Time from commandline option or default value. Get GlueCEPolicyMaxRunningJobs by counting machines in condor_status command. Get GlueCEPolicyMaxTotalJobs as product of CondorMaxTotalJobsFactor and GlueCEPolicyMaxRunningJobs. (if defined CondorMaxTotalJobsFactor is dynamic `condor_config_val MAX_JOBS_RUNNING`). Get GlueCEPolicyPriority from $PriorityUndefined variable. =cut sub glueCEPolicyToQueue{ my $dn = ""; my $queueName = ""; my $maxRunning = "-"; my $maxQueuable = "-"; if ($Lrms eq "pbs"){ foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); $foundOneOrMoreQueues=0; open QUEUES, "${CbsBinPath}qstat -Q -f $allQueue$ServerParam 2>&1 |" or die ("glueCEPolicyToQueue: could not open qstat.\n"); # parse queue while() { # if error due to unknown queue, leave the loop warn ($_), last if /unknown queue/i; # We found a queue! if (/^Queue:\s+\S+/) { #INIT # initialization of dynamic attributes $maxQueuable = "-"; $maxRunning = "-"; $priority = "-"; $maxCpuTime = "-"; $maxWallTime = "-"; # save queue information $foundOneOrMoreQueues = 1; } #FETCH PARAM # Parse different values if we are interested in this queue. $priority = $1, next if /^\s+Priority\s+=\s+(\S+)/i; $maxQueuable = $1, next if /^\s+max_queuable\s+=\s+(\S+)/i; $maxRunning = $1, next if /^\s+max_running\s+=\s+(\S+)/i; if (/^\s+resources_max\.cput\s+=\s+(\S+)/i) { $maxCpuTime = $1 if $maxCpuTime eq "-"; next; } $maxCpuTime = $1, next if /^\s+resources_default\.cput\s+=\s+(\S+)/i; if (/^\s+resources_max\.walltime\s+=\s+(\S+)/i){ $maxWallTime = $1 if $maxWallTime eq "-"; next; } $maxWallTime = $1, next if /^\s+resources_default\.walltime\s+=\s+(\S+)/i; } close QUEUES; #STORE QUEUE DATA if ($foundOneOrMoreQueues) { &storeMaxCpuAndWallTime($dn,$maxCpuTime,$maxWallTime); $AllCE{$dn}{OBJECTCLASS_GlueCEPolicy} = 1; $AllCE{$dn}{GlueCEPolicyPriority} = ($priority ne "-") ? $priority : $PriorityUndefined unless exists $AllCE{$dn}{GlueCEPolicyPriority}; $AllCE{$dn}{GlueCEPolicyMaxRunningJobs} = ($maxRunning ne "-") ? $maxRunning : $MaxRunningUndefined unless exists $AllCE{$dn}{GlueCEPolicyMaxRunningJobs}; $AllCE{$dn}{GlueCEPolicyMaxTotalJobs} = ($maxQueuable ne "-") ? $maxQueuable : $MaxQueuableUndefined unless exists $AllCE{$dn}{GlueCEPolicyMaxTotalJobs}; } } } if ($Lrms eq "lsf"){ $dn=""; my %limits; # stores xxxLIMIT info # no difference in lsf $maxRunning = \$maxQueuable; open BQUEUES, "$CbsBinPath" . "bqueues -l 2>&1 |" or die "glueCEPolicyToQueue: could not open bqueues.\n"; while() { if (/^QUEUE:\s+(\S+)/) { $queueName = $1; # INIT $dn = &getDnCEString(&getCeid,$queueName,$MdsArg); %limits = (); $priority = "-"; $maxQueuable = "-"; next; } # FETCH PARAMS # skip lines until next queue record next if $queueName eq ""; ($priority,$maxQueuable) = =~ /^\s+(\d+)\s+\d+\s+\S+\s+(\S+)/, next if /^PRIO\s+NICE\s+STATUS/; if (/\w+LIMIT/) { chomp; # find last attributes for this queue my (@limit_list_names) = split('\s\s+',$_); chomp($_=); my (@limit_list_values) = split('\s\s+',$_); for(my $i=0;$i<@limit_list_names;$i++) { $limit_list_names[$i] =~ s/^\s+//; $limit_list_names[$i] =~ s/\s+$//; my $limit_name = $limit_list_names[$i]; $limit_list_values[$i] =~ s/^\s+//; $limit_list_values[$i] =~ s/\s+$//; my $limit_value = $limit_list_values[$i]; if ($limit_list_names[$i] ne '' && $limit_value =~ /^\S+\s+min\s+of\s+\S+$/) { $limits{$limit_name} = $limit_value; } } } next unless /^SCHEDULING PARAMETERS/; # store attributes $limits{'CPULIMIT'} = "" if !defined $limits{'CPULIMIT'}; $limits{'RUNLIMIT'} = "" if !defined $limits{'RUNLIMIT'}; $AllCE{$dn}{OBJECTCLASS_GlueCEPolicy} = 1; &storeMaxCpuAndWallTime($dn,$limits{'CPULIMIT'},$limits{'RUNLIMIT'}); $AllCE{$dn}{GlueCEPolicyPriority} = $priority unless exists $AllCE{$dn}{GlueCEPolicyPriority}; $AllCE{$dn}{GlueCEPolicyMaxRunningJobs} = ($$maxRunning ne "-") ? $$maxRunning : $MaxRunningUndefined unless exists $AllCE{$dn}{GlueCEPolicyMaxRunningJobs}; $AllCE{$dn}{GlueCEPolicyMaxTotalJobs} = ($maxQueuable ne "-") ? $maxQueuable : $MaxQueuableUndefined unless exists $AllCE{$dn}{GlueCEPolicyMaxTotalJobs}; $queueName = ""; } close BQUEUES; } if ($Lrms eq "condor"){ foreach $allQueue (@AllQueues){ $Server=$CondorHosts{$allQueue}; $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); $AllCE{$dn}{OBJECTCLASS_GlueCEPolicy} = 1; $AllCE{$dn}{GlueCEPolicyMaxRunningJobs} = `"$CbsBinPath"condor_status -pool $Server` =~ /\s+Total\s+(\d+)/ ? $1 : $MaxRunningUndefined unless defined $AllCE{$dn}{GlueCEPolicyMaxRunningJobs}; # convert maxcputime & maxwalltime to seconds, int values &storeMaxCpuAndWallTime($dn,$maxCpuTime,$maxWallTime); $AllCE{$dn}{GlueCEPolicyPriority} = $PriorityUndefined unless exists $AllCE{$dn}{GlueCEPolicyPriority}; $AllCE{$dn}{GlueCEPolicyMaxCPUTime} = $maxCpuTimeSeconds unless exists $AllCE{$dn}{GlueCEPolicyMaxCPUTime}; $AllCE{$dn}{GlueCEPolicyMaxCPUTime} = $maxCpuTimeSeconds unless exists $AllCE{$dn}{GlueCEPolicyMaxCPUTime}; #default for MAX_JOBS_RUNNING in condor_config my $mQueuableApprox = 200; #maxTotalJobs = maxRunningJobs * CondorMaxTotalJobsFactor #CondorMaxTotalJobsFactor = condor_config_val MAX_JOBS_RUNNING $AllCE{$dn}{GlueCEPolicyMaxTotalJobs} = $AllCE{$dn}{GlueCEPolicyMaxRunningJobs} * `"$CbsBinPath"condor_config_val -pool $Server MAX_JOBS_RUNNING 2>/dev/null` =~ /^(\d+)/ ? $1 : $CondorMaxTotalJobsFactor unless defined $AllCE{$dn}{GlueCEPolicyMaxTotalJobs}; } } } # This procedure stores maxWall-, maxCpuTime values sub storeMaxCpuAndWallTime { my ($dn,$maxCpuTime,$maxWallTime) = @_; if ($Lrms eq "pbs" ){ $maxCpuTimeSeconds = &convertHhMmSs($maxCpuTime); $maxWallTimeSeconds = &convertHhMmSs($maxWallTime); $AllCE{$dn}{GlueCEPolicyMaxCPUTime} = ($maxCpuTimeSeconds > 0) ? $maxCpuTimeSeconds : ($MaxCPUTimeServer ne "-") ? $MaxCPUTimeServer : $MaxCPUTimeUndefined unless (exists $AllCE{$dn}{GlueCEPolicyMaxCPUTime}); $AllCE{$dn}{GlueCEPolicyMaxWallClockTime} = ($maxWallTimeSeconds > 0) ? $maxWallTimeSeconds : ($MaxWallTimeServer ne "-") ? $MaxWallTimeServer : (0 != $AllCE{$dn}{GlueCEPolicyMaxCPUTime}) ? $AllCE{$dn}{GlueCEPolicyMaxCPUTime}: $MaxCPUTimeUndefined unless (exists $AllCE{$dn}{GlueCEPolicyMaxWallClockTime}); } elsif ( $Lrms eq "condor") { $maxCpuTimeSeconds = &convertHhMmSs($maxCpuTime); $maxWallTimeSeconds = &convertHhMmSs($maxWallTime); $AllCE{$dn}{GlueCEPolicyMaxCPUTime} = ($maxCpuTimeSeconds > 0) ? $maxCpuTimeSeconds : $MaxCPUTimeUndefined unless exists $AllCE{$dn}{GlueCEPolicyMaxCPUTime}; $AllCE{$dn}{GlueCEPolicyMaxWallClockTime} = ($maxWallTimeSeconds > 0) ? $maxWallTimeSeconds : $MaxCPUTimeUndefined unless exists $AllCE{$dn}{GlueCEPolicyMaxWallClockTime}; } elsif ($Lrms eq "lsf") { $maxCpuTimeSeconds = &convertMinutes($maxCpuTime); $maxWallTimeSeconds= &convertMinutes($maxWallTime); $AllCE{$dn}{GlueCEPolicyMaxCPUTime} = ($maxCpuTimeSeconds > 0) ? $maxCpuTimeSeconds : $MaxCPUTimeUndefined unless exists $AllCE{$dn}{GlueCEPolicyMaxCPUTime}; $AllCE{$dn}{GlueCEPolicyMaxWallClockTime} = ($maxWallTimeSeconds > 0) ? $maxWallTimeSeconds : (0 != $AllCE{$dn}{GlueCEPolicyMaxCPUTime}) ? $AllCE{$dn}{GlueCEPolicyMaxCPUTime} : $MaxCPUTimeUndefined unless exists $AllCE{$dn}{GlueCEPolicyMaxWallClockTime}; } } # convert minutes into seconds sub convertMinutes { my $minute = shift || 0; return ($minute =~ /(\S+) min/) ? $1 * 60 : ($minute ne "-") ? $minute : 0; } =head2 glueCEStateToQueue For each CE add the GlueCEState attributes to the CE. =for html

see subprocedures:  checkImmediateJobStartcheckIllegalValues

B Process qstat -Q -f queue@server command Get GlueCEStateFreeCPUs is the sum of all nodes free cpus. Get GlueCEStateRunningJobs from line state_count= ...Running:num_running Exiting:num_exiting; Get GlueCEStateTotalJobs from line total_jobs=num_total_jobs GlueCEStateWaitingJobs is difference between total and running jobs. Get GlueCEStateWorstResponseTime & GlueCEStateEstimatedResponseTime see README Get GlueCEStateStatus processing lines enabled = is_enabled started = is_started B Process bqueues -l -m cluster Get GlueCEStateFreeCPUs is the sum of all nodes free cpus. Get GlueCEStateRunningJobs, GlueCEStateStatus, GlueCEStateTotalJobs from line following PRIO NICE STATUS MAX JL/U JL/P JL/H NJOBS PEND RUN SSUSP USUSP RSV GlueCEStateWaitingJobs is difference between total and running jobs. Get GlueCEStateWorstResponseTime & GlueCEStateEstimatedResponseTime see README Get GlueCEStateStatus processing lines enabled = is_enabled started = is_started =for html

see: setLSFCPUPowersetLsfRemainingResponse

B Process condor_q -global -pool $Server Get GlueCEStateFreeCPUs from command `condor_status -avail -long` counting Cpus. Get GlueCEStateRunningJobs, GlueCEStateTotalJobs, GlueCEStateWaitingJobs from command `condor_q -global` looking for regexp numJobs jobs; numIdle idle, numRunning running. GlueCEStateStatus is defined as "Production" GlueCEStateWorstResponseTime GlueCEStateEstimatedResponseTime is calculated in procedure computeResponseTimes with a static approximation. =for html

see: computeResponseTimes

=cut sub glueCEStateToQueue{ my $dn = ""; my $queue = ""; # ce name my $status = ""; # Production, Closed, Queueing, Draining my $freeCpus = 0; # free Cpu based on Nodes Computation power my $totalJobs = 0; my $running = 0; my $estimatedJobTime = ""; if ($Lrms eq "pbs"){ foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); $foundOneOrMoreQueues=0; open QUEUES, "${CbsBinPath}qstat -Q -f $allQueue$ServerParam 2>&1 |" or die "glueCEStateToQueue: could not open qstat.\n"; # parse queue while() { # if error due to unknown queue, leave the loop warn ($_), last if /unknown queue/i; # We found a queue! if (/^Queue:\s+(\S+)/) { #INIT $totalJobs = 0; $running = 0; $enabled = $started = "false"; #save queue information $foundOneOrMoreQueues = 1; next; } #FETCH PARAM $totalJobs = $1, next if /total_jobs\s+=\s+(\S+)/i; $enabled = $1 eq "True", next if /enabled\s+=\s+(\S+)/i; $started = $1 eq "True", next if /started\s+=\s+(\S+)/i; $running= $1, next if /(\d+)\s+Exiting:\d+\s$/; } close QUEUES; #STORE QUEUE DATA if ($foundOneOrMoreQueues){ # indicate to show Objectclasses of CEstate attributes $AllCE{$dn}{OBJECTCLASS_GlueCEState} = 1; $AllCE{$dn}{GlueCEStateTotalJobs} = $totalJobs unless exists $AllCE{$dn}{GlueCEStateTotalJobs}; $AllCE{$dn}{GlueCEStateRunningJobs} = $running unless exists $AllCE{$dn}{GlueCEStateRunningJobs}; $AllCE{$dn}{GlueCEStateWaitingJobs} = $AllCE{$dn}{GlueCEStateTotalJobs} - $AllCE{$dn}{GlueCEStateRunningJobs} unless exists $AllCE{$dn}{GlueCEStateWaitingJobs}; $AllCE{$dn}{GlueCEStateStatus} = ($enabled && $started) ? "Production" : ($enabled) ? "Queueing" : ($started) ? "Draining" : "Closed" unless exists $AllCE{$dn}{GlueCEStateStatus}; $freeCpus = 0; $freeCpus += $Nodes{$_}{FREECPUS} for keys %Nodes; $AllCE{$dn}{GlueCEStateFreeCPUs} = $freeCpus unless exists $AllCE{$dn}{GlueCEStateFreeCPUs}; &checkImmediateJobStart($dn); #&checkIlcheckIllegalValues($dn); } } # determine response Time based on remaining walltime of all running jobs # traverse all jobs, increase response time based on the jobs queue # This value is global, because all pbs queues share the same nodes. $avgResponseRunningJobs=0; foreach $job (keys %Jobs){ exists $Jobs{$job}{"QUEUE"} || next; $jobQueue = $Jobs{$job}{"QUEUE"}; $dn = &getDnCEString(&getCeid,$jobQueue,$MdsArg); # determine computing power of queue: Min (TotalCpus, MaxRunning) $maxCpus = min($AllCE{$dn}{GlueCEInfoTotalCPUs}, $AllCE{$dn}{GlueCEPolicyMaxRunningJobs}); # 1st part: compute average responseTime and ignore queued jobs # if job defines a Walltime if (exists $Jobs{$job}{WALLTIME}){ $avgResponseRunningJobs += ($Jobs{$job}{WALLTIME} - $Jobs{$job}{USEDWALLTIME}) / $maxCpus; } # else use queue walltime else { $avgResponseRunningJobs += ($AllCE{$dn}{GlueCEPolicyMaxWallClockTime} - $Jobs{$job}{USEDWALLTIME}) / $maxCpus; } } # determine response times foreach $dn (keys %AllCE){ $idle = $AllCE{$dn}{GlueCEStateWaitingJobs}; # determine computing power of queue: Min (TotalCpus, MaxRunning) $maxCpus = &min($AllCE{$dn}{GlueCEInfoTotalCPUs}, $AllCE{$dn}{GlueCEPolicyMaxRunningJobs}); $queuedWallTimeSumWorst=0; $queuedWallTimeSumEstimated=0; foreach $ce (keys %AllCE){ $ceIdle = $AllCE{$ce}{GlueCEStateWaitingJobs}; $leveledIdle = &min($ceIdle,$idle); # all queued jobs must be submitted before new Job is assigned to a node $queuedWallTimeSumWorst += $ceIdle * $AllCE{$ce}{GlueCEPolicyMaxWallClockTime}; # We assume a fair round robin mechanism, all other queues are allowed # to submit a queued job before the queue can submit another one. # We also assume that the queued jobs use only a certain factor of # the maxWallClockTime. $queuedWallTimeSumEstimated += $leveledIdle * $AllCE{$ce}{GlueCEPolicyMaxWallClockTime} * $MaxWallClockTimeConsumtionFactor; } if ($maxCpus > 0 ) { $estimatedResponse = $avgResponseRunningJobs + $queuedWallTimeSumEstimated * $MaxWallClockTimeConsumtionFactor / $maxCpus; $worstResponse = $avgResponseRunningJobs + $queuedWallTimeSumWorst / $maxCpus; } else { $estimatedResponse = $EstimatedResponseUndefined; $worstResponse = $WorstResponseUndefined; } $AllCE{$dn}{GlueCEStateWorstResponseTime} = max (floor ($worstResponse),0) unless exists $AllCE{$dn}{GlueCEStateWorstResponseTime}; $AllCE{$dn}{GlueCEStateEstimatedResponseTime} = max (floor ($estimatedResponse),0) unless exists $AllCE{$dn}{GlueCEStateEstimatedResponseTime}; } } elsif ($Lrms eq "lsf"){ $queue=""; $dn=""; $foundOneOrMoreQueues=0; open BQUEUES, "$CbsBinPath" . "bqueues -l 2>&1 |" or die "glueCEStateToQueue: could not open bqueues.\n"; while() { if (/^QUEUE:\s+(\S+)/) { $queueName = $1; # INIT $queue = $queueName; $dn = &getDnCEString(&getCeid,$queue,$MdsArg); $status = "-"; $freeCpus = 0; $running = "-"; $totalJobs = "-"; $idle = "-"; $estimatedResponse = "-"; $worstResponse = "-"; } # FETCH PARAMS next if ! /^PRIO\s+NICE\s+STATUS/; $nextLine = ; if ($nextLine =~ /^\s+(\d+)\s+\d+\s+(\S+)\s+(\S+)\s+\S+\s+\S+\s+\S+\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) { $priority = $1; $status = $2; $totalJobs = $4; $running = $6; $enabled = 1 if $status =~ /Open/; $started = 1 if $status =~ /Active/; $idle = $totalJobs-$running; # STORE QUEUE DATA $AllCE{$dn}{OBJECTCLASS_GlueCEState} = 1; $AllCE{$dn}{GlueCEStateStatus} = ($enabled && $started) ? "Production" : ($enabled) ? "Queueing" : ($started) ? "Draining" : "Closed" unless exists $AllCE{$dn}{GlueCEStateStatus}; $AllCE{$dn}{GlueCEStateTotalJobs} = $totalJobs unless exists $AllCE{$dn}{GlueCEStateTotalJobs}; $AllCE{$dn}{GlueCEStateRunningJobs} = $running unless exists $AllCE{$dn}{GlueCEStateRunningJobs}; $AllCE{$dn}{GlueCEStateWaitingJobs} = $AllCE{$dn}{GlueCEStateTotalJobs} - $AllCE{$dn}{GlueCEStateRunningJobs} unless exists $AllCE{$dn}{GlueCEStateWaitingJobs}; # determine Free CPUs foreach $node (grep { exists $Nodes{$_}{QUEUE}{$queue} } keys %Nodes) { $freeCpus += $Nodes{$node}{FREECPUS}; } $AllCE{$dn}{GlueCEStateFreeCPUs}=floor ($freeCpus) unless exists $AllCE{$dn}{GlueCEStateFreeCPUs}; #&checkIlcheckIllegalValues($dn); } } close BQUEUES; &setLsfCPUPower; &setLsfRemainingResponse; # compute response times based on queued jobs of a queue foreach $dn (keys %AllCE){ &checkImmediateJobStart($dn); unless (exists $AllCE{$dn}{GlueCEStateEstimatedResponseTime}){ $estimatedJobTime = $AllCE{$dn}{GlueCEPolicyMaxWallClockTime}* $MaxWallClockTimeConsumtionFactor; if ($AllCE{$dn}{CPUPOWER} > 0){ $AllCE{$dn}{GlueCEStateEstimatedResponseTime} = max( floor ( $AllCE{$dn}{RESPONSETIME} + $AllCE{$dn}{GlueCEStateWaitingJobs} * $estimatedJobTime / $AllCE{$dn}{CPUPOWER}), 0); } else { $AllCE{$dn}{GlueCEStateEstimatedResponseTime} = $EstimatedResponseUndefined; } } unless (exists $AllCE{$dn}{GlueCEStateWorstResponseTime}){ if ($AllCE{$dn}{CPUPOWER} > 0){ $AllCE{$dn}{GlueCEStateWorstResponseTime} = max( floor ( ($AllCE{$dn}{GlueCEStateRunningJobs} + $AllCE{$dn}{GlueCEStateWaitingJobs}) * $AllCE{$dn}{GlueCEPolicyMaxWallClockTime} / $AllCE{$dn}{CPUPOWER}), 0); } else { $AllCE{$dn}{GlueCEStateWorstResponseTime} = $WorstResponseUndefined; } } } } if ($Lrms eq "condor"){ foreach $allQueue (@AllQueues){ $Server=$CondorHosts{$allQueue}; $dn = &getDnCEString(&getCeid, $allQueue, $MdsArg); $AllCE{$dn}{OBJECTCLASS_GlueCEState} = 1; #set GlueCEState Jobattributes @_= `"$CbsBinPath"condor_q -global -pool $Server 2>&1` =~ /(\d+) jobs; (\d+) idle, (\d+) running/; $AllCE{$dn}{GlueCEStateTotalJobs} = (@_) ? $_[0] : 0 unless exists $AllCE{$dn}{GlueCEStateTotalJobs}; $AllCE{$dn}{GlueCEStateRunningJobs} = (@_) ? $_[2] : 0 unless exists $AllCE{$dn}{GlueCEStateRunningJobs}; $AllCE{$dn}{GlueCEStateWaitingJobs} = (@_) ? $_[1] : 0 unless exists $AllCE{$dn}{GlueCEStateWaitingJobs}; #level maxRunning and maxTotalJobs if necessary #&checkIlcheckIllegalValues($dn); $AllCE{$dn}{GlueCEStateStatus} = "Production" unless exists $AllCE{$dn}{GlueCEStateStatus}; $freeCpus = 0; $freeCpus += $_ for `"$CbsBinPath"condor_status -avail -long -pool $Server 2>&1` =~ /Cpus\s+=\s+(\d+)/gi; $AllCE{$dn}{GlueCEStateFreeCPUs}=$freeCpus unless exists $AllCE{$dn}{GlueCEStateFreeCPUs}; # set estimated and worst responsetime &computeResponseTimes($dn); #check for immediate jobstart if there is enough available computing power &checkImmediateJobStart($dn); } } } =for html =over 4 =item checkImmediateJobStart(dn) All Lrms check if they can immediately run a job. If we have free cpus and the queue allows more jobs to run then we can use a bypass and set the response times to 0 Also Idle Jobs must be less than GlueCEPolicyMaxTotalJobs * IdleJobThreshold, because to high values of idle jobs would indicate that an average job would not come to execution even if there where free cpus to process the job. =for html

see: glueCEStateToQueue

=cut sub checkImmediateJobStart { my $dn=shift; if ($AllCE{$dn}{GlueCEStateFreeCPUs} > $AllCE{$dn}{GlueCEInfoTotalCPUs} * $IdleJobThreshold && $AllCE{$dn}{GlueCEStateRunningJobs} < $AllCE{$dn}{GlueCEPolicyMaxRunningJobs}) { $AllCE{$dn}{GlueCEStateWorstResponseTime} = 0; $AllCE{$dn}{GlueCEStateEstimatedResponseTime} = 0; } } =for html =item checkIllegalValues(dn) All Lrms check if number of running jobs is greater than max_running jobs and total jobs greater than max_total jobs. The max value is increased to match the actual value of the CE specified by param dn. =cut sub checkIllegalValues{ my $dn = shift; $AllCE{$dn}{GlueCEPolicyMaxRunningJobs} = &max($AllCE{$dn}{GlueCEPolicyMaxRunningJobs}, $AllCE{$dn}{GlueCEStateRunningJobs}); $AllCE{$dn}{GlueCEPolicyMaxTotalJobs} = &max($AllCE{$dn}{GlueCEStateTotalJobs}, $AllCE{$dn}{GlueCEPolicyMaxTotalJobs}); } # returns the minimum of two values sub min{ my $mina=shift; my $minb=shift; return ($mina < $minb)? $mina : $minb; } =for html =item setLsfCPUPower We compute for every queue a value for the CPUPOWER attribute. We traverse the queues nodes and accumulate the cpu power of the nodes. Each queue can use 1/number_hosted_queues part of a cpu. If all nodes are used exclusivly by a queue then CPUPOWER = TotalCpus. =cut sub setLsfCPUPower{ # set effective CPUPOWER for each queue foreach $dn (keys %AllCE){ $AllCE{$dn}{CPUPOWER} = 0; $AllCE{$dn}{WASTEDCPUS} = 0; foreach $node (@{$AllCE{$dn}{NODES}}){ $AllCE{$dn}{CPUPOWER} += min ($Nodes{$node}{MAXJOBS}, $Nodes{$node}{TOTALCPUS}) / scalar (keys %{$Nodes{$node}{QUEUE}}); } } } =for html =item setLsfRemainingResponse(dn) We compute the remaining wallClockTime for each queue =cut sub setLsfRemainingResponse{ my $dn=shift; my $responseTime=0; # compute partial response time of a queue foreach $dn (keys %AllCE){ $AllCE{$dn}{RESPONSETIME}=0; foreach $job (@{$AllCE{$dn}{JOBS}}){ unless (exists $Jobs{$job}{USEDWALLTIME}) {$Jobs{$job}{USEDWALLTIME}=0;} #assume job has not been started $AllCE{$dn}{CPUPOWER} > 0 || next; $AllCE{$dn}{GlueCEPolicyMaxWallClockTime} > $Jobs{$job}{USEDWALLTIME} || next; $responseTime = ($AllCE{$dn}{GlueCEPolicyMaxWallClockTime} - $Jobs{$job}{USEDWALLTIME}) / $AllCE{$dn}{CPUPOWER}; $AllCE{$dn}{RESPONSETIME} += $responseTime; } } } =for html =item computeResponseTimes(dn) This general formula is used if no jobinfo is used to compute estimated- worstResponseTime. We add the GlueCEStateWorstResponseTime and GlueCEStateEstimatedResponseTime attribute to the CE specified by param dn. =back =cut sub computeResponseTimes { my $dn = shift; if ($AllCE{$dn}{GlueCEInfoTotalCPUs} != 0){ unless (defined $AllCE{$dn}{GlueCEStateWorstResponseTime}){ $AllCE{$dn}{GlueCEStateWorstResponseTime} = $AllCE{$dn}{GlueCEPolicyMaxWallClockTime} * $AllCE{$dn}{GlueCEPolicyMaxTotalJobs} / $AllCE{$dn}{GlueCEInfoTotalCPUs}; } unless (defined $AllCE{$dn}{GlueCEStateEstimatedResponseTime}){ $AllCE{$dn}{GlueCEStateEstimatedResponseTime} = $AllCE{$dn}{GlueCEPolicyMaxWallClockTime} * ($AllCE{$dn}{GlueCEStateRunningJobs} / 4 + $AllCE{$dn}{GlueCEStateWaitingJobs} / 2) / $AllCE{$dn}{GlueCEInfoTotalCPUs}; } } else { $AllCE{$dn}{GlueCEStateWorstResponseTime} = $WorstResponseUndefined unless defined $AllCE{$dn}{GlueCEStateWorstResponseTime}; $AllCE{$dn}{GlueCEStateEstimatedResponseTime} = $EstimatedResponseUndefined unless defined $AllCE{$dn}{GlueCEStateEstimatedResponseTime}; } } =head2 computeExecutionQueueSizes This procedure is used by rms and determines the suggested values for execution queue attributes. =cut sub computeExecutionQueueSizes { $GlobalMaxTotalJobs=0; $GlobalEstimatedResponseTime=0; $GlobalWorstResponseTime=0; my $mdsRemainder = "mds-vo-name=$MdsArg, o=grid"; foreach $queue (keys %AllCE) { foreach $rmsSubmissionQueue (@RmsSubmissionQueues){ $queue =~ /$rmsSubmissionQueue, $mdsRemainder$/ || next; $GlobalMaxTotalJobs += $AllCE{$queue}{GlueCEPolicyMaxTotalJobs}; $GlobalEstimatedResponseTime += $AllCE{$queue}{GlueCEStateEstimatedResponseTime}; $GlobalWorstResponseTime += $AllCE{$queue}{GlueCEStateWorstResponseTime}; last; } } } =head2 rmsStatetoQueue Add WP4 rms state information (submission/execution/normal) of a queue using the RMSSTATE flag. Shared attributes of rms submission queues are recalculated, since they depend on execution queues values. =cut sub rmsStatetoQueue{ my $dn=""; foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); $AllCE{$dn}{RMSSTATE}="normal"; if ($allQueue eq $RmsExecutionQueue) { $AllCE{$dn}{RMSSTATE}="execution"; } foreach (grep { $_ eq $allQueue } @RmsSubmissionQueues) { $AllCE{$dn}{RMSSTATE}="submission"; &calculateSubmissionQueue($dn); last; } } } =over 4 =item calculateSubmissionQueue(dn) Set running jobs of the CE specified by parameter dn to execution queues running jobs Level maximums for running and total jobs Decrease wall clock time of a submission queue to execution queue value. =back =cut sub calculateSubmissionQueue{ my $dn = shift; my $recordRmsQueue = $AllCE{&getDnCEString(&getCeid,$RmsExecutionQueue,$MdsArg)}; exists $AllCE{$dn} || return; $AllCE{$dn}{GlueCEStateRunningJobs}= $$recordRmsQueue{GlueCEStateRunningJobs}; if ($AllCE{$dn}{GlueCEStateRunningJobs} > $AllCE{$dn}{GlueCEPolicyMaxRunningJobs}){ $AllCE{$dn}{GlueCEPolicyMaxRunningJobs}= $$recordRmsQueue{GlueCEPolicyMaxRunningJobs}; } $AllCE{$dn}{GlueCEStateWaitingJobs} += $$recordRmsQueue{GlueCEStateWaitingJobs}; $AllCE{$dn}{GlueCEStateTotalJobs} += $$recordRmsQueue{GlueCEStateTotalJobs}; if ($AllCE{$dn}{GlueCEStateWaitingJobs} + $AllCE{$dn}{GlueCEStateRunningJobs} > $AllCE{$dn}{GlueCEStateTotalJobs}){ $AllCE{$dn}{GlueCEStateTotalJobs} = $AllCE{$dn}{GlueCEStateWaitingJobs} + $AllCE{$dn}{GlueCEStateRunningJobs}; } if ($AllCE{$dn}{GlueCEPolicyMaxWallClockTime} > $$recordRmsQueue{GlueCEPolicyMaxWallClockTime}){ $AllCE{$dn}{GlueCEPolicyMaxWallClockTime} = $$recordRmsQueue{GlueCEPolicyMaxWallClockTime}; } if ($AllCE{$dn}{GlueCEPolicyMaxCPUTime} > $$recordRmsQueue{GlueCEPolicyMaxCPUTime}){ $AllCE{$dn}{GlueCEPolicyMaxCPUTime} = $$recordRmsQueue{GlueCEPolicyMaxCPUTime}; } if ($GlobalMaxTotalJobs > $$recordRmsQueue{GlueCEPolicyMaxTotalJobs}){ $AllCE{$dn}{GlueCEPolicyMaxTotalJobs} = $GlobalMaxTotalJobs; } else { $AllCE{$dn}{GlueCEPolicyMaxTotalJobs} = $$recordRmsQueue{GlueCEPolicyMaxTotalJobs}; } $AllCE{$dn}{GlueCEStateWorstResponseTime} = $GlobalWorstResponseTime; $AllCE{$dn}{GlueCEStateEstimatedResponseTime} = $GlobalEstimatedResponseTime; } =head2 activateQueues Activate all queues set in '-queue' for printing using PRINTABLEQUEUE flag. =cut sub activateQueues{ my $dn=""; foreach $allQueue (@AllQueues){ $dn = &getDnCEString(&getCeid,$allQueue,$MdsArg); $AllCE{$dn}{PRINTABLEQUEUE} = 1, next if ! @Queues; $AllCE{$dn}{PRINTABLEQUEUE} = 1, next if grep { $_ eq $allQueue } @Queues; $AllCE{$dn}{PRINTABLEQUEUE} = 1, next if grep { $_ eq $allQueue } @RmsSubmissionQueues; $AllCE{$dn}{PRINTABLEQUEUE} = 1, next if $Lrms eq "condor"; } } =head1 Cluster Attributes Integration We allow multiple sources of record definition, the admin may create multiple cluster / subCluster / remoteFileystem records each with different regular expressions. Therefore we have to unify these records. =for html

see: unifyClusterDataaddCEsunifySubClusterDataunifyFileSystemData

=head2 unifyClusterData This procedure resolves the regular expressions in %Clusters hash and copies entries to %UnifiedCluster. =cut sub unifyClusterData { foreach $cl (keys %Clusters){ &validCluster($cl) or next; ©ClusterValues($cl); } } # If the parameter contains GlueClusterUniqueID substring the method # will returns true if it matches the cluster and a the virtual organisation # substring is either empty or matches the mds sub validCluster{ my $dnString = shift; # current SclID my $dnCl = ($dnString =~ /GlueClusterUniqueID=([^,\s]+)/) ? $1 : "-"; my $mdsvo = ($dnString =~ /mds-vo-name=([^,\s]+)/) ? $1 : "-"; my $cl = &getDnClString($Cluster, $MdsArg); return $cl =~ /$dnCl/ && (&validMds($dnString) || $mdsvo eq "-"); } # returns the subcluster dn string matching to (sclID, clID, Mds-name) sub getDnClString{ return sprintf("dn: GlueClusterUniqueID=%s, mds-vo-name=%s, o=grid",@_); } # copies entries from unresolved clusters hash to resolved unifiedCluster hash. sub copyClusterValues{ my $dnString = shift; my $dnCl = &getDnClString($Cluster, $MdsArg); while (($key,$data) = each %{$Clusters{$dnString}}) { $UnifiedCluster{$dnCl}{$key} = $data; } } =head2 addCEs Add hosted CE's to the cluster. =cut sub addCEs{ my $cl=""; foreach $cl (keys %UnifiedCluster){ $UnifiedCluster{$cl}{GlueForeignKey}=[]; } foreach $dn (keys %AllCE){ exists $AllCE{$dn}{PRINTABLEQUEUE} || next; # add the queue to its clusters clusterservicelist $cl = &getDnClString($AllCE{$dn}{GlueCEHostingCluster}, $MdsArg); #deprecated attribute push @{$UnifiedCluster{$cl}{GlueClusterService}}, $AllCE{$dn}{GlueCEUniqueID}; push @{$UnifiedCluster{$cl}{GlueForeignKey}}, "GlueCEUniqueID=". $AllCE{$dn}{GlueCEUniqueID}; $UnifiedCluster{$cl}{OBJECTCLASS_GlueKey}=1; $UnifiedCluster{$cl}{GlueClusterUniqueID} = $Cluster unless exists $UnifiedCluster{$cl}{GlueClusterUniqueID}; $UnifiedCluster{$cl}{GlueClusterName} = $Cluster unless exists $UnifiedCluster{$cl}{GlueClusterName}; } } =head2 unifySubClusterData This procedure resolves the regular expressions in %SubCluster hash and copies entries to %UnifiedSubCluster. =cut sub unifySubClusterData{ foreach $scl (keys %SubCluster){ # Copy data if either ce is specified directly or if $queue matches # a static queues regular expression. &validSubCluster($scl) || next; ©SubClusterValues($scl); } } # returns true if dnString contains a subClusterUniqueID and a matching or # missing clusterUniqueID. sub validSubCluster{ my $dnString = shift; # current SclID my $dnScl = ($dnString =~ /GlueSubClusterUniqueID=([^,\s]+)/) ? $1 : "-"; my $cl = ($dnString =~ /GlueClusterUniqueID=([^,\s]+)/) ? $1 : "-"; return $dnScl ne "-" && (&validCluster($dnString) || $cl eq "-"); } # copies entries from unresolved subclusters hash to resolved # unifiedSubCluster hash. sub copySubClusterValues{ my $dnString = shift; my $dnScl = ""; my $cl = ($dnString =~ /GlueClusterUniqueID=([^,\s]+)/) ? $1 : ""; my $scl = ($dnString =~ /GlueSubClusterUniqueID=([^,\s]+)/) ? $1 : "-"; # subclusteruniqueid = clusteruniqueid if ($Wp1SubClusterMode) { $scl=$Cluster; } foreach (keys %UnifiedCluster){ next if $Cluster !~ /$cl/ && $cl ne ""; $dnScl=&getDnSclString($scl, $Cluster, $MdsArg); while (($key,$data) = each %{$SubCluster{$dnString}}){ $UnifiedSubCluster{$dnScl}{$key} = $data; } } } # returns the subcluster dn string matching to (sclID, clID, Mds-name) sub getDnSclString{ return sprintf("dn: GlueSubClusterUniqueID=%s, GlueClusterUniqueID=%s, mds-vo-name=%s, o=grid", @_); } sub addSubClusterGlueKeys{ for (keys %UnifiedSubCluster){ #extract the ClusterID and add it to the chunk keys push @{$UnifiedSubCluster{$_}{GlueChunkKey}}, (/(GlueClusterUniqueID=[^,\s]+)/)[0]; $UnifiedSubCluster{$_}{OBJECTCLASS_GlueKey}=1; } } =head2 unifyFileSystemData This procedure resolves the regular expressions in %FileSystems hash and copies entries to %UnifiedFileSystem. =cut sub unifyFileSystemData { foreach $fs (grep { &validFileSystem($_) } keys %FileSystems){ # Copy data if either ce is specified directly or if $queue matches # a static queues regular expression. ©FileSystemValues($fs); } } # returns true if parameter contains a GlueHostRemoteFileSystemName entry and # either a matching subcluster or missing subcluster entry sub validFileSystem{ my $dnString = shift; my $fs = ($dnString =~ /GlueHostRemoteFileSystemName=([^,\s]+)/) ? $1 : ""; my $scl = ($dnString =~ /GlueSubClusterUniqueID=([^,\s]+)/) ? $1 : ""; return $fs ne "" && (&validSubCluster($dnString) || $scl eq ""); } # Copies entries from unresolved fileSystems hash to resolved # unifiedFileSystem hash. sub copyFileSystemValues{ my $dnString = shift; my $dnFs = ""; my $scl = ($dnString =~ /GlueSubClusterUniqueID=([^,\s]+)/) ? $1 : ""; my $fs = ($dnString =~ /GlueHostRemoteFileSystemName=([^,\s]+)/) ? $1 : ""; # append filesystem record to each subcluster record foreach (keys %UnifiedSubCluster){ next if $UnifiedSubCluster{$_}{GlueSubClusterUniqueID} !~ /$scl/ && $scl ne ""; $dnFs=&getDnFSString($fs, $UnifiedSubCluster{$_}{GlueSubClusterUniqueID}, $Cluster, $MdsArg); while (($key,$data) = each %{$FileSystems{$dnString}}){ $UnifiedFileSystem{$dnFs}{$key}=$data; } } } # returns the subcluster dn string matching to (sclID, clID, Mds-name) sub getDnFSString{ return sprintf("dn: GlueHostRemoteFileSystemName=%s, GlueSubClusterUniqueID=%s, GlueClusterUniqueID=%s, mds-vo-name=%s, o=grid",@_); } sub addFileSystemGlueKeys{ for (keys %UnifiedFileSystem){ #extract the SubClusterID and add it to the chunk keys push @{$UnifiedFileSystem{$_}{GlueChunkKey}}, (/(GlueSubClusterUniqueID=[^,\s]+)/)[0]; $UnifiedFileSystem{$_}{OBJECTCLASS_GlueKey}=1; } } =head1 Print Procedures Each record type has got its own print procedure. =for html

see:  printCeInformationprintClustersprintSubclustersprintFileSystemsprintHostInfo,  =head2 printCeInformation This procedure traverses the hash of all CE and calls the printCE procedure if lrms, rms state information and the mds-vo-name of the CE apply to the current environment. If we use WP4 rms queues the execution queue should only be printed if set in '-queue'. =cut sub printCeInformation{ my $queue = ""; # all LRMS traverse the queues records foreach $ce (grep {&showCE ($_)} keys %AllCE){ # get Queuename $queue = ($ce =~ /jobmanager-\S+-([^,\s]+)/) ? $1 : ""; #CONDOR &printCE($ce), next if $Lrms eq "condor"; if ($Lrms eq "pbs"|| $Lrms eq "lsf"){ exists ($AllCE{$ce}{RMSSTATE}) || next; &printCE($ce), next if $RmsExecutionQueue eq "-"; #normal queueing if ($AllCE{$ce}{RMSSTATE} eq "normal" || $AllCE{$ce}{RMSSTATE} eq "submission"){ # always print normal and submission queues &printCE($ce); } elsif ($AllCE{$ce}{RMSSTATE} eq "execution"){ # print executionqueue if set in -queue &printCE($ce) if scalar (grep {$queue eq $_} @Queues); } } } } sub showCE{ my $ce=shift; return ($ce =~ /$MdsArg/ && $ce =~ /jobmanager-$Lrms/i && exists $AllCE{$ce}{PRINTABLEQUEUE}); } =head2 printCE This procedure prints the CE information. We print all data for the ce specified by parameter $ce. 1. print the dn 2. print the objectclasses - print cetop, - ce - schemaversion - all other classes using the tags $AllCE{$ce}{OBJECTCLASS_class} 3. print the attributes - schemaversion - remaining attributes in sorted order 4. print timestamps =cut sub printCE{ my $ce=shift; print "$ce\n"; &printCEObjectClasses($ce); &printGlueCE($ce); &printGlueCEInfo($ce); &printGlueCEState($ce); &printGlueCEPolicy($ce); &printGlueCEAccessControlBase($ce); &printGlueForeignKey($ce); print "GlueInformationServiceURL: ", $AllCE{$ce}{GlueInformationServiceURL},"\n" if defined $AllCE{$ce}{GlueInformationServiceURL}; print "GlueInformationServiceURL: ldap://$Cluster:$GrisPortArg/mds-vo-name=$MdsArg, o=grid\n" if $GrisPortArg ne ""; &printTimestamps; } # Print the Computing Elements object classes # Note only objectclasses of the found attributes will be printed. sub printCEObjectClasses{ my $ce=shift; if (%{$AllCE{$ce}}){ print "ObjectClass: GlueCETop\n"; print "ObjectClass: GlueCE\n"; print "ObjectClass: GlueSchemaVersion\n"; } $AllCE{$ce}{OBJECTCLASS_GlueCEAccessControlBase} = 1 if @AuthorizedUsersList; $AllCE{$ce}{OBJECTCLASS_GlueInformationService} = 1 if $GrisPortArg ne ""; print "ObjectClass: $_\n" for sort map /^OBJECTCLASS_(\S+)/, keys %{$AllCE{$ce}}; } # Print all GlueCE attributes. sub printGlueCE{ my $ce=shift; &printSchemaVersion; for (sort keys %{$AllCE{$ce}}){ if (/^GlueCEUniqueID/ || /^GlueCEName/ || /^GlueCEHostingCluster/) { print "$_: $AllCE{$ce}{$_}\n"; } } } # Print the Glue schema version. sub printSchemaVersion{ print "GlueSchemaVersionMajor: $GlueSchemaVersionMajor\n"; print "GlueSchemaVersionMinor: $GlueSchemaVersionMinor\n"; } # Print all GlueCEInfo attributes. sub printGlueCEInfo{ my $ce=shift; print "$_: $AllCE{$ce}{$_}\n" for sort grep /^GlueCEInfo/, keys %{$AllCE{$ce}}; } # Print all GlueCEState attributes. sub printGlueCEState{ my $ce=shift; print "$_: $AllCE{$ce}{$_}\n" for sort grep /^GlueCEState/, keys %{$AllCE{$ce}}; } # Print all GlueCEPolicy attributes. sub printGlueCEPolicy{ my $ce=shift; print "$_: $AllCE{$ce}{$_}\n" for sort grep /^GlueCEPolicy/, keys %{$AllCE{$ce}}; } # Print all GlueCEAccessControlBase attributes. # Merge Info from static config file and gridmap file. sub printGlueCEAccessControlBase{ my $ce=shift; for (@{$AllCE{$ce}{GlueCEAccessControlBaseRule}}, @AuthorizedUsersList) { print "GlueCEAccessControlBaseRule: $_\n"; } } sub printGlueForeignKey{ my $ce=shift; print "GlueForeignKey: $_\n" for @{$AllCE{$ce}{GlueForeignKey}}; } sub printTimestamps{ $now = ×tamp; print "createTimestamp: $now\n"; print "modifyTimestamp: $now\n"; print "entryTtl: $TtlArg\n\n"; } sub timestamp { my @time = gmtime; return sprintf("%04d%02d%02d%02d%02d%02dZ",$time[5]+1900, $time[4]+1,$time[3],$time[2],$time[1],$time[0]); } =head2 printClusters If the information provider is configured to print cluster records we will append the records matching to the CE's GlueCEHostingClusters. Note prints only the first cluster if $Wp1SubClusterMode is set. 1. print the dn 2. print the objectclasses - print clustertop, - cluster - schemaversion - all other classes using the tags $UnifiedCluster{$cl}{OBJECTCLASS_class} 3. print the attributes - schemaversion - remaining attributes in sorted order 4. print timestamps =cut sub printClusters{ my $clID=""; my $printFlag=0; foreach $cl (sort keys %UnifiedCluster) { $clID=""; $printFlag=0; foreach $ce (keys %AllCE){ $clID=$UnifiedCluster{$cl}{GlueClusterUniqueID}; $printFlag = $AllCE{$ce}{GlueCEHostingCluster}=~ /^$clID/; } # print cluster if a matching hostingcluster was found and # cluster has not already been printed $printFlag || ! exists $UnifiedCluster{$cl}{PRINTED} || next; # PRINT CLUSTER RECORD print "$cl\n"; &printClusterObjectClasses($cl); &printSchemaVersion; foreach $key (sort grep { !/^OBJECTCLASS_/ } keys %{$UnifiedCluster{$cl}}){ if ($key eq "GlueForeignKey"){ print "$key: $_\n" for sort @{$UnifiedCluster{$cl}{GlueForeignKey}}; } elsif ($key eq "GlueClusterService"){ print "$key: $_\n" for @{$UnifiedCluster{$cl}{GlueClusterService}}; } else { print "$key: $UnifiedCluster{$cl}{$key}\n"; } } print "GlueInformationServiceURL: ldap://$Cluster:$GrisPortArg/mds-vo-name=$MdsArg, o=grid\n" if $GrisPortArg ne ""; &printTimestamps; # tag cluster as printed $UnifiedCluster{$cl}{PRINTED}=1; last if $Wp1SubClusterMode; } } # Print the Clusters object classes # Note only objectclasses of the found attributes will be printed. sub printClusterObjectClasses{ my $cl=shift; if (%{$UnifiedCluster{$cl}}){ print "ObjectClass: GlueClusterTop\n"; print "ObjectClass: GlueCluster\n"; print "ObjectClass: GlueSchemaVersion\n"; } $UnifiedCluster{$cl}{OBJECTCLASS_GlueInformationService}=1 if $GrisPortArg ne ""; print "ObjectClass: $_\n" for sort map /^OBJECTCLASS_(\S+)/, keys %{$UnifiedCluster{$cl}}; } =head2 printSubClusters If the information provider is configured to print cluster records we will append the subCluster records matching to the CE's GlueCEHostingClusters from hash %UnifiedSubCluster. Note prints only the first cluster if $Wp1SubClusterMode is set. 1. print the dn 2. print the objectclasses - print clustertop, - subcluster - schemaversion - all other classes using the tags $UnifiedSubCluster{$scl}{OBJECTCLASS_class} 3. print the attributes - schemaversion - remaining attributes in sorted order 4. print timestamps =cut sub printSubClusters{ foreach $ce (sort keys %AllCE){ $AllCE{$ce}{PRINTABLEQUEUE} || next; &printGlueSubCluster($ce); last if $Wp1SubClusterMode; } } # Prints a subcluster record from %UnifiedSubCluster # if CE's HostingCluster matches the Subclusters GlueClusterUniqueID # and the record had not been tagged as printed. sub printGlueSubCluster{ my $ce=shift; my $mdsScl = "-"; my $cl = "-"; my $sclDn = "-"; my $sclID = "-"; my $mds = ($ce =~ /mds-vo-name=([^,\s]+)/) ? $1 : "-"; foreach $subc (sort keys %UnifiedSubCluster) { # if subcluster record had not already been printed last if exists $UnifiedSubCluster{$subc}{PRINTED}; $sclDn = "-"; # determine subClusterUniqueID, if it does not exist we skip the record $sclID = ($subc =~ /GlueSubClusterUniqueID=([^,\s]+)/) ? $1 : "-"; $sclID ne "-" || next; # get unifiedSubClusters cluster $cl = ($subc =~ /GlueClusterUniqueID=([^,\s]+)/) ? $1 : "-"; # get unifiedSubClusters mds-vo-name $mdsScl = ($ce =~ /mds-vo-name=([^,\s]+)/) ? $1 : "-"; $mds =~ /$mdsScl/ || $mdsScl eq "-" || next; $AllCE{$ce}{GlueCEHostingCluster} =~ /$cl/ || $cl eq "-" || next; # compute unifiedSubClusters dn string $sclDn=&getDnSclString($sclID,$AllCE{$ce}{GlueCEHostingCluster},$mds); # PRINT THE RECORD print "$sclDn\n"; &printSubClusterObjectClasses($subc); &printSchemaVersion; foreach $key (sort keys %{$UnifiedSubCluster{$subc}}){ $data=$UnifiedSubCluster{$subc}{$key}; if ($key =~ /^GlueHostApplicationSoftwareRunTimeEnvironment/i){ print "GlueHostApplicationSoftwareRunTimeEnvironment: $_\n" for sort @$data; next; } if ($key =~ /^GlueChunkKey/){ print "GlueChunkKey: $_\n" for sort @$data; next; } print "$key: $data\n" if $key !~ /^OBJECTCLASS_/; } print "GlueInformationServiceURL: ldap://$Cluster:$GrisPortArg/mds-vo-name=$MdsArg, o=grid\n" if $GrisPortArg ne ""; &printTimestamps; # tag subcluster as printed $UnifiedSubCluster{$subc}{PRINTED}=1; last if $Wp1SubClusterMode; } } # Print the subClusters object classes # Note only objectclasses of the found attributes will be printed. sub printSubClusterObjectClasses{ my $scl=shift; if (%{$UnifiedSubCluster{$scl}}){ print "ObjectClass: GlueClusterTop\n"; print "ObjectClass: GlueSubCluster\n"; print "ObjectClass: GlueSchemaVersion\n"; } $UnifiedSubCluster{$scl}{OBJECTCLASS_GlueInformationService} = 1 if $GrisPortArg ne ""; print "ObjectClass: $_\n" for sort map /^OBJECTCLASS_(\S+)/, keys %{$UnifiedSubCluster{$scl}}; } =head2 printFileSystems Print all fileSystems from hash %UnifiedFileSystem. 1. print the dn 2. print the objectclasses - clustertop - hostremotefilesystem - schemaversion 3. print the attributes - schemaversion - remaining attributes in sorted order - all other classes using the tags $UnifiedFileSystem{$fileSys}{OBJECTCLASS_class} 4. print timestamps =cut sub printFileSystems{ foreach $fileSys (sort keys %UnifiedFileSystem) { print "$fileSys\n"; &printFileSystemObjectClasses($fileSys); &printSchemaVersion; # print attributes foreach $attr (sort grep !/^OBJECTCLASS_/, keys %{$UnifiedFileSystem{$fileSys}}){ if ($attr eq "GlueChunkKey"){ print "GlueChunkKey: $_\n" for sort @{$UnifiedFileSystem{$fileSys}{$attr}}; } else { print "$attr: $UnifiedFileSystem{$fileSys}{$attr}\n"; } } &printTimestamps; } } # Print the fileSystem object classes # Note only objectclasses of the found attributes will be printed. sub printFileSystemObjectClasses{ my $fileSys=shift; if (%{$UnifiedFileSystem{$fileSys}}){ print "ObjectClass: GlueClusterTop\n"; print "ObjectClass: GlueHostRemoteFileSystem\n"; print "ObjectClass: GlueSchemaVersion\n"; } print "ObjectClass: $_\n" for sort map /^OBJECTCLASS_(\S+)/, keys %{$UnifiedFileSystem{$fileSys}}; } =head2 printHostInfo Print host information if HostInfoBinArg is specified in commandline. =cut sub printHostInfo{ if ($HostInfoBinArg ne "") { $| = 1; system("$HostInfoBinArg --ce $ClusterArg --basename \"mds-vo-name=$MdsArg,o=grid\""); $| = 0; } } =head2 printHostInfo Print CE SE binding record. =cut sub printCesebindings{ foreach $ce (grep { &showCE ($_) && exists $AllCE{$_}{CESEBIND} } keys %AllCE){ print "dn: GlueCESEBindGroupCEUniqueID=" . $AllCE{$ce}{GlueCEUniqueID} . ", mds-vo-name=$MdsArg, o=grid\n"; print "Objectclass: GlueGeneralTop\n"; print "Objectclass: GlueCESEBindGroup\n"; print "GlueCESEBindGroupCEUniqueID: ".$AllCE{$ce}{GlueCEUniqueID}."\n"; foreach $rec (@{$AllCE{$ce}{CESEBIND}}){ print "GlueCESEBindGroupSEUniqueID: " . $$rec{GlueCESEBindGroupSEUniqueID} . "\n"; } print "Objectclass: GlueSchemaVersion\n"; &printSchemaVersion; &printTimestamps; foreach $se (grep {/$AllCE{$ce}{GlueCEUniqueID}/} keys %SE){ print "$se\n"; print "Objectclass: GlueGeneralTop\n"; print "Objectclass: GlueSchemaVersion\n"; &printSchemaVersion; print "Objectclass: GlueCESEBind\n"; while (my ($key, $data) = each %{$SE{$se}}) { print "$key: $data\n"; } &printTimestamps; } } } sub usage { warn <<__USAGE__; usage: ce-all|ce-condor|ce-lsf|ce-pbs [OPTIONS] OPTIONS: '-auth-users-from-grid-mapfile' reads authorized users from the grid-mapfile rather than from the static configuration file '-cesebind | queueregex se | queueregex se directory' specifies the cesebind configuration '-cluster ' specifies the name of the server '-cluster-batch-system-bin-path ' specifies the path where the commands of the cluster batch system can be found '-globus-config-file ' specifies the path + filename of the Globus configuration files (e.g. globus-jobmanager.conf) '-globus-gatekeeperhost ' specifies the globus gatekeeper hostname '-globus-gatekeeperport ' specifies the globus gatekeeper port '-grisport ' specifies the grisport, adds GlueInformationServiceURL to CE Cluster and SubCluster records '-help' print this help message '-host-info-bin' specifies the location of the hosts info script. '-lrms ' specifies the my Ressource Management System (default: PBS) '-maxcputime ' defines the maximum cpu time for a job submitted to the ce [Condor only] '-maxwalltime' defines the maximum wall clock time allowed for jobs submitted to the CE [Condor only] '-mds-vo-name ' specifies the name of the virtual organisation '-queue ' specifies non rms submission queues or a set of condor central managers (CONDOR_HOST) '-remotefiles ' specifies a set of configfiles that contain remote hosts subcuster and filesystem information '-rms-execution-queue ' specifies the rms execution queue (stats are only shown if set in -queue switch) [PBS & LSF only] '-rms-submission-queues ...' specifies rms submission queues [PBS & LSF only] '-static ' specifies the name of the file that contains static information '-ttl ' specifies the value for entryTtl __USAGE__ }