Commit 68c174d1 authored by Maiken's avatar Maiken
Browse files

Merge branch 'devfix3718' into 'next'

Fix for BUGZ-3718 - better handling of missing LRMS commands

See merge request nordugrid/arc!569
parents 9776f344 cc55f707
......@@ -483,7 +483,7 @@ sub cluster_info ($) {
my %lrms_cluster;
configure_condor_env(%$config) or die "Condor executables or config file not found\n";
configure_condor_env(%$config) or error("Condor executables (in condor_bin_path) or config file (condor_config) not found, check configuration. Exiting...");
collect_node_data();
collect_job_data();
......@@ -529,7 +529,7 @@ sub queue_info ($$) {
warning("Option 'condor_requirements' is not defined for queue $qname") unless $qdef;
debug("===Requirements for queue $qname: $qdef");
configure_condor_env(%$config) or die "Condor executables or config file not found\n";
configure_condor_env(%$config) or error("Condor executables (in condor_bin_path) or config file (condor_config) not found, check configuration. Exiting...");
collect_node_data();
collect_job_data();
......
......@@ -39,7 +39,9 @@ our $total_cpus="0";
sub lsf_env($$){
my ($path)=shift;
error("lsf_bin_path not defined, cannot continue. Exiting...") unless defined $path;
$lsf_profile_path=shift;
error("lsf_profile_path not defined, cannot continue. Exiting...") unless defined $lsf_profile_path;
$lsf_profile=`source $lsf_profile_path`;
$lshosts_command="$path/lshosts -w";
......@@ -74,8 +76,7 @@ sub read_lsfnodes ($){
my ($cpu_count) = 0;
unless (open LSFHOSTSOUTPUT, "$lshosts_command |") {
debug("Error in executing lshosts command: $lshosts_command");
die "Error in executing lshosts: $lshosts_command\n";
error("Error in executing lshosts command: $lshosts_command");
}
while (my $line= <LSFHOSTSOUTPUT>) {
......@@ -100,8 +101,7 @@ sub read_lsfnodes ($){
close LSFHOSTSOUTPUT;
unless (open LSFBHOSTSOUTPUT, "$bhosts_command |") {
debug("Error in executing bhosts command: $bhosts_command");
die "Error in executing bhosts: $bhosts_command\n";
error("Error in executing bhosts command: $bhosts_command");
}
while (my $line= <LSFBHOSTSOUTPUT>) {
......@@ -145,8 +145,7 @@ sub queue_info_user ($$$) {
$user = "-u " . $user;
}
unless (open BQOUTPUT, "$bqueues_command $user $qname|") {
debug("Error in executing bqueues command: $bqueues_command $user $qname");
die "Error in executing bqueues: $bqueues_command \n";
error("Error in executing bqueues command: $bqueues_command $user $qname");
}
while (my $line= <BQOUTPUT>) {
......@@ -178,8 +177,7 @@ sub queue_info_user ($$$) {
$lrms_queue{maxwalltime} = "";
unless (open BQOUTPUT, "$bqueuesl_command $user $qname|") {
debug("Error in executing bqueues command: $bqueuesl_command $user $qname");
die "Error in executing bqueues: $bqueuesl_command \n";
error("Error in executing bqueues command: $bqueuesl_command $user $qname");
}
my $lastline ="";
while (my $line= <BQOUTPUT>) {
......@@ -225,8 +223,7 @@ sub get_jobinfo($){
my %job;
unless (open BJOUTPUT, "$bjobs_command $id|") {
debug("Error in executing bjobs command: $bjobs_command $id");
die "Error in executing bjobs: $bjobs_command \n";
error("Error in executing bjobs command: $bjobs_command $id");
}
while (my $line= <BJOUTPUT>) {
......
......@@ -60,6 +60,7 @@ sub get_pbs_version ($) {
# path to LRMS commands
my ($config) = shift;
my ($path) = $$config{pbs_bin_path};
error("pbs_bin_path not defined, cannot continue. Exiting...") unless defined $path;
# determine the flavour and version of PBS
my $qmgr_string=`$path/qmgr -c "list server"`;
......@@ -378,6 +379,7 @@ sub cluster_info ($) {
# Path to LRMS commands
my ($config) = shift;
my ($path) = $$config{pbs_bin_path};
error("pbs_bin_path not defined, cannot continue. Exiting...") unless defined $path;
# Return data structure %lrms_cluster{$keyword}
#
......@@ -504,6 +506,7 @@ sub queue_info ($$) {
# Path to LRMS commands
my ($config) = shift;
my ($path) = $$config{pbs_bin_path};
error("pbs_bin_path not defined, cannot continue. Exiting...") unless defined $path;
# Name of the queue to query
my ($qname) = shift;
......@@ -766,6 +769,7 @@ sub jobs_info ($$@) {
# Path to LRMS commands
my ($config) = shift;
my ($path) = $$config{pbs_bin_path};
error("pbs_bin_path not defined, cannot continue. Exiting...") unless defined $path;
# Name of the queue to query
my ($qname) = shift;
......@@ -917,6 +921,7 @@ sub users_info($$@) {
# Path to LRMS commands
my ($config) = shift;
my ($path) = $$config{pbs_bin_path};
error("pbs_bin_path not defined, cannot continue. Exiting...") unless defined $path;
# Name of the queue to query
my ($qname) = shift;
......
......@@ -43,6 +43,7 @@ sub slurm_read_config($){
my ($path) = ($$config{slurm_bin_path} or $$config{SLURM_bin_path} or "/usr/bin");
# get SLURM config, store dictionary in scont_config
my %scont_config;
checkbin("$path/scontrol");
open (SCPIPE,"$path/scontrol show config| grep -Ev \"primary|Configuration|^\$\"|");
while(<SCPIPE>){
chomp;
......@@ -67,6 +68,7 @@ sub slurm_read_jobs($){
my ($path) = ($$config{slurm_bin_path} or $$config{SLURM_bin_path} or "/usr/bin");
# get SLURM jobs, store dictionary in scont_jobs
my %scont_jobs;
checkbin("$path/squeue");
open (SCPIPE,"$path/squeue -a -h -t all -o \"JobId=%i TimeUsed=%M Partition=%P JobState=%T ReqNodes=%D ReqCPUs=%C TimeLimit=%l Name=%j NodeList=%N\"|");
while(<SCPIPE>){
my %job;
......@@ -100,6 +102,7 @@ sub slurm_read_partitions($){
my ($path) = ($$config{slurm_bin_path} or $$config{SLURM_bin_path} or "/usr/bin");
# get SLURM partitions, store dictionary in scont_part
my %scont_part;
checkbin("$path/sinfo");
open (SCPIPE,"$path/sinfo -a -h -o \"PartitionName=%P TotalCPUs=%C TotalNodes=%D MaxTime=%l DefTime=%L\"|");
while(<SCPIPE>){
my %part;
......@@ -142,6 +145,7 @@ sub slurm_read_cpuinfo($){
# get SLURM partitions, store dictionary in scont_part
my %sinfo_cpuinfo;
my $cpuinfo;
checkbin("$path/sinfo");
open (SCPIPE,"$path/sinfo -a -h -o \"cpuinfo=%C\"|");
while(<SCPIPE>){
my $string = $_;
......@@ -163,6 +167,7 @@ sub slurm_read_nodes($){
my ($path) = ($$config{slurm_bin_path} or $$config{SLURM_bin_path} or "/usr/bin");
# get SLURM nodes, store dictionary in scont_nodes
my %scont_nodes;
checkbin("$path/scontrol");
open (SCPIPE,"$path/scontrol show node --oneliner|");
while(<SCPIPE>){
my %record;
......@@ -312,6 +317,11 @@ sub slurm_expand_nodes($){
return $enodes;
}
# check the existence of a passed binary, full path.
sub checkbin ($) {
my $apbin = shift;
error("Can't find $apbin , check slurm_bin_path. Exiting...") unless -f $apbin;
}
############################################
# Public subs
......
......@@ -43,9 +43,9 @@ sub get_lrms_info($) {
$options = shift;
$path = ($options->{slurm_bin_path} or "/usr/bin");
# slurm_init_check();
slurm_init_check($path);
slurm_get_data();
cluster_info();
......@@ -72,16 +72,19 @@ sub get_lrms_info($) {
# Private subs
##########################################
#sub slurm_init_check() {
#
#$log->info("Verifying slurm commands...");
#
#my @slurm_commands = ('scontrol','squeue','sinfo');
#
#foreach my $slurmcmd (@slurm_commands) {
# unless (-e "$path/$slurmcmd") {$log->error("$slurmcmd command not found. Exiting...")};
# }
#}
# checks existence of slurm commands
sub slurm_init_check($) {
my $path = shift;
$log->info("Verifying slurm commands...");
my @slurm_commands = ('scontrol','squeue','sinfo');
foreach my $slurmcmd (@slurm_commands) {
$log->error("$path/$slurmcmd command not found. Check slurm_bin_path in configuration. Exiting...") unless (-f "$path/$slurmcmd") ;
}
}
sub nodes_info() {
my $lrms_nodes = {};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment