Commit 76e372f4 authored by Maiken's avatar Maiken

Merge branch 'dev-netstat-ss-mr' into 'master'

Fix for bugzilla #3700 - ARC1ClusterInfo.pm uses netstat

See merge request nordugrid/arc!134
parents bdb07e0c 97537c31
......@@ -1602,28 +1602,11 @@ sub collect($) {
}
}
# check if WS interface is actually running
# done with netstat but I'd like to be smarter
# this only works if the effective user is root
# TODO: find a better way to do this. Ask A-REX?
# changed by request of aleksandr. Only if root is running arex.
if ($> == 0) {
my $netstat=`netstat -antup`;
if ( $? != 0 ) {
# push @{$healthissues{unknown}}, "Checking if ARC WS interface is running: error in executing netstat. Infosys will assume the service is in ok HealthState";
$log->verbose("Checking if ARC WS interface is running: error in executing netstat. Infosys will assume AREX WSRF/XBES running properly");
} else {
# searches if arched is listed in netstat output
# best way would be ask arched if its service is up...?
if( $netstat !~ m/arched/ ) {
push @{$healthissues{critical}}, "arched A-REX endpoint not found with netstat" ;
}
}
} else {
# push @{$healthissues{unknown}}, "user ".getpwuid($>)." cannot run netstat -p. Infosys will assume the service is in ok HealthState";
$log->verbose("Checking if ARC WS interface is running: user ".getpwuid($>)." cannot run netstat -p. Infosys will assume AREX WSRF/XBES is running properly");
# check health status by using port probe in hostinfo
if (defined $host_info->{ports}{arched}{'443'} and $host_info->{ports}{arched}{'443'} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{arched}{'443'}}[0]}} , @{$host_info->{ports}{arched}{'443'}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other unknown)) {
......@@ -1771,6 +1754,12 @@ sub collect($) {
# check if gridftpd is running, by checking pidfile existence
push @{$healthissues{critical}}, 'gridfptd pidfile does not exist' unless (-e $config->{gridftpd}{pidfile});
# check health status by using port probe in hostinfo
my $gridftpdport = $config->{gridftpd}{port};
if (defined $host_info->{ports}{gridftpd}{$gridftpdport} and $host_info->{ports}{gridftpd}{$gridftpdport} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{gridftpd}{$gridftpdport}}[0]}} , @{$host_info->{ports}{gridftpd}{$gridftpdport}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other)) {
......@@ -1897,6 +1886,11 @@ sub collect($) {
: 'Grid manager is down';
}
}
# check health status by using port probe in hostinfo
if (defined $host_info->{ports}{arched}{'443'} and $host_info->{ports}{arched}{'443'} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{arched}{'443'}}[0]}} , @{$host_info->{ports}{arched}{'443'}}[1];
}
if (%healthissues) {
my @infos;
......@@ -2024,6 +2018,11 @@ sub collect($) {
}
}
# check health status by using port probe in hostinfo
if (defined $host_info->{ports}{arched}{'443'} and $host_info->{ports}{arched}{'443'} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{arched}{'443'}}[0]}} , @{$host_info->{ports}{arched}{'443'}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other)) {
......@@ -2137,26 +2136,9 @@ sub collect($) {
}
}
# check if WS interface is actually running
# done with netstat but I'd like to be smarter
# this only works if the effective user is root
# TODO: find a better way to do this. Ask A-REX?
# changed by request of aleksandr. Only checks if it's root
if ($> == 0) {
my $netstat=`netstat -antup`;
if ( $? != 0 ) {
# push @{$healthissues{unknown}}, "Checking if ARC WS interface is running: error in executing netstat. Infosys will assume the service is in ok HealthState";
$log->verbose("Checking if ARC WS interface is running: error in executing netstat. Infosys will assume EMIES is running properly");
} else {
# searches if arched is listed in netstat output
# best way would be ask arched if its service is up...?
if( $netstat !~ m/arched/ ) {
push @{$healthissues{critical}}, "arched A-REX endpoint not found with netstat. EMIES cannot be enabled." ;
}
}
} else {
# push @{$healthissues{unknown}}, "user ".getpwuid($>)." cannot run netstat -p. Infosys will assume EMIES is in ok HeathState";
$log->verbose("Checking if ARC WS interface is running: user ".getpwuid($>)." cannot run netstat -p. Infosys will assume EMIES is running properly");
# check health status by using port probe in hostinfo
if (defined $host_info->{ports}{arched}{'443'} and $host_info->{ports}{arched}{'443'} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{arched}{'443'}}[0]}} , @{$host_info->{ports}{arched}{'443'}}[1];
}
if (%healthissues) {
......@@ -2257,6 +2239,11 @@ sub collect($) {
}
}
# check health status by using port probe in hostinfo
if (defined $host_info->{ports}{arched}{'443'} and $host_info->{ports}{arched}{'443'} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{arched}{'443'}}[0]}} , @{$host_info->{ports}{arched}{'443'}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other)) {
......@@ -2365,26 +2352,9 @@ sub collect($) {
}
}
# check if WS interface is actually running
# done with netstat but I'd like to be smarter
# this only works if the effective user is root
# TODO: find a better way to do this. Ask A-REX?
# changed by request of aleksandr. Only checks if it's root
if ($> == 0) {
my $netstat=`netstat -antup`;
if ( $? != 0 ) {
# push @{$healthissues{unknown}}, "Checking if ARC WS interface is running: error in executing netstat. Infosys will assume the service is in ok HealthState";
$log->verbose("Checking if ARC WS interface is running: error in executing netstat. Infosys will assume EMIES is running properly");
} else {
# searches if arched is listed in netstat output
# best way would be ask arched if its service is up...?
if( $netstat !~ m/arched/ ) {
push @{$healthissues{critical}}, "arched A-REX endpoint not found with netstat. EMIES cannot be enabled." ;
}
}
} else {
# push @{$healthissues{unknown}}, "user ".getpwuid($>)." cannot run netstat -p. Infosys will assume EMIES is in ok HeathState";
$log->verbose("Checking if ARC WS interface is running: user ".getpwuid($>)." cannot run netstat -p. Infosys will assume EMIES is running properly");
# check health status by using port probe in hostinfo
if (defined $host_info->{ports}{arched}{'443'} and $host_info->{ports}{arched}{'443'} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{arched}{'443'}}[0]}} , @{$host_info->{ports}{arched}{'443'}}[1];
}
if (%healthissues) {
......@@ -2493,6 +2463,11 @@ sub collect($) {
$log->verbose("Checking if ARC WS interface is running: user ".getpwuid($>)." cannot run netstat -p. Infosys will assume EMIES is running properly");
}
# check health status by using port probe in hostinfo
if (defined $host_info->{ports}{arched}{'443'} and $host_info->{ports}{arched}{'443'} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{arched}{'443'}}[0]}} , @{$host_info->{ports}{arched}{'443'}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other unknown)) {
......@@ -2553,7 +2528,7 @@ sub collect($) {
# don't publish if no EMIES endpoint configured
$arexceps->{ARCRESTComputingEndpoint} = $getARCRESTComputingEndpoint if ($emiesenabled);
#
#
## NorduGrid local submission
#
my $getNorduGridLocalSubmissionEndpoint = sub {
......@@ -2579,8 +2554,8 @@ sub collect($) {
$cep->{Technology} = 'direct';
$cep->{InterfaceName} = 'org.nordugrid.local';
$cep->{InterfaceVersion} = [ '1.0' ];
$cep->{Capability} = [ @{$epscapabilities->{'org.nordugrid.local'}}, @{$epscapabilities->{'common'}} ];
$cep->{Implementor} = "NorduGrid";
$cep->{Capability} = [ @{$epscapabilities->{'org.nordugrid.local'}}, @{$epscapabilities->{'common'}} ];
$cep->{Implementor} = "NorduGrid";
$cep->{ImplementationName} = "nordugrid-arc";
$cep->{ImplementationVersion} = $config->{arcversion};
......@@ -2805,9 +2780,14 @@ sub collect($) {
$ep->{QualityLevel} = "production";
# How to calculate health for this interface?
my %healthissues;
# check health status by using port probe in hostinfo
my $ldapport = $config->{infosys}{ldap}{port} if defined $config->{infosys}{ldap}{port};
if (defined $host_info->{ports}{slapd}{$ldapport} and $host_info->{ports}{slapd}{$ldapport} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{slapd}{$ldapport}}[0]}} , @{$host_info->{ports}{slapd}{$ldapport}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other)) {
......@@ -2870,9 +2850,14 @@ sub collect($) {
$ep->{QualityLevel} = "production";
# How to calculate health for this interface?
my %healthissues;
# check health status by using port probe in hostinfo
my $ldapport = $config->{infosys}{ldap}{port} if defined $config->{infosys}{ldap}{port};
if (defined $host_info->{ports}{slapd}{$ldapport} and $host_info->{ports}{slapd}{$ldapport} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{slapd}{$ldapport}}[0]}} , @{$host_info->{ports}{slapd}{$ldapport}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other)) {
......@@ -2938,6 +2923,12 @@ sub collect($) {
# How to calculate health for this interface?
my %healthissues;
# check health status by using port probe in hostinfo
my $ldapport = $config->{infosys}{ldap}{port} if defined $config->{infosys}{ldap}{port};
if (defined $host_info->{ports}{slapd}{$ldapport} and $host_info->{ports}{slapd}{$ldapport} ne 'ok') {
push @{$healthissues{@{$host_info->{ports}{slapd}{$ldapport}}[0]}}, @{$host_info->{ports}{slapd}{$ldapport}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other)) {
......@@ -3019,28 +3010,13 @@ sub collect($) {
}
}
# check if WS interface is actually running
# done with netstat but I'd like to be smarter
# this only works if the effective user is root
# TODO: find a better way to do this. Ask A-REX?
# changed by request of aleksandr. Only if root is running arex.
if ($> == 0) {
my $netstat=`netstat -antup`;
if ( $? != 0 ) {
# push @{$healthissues{ok}}, "Checking if ARC WS interface is running: error in executing netstat. Infosys will assume the service is in ok HealthState";
$log->verbose("Checking if ARC WS interface is running: error in executing netstat. Infosys will assume ARIS WSRFGLUE2 is running properly");
} else {
# searches if arched is listed in netstat output
# best way would be ask arched if its service is up...?
if( $netstat !~ m/arched/ ) {
push @{$healthissues{critical}}, "arched A-REX endpoint not found with netstat" ;
}
}
} else {
# push @{$healthissues{ok}}, "user ".getpwuid($>)." cannot run netstat -p. Infosys will assume the service is in ok HealthState";
$log->verbose("Checking if ARC WS interface is running: user ".getpwuid($>)." cannot run netstat -p. Infosys will assume ARIS WSRFGLUE2 is is running properly");
# check health status by using port probe in hostinfo
# a-rex port hardcoded in ARC6
if (defined $host_info->{ports}{arched}{'443'}) {
push @{$healthissues{@{$host_info->{ports}{arched}{'443'}}[0]}} , @{$host_info->{ports}{arched}{'443'}}[1];
}
if (%healthissues) {
my @infos;
for my $level (qw(critical warning other unknown)) {
......
......@@ -393,7 +393,10 @@ sub get_host_info($$) {
my $host_opts = {};
$host_opts->{localusers} = $localusers;
$host_opts->{processes} = ['arched', 'gridftpd'];
$host_opts->{processes} = ['arched', 'gridftpd','slapd'];
$host_opts->{ports}{'arched'} = ['443'];
$host_opts->{ports}{'gridftpd'} = [$config->{gridftpd}{port}];
$host_opts->{ports}{'slapd'} = [$config->{infosys}{ldap}{port}];
$host_opts->{x509_host_cert} = $config->{x509_host_cert};
$host_opts->{x509_cert_dir} = $config->{x509_cert_dir};
$host_opts->{wakeupperiod} = $config->{wakeupperiod};
......
......@@ -19,6 +19,9 @@ our $host_options_schema = {
x509_cert_dir => '*',
wakeupperiod => '*',
processes => [ '' ],
ports => {
'*' => [ '*' ] #process name, ports
},
localusers => [ '' ],
control => {
'*' => {
......@@ -60,6 +63,11 @@ our $host_info_schema = {
cache_total => '', # unit: MB
globusversion => '*',
processes => { '*' => '' },
ports => {
'*' => { # process name
'*' => [ '' ] # port -> [port status, error message ]
}
},
gm_alive => '',
localusers => {
'*' => {
......@@ -67,7 +75,7 @@ our $host_info_schema = {
diskfree => '' # unit: MB
}
},
EMIversion => [ '' ] # taken from /etc/emi-version if exists
EMIversion => [ '' ] # taken from /etc/emi-version if exists
};
our $log = LogUtils->getLogger(__PACKAGE__);
......@@ -107,7 +115,7 @@ sub enddate {
# assuming here that the file exists and is a well-formed certificate.
my $stdout =`$openssl x509 -noout -enddate -in '$certfile' 2>&1`;
if ($?) {
$log->info("openssl error: $stdout");
$log->info("openssl error: $stdout");
return undef;
}
chomp ($stdout);
......@@ -122,6 +130,61 @@ sub enddate {
}
}
sub get_ports_info {
my ($processes, $ports) = @_;
my $portsstatus = {};
my $errormessage = '';
# Assume user is root
my $userisroot = 1;
if ($> != 0) {
$userisroot = 0;
$errormessage = "Checking if ARC ports are open: user ".getpwuid($>)." cannot access process names. Infosys will assume AREX interfaces are running properly;";
$log->verbose($errormessage);
}
my $netcommand = '';
my $stdout = '';
# check if to use either netstat or ss
if ($userisroot) {
for my $path (split ':', "$ENV{PATH}") {
$netcommand = "$path/netstat" and last if -x "$path/netstat";
$netcommand = "$path/ss" and last if -x "$path/ss";
}
if ($netcommand eq '') {
$errormessage = $errormessage." Could not find neither netstat nor ss command, cannot probe open ports, assuming services are up;";
$log->verbose("Could not find neither netstat nor ss command, cannot probe open ports, assuming services are up");
} else {
# run net command
$stdout = `$netcommand -antup 2>&1`;
if ($?) {
$errormessage = $errormessage." $netcommand error: $stdout";
$log->info("$netcommand error: $stdout");
return undef;
}
}
chomp ($stdout);
}
foreach my $process (@$processes) {
my $procports = $ports->{$process};
foreach my $port (@$procports) {
if ( $stdout =~ m/$port.*$process/ or $netcommand eq '' or $userisroot == 0 ) {
$portsstatus->{$process}{$port} = ['ok', $errormessage ];
} else {
my $porterrormessage = $errormessage. " $netcommand: process $process is not listening on port $port;";
$portsstatus->{$process}{$port} = ['critical', $porterrormessage ];
}
}
}
return $portsstatus;
}
# Hostcert, issuer CA, trustedca, issuercahash, enddate ...
sub get_cert_info {
......@@ -317,7 +380,7 @@ sub get_host_info {
# Opting to publish the least free space on any of the cache
# disks -- at least this has a simple meaning and is useful to
# diagnose if a disk gets full.
$host_info->{cache_free} = $res{freemin};
$host_info->{cache_free} = $res{freemin};
# Only accurate if caches are on filesystems of their own
$host_info->{cache_total} = $res{totalsum};
}
......@@ -345,6 +408,8 @@ sub get_host_info {
$host_info->{processes} = Sysinfo::processid(@{$options->{processes}});
$host_info->{ports} = get_ports_info($options->{processes},$options->{ports});
# gets EMI version from /etc/emi-version if any.
my $EMIversion;
if (-r "/etc/emi-version") {
......@@ -360,8 +425,8 @@ sub get_host_info {
#### TEST ##### TEST ##### TEST ##### TEST ##### TEST ##### TEST ##### TEST ####
sub test {
my $options = { x509_host_cert => '/etc/grid-security/hostcert.pem',
x509_cert_dir => '/etc/grid-security/certificates',
my $options = { x509_host_cert => '/home/pflorido/build/certs/hostcert.pem',
x509_cert_dir => '/home/pflorido/build/certs',
control => {
'.' => {
sessiondir => [ '/home', '/boot' ],
......@@ -377,7 +442,12 @@ sub test {
'/dummy/control /boot' ],
libexecdir => '/usr/libexec/arc',
runtimedir => '/home/grid/runtime',
processes => [ qw(bash ps init grid-manager bogous) ],
processes => [ qw(bash ps init grid-manager bogous cupsd slapd) ],
ports => {
cupsd => ['631'],
gridftpd => ['3811'],
slapd => ['133','389','2135']
},
localusers => [ qw(root bin daemon) ] };
require Data::Dumper; import Data::Dumper qw(Dumper);
LogUtils::level('DEBUG');
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment