Commit 72caa1e5 authored by Maiken's avatar Maiken

Merge branch 'arex-ganglia-failrate-fix' into 'master'

Arex ganglia failrate fix of ever-increasing number

See merge request !1006
parents d0eb95b7 9374ff12
Pipeline #7837 passed with stage
in 109 minutes and 42 seconds
......@@ -17,13 +17,15 @@ static Arc::Logger& logger = Arc::Logger::getRootLogger();
JobStateList::JobStateList(int _limit):limit(_limit){
failures = 0;
length = 0;
this_node = NULL;
oldhead = NULL;
head = NULL;
tail = NULL;
}
JobStateList::~JobStateList(){}
JobStateList::JobNode::JobNode(JobStateList* _sl, JobNode* _prev, JobNode* _next, int _isfailed, std::string _job_id):
JobStateList::JobNode::JobNode(JobStateList* _sl, JobNode* _prev, JobNode* _next, bool _isfailed, std::string _job_id):
sl(_sl),prev(_prev),next(_next),isfailed(_isfailed),job_id(_job_id){
//update the previously last node in the list to instead point to NULL, now point to the new node
if(prev)prev->next = this;
......@@ -51,15 +53,18 @@ JobStateList::JobNode::~JobNode(){}
void JobStateList::setFailure(int _isfailed,std::string _job_id){
void JobStateList::setFailure(bool _isfailed,std::string _job_id){
JobStateList::JobNode* this_node = NodeInList(_job_id);
//check if the node is already in the list, and if it is update the failure status
this_node = NodeInList(_job_id);
if(this_node){
//existing job in the list
//update the failure-state of the node
this_node->isfailed=_isfailed;
if(_isfailed)failures++;
if(!this_node->isfailed && _isfailed){
//update the failure-state of the node
//only update once (i.e. if node was not failed before)
this_node->isfailed=_isfailed;
if(_isfailed)failures++;
}
}
else{
if(head==NULL){
......@@ -77,11 +82,11 @@ JobStateList::JobNode::~JobNode(){}
if(length>limit){
//list is now 1 too long, remove the old head of the list (oldest job)
JobStateList::JobNode* oldhead = head;
oldhead = head;
head = oldhead->next;
length--;
if (oldhead->isfailed)failures--;
delete oldhead;
oldhead = NULL;
}
}
}
......@@ -90,7 +95,6 @@ JobStateList::JobNode::~JobNode(){}
JobsMetrics::JobsMetrics():enabled(false),proc(NULL) {
job_counter = 0;
job_fail_counter = 0;
std::memset(jobs_in_state, 0, sizeof(jobs_in_state));
std::memset(jobs_in_state_changed, 0, sizeof(jobs_in_state_changed));
......@@ -108,6 +112,7 @@ JobsMetrics::JobsMetrics():enabled(false),proc(NULL) {
}
JobsMetrics::~JobsMetrics() {
delete jobstatelist;
}
void JobsMetrics::SetEnabled(bool val) {
......@@ -131,7 +136,6 @@ void JobsMetrics::SetGmetricPath(const char* path) {
/*
## - failed jobs -- of the last 100 jobs, the number of failed jobs
## - fail ratio -- the ratio of failed jobs to all jobs (redundant)
## - job states -- number of jobs in different A-REX internal stages
*/
......
......@@ -30,7 +30,7 @@ namespace ARex {
JobStateList::JobNode* next;
JobStateList::JobNode* prev;
JobNode(JobStateList* _sl, JobNode* _prev=NULL, JobNode* _next=NULL, int _state=0,std::string _job_id="");
JobNode(JobStateList* _sl, JobNode* _prev=NULL, JobNode* _next=NULL, bool _isfailed=false, std::string _job_id="");
~JobNode(void);
};
......@@ -44,14 +44,16 @@ namespace ARex {
int failures;
int length;
JobStateList::JobNode* this_node;
JobStateList::JobNode* oldhead;
JobStateList::JobNode* tail;
JobStateList::JobNode* head;
void setFailure(int st,std::string job_id);
void setFailure(bool _isfailed, std::string _job_id);
JobStateList::JobNode* NodeInList(std::string job_id);
JobStateList::JobNode* NodeInList(std::string _job_id);
JobStateList(int limit);
JobStateList(int _limit);
~JobStateList(void);
......@@ -69,7 +71,6 @@ class JobsMetrics {
time_t time_lastupdate;
unsigned long long int job_counter;
unsigned long long int job_fail_counter;
unsigned long long int jobs_in_state[JOB_STATE_UNDEFINED];
unsigned long long int jobs_state_old_new[JOB_STATE_UNDEFINED+1][JOB_STATE_UNDEFINED];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment