KaliVeda
Toolkit for HIC analysis
KV_CCIN2P3_Slurm.cpp
1 //Created by KVClassFactory on Mon Jan 24 16:54:04 2022
2 //Author: John Frankland,,,
3 
4 #include "KV_CCIN2P3_Slurm.h"
5 #include "TSystem.h"
6 #include "TEnv.h"
7 #include "KVDataAnalyser.h"
8 #include "KVDataAnalysisTask.h"
9 #include "KVDataSetAnalyser.h"
10 #include "KVSimDirAnalyser.h"
11 
12 using namespace std;
13 
15 
16 
17 
22  : KVBatchSystem(name), fMultiJobs(kTRUE)
23 {
24  //Default constructor
25  //Sets default job time, memory and disk space as defined in $KVROOT/KVFiles/.kvrootrc
26 
27  fDefJobTime = gEnv->GetValue("GE.BatchSystem.DefaultJobTime", "00:05:00");
28  fDefJobMem = gEnv->GetValue("GE.BatchSystem.DefaultJobMemory", "3G");
29  fTimeSet = fMemSet = kFALSE;
30  //default number of runs per job in multi jobs mode (default=1)
31  SetRunsPerJob(gEnv->GetValue("GE.BatchSystem.RunsPerJob", 1));
32 }
33 
34 
35 
36 
39 
41 {
42  //Clear previously set parameters in order to create a new job submission command
44  fTimeSet = fMemSet = kFALSE;
45  fMultiJobs = kTRUE;
46 }
47 
48 
49 
50 
54 
56 {
57  //Set CPU time for batch job.
58  // SetJobTime() => use default time
59  KVString tmp(time);
60  if (tmp == "") tmp = fDefJobTime;
61  //time given as "hh:mm:ss"
62  if (tmp.GetNValues(":") == 2) tmp.Prepend("00:");
63  else if (tmp.GetNValues(":") == 1) tmp.Prepend("00:00:");
64  fParList.SetValue("--time ", tmp);
65  fTimeSet = kTRUE;
66 }
67 
68 
69 
74 
76 {
77  //Set maximum memory used by job.
78  //Include units in string, i.e. "100M", "1G" etc.
79  //If mem="", use default value
80  KVString tmp(mem);
81  if (tmp == "") tmp = fDefJobMem;
82  fParList.SetValue("--mem ", tmp);
83  fMemSet = kTRUE;
84 }
85 
86 
87 
90 
92 {
93  //Print list of owner's jobs.
94  KVList* j = GetListOfJobs();
95  j->ls();
96  delete j;
97 }
98 
99 
100 
103 
105 {
106  // Checks the job and asks for any missing parameters
107 
109 
110  if (!fTimeSet) ChooseJobTime();
111 
112  if (!fMemSet) ChooseJobMemory();
113 
114  return kTRUE;
115 }
116 
117 
118 
120 
122 {
123  KVString tmp = "";
124  cout << "Enter max CPU time per job (ss/mn:ss/hh:mn:ss) ["
125  << fDefJobTime << "] : ";
126  cout.flush();
127  tmp.ReadToDelim(cin);
128  if (!tmp.Length()) {
129  SetJobTime();
130  return;
131  }
132  else
133  SetJobTime(tmp);
134 }
135 
136 
137 
139 
141 {
142  KVString tmp = "";
143  cout << "Enter max memory per job (xKB/xMB/xGB) ["
144  << fDefJobMem.Data() << "] : ";
145  cout.flush();
146  tmp.ReadToDelim(cin);
147  SetJobMemory(tmp.Data());
148 }
149 
150 
151 
154 
156 {
157 // returns the parameter string corresponding to the job CPU time
158  return fParList.GetStringValue("--time ");
159 }
160 
161 
162 
165 
167 {
168 // returns the parameter string corresponding to the job Memory
169  return fParList.GetStringValue("--mem ");
170 }
171 
172 
173 
174 
178 
180 {
181  //Store any useful information on batch system in the TEnv
182  //(this method is used by KVDataAnalyser::WriteBatchEnvFile)
184  env->SetValue("BatchSystem.MultiJobs", MultiJobsMode());
185  if (MultiJobsMode()) env->SetValue("BatchSystem.CurrentRunList", fCurrJobRunList.AsString());
186  env->SetValue("BatchSystem.Time", GetJobTime());
187  env->SetValue("BatchSystem.Memory", GetJobMemory());
188  // if analysis of simulated data is being used, we copy the files to analyse to the
189  // scratch disk of the batch job (make sure enough disk space is requested)
190  env->SetValue("SimDirAnalyser.CopyFilesToWorkingDirectory", true);
191 }
192 
193 
194 
195 
199 
201 {
202  //Read any useful information on batch system from the TEnv
203  //(this method is used by KVDataAnalyser::ReadBatchEnvFile)
205  SetMultiJobsMode(env->GetValue("BatchSystem.MultiJobs", kFALSE));
206  if (MultiJobsMode()) fCurrJobRunList.SetList(env->GetValue("BatchSystem.CurrentRunList", ""));
207  SetJobTime(env->GetValue("BatchSystem.Time", ""));
208  SetJobMemory(env->GetValue("BatchSystem.Memory", ""));
209 }
210 
211 
212 
213 
217 
219 {
220  //if option="log", print infos for batch log file
221  //if option="all", print detailed info on batch system
222  if (!strcmp(option, "log")) {
224  cout << "* MEM_REQ: " << GetJobMemory() << " *" << endl;
225  }
226  else
228 }
229 
230 
231 
232 
248 
250 {
251  // PRIVATE method called by SubmitTask() at moment of job submission.
252  // Depending on the current environment, the default job submission options
253  // may be changed by this method.
254  //
255  // This method overrides and augments KVBatchSystem::ChangeDefJobOpt (which
256  // changes the options as a function of the type of analysis task).
257  // Here we add the CCIN2P3-specific case where the job is launched from a directory
258  // on the /sps/ semi-permanent storage facility, or if the data being analysed is
259  // stored in a repository on /sps/.
260  //
261  // Then we must declare the resource 'sps'.
262  // As the 'sbatch' command does not allow multiple '-L' ressource declarations,
263  // the whole thing has to be done here. 'xrootd' resource is declared by
264  // default, 'sps' is added if needed.
265 
267  KVString taskname = da->GetAnalysisTask()->GetName();
269  Bool_t repIsSPS = rootdir.BeginsWith("/sps/");
270 
271  KVString wrkdir(gSystem->WorkingDirectory());
272  KVString oldoptions(GetDefaultJobOptions());
273 
274  KVString ressource_list(" -L xrootd");
275  Bool_t NeedToAddSPS = wrkdir.Contains("/sps/");
276  if ((NeedToAddSPS || repIsSPS)) {
277  ressource_list += ",sps";
278  }
279  oldoptions += ressource_list;
280  SetDefaultJobOptions(oldoptions.Data());
281 }
282 
283 
284 
285 
292 
294 {
295  // Batch-system dependent sanitization of jobnames
296  // Grid Engine does not allow:
297  // :
298  // Any such character appearing in the current jobname will be replaced
299  // with '_'
300 
301  fCurrJobName.ReplaceAll(":", "_");
302 }
303 
304 
305 
310 
312 {
313  //Processes the job requests for the batch system.
314  //In normal mode, this submits one job for the data analyser fAnalyser
315  //In multijobs mode, this submits one job for each run in the runlist associated to fAnalyser
316 
317  if (!CheckJobParameters()) return;
318 
319  if (MultiJobsMode()) {
320  if (fAnalyser->InheritsFrom("KVDataSetAnalyser")) {
321  //submit jobs for every GetRunsPerJob() runs in runlist
322  KVDataSetAnalyser* ana = dynamic_cast<KVDataSetAnalyser*>(fAnalyser);
323  KVNumberList runs = ana->GetRunList();
324  runs.Begin();
325  Int_t remaining_runs = runs.GetNValues();
326  fCurrJobRunList.Clear();
327  while (remaining_runs && !runs.End()) {
328  Int_t run = runs.Next();
329  remaining_runs--;
330  fCurrJobRunList.Add(run);
331  if ((fCurrJobRunList.GetNValues() == GetRunsPerJob()) || runs.End()) {
332  // submit job for GetRunsPerJob() runs (or less if we have reached end of runlist 'runs')
333  ana->SetRuns(fCurrJobRunList, kFALSE);
334  ana->SetFullRunList(runs);
335  SubmitJob();
336  fCurrJobRunList.Clear();
337  }
338  }
339  ana->SetRuns(runs, kFALSE);
340  }
341  else if (fAnalyser->InheritsFrom("KVSimDirAnalyser")) {
342  // here we understand "run" to mean "file"
343  KVSimDirAnalyser* ana = dynamic_cast<KVSimDirAnalyser*>(fAnalyser);
344  TList* file_list = ana->GetFileList();
345  Int_t remaining_runs = ana->GetNumberOfFilesToAnalyse();
346  fCurrJobRunList.Clear();
347  TList cur_file_list;
348  TObject* of;
349  TIter it(file_list);
350  Int_t file_no = 1;
351  while ((of = it())) {
352  cur_file_list.Add(of);
353  fCurrJobRunList.Add(file_no);
354  remaining_runs--;
355  file_no++;
356  if ((fCurrJobRunList.GetNValues() == GetRunsPerJob()) || (remaining_runs == 0)) {
357  // submit job for GetRunsPerJob() files (or less if we have reached end of list)
358  ana->SetFileList(&cur_file_list);
359  SubmitJob();
360  fCurrJobRunList.Clear();
361  cur_file_list.Clear();
362  }
363  }
364  ana->SetFileList(file_list);
365  }
366  }
367  else {
368  SubmitJob();
369  }
370 
371 }
372 
373 
374 
386 
388 {
389  //Returns name of batch job, either during submission of batch jobs or when an analysis
390  //task is running in batch mode (access through gBatchSystem global pointer).
391  //
392  //In multi-job mode, the job name is generated from the base name set by SetJobName()
393  //plus the extension "_Rxxxx-yyyy" with "xxxx" and "yyyy" the number of the first and last run
394  //which will be analysed by the current job.
395  //
396  // Depending on the batch system, some sanitization of the jobname may be required
397  // e.g. to remove "illegal" characters from the jobname. This is done by SanitizeJobName()
398  // before the jobname is returned.
399 
400  if (!fAnalyser) {
401  //stand-alone batch submission ?
402  fCurrJobName = fJobName;
403  }
404  else {
405  //replace any special symbols with their current values
406  fCurrJobName = fAnalyser->ExpandAutoBatchName(fJobName.Data());
407  if (MultiJobsMode() && !fAnalyser->BatchMode()) {
408  KVString tmp;
409  if (fCurrJobRunList.GetNValues() > 1)
410  tmp.Form("_R%d-%d", fCurrJobRunList.First(), fCurrJobRunList.Last());
411  else
412  tmp.Form("_R%d", fCurrJobRunList.First());
413  fCurrJobName += tmp;
414  }
415  }
416  SanitizeJobName();
417  return fCurrJobName.Data();
418 }
419 
420 
421 
434 
436 {
437  // Fill the list with all relevant parameters for batch system,
438  // set to their default values.
439  //
440  // Parameters defined here are:
441  // JobTime [string]
442  // JobMemory [string]
443  // MultiJobsMode [bool]
444  // RunsPerJob [int]
445  // EMailOnStart [bool]
446  // EMailOnEnd [bool]
447  // EMailAddress [string]
448 
450  nl.SetValue("JobTime", fDefJobTime);
451  nl.SetValue("JobMemory", fDefJobMem);
452  nl.SetValue("MultiJobsMode", MultiJobsMode());
453  nl.SetValue("RunsPerJob", fRunsPerJob);
454 }
455 
456 
457 
460 
462 {
463  // Use the parameters in the list to set all relevant parameters for batch system.
464 
466  SetJobTime(nl.GetStringValue("JobTime"));
467  SetJobMemory(nl.GetStringValue("JobMemory"));
468  SetMultiJobsMode(nl.GetBoolValue("MultiJobsMode"));
469  SetRunsPerJob(nl.GetIntValue("RunsPerJob"));
470 }
471 
472 
int Int_t
bool Bool_t
char Char_t
constexpr Bool_t kFALSE
constexpr Bool_t kTRUE
const char Option_t
R__EXTERN TEnv * gEnv
const char rootdir[]
Option_t Option_t option
R__EXTERN TSystem * gSystem
Base class for interface to a batch job management system.
Definition: KVBatchSystem.h:78
virtual void WriteBatchEnvFile(TEnv *)
virtual void Print(Option_t *="") const
virtual void ChangeDefJobOpt(KVDataAnalyser *da)
virtual void ReadBatchEnvFile(TEnv *)
virtual void SetBatchSystemParameters(const KVNameValueList &)
Use the parameters in the list to set all relevant parameters for batch system.
virtual void GetBatchSystemParameterList(KVNameValueList &)
virtual void Clear(Option_t *opt="")
virtual Bool_t CheckJobParameters()
Checks the job and ask for the job name if needed.
Manager class which sets up and runs data analysis tasks.
virtual KVString GetRootDirectoryOfDataToAnalyse() const
KVDataAnalysisTask * GetAnalysisTask() const
Pilots user analysis of experimental data.
void SetFullRunList(const KVNumberList &nl)
void SetRuns(const KVNumberList &nl, Bool_t check=kTRUE)
const KVNumberList & GetRunList() const
Extended TList class which owns its objects by default.
Definition: KVList.h:28
Handles lists of named parameters with different types, a list of KVNamedParameter objects.
Int_t GetIntValue(const Char_t *name) const
void SetValue(const Char_t *name, value_type value)
Bool_t GetBoolValue(const Char_t *name) const
const Char_t * GetStringValue(const Char_t *name) const
Strings used to represent a set of ranges of values.
Definition: KVNumberList.h:85
Bool_t End(void) const
Definition: KVNumberList.h:199
Int_t GetNValues() const
void Begin(void) const
Int_t Next(void) const
Class piloting analyses of simulated data.
void SetFileList(TList *l) override
Int_t GetNumberOfFilesToAnalyse() const override
TList * GetFileList() const
Extension of ROOT TString class which allows backwards compatibility with ROOT v3....
Definition: KVString.h:73
Int_t GetNValues(TString delim) const
Definition: KVString.cpp:886
Interface to CCIN2P3 Grid Engine batch job management system.
virtual void Print(Option_t *="") const
virtual void ChangeDefJobOpt(KVDataAnalyser *)
const Char_t * GetJobName() const
const Char_t * GetJobTime(void) const
returns the parameter string corresponding to the job CPU time
virtual void Clear(Option_t *opt="")
Clear previously set parameters in order to create a new job submission command.
virtual Bool_t CheckJobParameters()
Checks the job and asks for any missing parameters.
void SetJobMemory(const Char_t *h="")
virtual void GetBatchSystemParameterList(KVNameValueList &)
const Char_t * GetJobMemory(void) const
returns the parameter string corresponding to the job Memory
void ChooseJobMemory(void)
void SetJobTime(const Char_t *h="")
virtual void SetBatchSystemParameters(const KVNameValueList &)
Use the parameters in the list to set all relevant parameters for batch system.
void PrintJobs(Option_t *opt="")
Print list of owner's jobs.
virtual void ReadBatchEnvFile(TEnv *)
virtual void SanitizeJobName() const
virtual void WriteBatchEnvFile(TEnv *)
void ls(Option_t *option="") const override
virtual const char * GetValue(const char *name, const char *dflt) const
virtual void SetValue(const char *name, const char *value, EEnvLevel level=kEnvChange, const char *type=nullptr)
void Clear(Option_t *option="") override
void Add(TObject *obj) override
const char * GetName() const override
Ssiz_t Length() const
std::istream & ReadToDelim(std::istream &str, char delim='\n')
const char * Data() const
TString & Prepend(char c, Ssiz_t rep=1)
void Form(const char *fmt,...)
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
virtual const char * WorkingDirectory()
UInt_t GetListOfJobs(TFile *file, TList &jobdirs)
ClassImp(TPyArg)