KaliVeda
Toolkit for HIC analysis
KVDataSetManager.cpp
1 /*
2 $Id: KVDataSetManager.cpp,v 1.17 2007/10/01 15:03:38 franklan Exp $
3 $Revision: 1.17 $
4 $Date: 2007/10/01 15:03:38 $
5 $Author: franklan $
6 */
7 
8 #include "KVBase.h"
9 #include "KVDataSetManager.h"
10 #include "KVDataRepositoryManager.h"
11 #include "KVString.h"
12 #include "TObjString.h"
13 #include "TObjArray.h"
14 #include "Riostream.h"
15 #include "TEnv.h"
16 #include "KVBase.h"
17 #include "TPluginManager.h"
18 #include "TError.h"
19 
20 //macro converting octal filemode to decimal value
21 //to convert e.g. 664 (=u+rw, g+rw, o+r) use CHMODE(6,6,4)
22 #define CHMODE(u,g,o) ((u << 6) + (g << 3) + o)
23 
24 using namespace std;
25 
27 
28 KVDataSetManager* gDataSetManager;
29 
30 
32 
34 {
35  fNavailable = 0;
36  fRepository = 0;
37  fCacheAvailable = kFALSE;
38  fMaxCacheTime = 0;
39  fCacheFileName = "";
40  fDataSets.SetOwner();
41  fTasks.SetOwner();
42 }
43 
44 
45 
47 
48 KVDataSetManager::~KVDataSetManager()
49 {
50 }
51 
52 
53 
67 
69 {
70  //Initialisation of dataset manager for the repository 'dr'.
71  //
72  //If dr=0x0 (default) then all known datasets are 'available', otherwise
73  //we check availability of datasets based on data present in repository.
74  //
75  //Initialise all possible data analysis tasks,
76  //then set list of possible tasks for each available dataset.
77  //
78  //Returns kTRUE if all goes well.
79  //
80  //For remote data repositories we return kFALSE if no datasets are available
81  //(as the access to these is read-only)
82 
83  fRepository = dr;
84 
85  ReadUserGroups();
86 
87  if (!ReadDataSetList())
88  return kFALSE;
89 
90  //use caching for dataset availability ?
91  if (dr) {
92  fCacheAvailable = gEnv->GetValue(Form("%s.DataRepository.CacheAvailable", dr->GetName()),
93  kFALSE);
94  fMaxCacheTime = (UInt_t)gEnv->GetValue(Form("%s.DataRepository.MaxCacheTime", dr->GetName()),
95  0);
96  }
97  //name of cache file
98  if (dr) fCacheFileName.Form("%s.available.datasets", dr->GetName());
99 
100  //check which datasets are available
101  CheckAvailability();
102  if (!GetNavailable() && dr && dr->IsRemote()) return kFALSE;
103 
104  if (!ReadTaskList())
105  return kFALSE;
106 
107  //set available data analysis tasks for available datasets
108  if (GetNavailable()) {
109  for (Int_t i = 1; i <= GetNavailable(); i++)
110  GetAvailableDataSet(i)->SetAnalysisTasks(&fTasks);
111  }
112 
113  // stand-alone dataset manager: make it the default
114  if (!dr) gDataSetManager = this;
115 
116  return kTRUE;
117 }
118 
119 
120 
125 
127 {
128  //Sets up list of user groups defining restricted access to certain datasets.
129  //Definition of different user groups is given in $KVROOT/KVFiles/.kvrootrc
130 
131  //UserGroups env var contains whitespace-separated list of group names
132  TString groups = gEnv->GetValue("UserGroup", "");
133  if (groups == "") {
134  //cout << "No value for UserGroup" << endl;
135  return;
136  }
137  //split into array of group names
138  TObjArray* toks = groups.Tokenize(' ');
139  TObjString* group_name;
140  TIter next_name(toks);
141  fUserGroups.Clear();
142  while ((group_name = (TObjString*) next_name())) {
143  //for each group_name, the env var 'group_name.Users' contains a whitespace-separated list of user names
144  //we store this string in the fUserGroups parameter list with parameter name 'group_name'
145  TString users =
146  gEnv->
147  GetValue(Form("%s.Users", group_name->String().Data()), "");
148  fUserGroups.SetValue(group_name->String().Data(), users);
149  }
150  delete toks;
151 }
152 
153 
154 
158 
160 {
161  //Initialise list of all known datasets from informations in $KVROOT/KVFIles/.kvrootrc
162  //(and user's .kvrootrc)
163 
164  KVString manip_list = gEnv->GetValue("DataSet", "");
165 
166  fDataSets.Clear();
167 
168  TObjArray* manips = manip_list.Tokenize(" ");
169  TIter next(manips);
170  TObjString* manip;
171  while ((manip = (TObjString*)next())) {
172 
173  KVDataSet* ds = NewDataSet();
174  ds->SetName(manip->GetString().Data());
175  ds->SetTitle(gEnv->GetValue(Form("%s.DataSet.Title", manip->GetString().Data()), "Experimental dataset"));
176  ds->SetDataPathSubdir(gEnv->GetValue(Form("%s.DataSet.RepositoryDir", manip->GetString().Data()), manip->GetString().Data()));
177  ds->SetUserGroups(gEnv->GetValue(Form("%s.DataSet.UserGroup", manip->GetString().Data()), ""));
178  ds->SetRepository(fRepository);
179  fDataSets.Add(ds);
180 
181  }
182 
183  delete manips;
184 
185  return kTRUE;
186 }
187 
188 
189 
193 
195 {
196  //Initialise list of all known analysis tasks from informations in $KVROOT/KVFIles/.kvrootrc
197  //(and user's .kvrootrc)
198 
199  KVString task_list = gEnv->GetValue("DataAnalysisTask", "");
200 
201  fTasks.Clear();
202 
203  task_list.Begin(" ");
204  while (!task_list.End()) {
205 
207  TString name = task_list.Next();
208  dat->SetName(name.Data());
209  dat->SetTitle(gEnv->GetValue(Form("%s.DataAnalysisTask.Title", name.Data()), ""));
210  dat->SetPrereq(gEnv->GetValue(Form("%s.DataAnalysisTask.Prereq", name.Data()), ""));
211  dat->SetOutputDataType(gEnv->GetValue(Form("%s.DataAnalysisTask.Output", name.Data()), ""));
212  dat->SetDataAnalyser(gEnv->GetValue(Form("%s.DataAnalysisTask.Analyser", name.Data()), "KVDataAnalyser"));
213  dat->SetWithUserClass(gEnv->GetValue(Form("%s.DataAnalysisTask.UserClass", name.Data()), kFALSE));
214  dat->SetUserBaseClass(gEnv->GetValue(Form("%s.DataAnalysisTask.UserClass.Base", name.Data()), ""));
215  dat->SetStatusUpdateInterval(gEnv->GetValue(Form("%s.DataAnalysisTask.StatusUpdateInterval", name.Data()), 1000));
216  fTasks.Add(dat);
217 
218  }
219 
220  return kTRUE;
221 }
222 
223 
224 
230 
232 {
233  //Print list of datasets
234  //If opt="" (default) all datasets are shown with full information
235  //if opt="available" only available datasets are shown, each with a number which can
236  //be used with GetAvailableDataSet(Int_t) in order to retrieve the corresponding dataset.
237 
238  TString Sopt(opt);
239  Sopt.ToUpper();
240  if (Sopt.BeginsWith("AVAIL")) {
241  if (!fNavailable) {
242  cout << " *** No available datasets ***" <<
243  endl;
244  return;
245  }
246  else {
247  for (int i = 1; i <= fNavailable; i++) {
248  KVDataSet* ds = GetAvailableDataSet(i);
249  cout << "\t" << i << ". " << ds->GetTitle() << endl;
250  }
251  }
252  return;
253  }
254  if (fDataSets.GetSize()) {
255  TIter next(&fDataSets);
256  KVDataSet* ds;
257  while ((ds = (KVDataSet*) next()))
258  ds->ls();
259  }
260 }
261 
262 
263 
285 
287 {
288  //Check availability of datasets in repository associated to this data set manager
289  //
290  //If caching is activated for the parent repository, i.e. if
291  //
292  // [repository name].DataRepository.CacheAvailable: yes
293  //
294  //then instead of directly checking the existence of the directories for each dataset,
295  //we use the cached information written in the file
296  //KVBase::WorkingRepository()/[repository name].available.datasets
297  //unless (1) it doesn't exist, or (2) the file is older than the maximum
298  //cache time (in seconds) defined by
299  //
300  // [repository name].DataRepository.MaxCacheSeconds:
301  //
302  //In either of these 2 cases, we check the existence of the directories and update/
303  //create the cache file.
304  //
305  //If the repository appears to be empty (perhaps because we are using a remote access
306  //protocol to check it, and the protocol has some problems...), then as a last resort we
307  //we will use the cache if it exists, whatever its age.
308 
309  if (fCacheAvailable) {
310  //caching of dataset availability is activated
311  if (CheckCacheStatus()) {
312  //cache file exists and is not out of date
313  if (ReadAvailableDatasetsFile()) return;
314  }
315  }
316 
317  // print (repository-dependent) warning/informational message
318  if (fRepository) fRepository->PrintAvailableDatasetsUpdateWarning();
319 
320  //open temporary file
321  ofstream tmp_file;
322  TString tmp_file_path = fCacheFileName;
323  KVBase::OpenTempFile(tmp_file_path, tmp_file);
324 
325  fNavailable = 0;
326  if (fDataSets.GetSize()) {
327  TIter next(&fDataSets);
328  KVDataSet* ds;
329  while ((ds = (KVDataSet*) next())) {
330  //The results of this check are written in $KVROOT/KVFiles/[repository name].available.datasets
331  //This file may be read by KVRemoteDataSetManager::CheckAvailability when this
332  //data repository is accessed as a remote data repository from a remote machine.
333  //In this case we do not want the identity of the user to influence the contents of the file.
334  //Therefore even for 'unavailable' datasets we write the available datatypes (if any)
335  //in the file.
336  tmp_file << ds->GetName() << " : ";
337  ds->CheckAvailable();
338  tmp_file << ds->GetAvailableDataTypes() << endl;
339  if (ds->IsAvailable()) {
340  fNavailable++;
341  }
342  }
343 
344  //close temp file
345  tmp_file.close();
346  //if datasets are found, then we copy the temporary file to KVFiles directory,
347  //overwriting any previous version. if no datasets were found, we try the cache
348  //file (if it exists)
349  if (fNavailable && fRepository) { //if no repository is associated, no need to keep file
350  TString runlist = KVBase::GetWORKDIRFilePath(fCacheFileName.Data());
351  gSystem->CopyFile(tmp_file_path, runlist, kTRUE);
352  //set access permissions to 664
353  gSystem->Chmod(runlist.Data(), CHMODE(6, 6, 4));
354  }
355 
356  //delete temp file
357  gSystem->Unlink(tmp_file_path);
358 
359  if (!fNavailable) {
360  //no datasets found when checking file system ?
361  //can we rely on the cache file ?
362  ReadAvailableDatasetsFile();
363  }
364  else {
365  //now set up array of available datasets' indices
366  fIndex.clear();
367  next.Reset();
368  Int_t j(0);
369  while ((ds = (KVDataSet*) next())) {
370  if (ds->IsAvailable()) {
371  fIndex.push_back(j);
372  }
373  j++;
374  }
375  }
376  }
377 }
378 
379 
380 
381 
384 
386 {
387  //Return pointer to DataSet using index in list of all datasets, index>=0
388  if (fDataSets.GetSize() && index < fDataSets.GetSize())
389  return (KVDataSet*) fDataSets.At(index);
390  return 0;
391 }
392 
393 
394 
397 
399 {
400  //Return pointer to DataSet using name
401  return (KVDataSet*) fDataSets.FindObjectByName(name);
402 }
403 
404 
405 
410 
412 {
413  //Return pointer to available DataSet using index of available datasets
414  //Note this index begins at 1, and corresponds to the number printed next to the dataset
415  //when Print("available") is called
416  if (fNavailable && index && index <= fNavailable)
417  return GetDataSet(fIndex[index - 1]);
418  return 0;
419 }
420 
421 
422 
425 
427 {
428  //Return pointer to named data analysis task
429  return (KVDataAnalysisTask*) fTasks.FindObjectByName(name);
430 }
431 
432 
433 
437 
439  const Char_t* username)
440 {
441  //Check in list of groups fUserGroups if the user name 'username' is part of the group 'groupname'.
442  //If 'username' is not given (default) we use current user info (gSystem->GetUserInof()->fUser).
443 
444  TString Username = strcmp(username,
445  "") ? username : gSystem->GetUserInfo()->
446  fUser.Data();
447 
448  if (fUserGroups.HasParameter(groupname)) {
449  if (fUserGroups.GetTStringValue(groupname).Contains(Username.Data()))
450  return kTRUE;
451  }
452  return kFALSE;
453 }
454 
455 
456 
459 
461 {
462  //Creates and returns pointer to new data set object
463  return (new KVDataSet);
464 }
465 
466 
467 
473 
475 {
476  //Called when the physical state of the repository has changed i.e. a subdirectory for
477  //a new dataset or datatype has been added or removed. We update the available datatsets,
478  //datatypes and analysis tasks.
479 
480  //check which datasets are available
481  CheckAvailability();
482 
483  //set available data analysis tasks for available datasets
484  if (GetNavailable()) {
485  for (Int_t i = 1; i <= GetNavailable(); i++)
486  GetAvailableDataSet(i)->SetAnalysisTasks(&fTasks);
487  }
488 }
489 
490 
491 
498 
500 {
501  //Opens file KVBase::WorkingDirectory()/[repository name].available.datasets
502  //containing cached info on available datasets and
503  //associated subdirectories in data repository.
504  //Opens file for reading, & if all goes well returns kTRUE.
505  //Returns kFALSE in case of problems.
506 
507  return KVBase::SearchAndOpenKVFile(KVBase::GetWORKDIRFilePath(fCacheFileName), fDatasets);
508 }
509 
510 
511 
517 
519 {
520  //Opens and reads file containing cached info on available datasets, and sets
521  //the availability of the concerned datasets.
522  //Returns kTRUE if all goes well.
523  //Returns kFALSE if no cache exists or if file cannot be opened.
524  if (OpenAvailableDatasetsFile()) {
525  Info("ReadAvailableDataSetsFile",
526  "Reading cached information in file %s", fCacheFileName.Data());
527  //read file
528  TString line;
529  line.ReadLine(fDatasets);
530  while (fDatasets.good()) {
531 
532  TObjArray* toks = line.Tokenize(": ,");
533 
534  //first entry is dataset name
535  TString datasetname = ((TObjString*) toks->At(0))->String();
536  KVDataSet* dataset = GetDataSet(datasetname.Data());
537 
538  if (dataset) { //check dataset is known to local version of KaliVeda
539  //in case of remote repository, there may be datasets in the remote repository which are not defined here
540  if (toks->GetEntries() > 1 && dataset->CheckUserCanAccess()) {
541  //AVAILABLE DATASET
542  dataset->SetAvailable();
543  fNavailable++;
544  for (int i = 1; i < toks->GetEntries(); i++) {
545  //each following entry is a subdirectory name
546  dataset->AddAvailableDataType(((TObjString*) toks->At(i))->String().
547  Data());
548  }
549  }
550  else {
551  //UNAVAILABLE DATASET (no subdirs)
552  dataset->SetAvailable(kFALSE);
553  }
554  }
555 
556  delete toks;
557  line.ReadLine(fDatasets);
558  }
559 
560  //close file
561  fDatasets.close();
562  fDatasets.clear();
563 
564  if (fNavailable) {
565  TIter next(&fDataSets);
566  //now set up array of available datasets' indices
567  fIndex.clear();
568  Int_t j(0);
569  KVDataSet* ds;
570  while ((ds = (KVDataSet*) next())) {
571  if (ds->IsAvailable()) {
572  fIndex.push_back(j);
573  }
574  j++;
575  }
576  }
577  //all is OK
578  return kTRUE;
579  }
580  //we could not find/open the cache file
581  return kFALSE;
582 }
583 
584 
585 
586 
591 
593 {
594  //We check the status of the available datasets cache file.
595  //We return kTRUE if the file exists & was last modified
596  //less than fMaxCacheTime seconds ago.
597 
598  TString fullpath;
599  Info("KVDataSetManager::CheckCacheStatus", "Checking for available datasets cache file...");
600  if (KVBase::SearchKVFile(KVBase::GetWORKDIRFilePath(fCacheFileName), fullpath)) {
601 
602  // file exists - how old is it ?
603  FileStat_t file_info;
604  gSystem->GetPathInfo(fullpath.Data(), file_info);
605  TDatime file_date(file_info.fMtime);
606  TDatime now;
607  UInt_t file_age = now.Convert() - file_date.Convert();
608  Info("KVDataSetManager::CheckCacheStatus", "...file found. It is %u seconds old", file_age);
609  if (file_age < fMaxCacheTime) {
610  Info("KVDataSetManager::CheckCacheStatus", "Using cached file");
611  return kTRUE;
612  }
613  else
614  Info("KVDataSetManager::CheckCacheStatus", "File is too old (max time=%u). Update will be performed.", fMaxCacheTime);
615  }
616  else
617  Info("KVDataSetManager::CheckCacheStatus", "...no file found");
618  return kFALSE;
619 }
620 
621 
622 
628 
630 {
631  // This method returns a pointer to the analysis task whose description (title) contains
632  // all of the whitespace-separated keywords (which may be regular expressions)
633  // given in the string "keywords". The comparison is case-insensitive.
634 
635  //case-insensitive search for matches in list of all analysis tasks, based on 'title' attribute
636  return (KVDataAnalysisTask*)GetAnalysisTaskList()->FindObjectAny("title", keywords, kTRUE, kFALSE);
637 }
638 
639 
int Int_t
unsigned int UInt_t
bool Bool_t
char Char_t
constexpr Bool_t kFALSE
constexpr Bool_t kTRUE
const char Option_t
R__EXTERN TEnv * gEnv
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
char name[80]
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
static void OpenTempFile(TString &base, std::ofstream &fp)
Definition: KVBase.cpp:822
static const Char_t * GetWORKDIRFilePath(const Char_t *namefile="")
Definition: KVBase.cpp:121
static Bool_t SearchKVFile(const Char_t *name, TString &fullpath, const Char_t *kvsubdir="")
Definition: KVBase.cpp:533
static Bool_t SearchAndOpenKVFile(const Char_t *name, KVSQLite::database &dbfile, const Char_t *kvsubdir="")
Definition: KVBase.cpp:644
Define and manage data analysis tasks.
virtual void SetDataAnalyser(const Char_t *d)
void SetOutputDataType(const KVString &p)
virtual void SetPrereq(const Char_t *p)
virtual void SetStatusUpdateInterval(Long64_t n)
virtual void SetUserBaseClass(const Char_t *d)
virtual void SetWithUserClass(Bool_t w=kTRUE)
Base class for managing repositories of experimental data.
virtual Bool_t IsRemote() const
Returns kTRUE for remote repositories, kFALSE for local repositories.
Manage all datasets contained in a given data repository.
virtual void Print(Option_t *opt="") const
virtual void ReadUserGroups()
virtual Bool_t ReadDataSetList()
virtual Bool_t Init(KVDataRepository *=0)
KVDataAnalysisTask * GetAnalysisTaskAny(const Char_t *keywords) const
virtual Bool_t OpenAvailableDatasetsFile()
virtual void Update()
virtual Bool_t CheckCacheStatus()
virtual Bool_t ReadAvailableDatasetsFile()
virtual Bool_t CheckUser(const Char_t *groupname, const Char_t *username="")
virtual KVDataAnalysisTask * GetTask(const Char_t *name)
Return pointer to named data analysis task.
KVDataSet * GetDataSet(Int_t) const
Return pointer to DataSet using index in list of all datasets, index>=0.
virtual KVDataSet * NewDataSet()
Creates and returns pointer to new data set object.
virtual Bool_t ReadTaskList()
virtual void CheckAvailability()
virtual KVDataSet * GetAvailableDataSet(Int_t) const
Manage an experimental dataset corresponding to a given experiment or campaign.
Definition: KVDataSet.h:146
virtual void SetUserGroups(const Char_t *groups)
Definition: KVDataSet.h:219
void ls(Option_t *opt="") const override
Print dataset information.
Definition: KVDataSet.cpp:394
virtual void SetDataPathSubdir(const Char_t *s)
Definition: KVDataSet.h:197
virtual const Char_t * GetAvailableDataTypes() const
Definition: KVDataSet.h:213
virtual void AddAvailableDataType(const Char_t *)
Definition: KVDataSet.cpp:507
virtual void CheckAvailable()
Definition: KVDataSet.cpp:460
virtual Bool_t IsAvailable() const
Definition: KVDataSet.h:223
virtual void SetAvailable(Bool_t yes=kTRUE)
Definition: KVDataSet.h:228
void SetName(const char *name) override
Definition: KVDataSet.cpp:707
virtual Bool_t CheckUserCanAccess()
Definition: KVDataSet.cpp:1460
void SetRepository(KVDataRepository *)
Set pointer to data repository in which dataset is stored.
Definition: KVDataSet.cpp:1491
Extension of ROOT TString class which allows backwards compatibility with ROOT v3....
Definition: KVString.h:73
void Begin(TString delim) const
Definition: KVString.cpp:565
Bool_t End() const
Definition: KVString.cpp:634
KVString Next(Bool_t strip_whitespace=kFALSE) const
Definition: KVString.cpp:695
UInt_t Convert(Bool_t toGMT=kFALSE) const
virtual const char * GetValue(const char *name, const char *dflt) const
void Reset()
virtual void SetTitle(const char *title="")
const char * GetName() const override
const char * GetTitle() const override
virtual void SetName(const char *name)
Int_t GetEntries() const override
TObject * At(Int_t idx) const override
const TString & GetString() const
TString & String()
const char * Data() const
void ToUpper()
TObjArray * Tokenize(const TString &delim) const
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
virtual int Chmod(const char *file, UInt_t mode)
virtual int CopyFile(const char *from, const char *to, Bool_t overwrite=kFALSE)
virtual int GetPathInfo(const char *path, FileStat_t &buf)
virtual UserGroup_t * GetUserInfo(const char *user=nullptr)
virtual int Unlink(const char *name)
TLine * line
void Info(const char *location, const char *fmt,...)
const char * String
Long_t fMtime
ClassImp(TPyArg)
size_t fIndex