forked from GonzalezFJR/xtools
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCompareDatasetCount.py
More file actions
68 lines (56 loc) · 2.77 KB
/
CompareDatasetCount.py
File metadata and controls
68 lines (56 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
'''
[CompareDatasetsCount.py] by Xuan (jrgonzal@cern.ch)
Description: Execute this program to compare the numbers in the 'Count' histogram
contained in your nanoAOD, corresponding to the total number of events of the processed trees,
with the number of events of the dataset in DAS
You need to have 'rootfilesReader' and 'crabtools'
Usage:
python CompareDatasetCount.py dataset/data2017.txt --path nanoAODcrab/ --prodName prod
python CompareDatasetCount.py dataset/data2017.txt --path nanoAOD/data2017/
python CompareDatasetCount.py --dataset dataset --path nanoAOD/data2017/
'''
from crabTools.GetEventsDataset import GetEntriesDAS
from crabTools.SubmitDatasets import ReadLines
from rootfileReader.rootfileReader import SearchFiles, GetAllTrees, GetOnlyCount, FixStringLength, GetFiles
import argparse
parser = argparse.ArgumentParser(description='Check events with events in das')
parser.add_argument('--verbose','-v' , action='store_true' , help = 'Activate the verbosing')
parser.add_argument('--pretend','-p' , action='store_true' , help = 'Create the files but not send the jobs')
parser.add_argument('--test','-t' , action='store_true' , help = 'Sends only one or two jobs, as a test')
parser.add_argument('--dataset','-d' , default='' , help = 'Check this dataset')
parser.add_argument('--prodName','-n' , default='' , help = 'Name of your production')
parser.add_argument('--options','-o' , default='' , help = 'Options to pass to your producer')
parser.add_argument('--path' , default='./' , help = 'path to files')
parser.add_argument('file' , default='' , nargs='?', help = 'txt file with datasets')
args = parser.parse_args()
verbose = args.verbose
doPretend = args.pretend
dotest = args.test
datasetName = args.dataset
prodName = args.prodName
options = args.options
path = args.path
fname = args.file
doDataset = False if datasetName == '' else True
dataset = ReadLines(fname) if datasetName == '' else datasetName
print 'Getting info from DAS...'
dic = GetEntriesDAS(dataset, verbose, doPretend)
# {dataset : [nfiles, nev]} or [nfiles, nev]
datasets = dic.keys()
print 'Searching files of prod ', prodName, ' in ', path
dicfiles = SearchFiles(path, prodName)
# {samplename : path}
samples = dicfiles.keys()
samples.sort()
for s in samples:
if verbose: print 'Getting count for sample ', s
trees = GetFiles(dicfiles[s], s) if prodName == "" else GetAllTrees(dicfiles[s])
count = GetOnlyCount(trees)
for d in datasets:
dname = '%s'%d
dname = dname.replace('/', '_')
dname = dname.replace('-', '_')
if not s in dname: continue
nfiles, nev = dic[d]
fr = float(count)/nev*100
print '[%i] [%i] %s : %1.2f %s'%(nev, count, FixStringLength(s), fr, '%')