#!/bin/env python """This program parses the output of the swmgr run archive: https://pc-ads-01.roma1.infn.it/KV/index.php The table has a long long line with all the entries (cut+paste from page src) Usage: swmgrparser.py [infile [outfile]] infile (def: usatlas1-data) is the file with the web page extract outfile (def: stdout) is the file where to write collected data, summary table, ... The program is asking the certificate password to be able to connect to query protected information. Enries are filtered according to dates hardcoded in the src file (TODO: remove/improve that). For info contact Marco Mambelli: marco@hep.uchicago.edu """ """ e.g. line: uct2-c0259E9C6191-FE6E-1234-B2EA-35BE93849C85DC3 Z -> e e G4 event generation (python JT)30-10-2007 16:10OKlogfile """ import re import time import getpass import os import sys infilename = '' outfilename = None if len(sys.argv)>1: infilename = sys.argv[1] f = open(infilename, 'r') if len(sys.argv)>2: outfilename = sys.argv[2] else: f = open('usatlas1-data', 'r') if outfilename: outfile = open(outfilename, 'w') def printout(instr=''): outfile.write("%s\n" % (instr,)) else: def printout(instr=''): print instr #print "\a" cert_pass = getpass.getpass('Input Cert Password:') if cert_pass: os.environ['MYCERTVALUE'] = cert_pass cert_pass = '$MYCERTVALUE' #print "\a" def add_element(mdic, mdata, mstr=''): """ Possible URLS: protected/viewhost.php?hid=4153 index.php?sid=9E9C6191-FE6E-1234-B2EA-35BE93849C85&reload protected/viewinfo.php?id=31992 DC3 Z -> e e G4 event generation (python JT) protected/viewlog.php?id=31992 """ mkey = mdata[0] if mkey=='time': mdic['time'] = mdata[1] mdic['timestring'] = mstr elif mkey.startswith('protected/viewhost.php?hid='): mdic['hostid'] = mkey[len('protected/viewhost.php?hid='):] mdic['hostname'] = mdata[1] elif mkey.startswith('index.php?sid='): mdic['session'] = mdata[1] elif mkey.startswith('protected/viewinfo.php?id='): # mdic['id'] = mkey[len('protected/viewinfo.php?id='):] mdic['type'] = mdata[1] elif mkey.startswith('protected/viewlog.php?id='): mdic['id'] = mkey[len('protected/viewlog.php?id='):] # missing, parsing workaround if mstr.find('>OK<') != -1: mdic['complete'] = True elif mstr.find('>FAILED<') != -1: mdic['complete'] = False else: printout("WARN: no completion string in: %s" % (mstr,)) elif mkey=='flag': mdic['complete']=mdata[1] else: printout("WARN: add_element failed: %s" % (mstr,)) print "Processing entries in :", infilename if outfilename: print "Writing to: ", outfilename else: print "Writing to stdout" f = open('usatlas1-data', 'r') a1=f.readlines() a2=[] for i in a1: if i.startswith(''): a2=i.split('') # each row is an emtry ctr = 0 data = {} pat = re.compile(r'(.+)', re.I) for i in a2: a3=i.split('') data_dic = {} for j in a3: if not j: continue try: m1 = pat.search(j).groups() add_element(data_dic, m1, j) except AttributeError: # 30-10-2007 16:10 try: t1 = time.strptime(j[4:], "%d-%m-%Y %H:%M") add_element(data_dic, ('time', t1), j[4:]) continue except ValueError: pass # OK # in case generated HTML is fixed if j.endswith('>OK'): add_element(data_dic, ('flag', True)) elif j.endswith('>FAILED'): add_element(data_dic, ('flag', True)) else: printout("Warn: no valid data in :%s:" % (j,)) data[ctr] = data_dic ctr += 1 printout("Total number of entries: %s" % (ctr,)) sdata = {} TESTTYPES = {'DC3 Z -> e e G4 event generation (python JT)': (0, 'evgen', 'DC3 Z -> e e G4 event generation (python JT)'), 'DC3 Z -> e e G4 event atlasG4 (python JT)': (1, 'simul', 'DC3 Z -> e e G4 event atlasG4 (python JT)'), 'DC3 Z -> e e Digitization (python JT)': (2, 'digit', 'DC3 Z -> e e Digitization (python JT)'), 'DC3 Z -> e e event reconstruction (python JT)': (3, 'reco', 'DC3 Z -> e e event reconstruction (python JT)'), 'AtlasProduction 12.0.6 Validation': (4, 'valid', 'AtlasProduction 12.0.6 Validation'), } def addToSession(sid, elem): # A session includes 5 tests global sdata if sdata.has_key(sid): selem = sdata[sid] if selem['host'] != (elem['hostname'], elem['hostid']): printout("Warning, host differs within session: %s from %s" % ((elem['hostname'], elem['hostid']), selem['host'])) else: selem = {'tids': [None, None, None, None, None], 'tres': [None, None, None, None, None], 'ttime': [None, None, None, None, None], 'ttimestring': ['', '', '', '', ''], 'host': (elem['hostname'], elem['hostid']), 'id': sid, } try: testtype = TESTTYPES[elem['type']] except KeyError: printout("WARN: Unknown type: %s" % (elem['type'],)) return False if selem['tids'][testtype[0]]: printout("WARN: Duplicate entry in %s for %s" % (sid, testtype)) return False selem['tids'][testtype[0]] = elem['id'] selem['tres'][testtype[0]] = elem['complete'] selem['ttime'][testtype[0]] = elem['time'] selem['ttimestring'][testtype[0]] = elem['timestring'] sdata[sid] = selem for i in data.values(): if not i: printout("WARN: empty test") continue try: addToSession(i['session'], i) except: printout(i) raise printout("Total sessions: %s" % (len(sdata),)) BASEURL="https://pc-ads-01.roma1.infn.it/KV/" def printsession(key, value): printout("Session: %s" % (key, )) printout("%sindex.php?sid=%s" % (BASEURL, key)) printout("Hostname: %s" % (value['host'][0], )) printout("%sprotected/viewhost.php?hid=%s" % (BASEURL, value['host'][1])) printout("Test ids (evgen, sim, digit, reco, valid): %s" % (value['tids'],)) printout("Succesful: %s" % (value['tres'],)) printout("Time (first, last): %s, %s" % (value['ttimestring'][0], value['ttimestring'][4])) printout(' '.join([i for i in value['tids'] if i])) # selection printout("Printing only sessions started on: 14-11-2007, 15-11-2007") seldata = {} for key, value in sdata.items(): sdate = value['ttimestring'][0] if sdate.startswith("14-11-2007") or sdate.startswith("15-11-2007"): seldata[key] = value printout("Total selected sessions: %s" % (len(seldata),)) printout("Sessions detail") for key, value in seldata.items(): printsession(key, value) printout() # Extra selection for tests # comment it for the real program #k1, v1 = seldata.items()[1] #seldata={} #seldata[k1]=v1 printout() printout("Gathering extra info (host, timings)") print "Remote query... please wait" printout() import commands """ Hostnameuct2-c025IP number10.1.2.25OS nameLinuxOS version2.6.9-42.0.3.EL.cernsmpOS distributionScientific Linux CERN SLC release 4.4 (Beryllium)Architecturei686Compiler versiongcc version 3.4.6 20060404 (Red Hat 3.4.6-3)Python version2.4.2Number of CPUs4CPU nameDual Core AMD Opteron(tm) Processor 285Bogomips5175.56RAM0 """ pat1 = re.compile(r']*>([^<]*)]*>([^<]*)', re.I) pat2 = re.compile(r'Tot=([^\[]*)\[([^\]]*)\]') CPUINFOS = ['Hostname', 'IP number', 'OS name', 'OS version', 'OS distribution', 'Architecture', 'Compiler version', 'Python version', 'Number of CPUs', 'CPU name', 'Bogomips', 'RAM', ] def addExtraInfo(sdic): #timing_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --pass XXX --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewlog.php?id=%s 2>&1 | grep "StatSvc.*INFO Time User" | tail -n 1' % tid #timing_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewlog.php?id=$TID 2>&1 | grep "StatSvc.*INFO Time User" | tail -n 1' host_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --pass %s --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewhost.php?hid=%s 2>&1' % (cert_pass, sdic['host'][1]) ec, out = commands.getstatusoutput(host_cmd) outlines=out.split('\n') dline='' for i in outlines: if i.find('Hostname') != -1: if i.find(sdic['host'][0]) != -1: dline=i break cpuinfo={'Hostname': '', 'IP number': '', 'OS name': '', 'OS version': '', 'OS distribution': '', 'Architecture': '', 'Compiler version': '', 'Python version': '', 'Number of CPUs': '', 'CPU name': '', 'Bogomips': '', 'RAM': '', } if not dline: printout("WARN: No valid info from for host : %s ( %s )" % (sdic['host'][0], sdic['host'][1])) else: # parsing a1=dline.split('') for i in a1: try: m1 = pat1.search(i).groups() cpuinfo[m1[0]] = m1[1] except AttributeError: # always give warning pass sdic['cpuinfo'] = cpuinfo timeinfo = [0, 0, 0, 0, 0] for i in range(5): if i==0 and sdic['tids'][i]: pool_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --pass %s --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewlog.php?id=%s 2>&1 | grep OSG_SITE_NAME ' % (cert_pass, sdic['tids'][i]) ec, out = commands.getstatusoutput(pool_cmd) sdic['ce'] = '' try: tmp = out.find('OSG_SITE_NAME=') if tmp != -1: out = out[tmp+len('OSG_SITE_NAME='):] tmp = out.find('<') if tmp != -1: out = out[:tmp] sdic['ce'] = out except: printout("WARN: unable to find CE name: %s" % (out,)) if sdic['tids'][i] and sdic['tres'][i]: timing_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --pass %s --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewlog.php?id=%s 2>&1 | grep "StatSvc.*INFO Time User" | tail -n 1' % (cert_pass, sdic['tids'][i]) ec, out = commands.getstatusoutput(timing_cmd) tmp = 0 try: m1 = pat2.search(out).groups() tmp = float(m1[0]) if m1[1]=='min': tmp*=60.0 except: pass timeinfo[i] = tmp sdic['ttime'] = timeinfo if timeinfo[3]!=0 and timeinfo[2]!=0 and timeinfo[1]!=0 and timeinfo[0]!=0: sdic['tottime'] = timeinfo[3] + timeinfo[2] + timeinfo[1] + timeinfo[0] else: sdic['tottime'] = 0 for key, value in seldata.items(): addExtraInfo(value) printout(key) printout(value) printout() # dic elements: # sdic['cpuinfo'] = cpuinfo # CPUINFOS = ['Hostname', # 'IP number', # 'OS name', # 'OS version', # 'OS distribution', # 'Architecture', # 'Compiler version', # 'Python version', # 'Number of CPUs', # 'CPU name', # 'Bogomips', # 'RAM', # ] # sdic['ttime'] = timeinfo # sdic['tottime'] = 0 # print "Session: %s" % (key, ) # print "%sindex.php?sid=%s" % (BASEURL, key) # print "Hostname: %s" % (value['host'][0], ) # print "%sprotected/viewhost.php?hid=%s" % (BASEURL, value['host'][1]) # print "Test ids (evgen, sim, digit, reco, valid): %s" % (value['tids'],) # print "Succesful: %s" % (value['tres'],) # print "Time (first, last): %s, %s" % (value['ttimestring'][0], value['ttimestring'][4]) # print ' '.join([i for i in value['tids'] if i]) CPUINFOS2 = ['Hostname', # 'IP number', # 'OS name', # 'OS version', # 'OS distribution', 'Architecture', # 'Compiler version', # 'Python version', 'Number of CPUs', 'CPU name', 'Bogomips', 'RAM', ] #for key, value in seldata.items(): # print key # print value # print printout() printout("CSV tuples") printout() hline = "Session ID, " for i in CPUINFOS2: hline += "%s, " % i hline += "Time evgen, time sim, time digit, time reco, time val, Tot time" printout(hline) def printTupleCSV(key, value): sline = "%s, %s, %s, " % (key, value['ttimestring'][0], value['ce']) #sline += "%s, " % value['ttimestring'][0] for i in CPUINFOS2: sline += "%s, " % (value['cpuinfo'][i]) for i in range(5): sline += "%s, " % (value['ttime'][i]) sline += "%s" % value['tottime'] printout(sline) for key, value in seldata.items(): printTupleCSV(key, value) printout() print