#!/bin/env python
"""This program parses the output of the swmgr run archive: https://pc-ads-01.roma1.infn.it/KV/index.php
The table has a long long line with all the entries (cut+paste from page src)
Usage: swmgrparser.py [infile [outfile]]
infile (def: usatlas1-data) is the file with the web page extract
outfile (def: stdout) is the file where to write collected data, summary table, ...
The program is asking the certificate password to be able to connect to query protected information.
Enries are filtered according to dates hardcoded in the src file (TODO: remove/improve that).
For info contact Marco Mambelli: marco@hep.uchicago.edu
"""
""" e.g. line:
uct2-c025 | 9E9C6191-FE6E-1234-B2EA-35BE93849C85 | DC3 Z -> e e G4 event generation (python JT) | 30-10-2007 16:10 | OK | logfile |
"""
import re
import time
import getpass
import os
import sys
infilename = ''
outfilename = None
if len(sys.argv)>1:
infilename = sys.argv[1]
f = open(infilename, 'r')
if len(sys.argv)>2:
outfilename = sys.argv[2]
else:
f = open('usatlas1-data', 'r')
if outfilename:
outfile = open(outfilename, 'w')
def printout(instr=''):
outfile.write("%s\n" % (instr,))
else:
def printout(instr=''):
print instr
#print "\a"
cert_pass = getpass.getpass('Input Cert Password:')
if cert_pass:
os.environ['MYCERTVALUE'] = cert_pass
cert_pass = '$MYCERTVALUE'
#print "\a"
def add_element(mdic, mdata, mstr=''):
""" Possible URLS:
protected/viewhost.php?hid=4153
index.php?sid=9E9C6191-FE6E-1234-B2EA-35BE93849C85&reload
protected/viewinfo.php?id=31992 DC3 Z -> e e G4 event generation (python JT)
protected/viewlog.php?id=31992
"""
mkey = mdata[0]
if mkey=='time':
mdic['time'] = mdata[1]
mdic['timestring'] = mstr
elif mkey.startswith('protected/viewhost.php?hid='):
mdic['hostid'] = mkey[len('protected/viewhost.php?hid='):]
mdic['hostname'] = mdata[1]
elif mkey.startswith('index.php?sid='):
mdic['session'] = mdata[1]
elif mkey.startswith('protected/viewinfo.php?id='):
# mdic['id'] = mkey[len('protected/viewinfo.php?id='):]
mdic['type'] = mdata[1]
elif mkey.startswith('protected/viewlog.php?id='):
mdic['id'] = mkey[len('protected/viewlog.php?id='):]
# missing, parsing workaround
if mstr.find('>OK<') != -1:
mdic['complete'] = True
elif mstr.find('>FAILED<') != -1:
mdic['complete'] = False
else:
printout("WARN: no completion string in: %s" % (mstr,))
elif mkey=='flag':
mdic['complete']=mdata[1]
else:
printout("WARN: add_element failed: %s" % (mstr,))
print "Processing entries in :", infilename
if outfilename:
print "Writing to: ", outfilename
else:
print "Writing to stdout"
f = open('usatlas1-data', 'r')
a1=f.readlines()
a2=[]
for i in a1:
if i.startswith(''):
a2=i.split('
')
# each row is an emtry
ctr = 0
data = {}
pat = re.compile(r'(.+)', re.I)
for i in a2:
a3=i.split('')
data_dic = {}
for j in a3:
if not j:
continue
try:
m1 = pat.search(j).groups()
add_element(data_dic, m1, j)
except AttributeError:
# 30-10-2007 16:10
try:
t1 = time.strptime(j[4:], "%d-%m-%Y %H:%M")
add_element(data_dic, ('time', t1), j[4:])
continue
except ValueError:
pass
# | OK
# in case generated HTML is fixed
if j.endswith('>OK'):
add_element(data_dic, ('flag', True))
elif j.endswith('>FAILED'):
add_element(data_dic, ('flag', True))
else:
printout("Warn: no valid data in :%s:" % (j,))
data[ctr] = data_dic
ctr += 1
printout("Total number of entries: %s" % (ctr,))
sdata = {}
TESTTYPES = {'DC3 Z -> e e G4 event generation (python JT)': (0, 'evgen', 'DC3 Z -> e e G4 event generation (python JT)'),
'DC3 Z -> e e G4 event atlasG4 (python JT)': (1, 'simul', 'DC3 Z -> e e G4 event atlasG4 (python JT)'),
'DC3 Z -> e e Digitization (python JT)': (2, 'digit', 'DC3 Z -> e e Digitization (python JT)'),
'DC3 Z -> e e event reconstruction (python JT)': (3, 'reco', 'DC3 Z -> e e event reconstruction (python JT)'),
'AtlasProduction 12.0.6 Validation': (4, 'valid', 'AtlasProduction 12.0.6 Validation'),
}
def addToSession(sid, elem):
# A session includes 5 tests
global sdata
if sdata.has_key(sid):
selem = sdata[sid]
if selem['host'] != (elem['hostname'], elem['hostid']):
printout("Warning, host differs within session: %s from %s" % ((elem['hostname'], elem['hostid']), selem['host']))
else:
selem = {'tids': [None, None, None, None, None],
'tres': [None, None, None, None, None],
'ttime': [None, None, None, None, None],
'ttimestring': ['', '', '', '', ''],
'host': (elem['hostname'], elem['hostid']),
'id': sid,
}
try:
testtype = TESTTYPES[elem['type']]
except KeyError:
printout("WARN: Unknown type: %s" % (elem['type'],))
return False
if selem['tids'][testtype[0]]:
printout("WARN: Duplicate entry in %s for %s" % (sid, testtype))
return False
selem['tids'][testtype[0]] = elem['id']
selem['tres'][testtype[0]] = elem['complete']
selem['ttime'][testtype[0]] = elem['time']
selem['ttimestring'][testtype[0]] = elem['timestring']
sdata[sid] = selem
for i in data.values():
if not i:
printout("WARN: empty test")
continue
try:
addToSession(i['session'], i)
except:
printout(i)
raise
printout("Total sessions: %s" % (len(sdata),))
BASEURL="https://pc-ads-01.roma1.infn.it/KV/"
def printsession(key, value):
printout("Session: %s" % (key, ))
printout("%sindex.php?sid=%s" % (BASEURL, key))
printout("Hostname: %s" % (value['host'][0], ))
printout("%sprotected/viewhost.php?hid=%s" % (BASEURL, value['host'][1]))
printout("Test ids (evgen, sim, digit, reco, valid): %s" % (value['tids'],))
printout("Succesful: %s" % (value['tres'],))
printout("Time (first, last): %s, %s" % (value['ttimestring'][0], value['ttimestring'][4]))
printout(' '.join([i for i in value['tids'] if i]))
# selection
printout("Printing only sessions started on: 14-11-2007, 15-11-2007")
seldata = {}
for key, value in sdata.items():
sdate = value['ttimestring'][0]
if sdate.startswith("14-11-2007") or sdate.startswith("15-11-2007"):
seldata[key] = value
printout("Total selected sessions: %s" % (len(seldata),))
printout("Sessions detail")
for key, value in seldata.items():
printsession(key, value)
printout()
# Extra selection for tests
# comment it for the real program
#k1, v1 = seldata.items()[1]
#seldata={}
#seldata[k1]=v1
printout()
printout("Gathering extra info (host, timings)")
print "Remote query... please wait"
printout()
import commands
"""
| Hostname | uct2-c025 |
IP number | 10.1.2.25 |
OS name | Linux |
OS version | 2.6.9-42.0.3.EL.cernsmp |
OS distribution | Scientific Linux CERN SLC release 4.4 (Beryllium) |
Architecture | i686 |
Compiler version | gcc version 3.4.6 20060404 (Red Hat 3.4.6-3) |
Python version | 2.4.2 |
Number of CPUs | 4 |
CPU name | Dual Core AMD Opteron(tm) Processor 285 |
Bogomips | 5175.56 |
RAM | 0 |
"""
pat1 = re.compile(r']*>([^<]*) | ]*>([^<]*) | ', re.I)
pat2 = re.compile(r'Tot=([^\[]*)\[([^\]]*)\]')
CPUINFOS = ['Hostname',
'IP number',
'OS name',
'OS version',
'OS distribution',
'Architecture',
'Compiler version',
'Python version',
'Number of CPUs',
'CPU name',
'Bogomips',
'RAM',
]
def addExtraInfo(sdic):
#timing_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --pass XXX --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewlog.php?id=%s 2>&1 | grep "StatSvc.*INFO Time User" | tail -n 1' % tid
#timing_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewlog.php?id=$TID 2>&1 | grep "StatSvc.*INFO Time User" | tail -n 1'
host_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --pass %s --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewhost.php?hid=%s 2>&1' % (cert_pass, sdic['host'][1])
ec, out = commands.getstatusoutput(host_cmd)
outlines=out.split('\n')
dline=''
for i in outlines:
if i.find('Hostname') != -1:
if i.find(sdic['host'][0]) != -1:
dline=i
break
cpuinfo={'Hostname': '',
'IP number': '',
'OS name': '',
'OS version': '',
'OS distribution': '',
'Architecture': '',
'Compiler version': '',
'Python version': '',
'Number of CPUs': '',
'CPU name': '',
'Bogomips': '',
'RAM': '',
}
if not dline:
printout("WARN: No valid info from for host : %s ( %s )" % (sdic['host'][0], sdic['host'][1]))
else:
# parsing
a1=dline.split('')
for i in a1:
try:
m1 = pat1.search(i).groups()
cpuinfo[m1[0]] = m1[1]
except AttributeError:
# always give warning
pass
sdic['cpuinfo'] = cpuinfo
timeinfo = [0, 0, 0, 0, 0]
for i in range(5):
if i==0 and sdic['tids'][i]:
pool_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --pass %s --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewlog.php?id=%s 2>&1 | grep OSG_SITE_NAME ' % (cert_pass, sdic['tids'][i])
ec, out = commands.getstatusoutput(pool_cmd)
sdic['ce'] = ''
try:
tmp = out.find('OSG_SITE_NAME=')
if tmp != -1:
out = out[tmp+len('OSG_SITE_NAME='):]
tmp = out.find('<')
if tmp != -1:
out = out[:tmp]
sdic['ce'] = out
except:
printout("WARN: unable to find CE name: %s" % (out,))
if sdic['tids'][i] and sdic['tres'][i]:
timing_cmd = 'curl -k --cert $HOME/.globus/usercert.pem --key $HOME/.globus/userkey.pem --pass %s --capath /etc/grid-security/certificates https://pc-ads-01.roma1.infn.it/KV/protected/viewlog.php?id=%s 2>&1 | grep "StatSvc.*INFO Time User" | tail -n 1' % (cert_pass, sdic['tids'][i])
ec, out = commands.getstatusoutput(timing_cmd)
tmp = 0
try:
m1 = pat2.search(out).groups()
tmp = float(m1[0])
if m1[1]=='min':
tmp*=60.0
except:
pass
timeinfo[i] = tmp
sdic['ttime'] = timeinfo
if timeinfo[3]!=0 and timeinfo[2]!=0 and timeinfo[1]!=0 and timeinfo[0]!=0:
sdic['tottime'] = timeinfo[3] + timeinfo[2] + timeinfo[1] + timeinfo[0]
else:
sdic['tottime'] = 0
for key, value in seldata.items():
addExtraInfo(value)
printout(key)
printout(value)
printout()
# dic elements:
# sdic['cpuinfo'] = cpuinfo
# CPUINFOS = ['Hostname',
# 'IP number',
# 'OS name',
# 'OS version',
# 'OS distribution',
# 'Architecture',
# 'Compiler version',
# 'Python version',
# 'Number of CPUs',
# 'CPU name',
# 'Bogomips',
# 'RAM',
# ]
# sdic['ttime'] = timeinfo
# sdic['tottime'] = 0
# print "Session: %s" % (key, )
# print "%sindex.php?sid=%s" % (BASEURL, key)
# print "Hostname: %s" % (value['host'][0], )
# print "%sprotected/viewhost.php?hid=%s" % (BASEURL, value['host'][1])
# print "Test ids (evgen, sim, digit, reco, valid): %s" % (value['tids'],)
# print "Succesful: %s" % (value['tres'],)
# print "Time (first, last): %s, %s" % (value['ttimestring'][0], value['ttimestring'][4])
# print ' '.join([i for i in value['tids'] if i])
CPUINFOS2 = ['Hostname',
# 'IP number',
# 'OS name',
# 'OS version',
# 'OS distribution',
'Architecture',
# 'Compiler version',
# 'Python version',
'Number of CPUs',
'CPU name',
'Bogomips',
'RAM',
]
#for key, value in seldata.items():
# print key
# print value
# print
printout()
printout("CSV tuples")
printout()
hline = "Session ID, "
for i in CPUINFOS2:
hline += "%s, " % i
hline += "Time evgen, time sim, time digit, time reco, time val, Tot time"
printout(hline)
def printTupleCSV(key, value):
sline = "%s, %s, %s, " % (key, value['ttimestring'][0], value['ce'])
#sline += "%s, " % value['ttimestring'][0]
for i in CPUINFOS2:
sline += "%s, " % (value['cpuinfo'][i])
for i in range(5):
sline += "%s, " % (value['ttime'][i])
sline += "%s" % value['tottime']
printout(sline)
for key, value in seldata.items():
printTupleCSV(key, value)
printout()
print