#!/usr/bin/env python #!/bin/env python # on the Mac env is in /usr/bin # cpwrapper.py - Marco Mambelli """wrapper for copy tools Depending on URLs and other information tryes to invoke the correct client with the correct command parameters. The program invoked by the wrapper must not require interactive responses. Protect the arguments from expansion or being separated if they include spaces. URLs (as presented in NG document) have an extended form like: method://[host[:port]][;param][/service_path]/data_path """ """Collection of URL examples or syntax specification """ class UrlType: pass URL_SRM = 1 URL_SRMv1 = 2 URL_SRMv2 = 3 URL_GSIFTP = 4 URL_FTP = 5 URL_HTTP = 6 URL_PNFS = 7 URL_FILE = 8 def _fix_pre(url): ret = url.replace('////', '///') return ret def _classify(url): if url.startswith('file:'): if url.startswith('file:///pnfs'): return URL_PNFS # check for CARTOR, XROOT and other non POSIX FS return URL_FILE elif url.startswith('gsiftp:'): return URL_GSIFTP elif url.startswith('srm:'): # does SFN has to be uppercase? url_t = url.split('?SFN=') if len(url_t)>1: if url_t[0].endswith('/srm/managerv2'): return URL_SRMv2 if url_t[0].endswith('/srm/managerv1'): return URL_SRMv1 return URL_SRM elif url.startswith('ftp:'): return URL_FTP elif url.startswith('http:'): return URL_HTTP def _fix_post(url, protocol, cli=''): """returns a complete URL adding missing default parts """ # more frequent protocols should come first even if there is no change in the URL? if protocol==URL_FILE: if cli=='srmcp': return url.replace('file:///', 'file:////') return url # elif protocol==URL_GFTP: # return url elif protocol==URL_SRMv2 or protocol==URL_SRM or protocol==URL_SRMv1: # fix missing port number url_t = url.split('/', 3) idx = url_t[2].find(':') if idx<0: idx = url_t[2].find(';') if idx<0: url_t[2] += ":8443" else: url_t[2] = "%s:8443%s" % (url_t[2][:idx], url_t[2][idx:]) # should a manager always be added? # should SRM be promoted to SRMv2? url = '/'.join(url_t) return url return url #elif protocol==URL_FTP or protocol==URL_HTTP: # return url def _get_lcgcp_setype(protocol): if protocol==URL_SRM or protocol==URL_SRMv1: return 'srmv1' if protocol==URL_SRMv2: return 'srmv2' #TODO: is gsiftp considered 'se' or is it something different? #if protocol==URL_GSIFTP: # return 'se' return '' def _hostopt(cli, src, dst): """Additional host-specific options for some commands """ has_hostopt = ['srmcp'] if cli not in has_hostopt: return '' ret = '' if cli=='srmcp': # BNL firewall requires -streams_num=1 url_t = src.split('/', 3) idx = url_t[2].find('bnl.gov') if idx<0: url_t = dst.split('/', 3) idx = url_t[2].find('bnl.gov') if idx>=0: ret += '-streams_num=1 ' return ret def getURLsOpt(cli, srcurl, dsturl): """Sets the command line parameters to optimize the success probablity of the transfer cli - command to invoke the client (the first string is the name of the client) srcurl - url of the source (first parameter) dsturl - URL of the destination (second parameter) It returns modified URLs (srcURL, dstURL) and a string with additional options """ clicomm = cli.strip().split(' ',1)[0] srcurl = _fix_pre(srcurl) dsturl = _fix_pre(dsturl) optadd = '' # fix URL and parameters for picky programs require_changes = ['lcg-cp', 'srmcp'] if clicomm in require_changes: srcp = _classify(srcurl) dstp = _classify(dsturl) srcurl = _fix_post(srcurl, srcp, clicomm) dsturl = _fix_post(dsturl, dstp, clicomm) optadd += _hostopt(clicomm, srcurl, dsturl) if clicomm == 'srmcp': if srcp == URL_SRMv2 or dstp == URL_SRMv2: optadd += '-2 ' # should I add -1 or is it the default? # what if one is v2 and one is v1? elif clicomm == 'lcg-cp': optadd += '-b ' tmpt = _get_lcgcp_setype(srcp) if tmpt: optadd += "-T %s " % (tmpt,) tmpt = _get_lcgcp_setype(dstp) if tmpt: optadd += "-U %s " % (tmpt,) return srcurl, dsturl, optadd def getCL(cli, srcurl, dsturl, optstr=''): """Sets the command line parameters to optimize the success probablity of the transfer cli - command to invoke the client (the first string is the name of the client) srcurl - url of the source (first parameter) dsturl - URL of the destination (second parameter) optstr - additional option string #TODO:?: check for duplicate parameter, hopefully it is fine #TODO:?: should I add -b to clg-cp? - yes """ # use other function to get the modified values (URLs and options) srcurl, dsturl, optadd = getURLsOpt(cli, srcurl, dsturl) return "%s %s %s %s %s" % (cli, optstr, optadd, srcurl, dsturl) def fixCL(cline): """Separates the arguments for getCL Assumes a space separated command line: first is the name of the client (executable) second to the last is the source for the copy last is the destination for the copy """ cline_t = cline.split(' ') if len(cline_t)<3: return cline return getCL(cline_t[0], cline_t[-2], cline_t[1], ' '.join(cline_t[1:-2])) #If main, execute (run as a wrapper for a program with the same name in a different directory in the path)! if __name__ == '__main__': import sys, os import commands # remove current directory from PATH mypath = os.getenv('PATH') if mypath: try: should_be_file = __file__ except: # how to find directory of the script? should_be_file = sys.argv[0] if not should_be_file: print "Copy wrapper error. Remove it from the PATH and reexecute the command." sys.exit(1) local_dir = os.path.abspath(os.path.dirname(should_be_file)) mypath_t = mypath.split(os.path.pathsep) mypath_t = [i for i in mypath_t if os.path.abspath(i)!=local_dir] os.environ['PATH'] = os.path.pathsep.join(mypath_t) # invoke the correct command # First remove the path from the executable name sys.argv[0] = os.path.basename(sys.argv[0]) if len(sys.argv)<3: # just invoke it cline = ' '.join(sys.argv) print "Command: ", cline # tests: ec, out = commands.getstatusoutput("echo $PATH") # (1, "no") ec, out = commands.getstatusoutput(cline) print out sys.exit(ec) # last 2 arguments are assumed source and destination URLs for a copy myargs = sys.argv cline = getCL(myargs[0], myargs[-2], myargs[-1], ' '.join(myargs[1:-2])) print "Command: ", cline ec, out = commands.getstatusoutput(cline) print out sys.exit(ec)