###########################################################################################################################################################
# Script: XML splitter script to split exported datastage XML file containing jobs into individual job files and into the respective sub folderse
# Date Author Description/ Log
# ======== ============== ================
# 20181005 Denzil D'souza Developed the script
###########################################################################################################################################################
import os
import xml.etree.ElementTree as ET
import sys
import time
startTime = time.asctime(time.localtime(time.time()))
noofargs=len(sys.argv)
if len(sys.argv) != 2:
print ("Please supply datastage project XML file name to split as an input value.")
exit()
inputFile=sys.argv[1]
if not os.path.isfile(inputFile):
print (inputFile, " XML file does not exist")
exit()
mytree = ET.parse(inputFile)
myroot = mytree.getroot()
f=open(inputFile)
lines= f.readlines()
f.close()
basePath="c:\Temp\python\git"+"\\"+inputFile.split(".")[0]
for elem in myroot:
filePath=""
fullFilePath=""
for subelem in elem:
for prop in subelem:
category = prop.attrib.get('Name')
if category == 'Category':
filePath=basePath+ prop.text
os.makedirs(filePath,exist_ok=True)
if elem.tag in('DataElements','StageTypes','TableDefinitions','Transforms','Routines','QSRuleAssemblies','ParameterSets','DataConnections'):
if subelem.tag == "Record":
title=subelem.attrib.get("Identifier")
if elem.tag == "TableDefinitions":
titlelist = title.split("\\")
title = titlelist[-1]
filename = format(title + ".xml")
fullFilePath= filePath + "\\" + filename.replace(":","_")
with open(fullFilePath,"w") as f:
f.write(lines[0])
f.write(lines[1])
f.write(lines[2])
f.write("<"+elem.tag+">")
print(fullFilePath)
f.close()
with open(fullFilePath,'ab') as f:
f.write(ET.tostring(subelem))
f.close()
with open(fullFilePath,'a') as f:
f.write("</"+elem.tag+">")
f.write(lines[len(lines)-1])
f.close()
if elem.tag in ('Job','SharedContainer'):
title = elem.attrib.get("Identifier")
filename = format(title + ".xml")
fullFilePath= filePath + "\\" + filename.replace(":","_")
print(fullFilePath)
with open(fullFilePath,"w") as f:
f.write(lines[0])
f.write(lines[1])
f.write(lines[2])
f.close()
with open(fullFilePath,'ab') as f:
f.write(ET.tostring(elem))
f.close()
with open(fullFilePath,'a') as f:
f.write(lines[len(lines)-1])
f.close()
endTime = time.asctime(time.localtime(time.time()))
print("Extraction started at : ", startTime, " Extraction ended at : ", endTime)
Related