rougail/src/rougail/xmlreflector.py

91 lines
3.4 KiB
Python

# coding: utf-8
from os.path import join, isfile, basename, isdir
from os import listdir
#from io import BytesIO
from lxml.etree import DTD, parse, tostring # , XMLParser
from .i18n import _
from .error import DictConsistencyError
class XMLReflector(object):
"""Helper class for loading the Creole XML file,
parsing it, validating against the Creole DTD,
writing the xml result on the disk
"""
def __init__(self):
self.dtd = None
def parse_dtd(self, dtdfilename):
"""Loads the Creole DTD
:raises IOError: if the DTD is not found
:param dtdfilename: the full filename of the Creole DTD
"""
if not isfile(dtdfilename):
raise IOError(_("no such DTD file: {}").format(dtdfilename))
with open(dtdfilename, 'r') as dtdfd:
self.dtd = DTD(dtdfd)
def parse_xmlfile(self, xmlfile):
"""Parses and validates some Creole XML against the Creole DTD
:returns: the root element tree object
"""
# document = parse(BytesIO(xmlfile), XMLParser(remove_blank_text=True))
document = parse(xmlfile)
if not self.dtd.validate(document):
raise DictConsistencyError(_("not a valid xml file: {}").format(xmlfile))
return document.getroot()
def load_xml_from_folders(self, xmlfolders):
"""Loads all the XML files located in the xmlfolders' list
:param xmlfolders: list of full folder's name
"""
documents = []
if not isinstance(xmlfolders, list):
xmlfolders = [xmlfolders]
for xmlfolder in xmlfolders:
if isinstance(xmlfolder, list) or isinstance(xmlfolder, tuple):
# directory group : collect files from each
# directory and sort them before loading
group_files = []
for idx, subdir in enumerate(xmlfolder):
if isdir(subdir):
for filename in listdir(subdir):
group_files.append((filename, idx, subdir))
else:
group_files.append(basename(subdir), idx, dirname(subdir))
def sort_group(file1, file2):
if file1[0] == file2[0]:
# sort by initial xmlfolder order if same name
return file1[1].__cmp__(file2[1])
# sort by filename
elif file1[0] > file2[0]:
return 1
else:
return -1
group_files.sort(sort_group)
filenames = [join(f[2], f[0]) for f in group_files]
elif isdir(xmlfolder):
filenames = []
for filename in listdir(xmlfolder):
filenames.append(join(xmlfolder, filename))
filenames.sort()
else:
filenames = [xmlfolder]
for xmlfile in filenames:
if xmlfile.endswith('.xml'):
#xmlfile_path = join(xmlfolder, xmlfile)
documents.append((xmlfile, self.parse_xmlfile(xmlfile)))
return documents
def save_xmlfile(self, xmlfilename, xml): # pylint: disable=R0201
"""Write a bunch of XML on the disk
"""
with open(xmlfilename, 'w') as xmlfh:
xmlfh.write(tostring(xml, pretty_print=True, encoding="UTF-8", xml_declaration=True).decode('utf8'))