# coding: utf-8 from os.path import join, isfile, basename, isdir from os import listdir #from io import BytesIO from lxml.etree import DTD, parse, tostring # , XMLParser from .i18n import _ from .error import DictConsistencyError HIGH_COMPATIBILITY = True class XMLReflector(object): """Helper class for loading the Creole XML file, parsing it, validating against the Creole DTD, writing the xml result on the disk """ def __init__(self): self.dtd = None def parse_dtd(self, dtdfilename): """Loads the Creole DTD :raises IOError: if the DTD is not found :param dtdfilename: the full filename of the Creole DTD """ if not isfile(dtdfilename): raise IOError(_("no such DTD file: {}").format(dtdfilename)) with open(dtdfilename, 'r') as dtdfd: self.dtd = DTD(dtdfd) def parse_xmlfile(self, xmlfile): """Parses and validates some Creole XML against the Creole DTD :returns: the root element tree object """ # document = parse(BytesIO(xmlfile), XMLParser(remove_blank_text=True)) document = parse(xmlfile) if not self.dtd.validate(document): raise DictConsistencyError(_("not a valid xml file: {}").format(xmlfile)) return document.getroot() def load_xml_from_folders(self, xmlfolders): """Loads all the XML files located in the xmlfolders' list :param xmlfolders: list of full folder's name """ documents = [] if not isinstance(xmlfolders, list): xmlfolders = [xmlfolders] for xmlfolder in xmlfolders: if isinstance(xmlfolder, list) or isinstance(xmlfolder, tuple): # directory group : collect files from each # directory and sort them before loading group_files = [] for idx, subdir in enumerate(xmlfolder): if isdir(subdir): for filename in listdir(subdir): group_files.append((filename, idx, subdir)) else: group_files.append(basename(subdir), idx, dirname(subdir)) def sort_group(file1, file2): if file1[0] == file2[0]: # sort by initial xmlfolder order if same name return file1[1].__cmp__(file2[1]) # sort by filename elif file1[0] > file2[0]: return 1 else: return -1 group_files.sort(sort_group) filenames = [join(f[2], f[0]) for f in group_files] elif isdir(xmlfolder): filenames = [] for filename in listdir(xmlfolder): filenames.append(join(xmlfolder, filename)) filenames.sort() else: filenames = [xmlfolder] for xmlfile in filenames: if xmlfile.endswith('.xml'): #xmlfile_path = join(xmlfolder, xmlfile) documents.append((xmlfile, self.parse_xmlfile(xmlfile))) return documents def save_xmlfile(self, xmlfilename, xml): # pylint: disable=R0201 """Write a bunch of XML on the disk """ with open(xmlfilename, 'w') as xmlfh: xmlfh.write(tostring(xml, pretty_print=True, encoding="UTF-8", xml_declaration=True).decode('utf8'))