2019-11-23 08:17:35 +01:00
|
|
|
# coding: utf-8
|
|
|
|
from os.path import join, isfile, basename, isdir
|
|
|
|
from os import listdir
|
2019-11-26 20:33:24 +01:00
|
|
|
#from io import BytesIO
|
2019-11-23 08:17:35 +01:00
|
|
|
|
2019-11-26 20:33:24 +01:00
|
|
|
from lxml.etree import DTD, parse, tostring # , XMLParser
|
2019-11-23 08:17:35 +01:00
|
|
|
|
|
|
|
from .i18n import _
|
|
|
|
from .error import CreoleDictConsistencyError
|
|
|
|
|
|
|
|
HIGH_COMPATIBILITY = True
|
|
|
|
|
|
|
|
class XMLReflector(object):
|
|
|
|
"""Helper class for loading the Creole XML file,
|
|
|
|
parsing it, validating against the Creole DTD,
|
|
|
|
writing the xml result on the disk
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
|
|
self.dtd = None
|
|
|
|
|
|
|
|
def parse_dtd(self, dtdfilename):
|
|
|
|
"""Loads the Creole DTD
|
|
|
|
|
|
|
|
:raises IOError: if the DTD is not found
|
|
|
|
|
|
|
|
:param dtdfilename: the full filename of the Creole DTD
|
|
|
|
"""
|
|
|
|
if not isfile(dtdfilename):
|
|
|
|
raise IOError(_("no such DTD file: {}").format(dtdfilename))
|
|
|
|
with open(dtdfilename, 'r') as dtdfd:
|
|
|
|
self.dtd = DTD(dtdfd)
|
|
|
|
|
2019-11-26 20:33:24 +01:00
|
|
|
def parse_xmlfile(self, xmlfile):
|
2019-11-23 08:17:35 +01:00
|
|
|
"""Parses and validates some Creole XML against the Creole DTD
|
|
|
|
|
|
|
|
:returns: the root element tree object
|
|
|
|
"""
|
2019-11-26 20:33:24 +01:00
|
|
|
# FIXME zephir2
|
|
|
|
# document = parse(BytesIO(xmlfile), XMLParser(remove_blank_text=True))
|
|
|
|
document = parse(xmlfile)
|
|
|
|
if not self.dtd.validate(document):
|
|
|
|
raise CreoleDictConsistencyError(_("not a valid xml file: {}").format(xmlfile))
|
2019-11-23 08:17:35 +01:00
|
|
|
return document.getroot()
|
|
|
|
|
|
|
|
def load_xml_from_folders(self, xmlfolders, from_zephir):
|
|
|
|
"""Loads all the XML files located in the xmlfolders' list
|
|
|
|
|
|
|
|
:param xmlfolders: list of full folder's name
|
|
|
|
"""
|
|
|
|
documents = []
|
|
|
|
if from_zephir:
|
|
|
|
for idx, xmlfile in enumerate(xmlfolders):
|
|
|
|
documents.append(('generate_{}'.format(idx), self.parse_xmlfile(xmlfile, from_zephir=from_zephir)))
|
|
|
|
else:
|
|
|
|
if not isinstance(xmlfolders, list):
|
|
|
|
xmlfolders = [xmlfolders]
|
|
|
|
for xmlfolder in xmlfolders:
|
|
|
|
if isinstance(xmlfolder, list) or isinstance(xmlfolder, tuple):
|
|
|
|
# directory group : collect files from each
|
|
|
|
# directory and sort them before loading
|
|
|
|
group_files = []
|
|
|
|
for idx, subdir in enumerate(xmlfolder):
|
|
|
|
if isdir(subdir):
|
|
|
|
for filename in listdir(subdir):
|
|
|
|
group_files.append((filename, idx, subdir))
|
|
|
|
else:
|
|
|
|
group_files.append(basename(subdir), idx, dirname(subdir))
|
|
|
|
def sort_group(file1, file2):
|
|
|
|
if file1[0] == file2[0]:
|
|
|
|
# sort by initial xmlfolder order if same name
|
|
|
|
return file1[1].__cmp__(file2[1])
|
|
|
|
# sort by filename
|
|
|
|
elif file1[0] > file2[0]:
|
|
|
|
return 1
|
|
|
|
else:
|
|
|
|
return -1
|
|
|
|
group_files.sort(sort_group)
|
|
|
|
filenames = [join(f[2], f[0]) for f in group_files]
|
|
|
|
elif isdir(xmlfolder):
|
|
|
|
filenames = []
|
|
|
|
for filename in listdir(xmlfolder):
|
|
|
|
filenames.append(join(xmlfolder, filename))
|
|
|
|
filenames.sort()
|
|
|
|
else:
|
|
|
|
filenames = [xmlfolder]
|
|
|
|
for xmlfile in filenames:
|
|
|
|
if xmlfile.endswith('.xml'):
|
|
|
|
#xmlfile_path = join(xmlfolder, xmlfile)
|
|
|
|
documents.append((xmlfile, self.parse_xmlfile(xmlfile)))
|
|
|
|
return documents
|
|
|
|
|
|
|
|
def save_xmlfile(self, xmlfilename, xml): # pylint: disable=R0201
|
|
|
|
"""Write a bunch of XML on the disk
|
|
|
|
"""
|
|
|
|
with open(xmlfilename, 'w') as xmlfh:
|
2019-11-26 20:33:24 +01:00
|
|
|
xmlfh.write(tostring(xml, pretty_print=True, encoding="UTF-8", xml_declaration=True).decode('utf8'))
|