87 lines
3.1 KiB
Python
87 lines
3.1 KiB
Python
"""load XML file from directory
|
|
|
|
Created by:
|
|
EOLE (http://eole.orion.education.fr)
|
|
Copyright (C) 2005-2018
|
|
|
|
Forked by:
|
|
Cadoles (http://www.cadoles.com)
|
|
Copyright (C) 2019-2021
|
|
|
|
distribued with GPL-2 or later license
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
"""
|
|
from typing import List
|
|
from os.path import join, isfile
|
|
from os import listdir
|
|
|
|
from lxml.etree import DTD, parse, XMLSyntaxError # pylint: disable=E0611
|
|
|
|
from .i18n import _
|
|
from .error import DictConsistencyError
|
|
|
|
|
|
class XMLReflector:
|
|
"""Helper class for loading the Creole XML file,
|
|
parsing it, validating against the Creole DTD,
|
|
writing the xml result on the disk
|
|
"""
|
|
def __init__(self,
|
|
rougailconfig: 'RougailConfig',
|
|
) -> None:
|
|
"""Loads the Creole DTD
|
|
|
|
:raises IOError: if the DTD is not found
|
|
|
|
:param dtdfilename: the full filename of the Creole DTD
|
|
"""
|
|
dtdfilename = rougailconfig['dtdfilename']
|
|
if not isfile(dtdfilename):
|
|
raise IOError(_("no such DTD file: {}").format(dtdfilename))
|
|
with open(dtdfilename, 'r') as dtdfd:
|
|
self.dtd = DTD(dtdfd)
|
|
|
|
def load_xml_from_folders(self,
|
|
xmlfolders: List[str],
|
|
):
|
|
"""Loads all the XML files located in the xmlfolders' list
|
|
|
|
:param xmlfolders: list of full folder's name
|
|
"""
|
|
filenames = {}
|
|
for xmlfolder in xmlfolders:
|
|
for filename in listdir(xmlfolder):
|
|
if not filename.endswith('.xml'):
|
|
continue
|
|
if filename in filenames:
|
|
raise DictConsistencyError(_(f'duplicate xml file name {filename}'), 78, [xmlfolder])
|
|
filenames[filename] = join(xmlfolder, filename)
|
|
if not filenames:
|
|
raise DictConsistencyError(_('there is no XML file'), 77, [xmlfolder])
|
|
file_names = list(filenames.keys())
|
|
file_names.sort()
|
|
for filename in file_names:
|
|
xmlfile = filenames[filename]
|
|
try:
|
|
document = parse(xmlfile)
|
|
except XMLSyntaxError as err:
|
|
raise DictConsistencyError(_(f'not a XML file: {err}'), 52, [xmlfile]) from err
|
|
if not self.dtd.validate(document):
|
|
dtd_error = self.dtd.error_log.filter_from_errors()[0]
|
|
msg = _(f'not a valid XML file: {dtd_error}')
|
|
raise DictConsistencyError(msg, 43, [xmlfile])
|
|
yield xmlfile, document.getroot()
|