rougail/src/rougail/xmlreflector.py

80 lines
2.9 KiB
Python
Raw Normal View History

2021-01-11 22:34:16 +01:00
"""load XML file from directory
2021-01-30 08:15:26 +01:00
Created by:
EOLE (http://eole.orion.education.fr)
Copyright (C) 2005-2018
Forked by:
Cadoles (http://www.cadoles.com)
Copyright (C) 2019-2021
distribued with GPL-2 or later license
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2021-01-11 22:34:16 +01:00
"""
2020-12-24 16:02:20 +01:00
from typing import List
from os.path import join, isfile
2019-11-23 08:17:35 +01:00
from os import listdir
2021-01-11 22:34:16 +01:00
from lxml.etree import DTD, parse, XMLSyntaxError # pylint: disable=E0611
2019-11-23 08:17:35 +01:00
from .i18n import _
2020-07-20 18:13:53 +02:00
from .error import DictConsistencyError
2019-11-23 08:17:35 +01:00
2020-12-26 17:06:56 +01:00
class XMLReflector:
2019-11-23 08:17:35 +01:00
"""Helper class for loading the Creole XML file,
parsing it, validating against the Creole DTD,
writing the xml result on the disk
"""
2021-02-16 12:08:45 +01:00
def __init__(self,
rougailconfig: 'RougailConfig',
) -> None:
2019-11-23 08:17:35 +01:00
"""Loads the Creole DTD
:raises IOError: if the DTD is not found
:param dtdfilename: the full filename of the Creole DTD
"""
2021-02-16 12:08:45 +01:00
dtdfilename = rougailconfig['dtdfilename']
2019-11-23 08:17:35 +01:00
if not isfile(dtdfilename):
raise IOError(_("no such DTD file: {}").format(dtdfilename))
with open(dtdfilename, 'r') as dtdfd:
self.dtd = DTD(dtdfd)
2021-02-14 17:48:50 +01:00
def load_xml_from_folders(self,
xmlfolders: List[str],
):
2019-11-23 08:17:35 +01:00
"""Loads all the XML files located in the xmlfolders' list
:param xmlfolders: list of full folder's name
"""
2020-07-06 19:47:45 +02:00
for xmlfolder in xmlfolders:
2021-01-11 22:34:16 +01:00
filenames = [join(xmlfolder, filename) for filename in listdir(xmlfolder) if \
filename.endswith('.xml')]
2020-12-24 16:02:20 +01:00
filenames.sort()
2021-09-13 11:26:38 +02:00
if not filenames:
raise DictConsistencyError(_('there is no XML file'), 77, [xmlfolder])
2020-07-06 19:47:45 +02:00
for xmlfile in filenames:
2021-02-14 17:48:50 +01:00
try:
document = parse(xmlfile)
except XMLSyntaxError as err:
raise DictConsistencyError(_(f'not a XML file: {err}'), 52, [xmlfile]) from err
if not self.dtd.validate(document):
dtd_error = self.dtd.error_log.filter_from_errors()[0]
msg = _(f'not a valid XML file: {dtd_error}')
raise DictConsistencyError(msg, 43, [xmlfile])
yield xmlfile, document.getroot()