rougail/creole/dtd_parser.py

116 lines
4.6 KiB
Python

# -*- coding: utf-8 -*-
from .i18n import _
from tiramisu import option
CONVERT_VALUE = {'True': True, 'False': False, 'None': None}
forbidden_name = ('level',)
def parse_dtd(filename):
"""Parse DTD file and return a dict.
Dict structure:
- key: name of element
- values:
- type: if text, option type
- options: list of subelements
- needs: list of mandatory attributes with None or list of possible
value
- optionals: tuple:
- list of optional attributes with None or list of possible
value
- default value (None if no default value)
Example:
{'container':
{'type': False,
'options': ['service', 'interface', 'package', 'file', 'disknod'],
'needs': {'name': {'values': None, 'type': None},
'optionals': {'group': {'values': None, 'default': None,
'type': None},
'id': {'values': None, 'default': None, 'type': None}}}
}
"""
def parse_option(option):
option = option.replace('(', '').replace('*', '').replace(')', '')
option = option.replace('>', '').replace(' ', '').replace('+', '')
option = option.split('|')
options = []
for opt in option:
options.extend(opt.split(','))
if options == ['EMPTY']:
options = []
return options
def parse_comment(comment, options=None):
type_ = None
if comment.startswith('<!--') and comment.endswith('-->'):
comment = comment[4:-3]
if comment.endswith('Option'):
if comment == 'ChoiceOption':
raise ValueError(_(u'Do not write "ChoiceOption" in comments'))
try:
type_ = getattr(option, comment)
except AttributeError:
raise ValueError(_(u"Unvalid comment content: must match a valid attribute name"))
else:
#comment is the attribute name, the option type it's value
type_ = comment
return type_
fh = open(filename)
dtd_load = {}
for line in fh.readlines():
sline = line.split()
if sline == []:
continue
#for element line
if sline[0] == '<!ELEMENT':
if sline[-1].startswith('<!--') and sline[-1].endswith('-->'):
options = ' '.join(sline[2:-1])
else:
options = ' '.join(sline[2:])
options = parse_option(options)
type_ = None
if '#PCDATA' in options:
options.remove('#PCDATA')
if sline[-1].startswith('<!--') and sline[-1].endswith('-->'):
type_ = parse_comment(sline[-1], options)
else:
type_ = option.UnicodeOption
dtd_load[sline[1]] = {'type': type_, 'options': options,
'needs': {}, 'optionals': {}}
#for attlist line
elif sline[0] == '<!ATTLIST':
if sline[1] in forbidden_name:
raise ValueError(_(u'Using name {0} is forbidden in attributes').format(sline[1]))
#possible value
if sline[3] == 'CDATA':
values = None
else:
if not sline[3].startswith('(') or not sline[3].endswith(')'):
raise Exception(_(u'Not a valid list'))
sline3 = sline[3][1:-1].split('|')
values = []
for val in sline3:
values.append(CONVERT_VALUE.get(val, val))
#comment
type_ = parse_comment(sline[-1])
#default value or state value (needs or optionals)
if sline[4].startswith('#REQUIRED'):
dtd_load[sline[1]]['needs'][sline[2]] = {'values': values,
'type': type_}
elif sline[4].startswith('#IMPLIED'):
dtd_load[sline[1]]['optionals'][sline[2]] = {'values': values,
'default': None,
'type': type_}
else:
default = sline[4].replace('"', '').replace("'", '').replace(
'>', '').strip()
default = CONVERT_VALUE.get(default, default)
dtd_load[sline[1]]['optionals'][sline[2]] = {'values': values,
'default': default,
'type': type_}
return dtd_load