77 lines
2.5 KiB
Python
77 lines
2.5 KiB
Python
# coding: utf-8
|
|
import re
|
|
import unicodedata
|
|
from entities import entities
|
|
|
|
# ______________________________________________________________________________
|
|
|
|
ENCODING = 'utf-8'
|
|
|
|
def strip_accents(string):
|
|
return unicodedata.normalize('NFKD', unicode(string, ENCODING)
|
|
).encode('ASCII', 'ignore')
|
|
|
|
def normalize_entities():
|
|
"""
|
|
enleve les accents de la liste des entites + minuscules
|
|
:return: entities normalisé
|
|
"""
|
|
norm_entities = []
|
|
for entitie in entities:
|
|
norm_entitie = strip_accents(entitie).lower()
|
|
norm_entities.append(norm_entitie)
|
|
return norm_entities
|
|
|
|
NORM_ENTITIES = normalize_entities()
|
|
|
|
# ______________________________________________________________________________
|
|
|
|
def parse_string(text):
|
|
"""
|
|
enlève les accents d'un texte
|
|
"""
|
|
# libelle = strip_accents(text)
|
|
words = re.findall('([a-zA-Zéèàùêôëö_]+)', text)
|
|
return words
|
|
|
|
def is_in_entities(text):
|
|
"""
|
|
donne l'index dans entities du texte
|
|
"""
|
|
norm_text = text.lower()
|
|
index = None
|
|
if norm_text in NORM_ENTITIES:
|
|
index = NORM_ENTITIES.index(norm_text)
|
|
return index
|
|
|
|
def is_correct(libelle, name, family=False):
|
|
if libelle is not None and type(libelle) != str:
|
|
libelle = unicode.encode(libelle, ENCODING)
|
|
ret = []
|
|
if libelle == '' or libelle is None:
|
|
return ret
|
|
if libelle[0].islower():
|
|
#FIXME: faux positifs connus
|
|
if not libelle.startswith('ejabberd') and \
|
|
not libelle.startswith('phpMyAdmin'):
|
|
ret.append('%%%%%s : phrase sans majuscule'%name)
|
|
for text in parse_string(libelle):
|
|
text_index = is_in_entities(text)
|
|
if not text_index == None:
|
|
if str(text) != str(entities[text_index]):
|
|
#FIXME: faux positifs connus
|
|
if 'ipsec.conf' in libelle or 'test-rvp' in libelle \
|
|
or 'bareos-' in libelle \
|
|
or 'bacula-' in libelle \
|
|
or '/var/log/zephir' in libelle \
|
|
or 'exemple : eolebase' in libelle:
|
|
continue
|
|
ent = str(unicode.encode((unicode(entities[text_index], ENCODING)), ENCODING))
|
|
if family:
|
|
ret.append('famille [%s] : %s => %s' % (str(name), text, ent))
|
|
else:
|
|
ret.append('%%%%%s : %s => %s' % (str(name), text, ent))
|
|
return ret
|
|
# ______________________________________________________________________________
|
|
|