# coding: utf-8 import re import unicodedata from entities import entities # ______________________________________________________________________________ ENCODING = 'utf-8' def strip_accents(string): return unicodedata.normalize('NFKD', unicode(string, ENCODING) ).encode('ASCII', 'ignore') def normalize_entities(): """ enleve les accents de la liste des entites + minuscules :return: entities normalisé """ norm_entities = [] for entitie in entities: norm_entitie = strip_accents(entitie).lower() norm_entities.append(norm_entitie) return norm_entities NORM_ENTITIES = normalize_entities() # ______________________________________________________________________________ def parse_string(text): """ enlève les accents d'un texte """ # libelle = strip_accents(text) words = re.findall('([a-zA-Zéèàùêôëö_]+)', text) return words def is_in_entities(text): """ donne l'index dans entities du texte """ norm_text = text.lower() index = None if norm_text in NORM_ENTITIES: index = NORM_ENTITIES.index(norm_text) return index def is_correct(libelle, name, family=False): if libelle is not None and type(libelle) != str: libelle = unicode.encode(libelle, ENCODING) ret = [] if libelle == '' or libelle is None: return ret if libelle[0].islower(): #FIXME: faux positifs connus if not libelle.startswith('ejabberd') and \ not libelle.startswith('phpMyAdmin'): ret.append('%%%%%s : phrase sans majuscule'%name) for text in parse_string(libelle): text_index = is_in_entities(text) if not text_index == None: if str(text) != str(entities[text_index]): #FIXME: faux positifs connus if 'ipsec.conf' in libelle or 'test-rvp' in libelle \ or 'bareos-' in libelle \ or 'bacula-' in libelle \ or '/var/log/zephir' in libelle \ or 'exemple : eolebase' in libelle: continue ent = str(unicode.encode((unicode(entities[text_index], ENCODING)), ENCODING)) if family: ret.append('famille [%s] : %s => %s' % (str(name), text, ent)) else: ret.append('%%%%%s : %s => %s' % (str(name), text, ent)) return ret # ______________________________________________________________________________