Improve certificate name normalization: remove Unicode characters, etc. (#906)

* Accented characters are replaced with non-accented version (ä -> a)
* Spaces are replaced with '-' (previously they were removed)
* Multiple non-alphanumeric characters are collapsed into one '-'
This commit is contained in:
Marti Raudsepp 2017-09-08 20:52:22 +03:00 committed by kevgliss
parent e72efce071
commit dafed86179
4 changed files with 40 additions and 13 deletions

View File

@ -62,7 +62,6 @@ def get_sequence(name):
def get_or_increase_name(name):
name = '-'.join(name.strip().split(' '))
certificates = Certificate.query.filter(Certificate.name.ilike('{0}%'.format(name))).all()
if not certificates:
@ -138,7 +137,7 @@ class Certificate(db.Model):
# when destinations are appended they require a valid name.
if kwargs.get('name'):
self.name = get_or_increase_name(kwargs['name'])
self.name = get_or_increase_name(defaults.text_to_slug(kwargs['name']))
else:
self.name = get_or_increase_name(
defaults.certificate_name(self.cn, self.issuer, self.not_before, self.not_after, self.san))

View File

@ -1,8 +1,25 @@
import re
import unicodedata
from cryptography import x509
from flask import current_app
from lemur.constants import SAN_NAMING_TEMPLATE, DEFAULT_NAMING_TEMPLATE
def text_to_slug(value):
"""Normalize a string to a "slug" value, stripping character accents and removing non-alphanum characters."""
# Strip all character accents (ä => a): decompose Unicode characters and then drop combining chars.
value = ''.join(c for c in unicodedata.normalize('NFKD', value) if not unicodedata.combining(c))
# Replace all remaining non-alphanumeric characters with '-'. Multiple characters get collapsed into a single dash.
# Except, keep 'xn--' used in IDNA domain names as is.
value = re.sub(r'[^A-Za-z0-9.]+(?<!xn--)', '-', value)
# '-' in the beginning or end of string looks ugly.
return value.strip('-')
def certificate_name(common_name, issuer, not_before, not_after, san):
"""
Create a name for our certificate. A naming standard
@ -25,21 +42,13 @@ def certificate_name(common_name, issuer, not_before, not_after, san):
temp = t.format(
subject=common_name,
issuer=issuer,
issuer=issuer.replace(' ', ''),
not_before=not_before.strftime('%Y%m%d'),
not_after=not_after.strftime('%Y%m%d')
)
disallowed_chars = ''.join(c for c in map(chr, range(256)) if not c.isalnum())
disallowed_chars = disallowed_chars.replace("-", "")
disallowed_chars = disallowed_chars.replace(".", "")
temp = temp.replace('*', "WILDCARD")
for c in disallowed_chars:
temp = temp.replace(c, "")
# white space is silly too
return temp.replace(" ", "-")
return text_to_slug(temp)
def signing_algorithm(cert):

View File

@ -22,7 +22,6 @@ from lemur.tests.vectors import VALID_ADMIN_HEADER_TOKEN, VALID_USER_HEADER_TOKE
def test_get_or_increase_name(session, certificate):
from lemur.certificates.models import get_or_increase_name
assert get_or_increase_name('test name') == 'test-name'
assert get_or_increase_name(certificate.name) == '{0}-1'.format(certificate.name)
certificate.name = 'test-cert-11111111'

View File

@ -42,6 +42,19 @@ def test_cert_issuer(client):
assert issuer(INTERNAL_VALID_LONG_CERT) == 'Example'
def test_text_to_slug(client):
from lemur.common.defaults import text_to_slug
assert text_to_slug('test - string') == 'test-string'
# Accented characters are decomposed
assert text_to_slug('föö bär') == 'foo-bar'
# Melt away the Unicode Snowman
assert text_to_slug('\u2603') == ''
assert text_to_slug('\u2603test\u2603') == 'test'
assert text_to_slug('snow\u2603man') == 'snow-man'
# IDNA-encoded domain names should be kept as-is
assert text_to_slug('xn--i1b6eqas.xn--xmpl-loa9b3671b.com') == 'xn--i1b6eqas.xn--xmpl-loa9b3671b.com'
def test_create_name(client):
from lemur.common.defaults import certificate_name
from datetime import datetime
@ -59,3 +72,10 @@ def test_create_name(client):
datetime(2015, 5, 12, 0, 0, 0),
True
) == 'SAN-example.com-ExampleInc-20150507-20150512'
assert certificate_name(
'xn--mnchen-3ya.de',
'Vertrauenswürdig Autorität',
datetime(2015, 5, 7, 0, 0, 0),
datetime(2015, 5, 12, 0, 0, 0),
False
) == 'xn--mnchen-3ya.de-VertrauenswurdigAutoritat-20150507-20150512'