From 72f6fdb17d3ad0bba4796fdc668db739246aa36b Mon Sep 17 00:00:00 2001 From: Marti Raudsepp Date: Wed, 19 Dec 2018 17:59:48 +0200 Subject: [PATCH] Properly handle Unicode in issuer name sanitization If the point of sanitization is to get rid of all non-alphanumeric characters then Unicode characters should probably be forbidden too. We can re-use the same sanitization function as used for cert 'name' --- lemur/common/defaults.py | 38 +++++++++++++++++------------------- lemur/tests/conftest.py | 7 ++++++- lemur/tests/test_defaults.py | 32 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 21 deletions(-) diff --git a/lemur/common/defaults.py b/lemur/common/defaults.py index e9bbc6e6..72e863c1 100644 --- a/lemur/common/defaults.py +++ b/lemur/common/defaults.py @@ -7,18 +7,21 @@ from lemur.extensions import sentry from lemur.constants import SAN_NAMING_TEMPLATE, DEFAULT_NAMING_TEMPLATE -def text_to_slug(value): - """Normalize a string to a "slug" value, stripping character accents and removing non-alphanum characters.""" +def text_to_slug(value, joiner='-'): + """ + Normalize a string to a "slug" value, stripping character accents and removing non-alphanum characters. + A series of non-alphanumeric characters is replaced with the joiner character. + """ # Strip all character accents: decompose Unicode characters and then drop combining chars. value = ''.join(c for c in unicodedata.normalize('NFKD', value) if not unicodedata.combining(c)) - # Replace all remaining non-alphanumeric characters with '-'. Multiple characters get collapsed into a single dash. - # Except, keep 'xn--' used in IDNA domain names as is. - value = re.sub(r'[^A-Za-z0-9.]+(?