Merge pull request #2763 from castrapel/metrics_errors_acme
Better exception handling, logging, and metrics for ACME flow
This commit is contained in:
commit
e313b74813
|
@ -10,13 +10,21 @@ from dyn.tm.session import DynectSession
|
||||||
from dyn.tm.zones import Node, Zone, get_all_zones
|
from dyn.tm.zones import Node, Zone, get_all_zones
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
|
|
||||||
|
from lemur.extensions import metrics, sentry
|
||||||
|
|
||||||
|
|
||||||
def get_dynect_session():
|
def get_dynect_session():
|
||||||
dynect_session = DynectSession(
|
try:
|
||||||
current_app.config.get('ACME_DYN_CUSTOMER_NAME', ''),
|
dynect_session = DynectSession(
|
||||||
current_app.config.get('ACME_DYN_USERNAME', ''),
|
current_app.config.get('ACME_DYN_CUSTOMER_NAME', ''),
|
||||||
current_app.config.get('ACME_DYN_PASSWORD', ''),
|
current_app.config.get('ACME_DYN_USERNAME', ''),
|
||||||
)
|
current_app.config.get('ACME_DYN_PASSWORD', ''),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
sentry.captureException()
|
||||||
|
metrics.send('get_dynect_session_fail', 'counter', 1)
|
||||||
|
current_app.logger.debug("Unable to establish connection to Dyn", exc_info=True)
|
||||||
|
raise
|
||||||
return dynect_session
|
return dynect_session
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,10 +38,12 @@ def _has_dns_propagated(name, token):
|
||||||
for txt_record in rdata.strings:
|
for txt_record in rdata.strings:
|
||||||
txt_records.append(txt_record.decode("utf-8"))
|
txt_records.append(txt_record.decode("utf-8"))
|
||||||
except dns.exception.DNSException:
|
except dns.exception.DNSException:
|
||||||
|
metrics.send('has_dns_propagated_fail', 'counter', 1)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
for txt_record in txt_records:
|
for txt_record in txt_records:
|
||||||
if txt_record == token:
|
if txt_record == token:
|
||||||
|
metrics.send('has_dns_propagated_success', 'counter', 1)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
@ -46,10 +56,12 @@ def wait_for_dns_change(change_id, account_number=None):
|
||||||
status = _has_dns_propagated(fqdn, token)
|
status = _has_dns_propagated(fqdn, token)
|
||||||
current_app.logger.debug("Record status for fqdn: {}: {}".format(fqdn, status))
|
current_app.logger.debug("Record status for fqdn: {}: {}".format(fqdn, status))
|
||||||
if status:
|
if status:
|
||||||
|
metrics.send('wait_for_dns_change_success', 'counter', 1)
|
||||||
break
|
break
|
||||||
time.sleep(20)
|
time.sleep(20)
|
||||||
if not status:
|
if not status:
|
||||||
# TODO: Delete associated DNS text record here
|
# TODO: Delete associated DNS text record here
|
||||||
|
metrics.send('wait_for_dns_change_fail', 'counter', 1)
|
||||||
raise Exception("Unable to query DNS token for fqdn {}.".format(fqdn))
|
raise Exception("Unable to query DNS token for fqdn {}.".format(fqdn))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -67,6 +79,7 @@ def get_zone_name(domain):
|
||||||
if z.name.count(".") > zone_name.count("."):
|
if z.name.count(".") > zone_name.count("."):
|
||||||
zone_name = z.name
|
zone_name = z.name
|
||||||
if not zone_name:
|
if not zone_name:
|
||||||
|
metrics.send('dyn_no_zone_name', 'counter', 1)
|
||||||
raise Exception("No Dyn zone found for domain: {}".format(domain))
|
raise Exception("No Dyn zone found for domain: {}".format(domain))
|
||||||
return zone_name
|
return zone_name
|
||||||
|
|
||||||
|
@ -99,6 +112,8 @@ def create_txt_record(domain, token, account_number):
|
||||||
"Record already exists: {}".format(domain, token, e), exc_info=True
|
"Record already exists: {}".format(domain, token, e), exc_info=True
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
metrics.send('create_txt_record_error', 'counter', 1)
|
||||||
|
sentry.captureException()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
change_id = (fqdn, token)
|
change_id = (fqdn, token)
|
||||||
|
@ -122,6 +137,8 @@ def delete_txt_record(change_id, account_number, domain, token):
|
||||||
try:
|
try:
|
||||||
all_txt_records = node.get_all_records_by_type('TXT')
|
all_txt_records = node.get_all_records_by_type('TXT')
|
||||||
except DynectGetError:
|
except DynectGetError:
|
||||||
|
sentry.captureException()
|
||||||
|
metrics.send('delete_txt_record_error', 'counter', 1)
|
||||||
# No Text Records remain or host is not in the zone anymore because all records have been deleted.
|
# No Text Records remain or host is not in the zone anymore because all records have been deleted.
|
||||||
return
|
return
|
||||||
for txt_record in all_txt_records:
|
for txt_record in all_txt_records:
|
||||||
|
@ -178,6 +195,7 @@ def get_authoritative_nameserver(domain):
|
||||||
|
|
||||||
rcode = response.rcode()
|
rcode = response.rcode()
|
||||||
if rcode != dns.rcode.NOERROR:
|
if rcode != dns.rcode.NOERROR:
|
||||||
|
metrics.send('get_authoritative_nameserver_error', 'counter', 1)
|
||||||
if rcode == dns.rcode.NXDOMAIN:
|
if rcode == dns.rcode.NXDOMAIN:
|
||||||
raise Exception('%s does not exist.' % sub)
|
raise Exception('%s does not exist.' % sub)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -28,6 +28,7 @@ from lemur.authorizations import service as authorization_service
|
||||||
from lemur.common.utils import generate_private_key
|
from lemur.common.utils import generate_private_key
|
||||||
from lemur.dns_providers import service as dns_provider_service
|
from lemur.dns_providers import service as dns_provider_service
|
||||||
from lemur.exceptions import InvalidAuthority, InvalidConfiguration, UnknownProvider
|
from lemur.exceptions import InvalidAuthority, InvalidConfiguration, UnknownProvider
|
||||||
|
from lemur.extensions import metrics, sentry
|
||||||
from lemur.plugins import lemur_acme as acme
|
from lemur.plugins import lemur_acme as acme
|
||||||
from lemur.plugins.bases import IssuerPlugin
|
from lemur.plugins.bases import IssuerPlugin
|
||||||
from lemur.plugins.lemur_acme import cloudflare, dyn, route53
|
from lemur.plugins.lemur_acme import cloudflare, dyn, route53
|
||||||
|
@ -47,7 +48,9 @@ class AcmeHandler(object):
|
||||||
try:
|
try:
|
||||||
self.all_dns_providers = dns_provider_service.get_all_dns_providers()
|
self.all_dns_providers = dns_provider_service.get_all_dns_providers()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
current_app.logger.error("Unable to fetch DNS Providers: {}".format(e))
|
metrics.send('AcmeHandler_init_error', 'counter', 1)
|
||||||
|
sentry.captureException()
|
||||||
|
current_app.logger.error(f"Unable to fetch DNS Providers: {e}")
|
||||||
self.all_dns_providers = []
|
self.all_dns_providers = []
|
||||||
|
|
||||||
def find_dns_challenge(self, authorizations):
|
def find_dns_challenge(self, authorizations):
|
||||||
|
@ -94,6 +97,7 @@ class AcmeHandler(object):
|
||||||
current_app.logger.debug("Finalizing DNS challenge for {0}".format(authz_record.authz[0].body.identifier.value))
|
current_app.logger.debug("Finalizing DNS challenge for {0}".format(authz_record.authz[0].body.identifier.value))
|
||||||
dns_providers = self.dns_providers_for_domain.get(authz_record.host)
|
dns_providers = self.dns_providers_for_domain.get(authz_record.host)
|
||||||
if not dns_providers:
|
if not dns_providers:
|
||||||
|
metrics.send('complete_dns_challenge_error_no_dnsproviders', 'counter', 1)
|
||||||
raise Exception("No DNS providers found for domain: {}".format(authz_record.host))
|
raise Exception("No DNS providers found for domain: {}".format(authz_record.host))
|
||||||
|
|
||||||
for dns_provider in dns_providers:
|
for dns_provider in dns_providers:
|
||||||
|
@ -102,7 +106,15 @@ class AcmeHandler(object):
|
||||||
account_number = dns_provider_options.get("account_id")
|
account_number = dns_provider_options.get("account_id")
|
||||||
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
|
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
|
||||||
for change_id in authz_record.change_id:
|
for change_id in authz_record.change_id:
|
||||||
dns_provider_plugin.wait_for_dns_change(change_id, account_number=account_number)
|
try:
|
||||||
|
dns_provider_plugin.wait_for_dns_change(change_id, account_number=account_number)
|
||||||
|
except Exception:
|
||||||
|
metrics.send('complete_dns_challenge_error', 'counter', 1)
|
||||||
|
sentry.captureException()
|
||||||
|
current_app.logger.debug(
|
||||||
|
f"Unable to resolve DNS challenge for change_id: {change_id}, account_id: "
|
||||||
|
f"{account_number}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
for dns_challenge in authz_record.dns_challenge:
|
for dns_challenge in authz_record.dns_challenge:
|
||||||
response = dns_challenge.response(acme_client.client.net.key)
|
response = dns_challenge.response(acme_client.client.net.key)
|
||||||
|
@ -114,6 +126,7 @@ class AcmeHandler(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
if not verified:
|
if not verified:
|
||||||
|
metrics.send('complete_dns_challenge_verification_error', 'counter', 1)
|
||||||
raise ValueError("Failed verification")
|
raise ValueError("Failed verification")
|
||||||
|
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
@ -129,7 +142,9 @@ class AcmeHandler(object):
|
||||||
try:
|
try:
|
||||||
orderr = acme_client.finalize_order(order, deadline)
|
orderr = acme_client.finalize_order(order, deadline)
|
||||||
except AcmeError:
|
except AcmeError:
|
||||||
current_app.logger.error("Unable to resolve Acme order: {}".format(order), exc_info=True)
|
sentry.captureException()
|
||||||
|
metrics.send('request_certificate_error', 'counter', 1)
|
||||||
|
current_app.logger.error(f"Unable to resolve Acme order: {order}", exc_info=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
pem_certificate = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
|
pem_certificate = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
|
||||||
|
@ -196,6 +211,7 @@ class AcmeHandler(object):
|
||||||
|
|
||||||
for domain in order_info.domains:
|
for domain in order_info.domains:
|
||||||
if not self.dns_providers_for_domain.get(domain):
|
if not self.dns_providers_for_domain.get(domain):
|
||||||
|
metrics.send('get_authorizations_no_dns_provider_for_domain', 'counter', 1)
|
||||||
raise Exception("No DNS providers found for domain: {}".format(domain))
|
raise Exception("No DNS providers found for domain: {}".format(domain))
|
||||||
for dns_provider in self.dns_providers_for_domain[domain]:
|
for dns_provider in self.dns_providers_for_domain[domain]:
|
||||||
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
|
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
|
||||||
|
@ -284,6 +300,8 @@ class AcmeHandler(object):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# If this fails, it's most likely because the record doesn't exist (It was already cleaned up)
|
# If this fails, it's most likely because the record doesn't exist (It was already cleaned up)
|
||||||
# or we're not authorized to modify it.
|
# or we're not authorized to modify it.
|
||||||
|
metrics.send('cleanup_dns_challenges_error', 'counter', 1)
|
||||||
|
sentry.captureException()
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_dns_provider(self, type):
|
def get_dns_provider(self, type):
|
||||||
|
@ -378,12 +396,15 @@ class ACMEIssuerPlugin(IssuerPlugin):
|
||||||
try:
|
try:
|
||||||
order = acme_client.new_order(pending_cert.csr)
|
order = acme_client.new_order(pending_cert.csr)
|
||||||
except WildcardUnsupportedError:
|
except WildcardUnsupportedError:
|
||||||
|
metrics.send('get_ordered_certificate_wildcard_unsupported', 'counter', 1)
|
||||||
raise Exception("The currently selected ACME CA endpoint does"
|
raise Exception("The currently selected ACME CA endpoint does"
|
||||||
" not support issuing wildcard certificates.")
|
" not support issuing wildcard certificates.")
|
||||||
try:
|
try:
|
||||||
authorizations = self.acme.get_authorizations(acme_client, order, order_info)
|
authorizations = self.acme.get_authorizations(acme_client, order, order_info)
|
||||||
except ClientError:
|
except ClientError:
|
||||||
current_app.logger.error("Unable to resolve pending cert: {}".format(pending_cert.name), exc_info=True)
|
sentry.captureException()
|
||||||
|
metrics.send('get_ordered_certificate_error', 'counter', 1)
|
||||||
|
current_app.logger.error(f"Unable to resolve pending cert: {pending_cert.name}", exc_info=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
authorizations = self.acme.finalize_authorizations(acme_client, authorizations)
|
authorizations = self.acme.finalize_authorizations(acme_client, authorizations)
|
||||||
|
@ -418,6 +439,8 @@ class ACMEIssuerPlugin(IssuerPlugin):
|
||||||
try:
|
try:
|
||||||
order = acme_client.new_order(pending_cert.csr)
|
order = acme_client.new_order(pending_cert.csr)
|
||||||
except WildcardUnsupportedError:
|
except WildcardUnsupportedError:
|
||||||
|
sentry.captureException()
|
||||||
|
metrics.send('get_ordered_certificates_wildcard_unsupported_error', 'counter', 1)
|
||||||
raise Exception("The currently selected ACME CA endpoint does"
|
raise Exception("The currently selected ACME CA endpoint does"
|
||||||
" not support issuing wildcard certificates.")
|
" not support issuing wildcard certificates.")
|
||||||
|
|
||||||
|
@ -430,7 +453,13 @@ class ACMEIssuerPlugin(IssuerPlugin):
|
||||||
"order": order,
|
"order": order,
|
||||||
})
|
})
|
||||||
except (ClientError, ValueError, Exception) as e:
|
except (ClientError, ValueError, Exception) as e:
|
||||||
current_app.logger.error("Unable to resolve pending cert: {}".format(pending_cert), exc_info=True)
|
sentry.captureException()
|
||||||
|
metrics.send('get_ordered_certificates_pending_creation_error', 'counter', 1)
|
||||||
|
current_app.logger.error(f"Unable to resolve pending cert: {pending_cert}", exc_info=True)
|
||||||
|
|
||||||
|
error = e
|
||||||
|
if globals().get("order") and order:
|
||||||
|
error += f" Order uri: {order.uri}"
|
||||||
certs.append({
|
certs.append({
|
||||||
"cert": False,
|
"cert": False,
|
||||||
"pending_cert": pending_cert,
|
"pending_cert": pending_cert,
|
||||||
|
@ -459,14 +488,17 @@ class ACMEIssuerPlugin(IssuerPlugin):
|
||||||
"pending_cert": entry["pending_cert"],
|
"pending_cert": entry["pending_cert"],
|
||||||
})
|
})
|
||||||
except (PollError, AcmeError, Exception) as e:
|
except (PollError, AcmeError, Exception) as e:
|
||||||
|
sentry.captureException()
|
||||||
|
metrics.send('get_ordered_certificates_resolution_error', 'counter', 1)
|
||||||
order_url = order.uri
|
order_url = order.uri
|
||||||
|
error = f"{e}. Order URI: {order_url}"
|
||||||
current_app.logger.error(
|
current_app.logger.error(
|
||||||
"Unable to resolve pending cert: {}. "
|
f"Unable to resolve pending cert: {pending_cert}. "
|
||||||
"Check out {} for more information.".format(pending_cert, order_url), exc_info=True)
|
f"Check out {order_url} for more information.", exc_info=True)
|
||||||
certs.append({
|
certs.append({
|
||||||
"cert": False,
|
"cert": False,
|
||||||
"pending_cert": entry["pending_cert"],
|
"pending_cert": entry["pending_cert"],
|
||||||
"last_error": e,
|
"last_error": error,
|
||||||
})
|
})
|
||||||
# Ensure DNS records get deleted
|
# Ensure DNS records get deleted
|
||||||
self.acme.cleanup_dns_challenges(
|
self.acme.cleanup_dns_challenges(
|
||||||
|
|
Loading…
Reference in New Issue