Merge pull request #2765 from castrapel/metrics_errors_acme
Metrics / Exception Reporting - LetsEncrypt / Dynect flow, ELB source sync
This commit is contained in:
commit
db4c4846b9
|
@ -5,7 +5,7 @@ import dns.exception
|
|||
import dns.name
|
||||
import dns.query
|
||||
import dns.resolver
|
||||
from dyn.tm.errors import DynectCreateError, DynectGetError
|
||||
from dyn.tm.errors import DynectCreateError, DynectDeleteError, DynectGetError, DynectUpdateError
|
||||
from dyn.tm.session import DynectSession
|
||||
from dyn.tm.zones import Node, Zone, get_all_zones
|
||||
from flask import current_app
|
||||
|
@ -51,17 +51,23 @@ def _has_dns_propagated(name, token):
|
|||
|
||||
def wait_for_dns_change(change_id, account_number=None):
|
||||
fqdn, token = change_id
|
||||
number_of_attempts = 10
|
||||
number_of_attempts = 20
|
||||
for attempts in range(0, number_of_attempts):
|
||||
status = _has_dns_propagated(fqdn, token)
|
||||
current_app.logger.debug("Record status for fqdn: {}: {}".format(fqdn, status))
|
||||
if status:
|
||||
metrics.send('wait_for_dns_change_success', 'counter', 1)
|
||||
break
|
||||
time.sleep(20)
|
||||
time.sleep(10)
|
||||
if not status:
|
||||
# TODO: Delete associated DNS text record here
|
||||
metrics.send('wait_for_dns_change_fail', 'counter', 1)
|
||||
sentry.captureException(
|
||||
extra={
|
||||
"fqdn": fqdn, "txt_record": token}
|
||||
)
|
||||
metrics.send('wait_for_dns_change_error', 'counter', 1,
|
||||
metric_tags={'fqdn': fqdn, 'txt_record': token})
|
||||
raise Exception("Unable to query DNS token for fqdn {}.".format(fqdn))
|
||||
return
|
||||
|
||||
|
@ -105,7 +111,7 @@ def create_txt_record(domain, token, account_number):
|
|||
zone.add_record(node_name, record_type='TXT', txtdata="\"{}\"".format(token), ttl=5)
|
||||
zone.publish()
|
||||
current_app.logger.debug("TXT record created: {0}, token: {1}".format(fqdn, token))
|
||||
except DynectCreateError as e:
|
||||
except (DynectCreateError, DynectUpdateError) as e:
|
||||
if "Cannot duplicate existing record data" in e.message:
|
||||
current_app.logger.debug(
|
||||
"Unable to add record. Domain: {}. Token: {}. "
|
||||
|
@ -138,14 +144,33 @@ def delete_txt_record(change_id, account_number, domain, token):
|
|||
all_txt_records = node.get_all_records_by_type('TXT')
|
||||
except DynectGetError:
|
||||
sentry.captureException()
|
||||
metrics.send('delete_txt_record_error', 'counter', 1)
|
||||
metrics.send('delete_txt_record_geterror', 'counter', 1)
|
||||
# No Text Records remain or host is not in the zone anymore because all records have been deleted.
|
||||
return
|
||||
for txt_record in all_txt_records:
|
||||
if txt_record.txtdata == ("{}".format(token)):
|
||||
current_app.logger.debug("Deleting TXT record name: {0}".format(fqdn))
|
||||
txt_record.delete()
|
||||
zone.publish()
|
||||
try:
|
||||
txt_record.delete()
|
||||
except DynectDeleteError:
|
||||
sentry.captureException(
|
||||
extra={
|
||||
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
|
||||
"txt_record": txt_record.txtdata}
|
||||
)
|
||||
metrics.send('delete_txt_record_deleteerror', 'counter', 1,
|
||||
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})
|
||||
|
||||
try:
|
||||
zone.publish()
|
||||
except DynectUpdateError:
|
||||
sentry.captureException(
|
||||
extra={
|
||||
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
|
||||
"txt_record": txt_record.txtdata}
|
||||
)
|
||||
metrics.send('delete_txt_record_publish_error', 'counter', 1,
|
||||
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})
|
||||
|
||||
|
||||
def delete_acme_txt_records(domain):
|
||||
|
@ -171,7 +196,16 @@ def delete_acme_txt_records(domain):
|
|||
all_txt_records = node.get_all_records_by_type('TXT')
|
||||
for txt_record in all_txt_records:
|
||||
current_app.logger.debug("Deleting TXT record name: {0}".format(fqdn))
|
||||
txt_record.delete()
|
||||
try:
|
||||
txt_record.delete()
|
||||
except DynectDeleteError:
|
||||
sentry.captureException(
|
||||
extra={
|
||||
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
|
||||
"txt_record": txt_record.txtdata}
|
||||
)
|
||||
metrics.send('delete_txt_record_deleteerror', 'counter', 1,
|
||||
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})
|
||||
zone.publish()
|
||||
|
||||
|
||||
|
|
|
@ -142,9 +142,9 @@ class AcmeHandler(object):
|
|||
try:
|
||||
orderr = acme_client.finalize_order(order, deadline)
|
||||
except AcmeError:
|
||||
sentry.captureException()
|
||||
sentry.captureException(extra={"order_url": order.uri})
|
||||
metrics.send('request_certificate_error', 'counter', 1)
|
||||
current_app.logger.error(f"Unable to resolve Acme order: {order}", exc_info=True)
|
||||
current_app.logger.error(f"Unable to resolve Acme order: {order.uri}", exc_info=True)
|
||||
raise
|
||||
|
||||
pem_certificate = OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
|
||||
|
@ -289,9 +289,10 @@ class AcmeHandler(object):
|
|||
dns_challenges = authz_record.dns_challenge
|
||||
host_to_validate = self.maybe_remove_wildcard(authz_record.host)
|
||||
host_to_validate = self.maybe_add_extension(host_to_validate, dns_provider_options)
|
||||
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
|
||||
for dns_challenge in dns_challenges:
|
||||
try:
|
||||
dns_provider.delete_txt_record(
|
||||
dns_provider_plugin.delete_txt_record(
|
||||
authz_record.change_id,
|
||||
account_number,
|
||||
dns_challenge.validation_domain_name(host_to_validate),
|
||||
|
|
|
@ -10,7 +10,7 @@ from flask import current_app
|
|||
|
||||
from retrying import retry
|
||||
|
||||
from lemur.extensions import metrics
|
||||
from lemur.extensions import metrics, sentry
|
||||
from lemur.exceptions import InvalidListener
|
||||
from lemur.plugins.lemur_aws.sts import sts_client
|
||||
|
||||
|
@ -149,7 +149,7 @@ def describe_listeners_v2(**kwargs):
|
|||
|
||||
|
||||
@sts_client('elb')
|
||||
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
|
||||
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
|
||||
def describe_load_balancer_policies(load_balancer_name, policy_names, **kwargs):
|
||||
"""
|
||||
Fetching all policies currently associated with an ELB.
|
||||
|
@ -157,11 +157,18 @@ def describe_load_balancer_policies(load_balancer_name, policy_names, **kwargs):
|
|||
:param load_balancer_name:
|
||||
:return:
|
||||
"""
|
||||
return kwargs['client'].describe_load_balancer_policies(LoadBalancerName=load_balancer_name, PolicyNames=policy_names)
|
||||
try:
|
||||
return kwargs['client'].describe_load_balancer_policies(LoadBalancerName=load_balancer_name,
|
||||
PolicyNames=policy_names)
|
||||
except Exception as e: # noqa
|
||||
metrics.send('describe_load_balancer_policies_fail', 'counter', 1,
|
||||
metric_tags={"load_balancer_name": load_balancer_name, "policy_names": policy_names, "error": e})
|
||||
sentry.captureException(extra={"load_balancer_name": load_balancer_name, "policy_names": policy_names})
|
||||
raise
|
||||
|
||||
|
||||
@sts_client('elbv2')
|
||||
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
|
||||
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
|
||||
def describe_ssl_policies_v2(policy_names, **kwargs):
|
||||
"""
|
||||
Fetching all policies currently associated with an ELB.
|
||||
|
@ -169,7 +176,13 @@ def describe_ssl_policies_v2(policy_names, **kwargs):
|
|||
:param policy_names:
|
||||
:return:
|
||||
"""
|
||||
return kwargs['client'].describe_ssl_policies(Names=policy_names)
|
||||
try:
|
||||
return kwargs['client'].describe_ssl_policies(Names=policy_names)
|
||||
except Exception as e: # noqa
|
||||
metrics.send('describe_ssl_policies_v2_fail', 'counter', 1,
|
||||
metric_tags={"policy_names": policy_names, "error": e})
|
||||
sentry.captureException(extra={"policy_names": policy_names})
|
||||
raise
|
||||
|
||||
|
||||
@sts_client('elb')
|
||||
|
|
Loading…
Reference in New Issue