Strip out self-polling logic and rely on ACME; Enhance ELB logging and retries

This commit is contained in:
Curtis Castrapel 2019-04-26 10:16:18 -07:00
parent b300a21948
commit 1e64851d79
3 changed files with 104 additions and 78 deletions

View File

@ -64,11 +64,10 @@ def wait_for_dns_change(change_id, account_number=None):
metrics.send('wait_for_dns_change_fail', 'counter', 1) metrics.send('wait_for_dns_change_fail', 'counter', 1)
sentry.captureException( sentry.captureException(
extra={ extra={
"fqdn": fqdn, "txt_record": token} "fqdn": str(fqdn), "txt_record": str(token)}
) )
metrics.send('wait_for_dns_change_error', 'counter', 1, metrics.send('wait_for_dns_change_error', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': token}) metric_tags={'fqdn': fqdn, 'txt_record': token})
raise Exception("Unable to query DNS token for fqdn {}.".format(fqdn))
return return
@ -155,8 +154,8 @@ def delete_txt_record(change_id, account_number, domain, token):
except DynectDeleteError: except DynectDeleteError:
sentry.captureException( sentry.captureException(
extra={ extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name, "fqdn": str(fqdn), "zone_name": str(zone_name), "node_name": str(node_name),
"txt_record": txt_record.txtdata} "txt_record": str(txt_record.txtdata)}
) )
metrics.send('delete_txt_record_deleteerror', 'counter', 1, metrics.send('delete_txt_record_deleteerror', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata}) metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})
@ -166,11 +165,11 @@ def delete_txt_record(change_id, account_number, domain, token):
except DynectUpdateError: except DynectUpdateError:
sentry.captureException( sentry.captureException(
extra={ extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name, "fqdn": str(fqdn), "zone_name": str(zone_name), "node_name": str(node_name),
"txt_record": txt_record.txtdata} "txt_record": str(txt_record.txtdata)}
) )
metrics.send('delete_txt_record_publish_error', 'counter', 1, metrics.send('delete_txt_record_publish_error', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata}) metric_tags={'fqdn': str(fqdn), 'txt_record': str(txt_record.txtdata)})
def delete_acme_txt_records(domain): def delete_acme_txt_records(domain):
@ -201,8 +200,8 @@ def delete_acme_txt_records(domain):
except DynectDeleteError: except DynectDeleteError:
sentry.captureException( sentry.captureException(
extra={ extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name, "fqdn": str(fqdn), "zone_name": str(zone_name), "node_name": str(node_name),
"txt_record": txt_record.txtdata} "txt_record": str(txt_record.txtdata)}
) )
metrics.send('delete_txt_record_deleteerror', 'counter', 1, metrics.send('delete_txt_record_deleteerror', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata}) metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})

View File

@ -102,22 +102,6 @@ class AcmeHandler(object):
metrics.send('complete_dns_challenge_error_no_dnsproviders', 'counter', 1) metrics.send('complete_dns_challenge_error_no_dnsproviders', 'counter', 1)
raise Exception("No DNS providers found for domain: {}".format(authz_record.host)) raise Exception("No DNS providers found for domain: {}".format(authz_record.host))
for dns_provider in dns_providers:
# Grab account number (For Route53)
dns_provider_options = json.loads(dns_provider.credentials)
account_number = dns_provider_options.get("account_id")
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
for change_id in authz_record.change_id:
try:
dns_provider_plugin.wait_for_dns_change(change_id, account_number=account_number)
except Exception:
metrics.send('complete_dns_challenge_error', 'counter', 1)
sentry.captureException()
current_app.logger.debug(
f"Unable to resolve DNS challenge for change_id: {change_id}, account_id: "
f"{account_number}", exc_info=True)
raise
for dns_challenge in authz_record.dns_challenge: for dns_challenge in authz_record.dns_challenge:
response = dns_challenge.response(acme_client.client.net.key) response = dns_challenge.response(acme_client.client.net.key)
@ -139,12 +123,12 @@ class AcmeHandler(object):
for authz in authorization.authz: for authz in authorization.authz:
authorization_resource, _ = acme_client.poll(authz) authorization_resource, _ = acme_client.poll(authz)
deadline = datetime.datetime.now() + datetime.timedelta(seconds=90) deadline = datetime.datetime.now() + datetime.timedelta(seconds=360)
try: try:
orderr = acme_client.poll_and_finalize(order, deadline) orderr = acme_client.poll_and_finalize(order, deadline)
except AcmeError: except AcmeError:
sentry.captureException(extra={"order_url": order.uri}) sentry.captureException(extra={"order_url": str(order.uri)})
metrics.send('request_certificate_error', 'counter', 1) metrics.send('request_certificate_error', 'counter', 1)
current_app.logger.error(f"Unable to resolve Acme order: {order.uri}", exc_info=True) current_app.logger.error(f"Unable to resolve Acme order: {order.uri}", exc_info=True)
raise raise

View File

@ -21,14 +21,22 @@ def retry_throttled(exception):
:param exception: :param exception:
:return: :return:
""" """
# Log details about the exception
try:
raise exception
except Exception as e:
current_app.logger.error("ELB retry_throttled triggered", exc_info=True)
metrics.send('elb_retry', 'counter', 1,
metric_tags={"exception": e})
sentry.captureException()
if isinstance(exception, botocore.exceptions.ClientError): if isinstance(exception, botocore.exceptions.ClientError):
if exception.response['Error']['Code'] == 'LoadBalancerNotFound': if exception.response['Error']['Code'] == 'LoadBalancerNotFound':
return False return False
if exception.response['Error']['Code'] == 'CertificateNotFound': if exception.response['Error']['Code'] == 'CertificateNotFound':
return False return False
metrics.send('elb_retry', 'counter', 1)
return True return True
@ -63,7 +71,7 @@ def get_all_elbs(**kwargs):
:return: :return:
""" """
elbs = [] elbs = []
try:
while True: while True:
response = get_elbs(**kwargs) response = get_elbs(**kwargs)
@ -73,6 +81,10 @@ def get_all_elbs(**kwargs):
return elbs return elbs
else: else:
kwargs.update(dict(Marker=response['NextMarker'])) kwargs.update(dict(Marker=response['NextMarker']))
except Exception as e: # noqa
metrics.send('get_all_elbs_error', 'counter', 1)
sentry.captureException()
raise
def get_all_elbs_v2(**kwargs): def get_all_elbs_v2(**kwargs):
@ -84,6 +96,7 @@ def get_all_elbs_v2(**kwargs):
""" """
elbs = [] elbs = []
try:
while True: while True:
response = get_elbs_v2(**kwargs) response = get_elbs_v2(**kwargs)
elbs += response['LoadBalancers'] elbs += response['LoadBalancers']
@ -92,10 +105,14 @@ def get_all_elbs_v2(**kwargs):
return elbs return elbs
else: else:
kwargs.update(dict(Marker=response['NextMarker'])) kwargs.update(dict(Marker=response['NextMarker']))
except Exception as e: # noqa
metrics.send('get_all_elbs_v2_error', 'counter', 1)
sentry.captureException()
raise
@sts_client('elbv2') @sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000) @retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def get_listener_arn_from_endpoint(endpoint_name, endpoint_port, **kwargs): def get_listener_arn_from_endpoint(endpoint_name, endpoint_port, **kwargs):
""" """
Get a listener ARN from an endpoint. Get a listener ARN from an endpoint.
@ -103,6 +120,7 @@ def get_listener_arn_from_endpoint(endpoint_name, endpoint_port, **kwargs):
:param endpoint_port: :param endpoint_port:
:return: :return:
""" """
try:
client = kwargs.pop('client') client = kwargs.pop('client')
elbs = client.describe_load_balancers(Names=[endpoint_name]) elbs = client.describe_load_balancers(Names=[endpoint_name])
for elb in elbs['LoadBalancers']: for elb in elbs['LoadBalancers']:
@ -110,20 +128,32 @@ def get_listener_arn_from_endpoint(endpoint_name, endpoint_port, **kwargs):
for listener in listeners['Listeners']: for listener in listeners['Listeners']:
if listener['Port'] == endpoint_port: if listener['Port'] == endpoint_port:
return listener['ListenerArn'] return listener['ListenerArn']
except Exception as e: # noqa
metrics.send('get_listener_arn_from_endpoint_error', 'counter', 1,
metric_tags={"error": e, "endpoint_name": endpoint_name, "endpoint_port": endpoint_port})
sentry.captureException(extra={"endpoint_name": str(endpoint_name),
"endpoint_port": str(endpoint_port)})
raise
@sts_client('elb') @sts_client('elb')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000) @retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def get_elbs(**kwargs): def get_elbs(**kwargs):
""" """
Fetches one page elb objects for a given account and region. Fetches one page elb objects for a given account and region.
""" """
try:
client = kwargs.pop('client') client = kwargs.pop('client')
return client.describe_load_balancers(**kwargs) return client.describe_load_balancers(**kwargs)
except Exception as e: # noqa
metrics.send('get_elbs_error', 'counter', 1,
metric_tags={"error": e})
sentry.captureException()
raise
@sts_client('elbv2') @sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000) @retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def get_elbs_v2(**kwargs): def get_elbs_v2(**kwargs):
""" """
Fetches one page of elb objects for a given account and region. Fetches one page of elb objects for a given account and region.
@ -131,12 +161,18 @@ def get_elbs_v2(**kwargs):
:param kwargs: :param kwargs:
:return: :return:
""" """
try:
client = kwargs.pop('client') client = kwargs.pop('client')
return client.describe_load_balancers(**kwargs) return client.describe_load_balancers(**kwargs)
except Exception as e: # noqa
metrics.send('get_elbs_v2_error', 'counter', 1,
metric_tags={"error": e})
sentry.captureException()
raise
@sts_client('elbv2') @sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000) @retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def describe_listeners_v2(**kwargs): def describe_listeners_v2(**kwargs):
""" """
Fetches one page of listener objects for a given elb arn. Fetches one page of listener objects for a given elb arn.
@ -144,8 +180,14 @@ def describe_listeners_v2(**kwargs):
:param kwargs: :param kwargs:
:return: :return:
""" """
try:
client = kwargs.pop('client') client = kwargs.pop('client')
return client.describe_listeners(**kwargs) return client.describe_listeners(**kwargs)
except Exception as e: # noqa
metrics.send('describe_listeners_v2_error', 'counter', 1,
metric_tags={"error": e})
sentry.captureException()
raise
@sts_client('elb') @sts_client('elb')
@ -157,11 +199,12 @@ def describe_load_balancer_policies(load_balancer_name, policy_names, **kwargs):
:param load_balancer_name: :param load_balancer_name:
:return: :return:
""" """
try: try:
return kwargs['client'].describe_load_balancer_policies(LoadBalancerName=load_balancer_name, return kwargs['client'].describe_load_balancer_policies(LoadBalancerName=load_balancer_name,
PolicyNames=policy_names) PolicyNames=policy_names)
except Exception as e: # noqa except Exception as e: # noqa
metrics.send('describe_load_balancer_policies_fail', 'counter', 1, metrics.send('describe_load_balancer_policies_error', 'counter', 1,
metric_tags={"load_balancer_name": load_balancer_name, "policy_names": policy_names, "error": e}) metric_tags={"load_balancer_name": load_balancer_name, "policy_names": policy_names, "error": e})
sentry.captureException(extra={"load_balancer_name": load_balancer_name, "policy_names": policy_names}) sentry.captureException(extra={"load_balancer_name": load_balancer_name, "policy_names": policy_names})
raise raise
@ -179,14 +222,14 @@ def describe_ssl_policies_v2(policy_names, **kwargs):
try: try:
return kwargs['client'].describe_ssl_policies(Names=policy_names) return kwargs['client'].describe_ssl_policies(Names=policy_names)
except Exception as e: # noqa except Exception as e: # noqa
metrics.send('describe_ssl_policies_v2_fail', 'counter', 1, metrics.send('describe_ssl_policies_v2_error', 'counter', 1,
metric_tags={"policy_names": policy_names, "error": e}) metric_tags={"policy_names": policy_names, "error": e})
sentry.captureException(extra={"policy_names": policy_names}) sentry.captureException(extra={"policy_names": policy_names})
raise raise
@sts_client('elb') @sts_client('elb')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000) @retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def describe_load_balancer_types(policies, **kwargs): def describe_load_balancer_types(policies, **kwargs):
""" """
Describe the policies with policy details. Describe the policies with policy details.
@ -198,7 +241,7 @@ def describe_load_balancer_types(policies, **kwargs):
@sts_client('elb') @sts_client('elb')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000) @retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def attach_certificate(name, port, certificate_id, **kwargs): def attach_certificate(name, port, certificate_id, **kwargs):
""" """
Attaches a certificate to a listener, throws exception Attaches a certificate to a listener, throws exception
@ -218,7 +261,7 @@ def attach_certificate(name, port, certificate_id, **kwargs):
@sts_client('elbv2') @sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000) @retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def attach_certificate_v2(listener_arn, port, certificates, **kwargs): def attach_certificate_v2(listener_arn, port, certificates, **kwargs):
""" """
Attaches a certificate to a listener, throws exception Attaches a certificate to a listener, throws exception