Strip out self-polling logic and rely on ACME; Enhance ELB logging and retries

This commit is contained in:
Curtis Castrapel 2019-04-26 10:16:18 -07:00
parent b300a21948
commit 1e64851d79
3 changed files with 104 additions and 78 deletions

View File

@ -64,11 +64,10 @@ def wait_for_dns_change(change_id, account_number=None):
metrics.send('wait_for_dns_change_fail', 'counter', 1)
sentry.captureException(
extra={
"fqdn": fqdn, "txt_record": token}
"fqdn": str(fqdn), "txt_record": str(token)}
)
metrics.send('wait_for_dns_change_error', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': token})
raise Exception("Unable to query DNS token for fqdn {}.".format(fqdn))
return
@ -155,8 +154,8 @@ def delete_txt_record(change_id, account_number, domain, token):
except DynectDeleteError:
sentry.captureException(
extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
"txt_record": txt_record.txtdata}
"fqdn": str(fqdn), "zone_name": str(zone_name), "node_name": str(node_name),
"txt_record": str(txt_record.txtdata)}
)
metrics.send('delete_txt_record_deleteerror', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})
@ -166,11 +165,11 @@ def delete_txt_record(change_id, account_number, domain, token):
except DynectUpdateError:
sentry.captureException(
extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
"txt_record": txt_record.txtdata}
"fqdn": str(fqdn), "zone_name": str(zone_name), "node_name": str(node_name),
"txt_record": str(txt_record.txtdata)}
)
metrics.send('delete_txt_record_publish_error', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})
metric_tags={'fqdn': str(fqdn), 'txt_record': str(txt_record.txtdata)})
def delete_acme_txt_records(domain):
@ -201,8 +200,8 @@ def delete_acme_txt_records(domain):
except DynectDeleteError:
sentry.captureException(
extra={
"fqdn": fqdn, "zone_name": zone_name, "node_name": node_name,
"txt_record": txt_record.txtdata}
"fqdn": str(fqdn), "zone_name": str(zone_name), "node_name": str(node_name),
"txt_record": str(txt_record.txtdata)}
)
metrics.send('delete_txt_record_deleteerror', 'counter', 1,
metric_tags={'fqdn': fqdn, 'txt_record': txt_record.txtdata})

View File

@ -102,49 +102,33 @@ class AcmeHandler(object):
metrics.send('complete_dns_challenge_error_no_dnsproviders', 'counter', 1)
raise Exception("No DNS providers found for domain: {}".format(authz_record.host))
for dns_provider in dns_providers:
# Grab account number (For Route53)
dns_provider_options = json.loads(dns_provider.credentials)
account_number = dns_provider_options.get("account_id")
dns_provider_plugin = self.get_dns_provider(dns_provider.provider_type)
for change_id in authz_record.change_id:
try:
dns_provider_plugin.wait_for_dns_change(change_id, account_number=account_number)
except Exception:
metrics.send('complete_dns_challenge_error', 'counter', 1)
sentry.captureException()
current_app.logger.debug(
f"Unable to resolve DNS challenge for change_id: {change_id}, account_id: "
f"{account_number}", exc_info=True)
raise
for dns_challenge in authz_record.dns_challenge:
response = dns_challenge.response(acme_client.client.net.key)
for dns_challenge in authz_record.dns_challenge:
response = dns_challenge.response(acme_client.client.net.key)
verified = response.simple_verify(
dns_challenge.chall,
authz_record.host,
acme_client.client.net.key.public_key()
)
verified = response.simple_verify(
dns_challenge.chall,
authz_record.host,
acme_client.client.net.key.public_key()
)
if not verified:
metrics.send('complete_dns_challenge_verification_error', 'counter', 1)
raise ValueError("Failed verification")
if not verified:
metrics.send('complete_dns_challenge_verification_error', 'counter', 1)
raise ValueError("Failed verification")
time.sleep(5)
acme_client.answer_challenge(dns_challenge, response)
time.sleep(5)
acme_client.answer_challenge(dns_challenge, response)
def request_certificate(self, acme_client, authorizations, order):
for authorization in authorizations:
for authz in authorization.authz:
authorization_resource, _ = acme_client.poll(authz)
deadline = datetime.datetime.now() + datetime.timedelta(seconds=90)
deadline = datetime.datetime.now() + datetime.timedelta(seconds=360)
try:
orderr = acme_client.poll_and_finalize(order, deadline)
except AcmeError:
sentry.captureException(extra={"order_url": order.uri})
sentry.captureException(extra={"order_url": str(order.uri)})
metrics.send('request_certificate_error', 'counter', 1)
current_app.logger.error(f"Unable to resolve Acme order: {order.uri}", exc_info=True)
raise

View File

@ -21,14 +21,22 @@ def retry_throttled(exception):
:param exception:
:return:
"""
# Log details about the exception
try:
raise exception
except Exception as e:
current_app.logger.error("ELB retry_throttled triggered", exc_info=True)
metrics.send('elb_retry', 'counter', 1,
metric_tags={"exception": e})
sentry.captureException()
if isinstance(exception, botocore.exceptions.ClientError):
if exception.response['Error']['Code'] == 'LoadBalancerNotFound':
return False
if exception.response['Error']['Code'] == 'CertificateNotFound':
return False
metrics.send('elb_retry', 'counter', 1)
return True
@ -63,16 +71,20 @@ def get_all_elbs(**kwargs):
:return:
"""
elbs = []
try:
while True:
response = get_elbs(**kwargs)
while True:
response = get_elbs(**kwargs)
elbs += response['LoadBalancerDescriptions']
elbs += response['LoadBalancerDescriptions']
if not response.get('NextMarker'):
return elbs
else:
kwargs.update(dict(Marker=response['NextMarker']))
if not response.get('NextMarker'):
return elbs
else:
kwargs.update(dict(Marker=response['NextMarker']))
except Exception as e: # noqa
metrics.send('get_all_elbs_error', 'counter', 1)
sentry.captureException()
raise
def get_all_elbs_v2(**kwargs):
@ -84,18 +96,23 @@ def get_all_elbs_v2(**kwargs):
"""
elbs = []
while True:
response = get_elbs_v2(**kwargs)
elbs += response['LoadBalancers']
try:
while True:
response = get_elbs_v2(**kwargs)
elbs += response['LoadBalancers']
if not response.get('NextMarker'):
return elbs
else:
kwargs.update(dict(Marker=response['NextMarker']))
if not response.get('NextMarker'):
return elbs
else:
kwargs.update(dict(Marker=response['NextMarker']))
except Exception as e: # noqa
metrics.send('get_all_elbs_v2_error', 'counter', 1)
sentry.captureException()
raise
@sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def get_listener_arn_from_endpoint(endpoint_name, endpoint_port, **kwargs):
"""
Get a listener ARN from an endpoint.
@ -103,27 +120,40 @@ def get_listener_arn_from_endpoint(endpoint_name, endpoint_port, **kwargs):
:param endpoint_port:
:return:
"""
client = kwargs.pop('client')
elbs = client.describe_load_balancers(Names=[endpoint_name])
for elb in elbs['LoadBalancers']:
listeners = client.describe_listeners(LoadBalancerArn=elb['LoadBalancerArn'])
for listener in listeners['Listeners']:
if listener['Port'] == endpoint_port:
return listener['ListenerArn']
try:
client = kwargs.pop('client')
elbs = client.describe_load_balancers(Names=[endpoint_name])
for elb in elbs['LoadBalancers']:
listeners = client.describe_listeners(LoadBalancerArn=elb['LoadBalancerArn'])
for listener in listeners['Listeners']:
if listener['Port'] == endpoint_port:
return listener['ListenerArn']
except Exception as e: # noqa
metrics.send('get_listener_arn_from_endpoint_error', 'counter', 1,
metric_tags={"error": e, "endpoint_name": endpoint_name, "endpoint_port": endpoint_port})
sentry.captureException(extra={"endpoint_name": str(endpoint_name),
"endpoint_port": str(endpoint_port)})
raise
@sts_client('elb')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def get_elbs(**kwargs):
"""
Fetches one page elb objects for a given account and region.
"""
client = kwargs.pop('client')
return client.describe_load_balancers(**kwargs)
try:
client = kwargs.pop('client')
return client.describe_load_balancers(**kwargs)
except Exception as e: # noqa
metrics.send('get_elbs_error', 'counter', 1,
metric_tags={"error": e})
sentry.captureException()
raise
@sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def get_elbs_v2(**kwargs):
"""
Fetches one page of elb objects for a given account and region.
@ -131,12 +161,18 @@ def get_elbs_v2(**kwargs):
:param kwargs:
:return:
"""
client = kwargs.pop('client')
return client.describe_load_balancers(**kwargs)
try:
client = kwargs.pop('client')
return client.describe_load_balancers(**kwargs)
except Exception as e: # noqa
metrics.send('get_elbs_v2_error', 'counter', 1,
metric_tags={"error": e})
sentry.captureException()
raise
@sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def describe_listeners_v2(**kwargs):
"""
Fetches one page of listener objects for a given elb arn.
@ -144,8 +180,14 @@ def describe_listeners_v2(**kwargs):
:param kwargs:
:return:
"""
client = kwargs.pop('client')
return client.describe_listeners(**kwargs)
try:
client = kwargs.pop('client')
return client.describe_listeners(**kwargs)
except Exception as e: # noqa
metrics.send('describe_listeners_v2_error', 'counter', 1,
metric_tags={"error": e})
sentry.captureException()
raise
@sts_client('elb')
@ -157,11 +199,12 @@ def describe_load_balancer_policies(load_balancer_name, policy_names, **kwargs):
:param load_balancer_name:
:return:
"""
try:
return kwargs['client'].describe_load_balancer_policies(LoadBalancerName=load_balancer_name,
PolicyNames=policy_names)
except Exception as e: # noqa
metrics.send('describe_load_balancer_policies_fail', 'counter', 1,
metrics.send('describe_load_balancer_policies_error', 'counter', 1,
metric_tags={"load_balancer_name": load_balancer_name, "policy_names": policy_names, "error": e})
sentry.captureException(extra={"load_balancer_name": load_balancer_name, "policy_names": policy_names})
raise
@ -179,14 +222,14 @@ def describe_ssl_policies_v2(policy_names, **kwargs):
try:
return kwargs['client'].describe_ssl_policies(Names=policy_names)
except Exception as e: # noqa
metrics.send('describe_ssl_policies_v2_fail', 'counter', 1,
metrics.send('describe_ssl_policies_v2_error', 'counter', 1,
metric_tags={"policy_names": policy_names, "error": e})
sentry.captureException(extra={"policy_names": policy_names})
raise
@sts_client('elb')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def describe_load_balancer_types(policies, **kwargs):
"""
Describe the policies with policy details.
@ -198,7 +241,7 @@ def describe_load_balancer_types(policies, **kwargs):
@sts_client('elb')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def attach_certificate(name, port, certificate_id, **kwargs):
"""
Attaches a certificate to a listener, throws exception
@ -218,7 +261,7 @@ def attach_certificate(name, port, certificate_id, **kwargs):
@sts_client('elbv2')
@retry(retry_on_exception=retry_throttled, wait_fixed=2000)
@retry(retry_on_exception=retry_throttled, wait_fixed=2000, stop_max_attempt_number=20)
def attach_certificate_v2(listener_arn, port, certificates, **kwargs):
"""
Attaches a certificate to a listener, throws exception