From 9a02230d63419285ef4836784462de3538ce4847 Mon Sep 17 00:00:00 2001 From: Hossein Shafagh Date: Wed, 7 Aug 2019 17:48:06 -0700 Subject: [PATCH 1/5] adding soft time outs for celery --- lemur/common/celery.py | 91 +++++++++++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 14 deletions(-) diff --git a/lemur/common/celery.py b/lemur/common/celery.py index f5edb9ab..e868585a 100644 --- a/lemur/common/celery.py +++ b/lemur/common/celery.py @@ -282,7 +282,7 @@ def clean_all_sources(): metrics.send(f"{function}.success", 'counter', 1) -@celery.task() +@celery.task(soft_time_limit=600) def clean_source(source): """ This celery task will clean the specified source. This is a destructive operation that will delete unused @@ -298,7 +298,13 @@ def clean_source(source): "source": source, } current_app.logger.debug(log_data) - clean([source], True) + try: + clean([source], True) + except SoftTimeLimitExceeded: + log_data["message"] = "Checking revoked: Time limit exceeded." + current_app.logger.error(log_data) + sentry.captureException() + metrics.send("clean_source_timeout", "counter", 1) @celery.task() @@ -391,7 +397,7 @@ def sync_source_destination(): metrics.send(f"{function}.success", 'counter', 1) -@celery.task() +@celery.task(soft_time_limit=3600) def certificate_reissue(): """ This celery task reissues certificates which are pending reissue @@ -403,14 +409,21 @@ def certificate_reissue(): "message": "reissuing certificates", } current_app.logger.debug(log_data) - cli_certificate.reissue(None, True) + try: + cli_certificate.reissue(None, True) + except SoftTimeLimitExceeded: + log_data["message"] = "Checking revoked: Time limit exceeded." + current_app.logger.error(log_data) + sentry.captureException() + metrics.send("certificate_reissue_timeout", "counter", 1) + return log_data["message"] = "reissuance completed" current_app.logger.debug(log_data) red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1) -@celery.task() +@celery.task(soft_time_limit=3600) def certificate_rotate(): """ This celery task rotates certificates which are reissued but having endpoints attached to the replaced cert @@ -422,14 +435,21 @@ def certificate_rotate(): "message": "rotating certificates", } current_app.logger.debug(log_data) - cli_certificate.rotate(None, None, None, None, True) + try: + cli_certificate.rotate(None, None, None, None, True) + except SoftTimeLimitExceeded: + log_data["message"] = "Checking revoked: Time limit exceeded." + current_app.logger.error(log_data) + sentry.captureException() + metrics.send("certificate_rotate_timeout", "counter", 1) + return log_data["message"] = "rotation completed" current_app.logger.debug(log_data) red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1) -@celery.task() +@celery.task(soft_time_limit=3600) def endpoints_expire(): """ This celery task removes all endpoints that have not been recently updated @@ -441,12 +461,19 @@ def endpoints_expire(): "message": "endpoints expire", } current_app.logger.debug(log_data) - cli_endpoints.expire(2) # Time in hours + try: + cli_endpoints.expire(2) # Time in hours + except SoftTimeLimitExceeded: + log_data["message"] = "Checking revoked: Time limit exceeded." + current_app.logger.error(log_data) + sentry.captureException() + metrics.send("endpoints_expire_timeout", "counter", 1) + return red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1) -@celery.task() +@celery.task(soft_time_limit=600) def get_all_zones(): """ This celery syncs all zones from the available dns providers @@ -458,29 +485,58 @@ def get_all_zones(): "message": "refresh all zones from available DNS providers", } current_app.logger.debug(log_data) - cli_dns_providers.get_all_zones() + try: + cli_dns_providers.get_all_zones() + except SoftTimeLimitExceeded: + log_data["message"] = "Checking revoked: Time limit exceeded." + current_app.logger.error(log_data) + sentry.captureException() + metrics.send("get_all_zones_timeout", "counter", 1) + return red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1) -@celery.task() +@celery.task(soft_time_limit=3600) def check_revoked(): """ This celery task attempts to check if any certs are expired :return: """ + + task_id = None + if celery.current_task: + task_id = celery.current_task.request.id + function = f"{__name__}.{sys._getframe().f_code.co_name}" log_data = { "function": function, "message": "check if any certificates are revoked revoked", } + + if task_id and is_task_active(function, task_id, (id,)): + log_data["message"] = "Skipping task: Task is already active" + current_app.logger.debug(log_data) + return + current_app.logger.debug(log_data) - cli_certificate.check_revoked() + try: + cli_certificate.check_revoked() + except SoftTimeLimitExceeded: + log_data["message"] = "Checking revoked: Time limit exceeded." + current_app.logger.error(log_data) + sentry.captureException() + metrics.send("check_revoked_timeout", "counter", 1) + return + red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1) -@celery.task() +check_revoked() + + +@celery.task(soft_time_limit=3600) def notify_expirations(): """ This celery task notifies about expiring certs @@ -492,6 +548,13 @@ def notify_expirations(): "message": "notify for cert expiration", } current_app.logger.debug(log_data) - cli_notification.expirations(current_app.config.get("EXCLUDE_CN_FROM_NOTIFICATION", [])) + try: + cli_notification.expirations(current_app.config.get("EXCLUDE_CN_FROM_NOTIFICATION", [])) + except SoftTimeLimitExceeded: + log_data["message"] = "Checking revoked: Time limit exceeded." + current_app.logger.error(log_data) + sentry.captureException() + metrics.send("notify_expirations_timeout", "counter", 1) + return red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1) From 3b9b94623fc0571e3777232906049b4c857e058f Mon Sep 17 00:00:00 2001 From: Hossein Shafagh Date: Wed, 7 Aug 2019 18:06:59 -0700 Subject: [PATCH 2/5] cleaning up --- lemur/common/celery.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lemur/common/celery.py b/lemur/common/celery.py index e868585a..dfeb7017 100644 --- a/lemur/common/celery.py +++ b/lemur/common/celery.py @@ -533,9 +533,6 @@ def check_revoked(): metrics.send(f"{function}.success", 'counter', 1) -check_revoked() - - @celery.task(soft_time_limit=3600) def notify_expirations(): """ From a3dfc3ef0ad9c43fab1862a6c5b4361096b452ff Mon Sep 17 00:00:00 2001 From: Hossein Shafagh Date: Tue, 13 Aug 2019 11:58:58 -0700 Subject: [PATCH 3/5] consistency --- lemur/common/celery.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lemur/common/celery.py b/lemur/common/celery.py index dfeb7017..acb20081 100644 --- a/lemur/common/celery.py +++ b/lemur/common/celery.py @@ -503,18 +503,18 @@ def check_revoked(): This celery task attempts to check if any certs are expired :return: """ - + function = f"{__name__}.{sys._getframe().f_code.co_name}" task_id = None if celery.current_task: task_id = celery.current_task.request.id - function = f"{__name__}.{sys._getframe().f_code.co_name}" log_data = { "function": function, "message": "check if any certificates are revoked revoked", + "task_id": task_id, } - if task_id and is_task_active(function, task_id, (id,)): + if task_id and is_task_active(function, task_id, None): log_data["message"] = "Skipping task: Task is already active" current_app.logger.debug(log_data) return From 6e17d36d76b228d878ecab1c2f983de25237080f Mon Sep 17 00:00:00 2001 From: Hossein Shafagh Date: Tue, 13 Aug 2019 12:16:23 -0700 Subject: [PATCH 4/5] typos --- lemur/common/celery.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lemur/common/celery.py b/lemur/common/celery.py index acb20081..e36d8b35 100644 --- a/lemur/common/celery.py +++ b/lemur/common/celery.py @@ -301,7 +301,7 @@ def clean_source(source): try: clean([source], True) except SoftTimeLimitExceeded: - log_data["message"] = "Checking revoked: Time limit exceeded." + log_data["message"] = "Clean source: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() metrics.send("clean_source_timeout", "counter", 1) @@ -412,7 +412,7 @@ def certificate_reissue(): try: cli_certificate.reissue(None, True) except SoftTimeLimitExceeded: - log_data["message"] = "Checking revoked: Time limit exceeded." + log_data["message"] = "Certificate reissue: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() metrics.send("certificate_reissue_timeout", "counter", 1) @@ -438,7 +438,7 @@ def certificate_rotate(): try: cli_certificate.rotate(None, None, None, None, True) except SoftTimeLimitExceeded: - log_data["message"] = "Checking revoked: Time limit exceeded." + log_data["message"] = "Certificate rotate: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() metrics.send("certificate_rotate_timeout", "counter", 1) @@ -464,7 +464,7 @@ def endpoints_expire(): try: cli_endpoints.expire(2) # Time in hours except SoftTimeLimitExceeded: - log_data["message"] = "Checking revoked: Time limit exceeded." + log_data["message"] = "endpoint expire: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() metrics.send("endpoints_expire_timeout", "counter", 1) @@ -488,7 +488,7 @@ def get_all_zones(): try: cli_dns_providers.get_all_zones() except SoftTimeLimitExceeded: - log_data["message"] = "Checking revoked: Time limit exceeded." + log_data["message"] = "get all zones: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() metrics.send("get_all_zones_timeout", "counter", 1) @@ -548,7 +548,7 @@ def notify_expirations(): try: cli_notification.expirations(current_app.config.get("EXCLUDE_CN_FROM_NOTIFICATION", [])) except SoftTimeLimitExceeded: - log_data["message"] = "Checking revoked: Time limit exceeded." + log_data["message"] = "Notify expiring Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() metrics.send("notify_expirations_timeout", "counter", 1) From 2de3f287ab3ace80e6fb750e12d6f4b990bb02c5 Mon Sep 17 00:00:00 2001 From: Hossein Shafagh Date: Tue, 13 Aug 2019 12:21:27 -0700 Subject: [PATCH 5/5] standardizing the timeouts to easier monitor any timeouts --- lemur/common/celery.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lemur/common/celery.py b/lemur/common/celery.py index e36d8b35..a3f9cc5f 100644 --- a/lemur/common/celery.py +++ b/lemur/common/celery.py @@ -304,7 +304,7 @@ def clean_source(source): log_data["message"] = "Clean source: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() - metrics.send("clean_source_timeout", "counter", 1) + metrics.send("celery.timeout", "counter", 1, metric_tags={"function": function}) @celery.task() @@ -359,9 +359,8 @@ def sync_source(source): log_data["message"] = "Error syncing source: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() - metrics.send( - "sync_source_timeout", "counter", 1, metric_tags={"source": source} - ) + metrics.send("sync_source_timeout", "counter", 1, metric_tags={"source": source}) + metrics.send("celery.timeout", "counter", 1, metric_tags={"function": function}) return log_data["message"] = "Done syncing source" @@ -415,8 +414,9 @@ def certificate_reissue(): log_data["message"] = "Certificate reissue: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() - metrics.send("certificate_reissue_timeout", "counter", 1) + metrics.send("celery.timeout", "counter", 1, metric_tags={"function": function}) return + log_data["message"] = "reissuance completed" current_app.logger.debug(log_data) red.set(f'{function}.last_success', int(time.time())) @@ -441,8 +441,9 @@ def certificate_rotate(): log_data["message"] = "Certificate rotate: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() - metrics.send("certificate_rotate_timeout", "counter", 1) + metrics.send("celery.timeout", "counter", 1, metric_tags={"function": function}) return + log_data["message"] = "rotation completed" current_app.logger.debug(log_data) red.set(f'{function}.last_success', int(time.time())) @@ -467,8 +468,9 @@ def endpoints_expire(): log_data["message"] = "endpoint expire: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() - metrics.send("endpoints_expire_timeout", "counter", 1) + metrics.send("celery.timeout", "counter", 1, metric_tags={"function": function}) return + red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1) @@ -491,8 +493,9 @@ def get_all_zones(): log_data["message"] = "get all zones: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() - metrics.send("get_all_zones_timeout", "counter", 1) + metrics.send("celery.timeout", "counter", 1, metric_tags={"function": function}) return + red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1) @@ -526,7 +529,7 @@ def check_revoked(): log_data["message"] = "Checking revoked: Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() - metrics.send("check_revoked_timeout", "counter", 1) + metrics.send("celery.timeout", "counter", 1, metric_tags={"function": function}) return red.set(f'{function}.last_success', int(time.time())) @@ -551,7 +554,8 @@ def notify_expirations(): log_data["message"] = "Notify expiring Time limit exceeded." current_app.logger.error(log_data) sentry.captureException() - metrics.send("notify_expirations_timeout", "counter", 1) + metrics.send("celery.timeout", "counter", 1, metric_tags={"function": function}) return + red.set(f'{function}.last_success', int(time.time())) metrics.send(f"{function}.success", 'counter', 1)