Modifying the way we report metrics. Relying on metric tags instead of the the metric name for additional dimensions. (#1036)

This commit is contained in:
kevgliss 2018-01-02 15:26:31 -08:00 committed by GitHub
parent 8cad2f9f56
commit eea413a90f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 124 additions and 75 deletions

View File

@ -14,6 +14,7 @@ from flask import Blueprint, current_app
from flask_restful import reqparse, Resource, Api from flask_restful import reqparse, Resource, Api
from flask_principal import Identity, identity_changed from flask_principal import Identity, identity_changed
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.extensions import metrics from lemur.extensions import metrics
from lemur.common.utils import get_psuedo_random_string from lemur.common.utils import get_psuedo_random_string
@ -116,7 +117,6 @@ def retrieve_user(user_api_url, access_token):
profile = r.json() profile = r.json()
user = user_service.get_by_email(profile['email']) user = user_service.get_by_email(profile['email'])
metrics.send('successful_login', 'counter', 1)
return user, profile return user, profile
@ -267,7 +267,7 @@ class Login(Resource):
identity_changed.send(current_app._get_current_object(), identity_changed.send(current_app._get_current_object(),
identity=Identity(user.id)) identity=Identity(user.id))
metrics.send('successful_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user)) return dict(token=create_token(user))
# try ldap login # try ldap login
@ -279,16 +279,16 @@ class Login(Resource):
# Tell Flask-Principal the identity changed # Tell Flask-Principal the identity changed
identity_changed.send(current_app._get_current_object(), identity_changed.send(current_app._get_current_object(),
identity=Identity(user.id)) identity=Identity(user.id))
metrics.send('successful_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user)) return dict(token=create_token(user))
except Exception as e: except Exception as e:
current_app.logger.error("ldap error: {0}".format(e)) current_app.logger.error("ldap error: {0}".format(e))
ldap_message = 'ldap error: %s' % e ldap_message = 'ldap error: %s' % e
metrics.send('invalid_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message=ldap_message), 403 return dict(message=ldap_message), 403
# if not valid user - no certificates for you # if not valid user - no certificates for you
metrics.send('invalid_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message='The supplied credentials are invalid'), 403 return dict(message='The supplied credentials are invalid'), 403
@ -338,13 +338,13 @@ class Ping(Resource):
update_user(user, profile, roles) update_user(user, profile, roles)
if not user.active: if not user.active:
metrics.send('invalid_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message='The supplied credentials are invalid'), 403 return dict(message='The supplied credentials are invalid'), 403
# Tell Flask-Principal the identity changed # Tell Flask-Principal the identity changed
identity_changed.send(current_app._get_current_object(), identity=Identity(user.id)) identity_changed.send(current_app._get_current_object(), identity=Identity(user.id))
metrics.send('successful_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user)) return dict(token=create_token(user))
@ -387,12 +387,14 @@ class OAuth2(Resource):
update_user(user, profile, roles) update_user(user, profile, roles)
if not user.active: if not user.active:
metrics.send('invalid_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message='The supplied credentials are invalid'), 403 return dict(message='The supplied credentials are invalid'), 403
# Tell Flask-Principal the identity changed # Tell Flask-Principal the identity changed
identity_changed.send(current_app._get_current_object(), identity=Identity(user.id)) identity_changed.send(current_app._get_current_object(), identity=Identity(user.id))
metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user)) return dict(token=create_token(user))
@ -432,14 +434,14 @@ class Google(Resource):
user = user_service.get_by_email(profile['email']) user = user_service.get_by_email(profile['email'])
if not user.active: if not user.active:
metrics.send('invalid_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message='The supplied credentials are invalid.'), 403 return dict(message='The supplied credentials are invalid.'), 403
if user: if user:
metrics.send('successful_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user)) return dict(token=create_token(user))
metrics.send('invalid_login', 'counter', 1) metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
class Providers(Resource): class Providers(Resource):

View File

@ -20,6 +20,7 @@ from lemur import database
from lemur.extensions import sentry from lemur.extensions import sentry
from lemur.extensions import metrics from lemur.extensions import metrics
from lemur.plugins.base import plugins from lemur.plugins.base import plugins
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.deployment import service as deployment_service from lemur.deployment import service as deployment_service
from lemur.endpoints import service as endpoint_service from lemur.endpoints import service as endpoint_service
from lemur.notifications.messaging import send_rotation_notification from lemur.notifications.messaging import send_rotation_notification
@ -106,16 +107,17 @@ def request_rotation(endpoint, certificate, message, commit):
:param commit: :param commit:
:return: :return:
""" """
status = FAILURE_METRIC_STATUS
if commit: if commit:
try: try:
deployment_service.rotate_certificate(endpoint, certificate) deployment_service.rotate_certificate(endpoint, certificate)
metrics.send('endpoint_rotation_success', 'counter', 1)
if message: if message:
send_rotation_notification(certificate) send_rotation_notification(certificate)
status = SUCCESS_METRIC_STATUS
except Exception as e: except Exception as e:
metrics.send('endpoint_rotation_failure', 'counter', 1)
print( print(
"[!] Failed to rotate endpoint {0} to certificate {1} reason: {2}".format( "[!] Failed to rotate endpoint {0} to certificate {1} reason: {2}".format(
endpoint.name, endpoint.name,
@ -124,6 +126,8 @@ def request_rotation(endpoint, certificate, message, commit):
) )
) )
metrics.send('endpoint_rotation', 'counter', 1, metric_tags={'status': status})
def request_reissue(certificate, commit): def request_reissue(certificate, commit):
""" """
@ -132,16 +136,31 @@ def request_reissue(certificate, commit):
:param commit: :param commit:
:return: :return:
""" """
# set the lemur identity for all cli commands status = FAILURE_METRIC_STATUS
identity_changed.send(current_app._get_current_object(), identity=Identity(1)) try:
print("[+] {0} is eligible for re-issuance".format(certificate.name))
details = get_certificate_primitives(certificate) # set the lemur identity for all cli commands
print_certificate_details(details) identity_changed.send(current_app._get_current_object(), identity=Identity(1))
if commit: details = get_certificate_primitives(certificate)
new_cert = reissue_certificate(certificate, replace=True) print_certificate_details(details)
metrics.send('certificate_reissue_success', 'counter', 1)
print("[+] New certificate named: {0}".format(new_cert.name)) if commit:
new_cert = reissue_certificate(certificate, replace=True)
print("[+] New certificate named: {0}".format(new_cert.name))
status = SUCCESS_METRIC_STATUS
except Exception as e:
sentry.captureException()
print(
"[!] Failed to reissue certificates. Reason: {}".format(
e
)
)
metrics.send('certificate_reissue', 'counter', 1, metric_tags={'status': status})
@manager.option('-e', '--endpoint', dest='endpoint_name', help='Name of the endpoint you wish to rotate.') @manager.option('-e', '--endpoint', dest='endpoint_name', help='Name of the endpoint you wish to rotate.')
@ -159,6 +178,8 @@ def rotate(endpoint_name, new_certificate_name, old_certificate_name, message, c
print("[+] Starting endpoint rotation.") print("[+] Starting endpoint rotation.")
status = FAILURE_METRIC_STATUS
try: try:
old_cert = validate_certificate(old_certificate_name) old_cert = validate_certificate(old_certificate_name)
new_cert = validate_certificate(new_certificate_name) new_cert = validate_certificate(new_certificate_name)
@ -182,14 +203,19 @@ def rotate(endpoint_name, new_certificate_name, old_certificate_name, message, c
print("[+] Rotating {0} to {1}".format(endpoint.name, endpoint.certificate.replaced[0].name)) print("[+] Rotating {0} to {1}".format(endpoint.name, endpoint.certificate.replaced[0].name))
request_rotation(endpoint, endpoint.certificate.replaced[0], message, commit) request_rotation(endpoint, endpoint.certificate.replaced[0], message, commit)
else: else:
metrics.send('endpoint_rotation_failure', 'counter', 1) metrics.send('endpoint_rotation', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
print("[!] Failed to rotate endpoint {0} reason: Multiple replacement certificates found.".format( print("[!] Failed to rotate endpoint {0} reason: Multiple replacement certificates found.".format(
endpoint.name endpoint.name
)) ))
status = SUCCESS_METRIC_STATUS
print("[+] Done!") print("[+] Done!")
except Exception as e: except Exception as e:
sentry.captureException() sentry.captureException()
metrics.send('endpoint_rotation_job', 'counter', 1, metric_tags={'status': status})
@manager.option('-o', '--old-certificate', dest='old_certificate_name', help='Name of the certificate you wish to reissue.') @manager.option('-o', '--old-certificate', dest='old_certificate_name', help='Name of the certificate you wish to reissue.')
@manager.option('-c', '--commit', dest='commit', action='store_true', default=False, help='Persist changes.') @manager.option('-c', '--commit', dest='commit', action='store_true', default=False, help='Persist changes.')
@ -204,26 +230,29 @@ def reissue(old_certificate_name, commit):
print("[+] Starting certificate re-issuance.") print("[+] Starting certificate re-issuance.")
status = FAILURE_METRIC_STATUS
try: try:
old_cert = validate_certificate(old_certificate_name) old_cert = validate_certificate(old_certificate_name)
if not old_cert: if not old_cert:
for certificate in get_all_pending_reissue(): for certificate in get_all_pending_reissue():
print("[+] {0} is eligible for re-issuance".format(certificate.name))
request_reissue(certificate, commit) request_reissue(certificate, commit)
else: else:
request_reissue(old_cert, commit) request_reissue(old_cert, commit)
status = SUCCESS_METRIC_STATUS
print("[+] Done!") print("[+] Done!")
except Exception as e: except Exception as e:
sentry.captureException() sentry.captureException()
metrics.send('certificate_reissue_failure', 'counter', 1)
print( print(
"[!] Failed to reissue certificates. Reason: {}".format( "[!] Failed to reissue certificates. Reason: {}".format(
e e
) )
) )
metrics.send('certificate_reissue_job', 'counter', 1, metric_tags={'status': status})
@manager.option('-f', '--fqdns', dest='fqdns', help='FQDNs to query. Multiple fqdns specified via comma.') @manager.option('-f', '--fqdns', dest='fqdns', help='FQDNs to query. Multiple fqdns specified via comma.')
@manager.option('-i', '--issuer', dest='issuer', help='Issuer to query for.') @manager.option('-i', '--issuer', dest='issuer', help='Issuer to query for.')
@ -275,9 +304,11 @@ def worker(data, commit, reason):
if commit: if commit:
plugin.revoke_certificate(cert, reason) plugin.revoke_certificate(cert, reason)
metrics.send('certificate_revoke', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
except Exception as e: except Exception as e:
sentry.captureException() sentry.captureException()
metrics.send('certificate_revoke_failure', 'counter', 1) metrics.send('certificate_revoke', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
print( print(
"[!] Failed to revoke certificates. Reason: {}".format( "[!] Failed to revoke certificates. Reason: {}".format(
e e

View File

@ -33,6 +33,7 @@ from lemur.common import defaults
from lemur.plugins.base import plugins from lemur.plugins.base import plugins
from lemur.extensions import metrics from lemur.extensions import metrics
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.models import certificate_associations, certificate_source_associations, \ from lemur.models import certificate_associations, certificate_source_associations, \
certificate_destination_associations, certificate_notification_associations, \ certificate_destination_associations, certificate_notification_associations, \
@ -358,15 +359,16 @@ def update_destinations(target, value, initiator):
:return: :return:
""" """
destination_plugin = plugins.get(value.plugin_name) destination_plugin = plugins.get(value.plugin_name)
status = FAILURE_METRIC_STATUS
try: try:
if target.private_key: if target.private_key:
destination_plugin.upload(target.name, target.body, target.private_key, target.chain, value.options) destination_plugin.upload(target.name, target.body, target.private_key, target.chain, value.options)
status = SUCCESS_METRIC_STATUS
except Exception as e: except Exception as e:
sentry.captureException() sentry.captureException()
current_app.logger.exception(e)
metrics.send('destination_upload_failure', 'counter', 1, metrics.send('destination_upload', 'counter', 1,
metric_tags={'certificate': target.name, 'destination': value.label}) metric_tags={'status': status, 'certificate': target.name, 'destination': value.label})
@event.listens_for(Certificate.replaces, 'append') @event.listens_for(Certificate.replaces, 'append')

View File

@ -6,3 +6,6 @@
SAN_NAMING_TEMPLATE = "SAN-{subject}-{issuer}-{not_before}-{not_after}" SAN_NAMING_TEMPLATE = "SAN-{subject}-{issuer}-{not_before}-{not_after}"
DEFAULT_NAMING_TEMPLATE = "{subject}-{issuer}-{not_before}-{not_after}" DEFAULT_NAMING_TEMPLATE = "{subject}-{issuer}-{not_before}-{not_after}"
NONSTANDARD_NAMING_TEMPLATE = "{issuer}-{not_before}-{not_after}" NONSTANDARD_NAMING_TEMPLATE = "{issuer}-{not_before}-{not_after}"
SUCCESS_METRIC_STATUS = 'success'
FAILURE_METRIC_STATUS = 'failure'

View File

@ -10,8 +10,6 @@
""" """
import arrow import arrow
from flask import current_app
from sqlalchemy import func from sqlalchemy import func
from lemur import database from lemur import database
@ -132,19 +130,6 @@ def update(endpoint_id, **kwargs):
return endpoint return endpoint
def rotate_certificate(endpoint, new_cert):
"""Rotates a certificate on a given endpoint."""
try:
endpoint.source.plugin.update_endpoint(endpoint, new_cert)
endpoint.certificate = new_cert
database.update(endpoint)
metrics.send('certificate_rotate_success', 'counter', 1, metric_tags={'endpoint': endpoint.name, 'source': endpoint.source.label})
except Exception as e:
metrics.send('certificate_rotate_failure', 'counter', 1, metric_tags={'endpoint': endpoint.name})
current_app.logger.exception(e)
raise e
def render(args): def render(args):
""" """
Helper that helps us render the REST Api responses. Helper that helps us render the REST Api responses.

View File

@ -7,6 +7,8 @@
""" """
from flask_script import Manager from flask_script import Manager
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.extensions import sentry, metrics
from lemur.notifications.messaging import send_expiration_notifications from lemur.notifications.messaging import send_expiration_notifications
manager = Manager(usage="Handles notification related tasks.") manager = Manager(usage="Handles notification related tasks.")
@ -25,11 +27,18 @@ def expirations(exclude):
:return: :return:
""" """
print("Starting to notify subscribers about expiring certificates!") status = FAILURE_METRIC_STATUS
success, failed = send_expiration_notifications(exclude) try:
print( print("Starting to notify subscribers about expiring certificates!")
"Finished notifying subscribers about expiring certificates! Sent: {success} Failed: {failed}".format( success, failed = send_expiration_notifications(exclude)
success=success, print(
failed=failed "Finished notifying subscribers about expiring certificates! Sent: {success} Failed: {failed}".format(
success=success,
failed=failed
)
) )
) status = SUCCESS_METRIC_STATUS
except Exception as e:
sentry.captureException()
metrics.send('expiration_notification_job', 'counter', 1, metric_tags={'status': status})

View File

@ -18,6 +18,7 @@ from flask import current_app
from sqlalchemy import and_ from sqlalchemy import and_
from lemur import database, metrics from lemur import database, metrics
from lemur.constants import FAILURE_METRIC_STATUS, SUCCESS_METRIC_STATUS
from lemur.extensions import sentry from lemur.extensions import sentry
from lemur.common.utils import windowed_query from lemur.common.utils import windowed_query
@ -94,14 +95,17 @@ def send_notification(event_type, data, targets, notification):
:param notification: :param notification:
:return: :return:
""" """
status = FAILURE_METRIC_STATUS
try: try:
notification.plugin.send(event_type, data, targets, notification.options) notification.plugin.send(event_type, data, targets, notification.options)
metrics.send('{0}_notification_sent'.format(event_type), 'counter', 1) status = SUCCESS_METRIC_STATUS
return True
except Exception as e: except Exception as e:
sentry.captureException() sentry.captureException()
metrics.send('{0}_notification_failure'.format(event_type), 'counter', 1)
current_app.logger.exception(e) metrics.send('notification', 'counter', 1, metric_tags={'status': status, 'event_type': event_type})
if status == SUCCESS_METRIC_STATUS:
return True
def send_expiration_notifications(exclude): def send_expiration_notifications(exclude):
@ -147,8 +151,10 @@ def send_rotation_notification(certificate, notification_plugin=None):
rotated. rotated.
:param certificate: :param certificate:
:param notification_plugin:
:return: :return:
""" """
status = FAILURE_METRIC_STATUS
if not notification_plugin: if not notification_plugin:
notification_plugin = plugins.get(current_app.config.get('LEMUR_DEFAULT_NOTIFICATION_PLUGIN')) notification_plugin = plugins.get(current_app.config.get('LEMUR_DEFAULT_NOTIFICATION_PLUGIN'))
@ -156,12 +162,14 @@ def send_rotation_notification(certificate, notification_plugin=None):
try: try:
notification_plugin.send('rotation', data, [data['owner']]) notification_plugin.send('rotation', data, [data['owner']])
metrics.send('rotation_notification_sent', 'counter', 1) status = SUCCESS_METRIC_STATUS
return True
except Exception as e: except Exception as e:
sentry.captureException() sentry.captureException()
metrics.send('rotation_notification_failure', 'counter', 1)
current_app.logger.exception(e) metrics.send('notification', 'counter', 1, metric_tags={'status': status, 'event_type': 'rotation'})
if status == SUCCESS_METRIC_STATUS:
return True
def needs_notification(certificate): def needs_notification(certificate):

View File

@ -14,6 +14,8 @@ from flask_script import Manager
from flask import current_app from flask import current_app
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.extensions import metrics, sentry from lemur.extensions import metrics, sentry
from lemur.plugins.base import plugins from lemur.plugins.base import plugins
@ -54,6 +56,8 @@ def validate_sources(source_strings):
def sync(source_strings): def sync(source_strings):
sources = validate_sources(source_strings) sources = validate_sources(source_strings)
for source in sources: for source in sources:
status = FAILURE_METRIC_STATUS
start_time = time.time() start_time = time.time()
print("[+] Staring to sync source: {label}!\n".format(label=source.label)) print("[+] Staring to sync source: {label}!\n".format(label=source.label))
@ -79,6 +83,8 @@ def sync(source_strings):
time=(time.time() - start_time) time=(time.time() - start_time)
) )
) )
status = SUCCESS_METRIC_STATUS
except Exception as e: except Exception as e:
current_app.logger.exception(e) current_app.logger.exception(e)
@ -86,9 +92,10 @@ def sync(source_strings):
"[X] Failed syncing source {label}!\n".format(label=source.label) "[X] Failed syncing source {label}!\n".format(label=source.label)
) )
metrics.send('sync_failed', 'counter', 1, metric_tags={'source': source.label})
sentry.captureException() sentry.captureException()
metrics.send('source_sync', 'counter', 1, metric_tags={'source': source.label, 'status': status})
@manager.option('-s', '--sources', dest='source_strings', action='append', help='Sources to operate on.') @manager.option('-s', '--sources', dest='source_strings', action='append', help='Sources to operate on.')
@manager.option('-c', '--commit', dest='commit', action='store_true', default=False, help='Persist changes.') @manager.option('-c', '--commit', dest='commit', action='store_true', default=False, help='Persist changes.')
@ -109,23 +116,25 @@ def clean(source_strings, commit):
cleaned = 0 cleaned = 0
for certificate in certificate_service.get_all_pending_cleaning(source): for certificate in certificate_service.get_all_pending_cleaning(source):
if commit: status = FAILURE_METRIC_STATUS
try: if commit:
s.clean(certificate, source.options) try:
certificate.sources.remove(source) s.clean(certificate, source.options)
certificate_service.database.update(certificate) certificate.sources.remove(source)
metrics.send('clean_success', 'counter', 1, metric_tags={'source': source.label}) certificate_service.database.update(certificate)
except Exception as e: status = SUCCESS_METRIC_STATUS
current_app.logger.exception(e) except Exception as e:
metrics.send('clean_failed', 'counter', 1, metric_tags={'source': source.label}) current_app.logger.exception(e)
sentry.captureException() sentry.captureException()
current_app.logger.warning("Removed {0} from source {1} during cleaning".format( metrics.send('clean', 'counter', 1, metric_tags={'source': source.label, 'status': status})
certificate.name,
source.label
))
cleaned += 1 current_app.logger.warning("Removed {0} from source {1} during cleaning".format(
certificate.name,
source.label
))
cleaned += 1
print( print(
"[+] Finished cleaning source: {label}. Removed {cleaned} certificates from source. Run Time: {time}\n".format( "[+] Finished cleaning source: {label}. Removed {cleaned} certificates from source. Run Time: {time}\n".format(