Modifying the way we report metrics. Relying on metric tags instead of the the metric name for additional dimensions. (#1036)

This commit is contained in:
kevgliss 2018-01-02 15:26:31 -08:00 committed by GitHub
parent 8cad2f9f56
commit eea413a90f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 124 additions and 75 deletions

View File

@ -14,6 +14,7 @@ from flask import Blueprint, current_app
from flask_restful import reqparse, Resource, Api
from flask_principal import Identity, identity_changed
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.extensions import metrics
from lemur.common.utils import get_psuedo_random_string
@ -116,7 +117,6 @@ def retrieve_user(user_api_url, access_token):
profile = r.json()
user = user_service.get_by_email(profile['email'])
metrics.send('successful_login', 'counter', 1)
return user, profile
@ -267,7 +267,7 @@ class Login(Resource):
identity_changed.send(current_app._get_current_object(),
identity=Identity(user.id))
metrics.send('successful_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user))
# try ldap login
@ -279,16 +279,16 @@ class Login(Resource):
# Tell Flask-Principal the identity changed
identity_changed.send(current_app._get_current_object(),
identity=Identity(user.id))
metrics.send('successful_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user))
except Exception as e:
current_app.logger.error("ldap error: {0}".format(e))
ldap_message = 'ldap error: %s' % e
metrics.send('invalid_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message=ldap_message), 403
# if not valid user - no certificates for you
metrics.send('invalid_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message='The supplied credentials are invalid'), 403
@ -338,13 +338,13 @@ class Ping(Resource):
update_user(user, profile, roles)
if not user.active:
metrics.send('invalid_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message='The supplied credentials are invalid'), 403
# Tell Flask-Principal the identity changed
identity_changed.send(current_app._get_current_object(), identity=Identity(user.id))
metrics.send('successful_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user))
@ -387,12 +387,14 @@ class OAuth2(Resource):
update_user(user, profile, roles)
if not user.active:
metrics.send('invalid_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message='The supplied credentials are invalid'), 403
# Tell Flask-Principal the identity changed
identity_changed.send(current_app._get_current_object(), identity=Identity(user.id))
metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user))
@ -432,14 +434,14 @@ class Google(Resource):
user = user_service.get_by_email(profile['email'])
if not user.active:
metrics.send('invalid_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
return dict(message='The supplied credentials are invalid.'), 403
if user:
metrics.send('successful_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
return dict(token=create_token(user))
metrics.send('invalid_login', 'counter', 1)
metrics.send('login', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
class Providers(Resource):

View File

@ -20,6 +20,7 @@ from lemur import database
from lemur.extensions import sentry
from lemur.extensions import metrics
from lemur.plugins.base import plugins
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.deployment import service as deployment_service
from lemur.endpoints import service as endpoint_service
from lemur.notifications.messaging import send_rotation_notification
@ -106,16 +107,17 @@ def request_rotation(endpoint, certificate, message, commit):
:param commit:
:return:
"""
status = FAILURE_METRIC_STATUS
if commit:
try:
deployment_service.rotate_certificate(endpoint, certificate)
metrics.send('endpoint_rotation_success', 'counter', 1)
if message:
send_rotation_notification(certificate)
status = SUCCESS_METRIC_STATUS
except Exception as e:
metrics.send('endpoint_rotation_failure', 'counter', 1)
print(
"[!] Failed to rotate endpoint {0} to certificate {1} reason: {2}".format(
endpoint.name,
@ -124,6 +126,8 @@ def request_rotation(endpoint, certificate, message, commit):
)
)
metrics.send('endpoint_rotation', 'counter', 1, metric_tags={'status': status})
def request_reissue(certificate, commit):
"""
@ -132,16 +136,31 @@ def request_reissue(certificate, commit):
:param commit:
:return:
"""
# set the lemur identity for all cli commands
identity_changed.send(current_app._get_current_object(), identity=Identity(1))
status = FAILURE_METRIC_STATUS
try:
print("[+] {0} is eligible for re-issuance".format(certificate.name))
details = get_certificate_primitives(certificate)
print_certificate_details(details)
# set the lemur identity for all cli commands
identity_changed.send(current_app._get_current_object(), identity=Identity(1))
if commit:
new_cert = reissue_certificate(certificate, replace=True)
metrics.send('certificate_reissue_success', 'counter', 1)
print("[+] New certificate named: {0}".format(new_cert.name))
details = get_certificate_primitives(certificate)
print_certificate_details(details)
if commit:
new_cert = reissue_certificate(certificate, replace=True)
print("[+] New certificate named: {0}".format(new_cert.name))
status = SUCCESS_METRIC_STATUS
except Exception as e:
sentry.captureException()
print(
"[!] Failed to reissue certificates. Reason: {}".format(
e
)
)
metrics.send('certificate_reissue', 'counter', 1, metric_tags={'status': status})
@manager.option('-e', '--endpoint', dest='endpoint_name', help='Name of the endpoint you wish to rotate.')
@ -159,6 +178,8 @@ def rotate(endpoint_name, new_certificate_name, old_certificate_name, message, c
print("[+] Starting endpoint rotation.")
status = FAILURE_METRIC_STATUS
try:
old_cert = validate_certificate(old_certificate_name)
new_cert = validate_certificate(new_certificate_name)
@ -182,14 +203,19 @@ def rotate(endpoint_name, new_certificate_name, old_certificate_name, message, c
print("[+] Rotating {0} to {1}".format(endpoint.name, endpoint.certificate.replaced[0].name))
request_rotation(endpoint, endpoint.certificate.replaced[0], message, commit)
else:
metrics.send('endpoint_rotation_failure', 'counter', 1)
metrics.send('endpoint_rotation', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
print("[!] Failed to rotate endpoint {0} reason: Multiple replacement certificates found.".format(
endpoint.name
))
status = SUCCESS_METRIC_STATUS
print("[+] Done!")
except Exception as e:
sentry.captureException()
metrics.send('endpoint_rotation_job', 'counter', 1, metric_tags={'status': status})
@manager.option('-o', '--old-certificate', dest='old_certificate_name', help='Name of the certificate you wish to reissue.')
@manager.option('-c', '--commit', dest='commit', action='store_true', default=False, help='Persist changes.')
@ -204,26 +230,29 @@ def reissue(old_certificate_name, commit):
print("[+] Starting certificate re-issuance.")
status = FAILURE_METRIC_STATUS
try:
old_cert = validate_certificate(old_certificate_name)
if not old_cert:
for certificate in get_all_pending_reissue():
print("[+] {0} is eligible for re-issuance".format(certificate.name))
request_reissue(certificate, commit)
else:
request_reissue(old_cert, commit)
status = SUCCESS_METRIC_STATUS
print("[+] Done!")
except Exception as e:
sentry.captureException()
metrics.send('certificate_reissue_failure', 'counter', 1)
print(
"[!] Failed to reissue certificates. Reason: {}".format(
e
)
)
metrics.send('certificate_reissue_job', 'counter', 1, metric_tags={'status': status})
@manager.option('-f', '--fqdns', dest='fqdns', help='FQDNs to query. Multiple fqdns specified via comma.')
@manager.option('-i', '--issuer', dest='issuer', help='Issuer to query for.')
@ -275,9 +304,11 @@ def worker(data, commit, reason):
if commit:
plugin.revoke_certificate(cert, reason)
metrics.send('certificate_revoke', 'counter', 1, metric_tags={'status': SUCCESS_METRIC_STATUS})
except Exception as e:
sentry.captureException()
metrics.send('certificate_revoke_failure', 'counter', 1)
metrics.send('certificate_revoke', 'counter', 1, metric_tags={'status': FAILURE_METRIC_STATUS})
print(
"[!] Failed to revoke certificates. Reason: {}".format(
e

View File

@ -33,6 +33,7 @@ from lemur.common import defaults
from lemur.plugins.base import plugins
from lemur.extensions import metrics
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.models import certificate_associations, certificate_source_associations, \
certificate_destination_associations, certificate_notification_associations, \
@ -358,15 +359,16 @@ def update_destinations(target, value, initiator):
:return:
"""
destination_plugin = plugins.get(value.plugin_name)
status = FAILURE_METRIC_STATUS
try:
if target.private_key:
destination_plugin.upload(target.name, target.body, target.private_key, target.chain, value.options)
status = SUCCESS_METRIC_STATUS
except Exception as e:
sentry.captureException()
current_app.logger.exception(e)
metrics.send('destination_upload_failure', 'counter', 1,
metric_tags={'certificate': target.name, 'destination': value.label})
metrics.send('destination_upload', 'counter', 1,
metric_tags={'status': status, 'certificate': target.name, 'destination': value.label})
@event.listens_for(Certificate.replaces, 'append')

View File

@ -6,3 +6,6 @@
SAN_NAMING_TEMPLATE = "SAN-{subject}-{issuer}-{not_before}-{not_after}"
DEFAULT_NAMING_TEMPLATE = "{subject}-{issuer}-{not_before}-{not_after}"
NONSTANDARD_NAMING_TEMPLATE = "{issuer}-{not_before}-{not_after}"
SUCCESS_METRIC_STATUS = 'success'
FAILURE_METRIC_STATUS = 'failure'

View File

@ -10,8 +10,6 @@
"""
import arrow
from flask import current_app
from sqlalchemy import func
from lemur import database
@ -132,19 +130,6 @@ def update(endpoint_id, **kwargs):
return endpoint
def rotate_certificate(endpoint, new_cert):
"""Rotates a certificate on a given endpoint."""
try:
endpoint.source.plugin.update_endpoint(endpoint, new_cert)
endpoint.certificate = new_cert
database.update(endpoint)
metrics.send('certificate_rotate_success', 'counter', 1, metric_tags={'endpoint': endpoint.name, 'source': endpoint.source.label})
except Exception as e:
metrics.send('certificate_rotate_failure', 'counter', 1, metric_tags={'endpoint': endpoint.name})
current_app.logger.exception(e)
raise e
def render(args):
"""
Helper that helps us render the REST Api responses.

View File

@ -7,6 +7,8 @@
"""
from flask_script import Manager
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.extensions import sentry, metrics
from lemur.notifications.messaging import send_expiration_notifications
manager = Manager(usage="Handles notification related tasks.")
@ -25,11 +27,18 @@ def expirations(exclude):
:return:
"""
print("Starting to notify subscribers about expiring certificates!")
success, failed = send_expiration_notifications(exclude)
print(
"Finished notifying subscribers about expiring certificates! Sent: {success} Failed: {failed}".format(
success=success,
failed=failed
status = FAILURE_METRIC_STATUS
try:
print("Starting to notify subscribers about expiring certificates!")
success, failed = send_expiration_notifications(exclude)
print(
"Finished notifying subscribers about expiring certificates! Sent: {success} Failed: {failed}".format(
success=success,
failed=failed
)
)
)
status = SUCCESS_METRIC_STATUS
except Exception as e:
sentry.captureException()
metrics.send('expiration_notification_job', 'counter', 1, metric_tags={'status': status})

View File

@ -18,6 +18,7 @@ from flask import current_app
from sqlalchemy import and_
from lemur import database, metrics
from lemur.constants import FAILURE_METRIC_STATUS, SUCCESS_METRIC_STATUS
from lemur.extensions import sentry
from lemur.common.utils import windowed_query
@ -94,14 +95,17 @@ def send_notification(event_type, data, targets, notification):
:param notification:
:return:
"""
status = FAILURE_METRIC_STATUS
try:
notification.plugin.send(event_type, data, targets, notification.options)
metrics.send('{0}_notification_sent'.format(event_type), 'counter', 1)
return True
status = SUCCESS_METRIC_STATUS
except Exception as e:
sentry.captureException()
metrics.send('{0}_notification_failure'.format(event_type), 'counter', 1)
current_app.logger.exception(e)
metrics.send('notification', 'counter', 1, metric_tags={'status': status, 'event_type': event_type})
if status == SUCCESS_METRIC_STATUS:
return True
def send_expiration_notifications(exclude):
@ -147,8 +151,10 @@ def send_rotation_notification(certificate, notification_plugin=None):
rotated.
:param certificate:
:param notification_plugin:
:return:
"""
status = FAILURE_METRIC_STATUS
if not notification_plugin:
notification_plugin = plugins.get(current_app.config.get('LEMUR_DEFAULT_NOTIFICATION_PLUGIN'))
@ -156,12 +162,14 @@ def send_rotation_notification(certificate, notification_plugin=None):
try:
notification_plugin.send('rotation', data, [data['owner']])
metrics.send('rotation_notification_sent', 'counter', 1)
return True
status = SUCCESS_METRIC_STATUS
except Exception as e:
sentry.captureException()
metrics.send('rotation_notification_failure', 'counter', 1)
current_app.logger.exception(e)
metrics.send('notification', 'counter', 1, metric_tags={'status': status, 'event_type': 'rotation'})
if status == SUCCESS_METRIC_STATUS:
return True
def needs_notification(certificate):

View File

@ -14,6 +14,8 @@ from flask_script import Manager
from flask import current_app
from lemur.constants import SUCCESS_METRIC_STATUS, FAILURE_METRIC_STATUS
from lemur.extensions import metrics, sentry
from lemur.plugins.base import plugins
@ -54,6 +56,8 @@ def validate_sources(source_strings):
def sync(source_strings):
sources = validate_sources(source_strings)
for source in sources:
status = FAILURE_METRIC_STATUS
start_time = time.time()
print("[+] Staring to sync source: {label}!\n".format(label=source.label))
@ -79,6 +83,8 @@ def sync(source_strings):
time=(time.time() - start_time)
)
)
status = SUCCESS_METRIC_STATUS
except Exception as e:
current_app.logger.exception(e)
@ -86,9 +92,10 @@ def sync(source_strings):
"[X] Failed syncing source {label}!\n".format(label=source.label)
)
metrics.send('sync_failed', 'counter', 1, metric_tags={'source': source.label})
sentry.captureException()
metrics.send('source_sync', 'counter', 1, metric_tags={'source': source.label, 'status': status})
@manager.option('-s', '--sources', dest='source_strings', action='append', help='Sources to operate on.')
@manager.option('-c', '--commit', dest='commit', action='store_true', default=False, help='Persist changes.')
@ -109,23 +116,25 @@ def clean(source_strings, commit):
cleaned = 0
for certificate in certificate_service.get_all_pending_cleaning(source):
if commit:
try:
s.clean(certificate, source.options)
certificate.sources.remove(source)
certificate_service.database.update(certificate)
metrics.send('clean_success', 'counter', 1, metric_tags={'source': source.label})
except Exception as e:
current_app.logger.exception(e)
metrics.send('clean_failed', 'counter', 1, metric_tags={'source': source.label})
sentry.captureException()
status = FAILURE_METRIC_STATUS
if commit:
try:
s.clean(certificate, source.options)
certificate.sources.remove(source)
certificate_service.database.update(certificate)
status = SUCCESS_METRIC_STATUS
except Exception as e:
current_app.logger.exception(e)
sentry.captureException()
current_app.logger.warning("Removed {0} from source {1} during cleaning".format(
certificate.name,
source.label
))
metrics.send('clean', 'counter', 1, metric_tags={'source': source.label, 'status': status})
cleaned += 1
current_app.logger.warning("Removed {0} from source {1} during cleaning".format(
certificate.name,
source.label
))
cleaned += 1
print(
"[+] Finished cleaning source: {label}. Removed {cleaned} certificates from source. Run Time: {time}\n".format(