feat: add basic prometheus metrics integration
Some checks reported errors
Cadoles/bouncer/pipeline/head Something is wrong with the build of this commit

This commit is contained in:
2023-06-30 10:26:27 -06:00
parent 5bf391b6bf
commit 56609ec316
16 changed files with 344 additions and 47 deletions

View File

@ -10,6 +10,7 @@ import (
"forge.cadoles.com/Cadoles/go-proxy/wildcard"
"forge.cadoles.com/cadoles/bouncer/internal/store"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"gitlab.com/wpetit/goweb/logger"
)
@ -59,6 +60,8 @@ MAIN:
logger.F("remoteAddr", r.RemoteAddr),
)
metricProxyRequestsTotal.With(prometheus.Labels{metricLabelProxy: string(match.Name)}).Add(1)
ctx = withProxy(ctx, match)
layers, err := d.getLayers(ctx, match.Name)

View File

@ -0,0 +1,73 @@
package queue
import (
"sync"
"time"
"github.com/pkg/errors"
)
type DebouncerMap struct {
debouncers sync.Map
}
func NewDebouncerMap() *DebouncerMap {
return &DebouncerMap{
debouncers: sync.Map{},
}
}
func (m *DebouncerMap) Do(key string, after time.Duration, fn func()) {
newDebouncer := NewDebouncer(after)
rawDebouncer, loaded := m.debouncers.LoadOrStore(key, newDebouncer)
debouncer, ok := rawDebouncer.(*Debouncer)
if !ok {
panic(errors.Errorf("unexpected debouncer value, expected '%T', got '%T'", newDebouncer, rawDebouncer))
}
if loaded {
debouncer.Update(after)
}
debouncer.Do(fn)
}
func NewDebouncer(after time.Duration) *Debouncer {
return &Debouncer{after: after}
}
type Debouncer struct {
mu sync.Mutex
after time.Duration
timer *time.Timer
fn func()
}
func (d *Debouncer) Do(fn func()) {
d.mu.Lock()
defer d.mu.Unlock()
if d.timer != nil {
d.timer.Stop()
}
d.fn = fn
d.timer = time.AfterFunc(d.after, d.fn)
}
func (d *Debouncer) Update(after time.Duration) {
d.mu.Lock()
defer d.mu.Unlock()
if after == d.after {
return
}
if d.timer != nil {
d.timer.Stop()
}
d.after = after
d.timer = time.AfterFunc(d.after, d.fn)
}

View File

@ -0,0 +1,31 @@
package queue
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
const (
metricNamespace = "bouncer_layer_queue"
metricLabelProxy = "proxy"
metricLabelLayer = "layer"
)
var (
metricQueueSessions = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "sessions",
Help: "Bouncer's queue layer current sessions",
Namespace: metricNamespace,
},
[]string{metricLabelProxy, metricLabelLayer},
)
metricQueueCapacity = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "capacity",
Help: "Bouncer's queue layer capacity",
Namespace: metricNamespace,
},
[]string{metricLabelProxy, metricLabelLayer},
)
)

View File

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"html/template"
"math/rand"
"net/http"
"path/filepath"
"sync"
@ -16,6 +17,7 @@ import (
"github.com/Masterminds/sprig/v3"
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"gitlab.com/wpetit/goweb/logger"
)
@ -30,7 +32,9 @@ type Queue struct {
loadOnce sync.Once
tmpl *template.Template
refreshJobRunning uint32
refreshJobRunning uint32
updateMetricsJobRunning uint32
postKeepAliveDebouncer *DebouncerMap
}
// LayerType implements director.MiddlewareLayer
@ -52,6 +56,8 @@ func (q *Queue) Middleware(layer *store.Layer) proxy.Middleware {
return
}
defer q.updateMetrics(ctx, layer.Proxy, layer.Name, options)
cookieName := q.getCookieName(layer.Name)
cookie, err := r.Cookie(cookieName)
@ -72,8 +78,6 @@ func (q *Queue) Middleware(layer *store.Layer) proxy.Middleware {
sessionID := cookie.Value
queueName := string(layer.Name)
q.refreshQueue(queueName, options.KeepAlive)
rank, err := q.adapter.Touch(ctx, queueName, sessionID)
if err != nil {
logger.Error(ctx, "could not retrieve session rank", logger.E(errors.WithStack(err)))
@ -102,6 +106,30 @@ func (q *Queue) Middleware(layer *store.Layer) proxy.Middleware {
}
}
func (q *Queue) updateSessionsMetric(ctx context.Context, proxyName store.ProxyName, layerName store.LayerName) {
if !atomic.CompareAndSwapUint32(&q.updateMetricsJobRunning, 0, 1) {
return
}
defer atomic.StoreUint32(&q.updateMetricsJobRunning, 0)
queueName := string(layerName)
status, err := q.adapter.Status(ctx, queueName)
if err != nil {
logger.Error(ctx, "could not retrieve queue status", logger.E(errors.WithStack(err)))
return
}
metricQueueSessions.With(
prometheus.Labels{
metricLabelLayer: string(layerName),
metricLabelProxy: string(proxyName),
},
).Set(float64(status.Sessions))
}
func (q *Queue) renderQueuePage(w http.ResponseWriter, r *http.Request, queueName string, options *LayerOptions, rank int64) {
ctx := r.Context()
@ -135,20 +163,22 @@ func (q *Queue) renderQueuePage(w http.ResponseWriter, r *http.Request, queueNam
return
}
refreshRate := time.Duration(int64(options.KeepAlive.Seconds()/2)) * time.Second
templateData := struct {
QueueName string
LayerOptions *LayerOptions
Rank int64
CurrentSessions int64
MaxSessions int64
RefreshRate int64
RefreshRate time.Duration
}{
QueueName: queueName,
LayerOptions: options,
Rank: rank + 1,
CurrentSessions: status.Sessions,
MaxSessions: options.Capacity,
RefreshRate: 5,
RefreshRate: refreshRate,
}
w.WriteHeader(http.StatusServiceUnavailable)
@ -161,24 +191,55 @@ func (q *Queue) renderQueuePage(w http.ResponseWriter, r *http.Request, queueNam
}
}
func (q *Queue) refreshQueue(queueName string, keepAlive time.Duration) {
func (q *Queue) refreshQueue(ctx context.Context, layerName store.LayerName, keepAlive time.Duration) {
if !atomic.CompareAndSwapUint32(&q.refreshJobRunning, 0, 1) {
return
}
go func() {
defer atomic.StoreUint32(&q.refreshJobRunning, 0)
defer atomic.StoreUint32(&q.refreshJobRunning, 0)
ctx, cancel := context.WithTimeout(context.Background(), keepAlive*2)
defer cancel()
if err := q.adapter.Refresh(ctx, string(layerName), keepAlive); err != nil {
logger.Error(ctx, "could not refresh queue",
logger.E(errors.WithStack(err)),
logger.F("queue", layerName),
)
}
}
if err := q.adapter.Refresh(ctx, queueName, keepAlive); err != nil {
logger.Error(ctx, "could not refresh queue",
logger.E(errors.WithStack(err)),
logger.F("queue", queueName),
)
}
}()
func (q *Queue) updateMetrics(ctx context.Context, proxyName store.ProxyName, layerName store.LayerName, options *LayerOptions) {
// Update queue capacity metric
metricQueueCapacity.With(
prometheus.Labels{
metricLabelLayer: string(layerName),
metricLabelProxy: string(proxyName),
},
).Set(float64(options.Capacity))
// Refresh queue data and metrics
q.refreshQueue(ctx, layerName, options.KeepAlive)
q.updateSessionsMetric(ctx, proxyName, layerName)
// (Re)schedule an update job after session ttl + semi-random time padding
// to update metrics after last session expiration
randDuration := rand.Int63n(int64(options.KeepAlive))
timePadding := options.KeepAlive/2 + time.Duration(randDuration)
after := options.KeepAlive + timePadding
debouncingKey := fmt.Sprintf("%s/%s", proxyName, layerName)
q.postKeepAliveDebouncer.Do(debouncingKey, after, func() {
ctx := logger.With(
context.Background(),
logger.F("proxy", proxyName),
logger.F("layer", layerName),
logger.F("after", after),
)
logger.Info(ctx, "running post keep alive refresh job")
q.refreshQueue(ctx, layerName, options.KeepAlive)
q.updateSessionsMetric(ctx, proxyName, layerName)
})
}
func (q *Queue) getCookieName(layerName store.LayerName) string {
@ -192,9 +253,10 @@ func New(adapter Adapter, funcs ...OptionFunc) *Queue {
}
return &Queue{
adapter: adapter,
templateDir: opts.TemplateDir,
defaultKeepAlive: opts.DefaultKeepAlive,
adapter: adapter,
templateDir: opts.TemplateDir,
defaultKeepAlive: opts.DefaultKeepAlive,
postKeepAliveDebouncer: NewDebouncerMap(),
}
}

View File

@ -0,0 +1,20 @@
package director
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
const (
metricNamespace = "bouncer_proxy_director"
metricLabelProxy = "proxy"
)
var metricProxyRequestsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "proxy_requests_total",
Help: "Bouncer proxy total requests",
Namespace: metricNamespace,
},
[]string{metricLabelProxy},
)

View File

@ -15,6 +15,7 @@ import (
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"gitlab.com/wpetit/goweb/logger"
)
@ -84,18 +85,40 @@ func (s *Server) run(parentCtx context.Context, addrs chan net.Addr, errs chan e
)
router.Use(middleware.RequestLogger(bouncerChi.NewLogFormatter()))
router.Use(director.Middleware())
handler := proxy.New(
proxy.WithRequestTransformers(
director.RequestTransformer(),
),
proxy.WithResponseTransformers(
director.ResponseTransformer(),
),
)
if s.serverConfig.Metrics.Enabled {
metrics := s.serverConfig.Metrics
router.Handle("/*", handler)
logger.Info(ctx, "enabling metrics", logger.F("endpoint", metrics.Endpoint))
router.Group(func(r chi.Router) {
if metrics.BasicAuth != nil {
logger.Info(ctx, "enabling authentication on metrics endpoint")
r.Use(middleware.BasicAuth(
"metrics",
metrics.BasicAuth.CredentialsMap(),
))
}
r.Handle(string(metrics.Endpoint), promhttp.Handler())
})
}
router.Group(func(r chi.Router) {
r.Use(director.Middleware())
handler := proxy.New(
proxy.WithRequestTransformers(
director.RequestTransformer(),
),
proxy.WithResponseTransformers(
director.ResponseTransformer(),
),
)
r.Handle("/*", handler)
})
if err := http.Serve(listener, router); err != nil && !errors.Is(err, net.ErrClosed) {
errs <- errors.WithStack(err)