feat(hydra-cleaner): add component #61

Merged
wpetit merged 3 commits from f/hydra_cleaner into unstable 2025-03-06 11:55:19 +01:00
6 changed files with 210 additions and 0 deletions

View File

@ -0,0 +1,116 @@
#!/bin/sh
set -e
set -o nounset
# 4 tables to empty, at least
# oidc, code, flow, authentication_session
# \d hydra_oauth2_flow
#Referenced by:
# TABLE "hydra_oauth2_access" CONSTRAINT "hydra_oauth2_access_challenge_id_fk" FOREIGN KEY (challenge_id) REFERENCES hydra_oauth2_flow(consent_challenge_id) ON DELETE CASCADE
# TABLE "hydra_oauth2_code" CONSTRAINT "hydra_oauth2_code_challenge_id_fk" FOREIGN KEY (challenge_id) REFERENCES hydra_oauth2_flow(consent_challenge_id) ON DELETE CASCADE
# TABLE "hydra_oauth2_oidc" CONSTRAINT "hydra_oauth2_oidc_challenge_id_fk" FOREIGN KEY (challenge_id) REFERENCES hydra_oauth2_flow(consent_challenge_id) ON DELETE CASCADE
# TABLE "hydra_oauth2_pkce" CONSTRAINT "hydra_oauth2_pkce_challenge_id_fk" FOREIGN KEY (challenge_id) REFERENCES hydra_oauth2_flow(consent_challenge_id) ON DELETE CASCADE
# TABLE "hydra_oauth2_refresh" CONSTRAINT "hydra_oauth2_refresh_challenge_id_fk" FOREIGN KEY (challenge_id) REFERENCES hydra_oauth2_flow(consent_challenge_id) ON DELETE CASCADE
# -> delete "cascade" on table "flow" cleans access, code, oidc, pkce and refresh tables.
DSN="${DSN:-postgresql://${HYDRA_DATABASE_USER}:${HYDRA_DATABASE_PASSWORD}@${HYDRA_DATABASE_SERVICE_NAME}:${HYDRA_DATABASE_SERVICE_PORT:-5432}/hydra?sslmode=disable}"
wpetit marked this conversation as resolved Outdated

Port configurable ?

Port configurable ?
RETENTION_HOURS="${RETENTION_HOURS:-48}"
BATCH_SIZE="${BATCH_SIZE:-50}"
LIMIT="${LIMIT:-1000}"
BEFORE_DATE="$(date +'%Y-%m-%d %H:%M:%S' --date=@$(($(date +%s) - RETENTION_HOURS * 3600)))"
log() {
echo "$(date +'%d-%m-%y %H:%M:%S%z')| $1"
}
perror() {
log "Something went wrong, exiting."
trap - EXIT
exit 1
}
trap perror EXIT
if ! [[ ${RETENTION_HOURS} =~ '^[0-9]+$' ]]; then
log "Error: variable RETENTION_HOURS is not a positive integer."
perror
fi
if ! [[ ${LIMIT} =~ '^[0-9]+$' ]]; then
log "Error: variable LIMIT is not a positive integer."
perror
fi
if ! [[ ${BATCH_SIZE} =~ '^[0-9]+$' ]]; then
log "Error: variable BATCH_SIZE is not a positive integer."
perror
fi
log "Starting hydra cleaner"
log "Removing up to ${LIMIT} elements before ${BEFORE_DATE} by batch of ${BATCH_SIZE}"
log "Beginning estimated size:"
psql "${DSN}" <<EOF
select
table_name, reltuples as estimate,
pg_size_pretty(pg_total_relation_size(quote_ident(table_name))),
pg_total_relation_size(quote_ident(table_name))
from information_schema.tables left join pg_class on information_schema.tables.table_name=pg_class.relname
where table_schema = 'public'
order by 4 desc;
EOF
REMAINING_ELMTS="${LIMIT}"
while [ "${REMAINING_ELMTS}" -gt 0 ]; do
OUTPUT=$(psql "${DSN}" <<EOF
DELETE
wpetit marked this conversation as resolved Outdated

Ouvrir une transaction et faire des verifs sur le nombre de lignes modifiés par sécurité.

Ouvrir une transaction et faire des verifs sur le nombre de lignes modifiés par sécurité.

Vu qu'on est sur une requête DELETE et que les CASCADE sont atomiques en PostgreSQL, je ne pense pas qu'ouvrir une transaction soit nécessaire (ça pourrait même être contre-productif en verrouillant les tables impactées par la/les requêtes).

On pourrait effectivement ajouter un SELECT avec les mêmes critères de filtrage en amont afin d'identifier le nombre d'item potentiellement à supprimer et afficher cette valeur en amont du traitement.

Vu qu'on est sur une requête `DELETE` et que les `CASCADE` sont atomiques en PostgreSQL, je ne pense pas qu'ouvrir une transaction soit nécessaire (ça pourrait même être contre-productif en verrouillant les tables impactées par la/les requêtes). On pourrait effectivement ajouter un `SELECT` avec les mêmes critères de filtrage en amont afin d'identifier le nombre d'item potentiellement à supprimer et afficher cette valeur en amont du traitement.
FROM hydra_oauth2_flow
WHERE login_challenge = ANY (
array(
SELECT login_challenge
FROM hydra_oauth2_flow
WHERE requested_at < '${BEFORE_DATE}'
LIMIT ${BATCH_SIZE}
)
);
EOF
)
log "${OUTPUT}"
if ! [[ "${OUTPUT}" =~ '^DELETE ' ]] ; then
log "Output doesn't seems OK..."
break
fi
OUTPUT_NB=$(echo "${OUTPUT}" | cut -d' ' -f 2)
if [ "${OUTPUT_NB}" -lt "${BATCH_SIZE}" ]; then
break
fi
REMAINING_ELMTS=$((REMAINING_ELMTS - BATCH_SIZE))
if [ "${REMAINING_ELMTS}" -lt "${BATCH_SIZE}" ]; then
BATCH_SIZE="${REMAINING_ELMTS}"
fi
done
log "Final estimated size:"
psql "${DSN}" <<EOF
select
table_name, reltuples as estimate,
pg_size_pretty(pg_total_relation_size(quote_ident(table_name))),
pg_total_relation_size(quote_ident(table_name))
from information_schema.tables left join pg_class on information_schema.tables.table_name=pg_class.relname
where table_schema = 'public'
order by 4 desc;
EOF
trap - EXIT

View File

@ -0,0 +1,17 @@
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component
resources:
- ./resources/hydra-cleaner-cronjob.yaml
configMapGenerator:
- name: hydra-cleaner-env
behavior: create
literals:
- RETENTION_HOURS="48"
- BATCH_SIZE="100"
- LIMIT="1000"
- name: hydra-cleaner-script
behavior: create
files:
- ./files/hydra-cleaner.sh

View File

@ -0,0 +1,54 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: hydra-cleaner
labels:
app.kubernetes.io/name: hydra-cleaner
spec:
concurrencyPolicy: Forbid
schedule: "30 */1 * * *"
jobTemplate:
spec:
template:
metadata:
labels:
app.kubernetes.io/name: hydra-cleaner
spec:
restartPolicy: OnFailure
serviceAccountName: hydra-sa
containers:
- name: hydra-cleaner
image: reg.cadoles.com/proxy_cache/alpine/psql:17.4
envFrom:
- configMapRef:
name: hydra-env
- configMapRef:
name: hydra-cleaner-env
imagePullPolicy: IfNotPresent
command: ["/hydra-cleaner.sh"]
env:
- name: HYDRA_DATABASE_USER
valueFrom:
secretKeyRef:
name: hydra-postgres-app
key: username
- name: HYDRA_DATABASE_PASSWORD
valueFrom:
secretKeyRef:
name: hydra-postgres-app
key: password
- name: HYDRA_DATABASE_SERVICE_NAME
valueFrom:
secretKeyRef:
name: hydra-postgres-app
key: host
args: []
volumeMounts:
- name: hydra-cleaner-script
mountPath: "/hydra-cleaner.sh"
subPath: "hydra-cleaner.sh"
volumes:
- name: hydra-cleaner-script
configMap:
name: hydra-cleaner-script
defaultMode: 0544

View File

@ -14,6 +14,7 @@ components:
- ../../components/hydra-ldap
- ../../components/oidc-test
- ../../components/redis
- ../../components/hydra-cleaner
patchesJson6902:
- target:
@ -51,6 +52,16 @@ patchesJson6902:
kind: OAuth2Client
name: oidc-test-oauth2-client
path: patches/oidc-test-oauth2-client.yaml
- target:
version: v1
kind: ConfigMap
name: hydra-cleaner-env
path: patches/hydra-cleaner-env.yaml
- target:
version: v1
kind: CronJob
name: hydra-cleaner
path: patches/hydra-cleaner.yaml
configMapGenerator:
- name: hydra-dispatcher-apps

View File

@ -0,0 +1,9 @@
- op: replace
path: "/data/RETENTION_HOURS"
value: "1" # 1 HOUR
- op: replace
path: "/data/BATCH_SIZE"
value: "100"
- op: replace
path: "/data/LIMIT"
value: "1000"

View File

@ -0,0 +1,3 @@
- op: replace
path: "/spec/schedule"
value: "* * * * *"