feat: prevent reddit rss endpoint blacklisting
All checks were successful
kipp-news/pipeline/head This commit looks good

This commit is contained in:
wpetit 2025-03-24 09:13:21 +01:00
parent 2fe16ffa2f
commit f30dfd02dd

View File

@ -4,6 +4,25 @@ set -eo pipefail
NEWSLETTER=""
function scrape {
curl \
-H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7' \
-H 'accept-language: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7' \
-H 'cache-control: no-cache' \
-H 'pragma: no-cache' \
-H 'priority: u=0, i' \
-H 'sec-ch-ua: "Not:A-Brand";v="24", "Chromium";v="134"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "Linux"' \
-H 'sec-fetch-dest: document' \
-H 'sec-fetch-mode: navigate' \
-H 'sec-fetch-site: none' \
-H 'sec-fetch-user: ?1' \
-H 'upgrade-insecure-requests: 1' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36' \
$@
}
function write {
local content=$@
NEWSLETTER="${NEWSLETTER}${content}"
@ -28,7 +47,7 @@ function include_subreddit_top_of_the_week {
local title=$1
local subreddit=$2
local total=$3
local top_of_the_week=$(curl -sk --retry 5 "https://www.reddit.com/r/$subreddit/top/.rss?sort=top&t=week" | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]')
local top_of_the_week=$(scrape -sk --retry 5 "https://www.reddit.com/r/$subreddit/top/.rss?sort=top&t=week" | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]')
if [ -z "$top_of_the_week" ]; then
return
@ -54,7 +73,7 @@ function include_subreddit_top_of_the_week {
function include_linuxfr_latest {
local total=5
local linuxfr_latest=$(curl -sk --retry 5 https://linuxfr.org/news.atom | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]')
local linuxfr_latest=$(scrape -sk --retry 5 https://linuxfr.org/news.atom | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]')
if [ -z "$linuxfr_latest" ]; then
return
@ -79,7 +98,7 @@ function include_linuxfr_latest {
}
function include_hackernews_top5 {
local hackernews_top5=$(curl -sk --retry 5 https://hacker-news.firebaseio.com/v0/topstories.json | jq -r '.[0:5] | .[]')
local hackernews_top5=$(scrape -sk --retry 5 https://hacker-news.firebaseio.com/v0/topstories.json | jq -r '.[0:5] | .[]')
if [ -z "$hackernews_top5" ]; then
return
@ -89,7 +108,7 @@ function include_hackernews_top5 {
writeln "#### Hackernews"
for story_id in ${hackernews_top5}; do
local hackernews_story=$(curl -sk --retry 5 https://hacker-news.firebaseio.com/v0/item/$story_id.json?print=pretty)
local hackernews_story=$(scrape -sk --retry 5 https://hacker-news.firebaseio.com/v0/item/$story_id.json?print=pretty)
local story_title=$(echo $hackernews_story | jq -r '.title')
local story_url=$(echo $hackernews_story | jq -r '.url')
local story_author=$(echo $hackernews_story | jq -r '.by')
@ -112,7 +131,7 @@ function include_news_api_latest_week {
local query=$2
local total=$3
local since=$(date -d '- 7 days' +%Y-%m-%d)
local news_of_the_week=$(curl -sk --retry 5 -H "X-Api-Key: ${NEWS_API_KEY}" -H 'User-Agent: Cazette/1.0' "https://api.newsdatahub.com/v1/news?language=fr&topic=technology&topic=business&topic=politics&topic=education&topic=innovation&topic=internet&q=${query}&start_date=${since}" | jq '.data')
local news_of_the_week=$(scrape -sk --retry 5 -H "X-Api-Key: ${NEWS_API_KEY}" -H 'User-Agent: Cazette/1.0' "https://api.newsdatahub.com/v1/news?language=fr&topic=technology&topic=business&topic=politics&topic=education&topic=innovation&topic=internet&q=${query}&start_date=${since}" | jq '.data')
if [ -z "$news_of_the_week" ]; then
return