feat: prevent reddit rss endpoint blacklisting
All checks were successful
kipp-news/pipeline/head This commit looks good
All checks were successful
kipp-news/pipeline/head This commit looks good
This commit is contained in:
parent
2fe16ffa2f
commit
f30dfd02dd
@ -4,6 +4,25 @@ set -eo pipefail
|
||||
|
||||
NEWSLETTER=""
|
||||
|
||||
function scrape {
|
||||
curl \
|
||||
-H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7' \
|
||||
-H 'accept-language: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7' \
|
||||
-H 'cache-control: no-cache' \
|
||||
-H 'pragma: no-cache' \
|
||||
-H 'priority: u=0, i' \
|
||||
-H 'sec-ch-ua: "Not:A-Brand";v="24", "Chromium";v="134"' \
|
||||
-H 'sec-ch-ua-mobile: ?0' \
|
||||
-H 'sec-ch-ua-platform: "Linux"' \
|
||||
-H 'sec-fetch-dest: document' \
|
||||
-H 'sec-fetch-mode: navigate' \
|
||||
-H 'sec-fetch-site: none' \
|
||||
-H 'sec-fetch-user: ?1' \
|
||||
-H 'upgrade-insecure-requests: 1' \
|
||||
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36' \
|
||||
$@
|
||||
}
|
||||
|
||||
function write {
|
||||
local content=$@
|
||||
NEWSLETTER="${NEWSLETTER}${content}"
|
||||
@ -28,7 +47,7 @@ function include_subreddit_top_of_the_week {
|
||||
local title=$1
|
||||
local subreddit=$2
|
||||
local total=$3
|
||||
local top_of_the_week=$(curl -sk --retry 5 "https://www.reddit.com/r/$subreddit/top/.rss?sort=top&t=week" | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]')
|
||||
local top_of_the_week=$(scrape -sk --retry 5 "https://www.reddit.com/r/$subreddit/top/.rss?sort=top&t=week" | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]')
|
||||
|
||||
if [ -z "$top_of_the_week" ]; then
|
||||
return
|
||||
@ -54,7 +73,7 @@ function include_subreddit_top_of_the_week {
|
||||
|
||||
function include_linuxfr_latest {
|
||||
local total=5
|
||||
local linuxfr_latest=$(curl -sk --retry 5 https://linuxfr.org/news.atom | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]')
|
||||
local linuxfr_latest=$(scrape -sk --retry 5 https://linuxfr.org/news.atom | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]')
|
||||
|
||||
if [ -z "$linuxfr_latest" ]; then
|
||||
return
|
||||
@ -79,7 +98,7 @@ function include_linuxfr_latest {
|
||||
}
|
||||
|
||||
function include_hackernews_top5 {
|
||||
local hackernews_top5=$(curl -sk --retry 5 https://hacker-news.firebaseio.com/v0/topstories.json | jq -r '.[0:5] | .[]')
|
||||
local hackernews_top5=$(scrape -sk --retry 5 https://hacker-news.firebaseio.com/v0/topstories.json | jq -r '.[0:5] | .[]')
|
||||
|
||||
if [ -z "$hackernews_top5" ]; then
|
||||
return
|
||||
@ -89,7 +108,7 @@ function include_hackernews_top5 {
|
||||
writeln "#### Hackernews"
|
||||
|
||||
for story_id in ${hackernews_top5}; do
|
||||
local hackernews_story=$(curl -sk --retry 5 https://hacker-news.firebaseio.com/v0/item/$story_id.json?print=pretty)
|
||||
local hackernews_story=$(scrape -sk --retry 5 https://hacker-news.firebaseio.com/v0/item/$story_id.json?print=pretty)
|
||||
local story_title=$(echo $hackernews_story | jq -r '.title')
|
||||
local story_url=$(echo $hackernews_story | jq -r '.url')
|
||||
local story_author=$(echo $hackernews_story | jq -r '.by')
|
||||
@ -112,7 +131,7 @@ function include_news_api_latest_week {
|
||||
local query=$2
|
||||
local total=$3
|
||||
local since=$(date -d '- 7 days' +%Y-%m-%d)
|
||||
local news_of_the_week=$(curl -sk --retry 5 -H "X-Api-Key: ${NEWS_API_KEY}" -H 'User-Agent: Cazette/1.0' "https://api.newsdatahub.com/v1/news?language=fr&topic=technology&topic=business&topic=politics&topic=education&topic=innovation&topic=internet&q=${query}&start_date=${since}" | jq '.data')
|
||||
local news_of_the_week=$(scrape -sk --retry 5 -H "X-Api-Key: ${NEWS_API_KEY}" -H 'User-Agent: Cazette/1.0' "https://api.newsdatahub.com/v1/news?language=fr&topic=technology&topic=business&topic=politics&topic=education&topic=innovation&topic=internet&q=${query}&start_date=${since}" | jq '.data')
|
||||
|
||||
if [ -z "$news_of_the_week" ]; then
|
||||
return
|
||||
|
Loading…
x
Reference in New Issue
Block a user