diff --git a/generate-newsletter.sh b/generate-newsletter.sh index dc3838b..2eb2b27 100755 --- a/generate-newsletter.sh +++ b/generate-newsletter.sh @@ -4,6 +4,25 @@ set -eo pipefail NEWSLETTER="" +function scrape { + curl \ + -H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7' \ + -H 'accept-language: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7' \ + -H 'cache-control: no-cache' \ + -H 'pragma: no-cache' \ + -H 'priority: u=0, i' \ + -H 'sec-ch-ua: "Not:A-Brand";v="24", "Chromium";v="134"' \ + -H 'sec-ch-ua-mobile: ?0' \ + -H 'sec-ch-ua-platform: "Linux"' \ + -H 'sec-fetch-dest: document' \ + -H 'sec-fetch-mode: navigate' \ + -H 'sec-fetch-site: none' \ + -H 'sec-fetch-user: ?1' \ + -H 'upgrade-insecure-requests: 1' \ + -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36' \ + $@ +} + function write { local content=$@ NEWSLETTER="${NEWSLETTER}${content}" @@ -28,7 +47,7 @@ function include_subreddit_top_of_the_week { local title=$1 local subreddit=$2 local total=$3 - local top_of_the_week=$(curl -sk --retry 5 "https://www.reddit.com/r/$subreddit/top/.rss?sort=top&t=week" | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]') + local top_of_the_week=$(scrape -sk --retry 5 "https://www.reddit.com/r/$subreddit/top/.rss?sort=top&t=week" | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]') if [ -z "$top_of_the_week" ]; then return @@ -54,7 +73,7 @@ function include_subreddit_top_of_the_week { function include_linuxfr_latest { local total=5 - local linuxfr_latest=$(curl -sk --retry 5 https://linuxfr.org/news.atom | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]') + local linuxfr_latest=$(scrape -sk --retry 5 https://linuxfr.org/news.atom | npx xml2json | jq --arg TOTAL "$total" '.feed.entry[0:($TOTAL|tonumber)]') if [ -z "$linuxfr_latest" ]; then return @@ -79,7 +98,7 @@ function include_linuxfr_latest { } function include_hackernews_top5 { - local hackernews_top5=$(curl -sk --retry 5 https://hacker-news.firebaseio.com/v0/topstories.json | jq -r '.[0:5] | .[]') + local hackernews_top5=$(scrape -sk --retry 5 https://hacker-news.firebaseio.com/v0/topstories.json | jq -r '.[0:5] | .[]') if [ -z "$hackernews_top5" ]; then return @@ -89,7 +108,7 @@ function include_hackernews_top5 { writeln "#### Hackernews" for story_id in ${hackernews_top5}; do - local hackernews_story=$(curl -sk --retry 5 https://hacker-news.firebaseio.com/v0/item/$story_id.json?print=pretty) + local hackernews_story=$(scrape -sk --retry 5 https://hacker-news.firebaseio.com/v0/item/$story_id.json?print=pretty) local story_title=$(echo $hackernews_story | jq -r '.title') local story_url=$(echo $hackernews_story | jq -r '.url') local story_author=$(echo $hackernews_story | jq -r '.by') @@ -112,7 +131,7 @@ function include_news_api_latest_week { local query=$2 local total=$3 local since=$(date -d '- 7 days' +%Y-%m-%d) - local news_of_the_week=$(curl -sk --retry 5 -H "X-Api-Key: ${NEWS_API_KEY}" -H 'User-Agent: Cazette/1.0' "https://api.newsdatahub.com/v1/news?language=fr&topic=technology&topic=business&topic=politics&topic=education&topic=innovation&topic=internet&q=${query}&start_date=${since}" | jq '.data') + local news_of_the_week=$(scrape -sk --retry 5 -H "X-Api-Key: ${NEWS_API_KEY}" -H 'User-Agent: Cazette/1.0' "https://api.newsdatahub.com/v1/news?language=fr&topic=technology&topic=business&topic=politics&topic=education&topic=innovation&topic=internet&q=${query}&start_date=${since}" | jq '.data') if [ -z "$news_of_the_week" ]; then return