Reddit-Image-Downloader-RID/reddit_image_downloader

65 lines
1.9 KiB
Plaintext
Raw Normal View History

2014-06-07 06:06:23 -04:00
#!/bin/bash
2020-02-14 17:32:14 -05:00
#
# download reddit images from subreddit
2014-06-07 06:06:23 -04:00
2019-06-15 13:18:11 -04:00
timeout=60
2014-06-07 06:06:23 -04:00
2020-02-14 17:32:14 -05:00
usage() { printf "%s" "\
Usage: ./download-subreddit-images.sh 'subreddit_name' [hot|new|rising|top|controversial] [number] [all|year|month|week|day]
Examples: ./download-subreddit-images.sh starterpacks new 10
./download-subreddit-images.sh funny top 50 month
"; exit 1;
}
2014-06-07 06:06:23 -04:00
subreddit=$1
2020-01-19 21:29:19 -05:00
sort=$2
2020-02-14 17:32:14 -05:00
number=$3
top_time=$4
if [ -z $subreddit ]; then
usage
fi
2020-01-19 21:29:19 -05:00
if [ -z $sort ]; then
sort="hot"
fi
2020-02-14 17:32:14 -05:00
if [ -z $top_time ]; then
2020-01-19 21:29:19 -05:00
top_time=""
fi
2020-02-14 17:32:14 -05:00
if [ -z $number ]; then
number=200
fi
2020-01-19 21:29:19 -05:00
url="https://www.reddit.com/r/$subreddit/$sort/.json?raw_json=1&t=$top_time"
2020-02-14 17:32:14 -05:00
content=$(wget -T $timeout -q -O - $url)
2019-01-24 09:35:32 -05:00
mkdir -p $subreddit
2020-02-14 17:32:14 -05:00
i=1
2014-06-07 06:06:23 -04:00
while : ; do
2020-02-14 17:32:14 -05:00
urls=$(echo -n "$content" | jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.preview.images[0].source.url')
names=$(echo -n "$content" | jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.title')
ids=$(echo -n "$content" | jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.id')
2014-06-07 06:06:23 -04:00
a=1
wait # prevent spawning too many processes
for url in $urls; do
2020-02-14 17:32:14 -05:00
name=$(echo -n "$names" | sed -n "$a"p)
id=$(echo -n "$ids" | sed -n "$a"p)
ext=$(echo -n "${url##*.}" | cut -d '?' -f 1)
newname="$subreddit"_"$sort""$timeframe"_"$(printf "%04d" $i)"_"$name"_$id.$ext
printf "$i/$number : $newname\n"
wget -T $timeout --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url &>/dev/null &
((a=a+1))
((i=i+1))
if [ $i -gt $number ] ; then
exit 0
fi
2014-06-07 06:06:23 -04:00
done
2020-01-19 21:29:19 -05:00
after=$(echo -n "$content"| jq -r '.data.after//empty')
2014-06-07 06:06:23 -04:00
if [ -z $after ]; then
break
fi
2020-01-19 21:29:19 -05:00
url="https://www.reddit.com/r/$subreddit/$sort/.json?count=200&after=$after&raw_json=1&t=$top_time"
2020-02-14 17:32:14 -05:00
content=`wget -T $timeout --no-check-certificate -q -O - $url`
2018-04-07 11:08:31 -04:00
done