Add number option.

This commit is contained in:
dbeley 2020-02-14 23:32:14 +01:00
parent 885219acd1
commit ed5cbeb988
4 changed files with 43 additions and 29 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
tags

0
LICENSE Normal file → Executable file
View File

18
README.md Normal file → Executable file
View File

@ -1,26 +1,18 @@
Simple Subreddit Image Downloader
==========================
Tired of all of those reddit downloaders which want you to install tons of dependencies and then don't work anyway? Me too.
*Simple Subreddit Image Downloader* is bash script which:
- downloads ALL images from specified subreddit in full size
- Linux/MacOS/Windows
- Parallel download
This script just downloads all directly linked images in subreddit. It can also download with a specific sort. For more complex usage, use other reddit image downloader.
Simple script downloading images from subreddit.
Requirements
============
- bash (cygwin is OK)
- bash
- wget
- jq
Usage
=====
```
./download-subreddit-images.sh <subreddit_name>
./download-subreddit-images.sh <subreddit_name> <hot|new|rising|top>
./download-subreddit-images.sh <subreddit_name> top <all|year|month|week|day>
Usage: ./download-subreddit-images.sh 'subreddit_name' [hot|new|rising|top] [number] [all|year|month|week|day]
Examples: ./download-subreddit-images.sh starterpacks new 10
./download-subreddit-images.sh funny top 50 month
```
Script downloads images to `<subreddit_name>` folder in current directory. If you want to change that, you need to edit destination in rdit.sh for now.

53
download-subreddit-images.sh Normal file → Executable file
View File

@ -1,43 +1,64 @@
#!/bin/bash
#
# download reddit images from subreddit
#cfg
useragent="Love by u/gadelat"
timeout=60
usage() { printf "%s" "\
Usage: ./download-subreddit-images.sh 'subreddit_name' [hot|new|rising|top|controversial] [number] [all|year|month|week|day]
Examples: ./download-subreddit-images.sh starterpacks new 10
./download-subreddit-images.sh funny top 50 month
"; exit 1;
}
subreddit=$1
sort=$2
top_time=$3
number=$3
top_time=$4
if [ -z $subreddit ]; then
usage
fi
if [ -z $sort ]; then
sort="hot"
fi
if [ -z $top_time ];then
if [ -z $top_time ]; then
top_time=""
fi
if [ -z $number ]; then
number=200
fi
url="https://www.reddit.com/r/$subreddit/$sort/.json?raw_json=1&t=$top_time"
content=`wget -T $timeout -U "$useragent" -q -O - $url`
content=$(wget -T $timeout -q -O - $url)
mkdir -p $subreddit
i=1
while : ; do
urls=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.preview.images[0].source.url')
names=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.title')
ids=$(echo -n "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.id')
urls=$(echo -n "$content" | jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.preview.images[0].source.url')
names=$(echo -n "$content" | jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.title')
ids=$(echo -n "$content" | jq -r '.data.children[]|select(.data.post_hint|test("image")?) | .data.id')
a=1
wait # prevent spawning too many processes
for url in $urls; do
name=`echo -n "$names"|sed -n "$a"p`
id=`echo -n "$ids"|sed -n "$a"p`
ext=`echo -n "${url##*.}"|cut -d '?' -f 1`
newname="$name"_"$subreddit"_$id.$ext
echo $name
wget -T $timeout -U "$useragent" --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url &>/dev/null &
a=$(($a+1))
name=$(echo -n "$names" | sed -n "$a"p)
id=$(echo -n "$ids" | sed -n "$a"p)
ext=$(echo -n "${url##*.}" | cut -d '?' -f 1)
newname="$subreddit"_"$sort""$timeframe"_"$(printf "%04d" $i)"_"$name"_$id.$ext
printf "$i/$number : $newname\n"
wget -T $timeout --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url &>/dev/null &
((a=a+1))
((i=i+1))
if [ $i -gt $number ] ; then
exit 0
fi
done
after=$(echo -n "$content"| jq -r '.data.after//empty')
if [ -z $after ]; then
break
fi
url="https://www.reddit.com/r/$subreddit/$sort/.json?count=200&after=$after&raw_json=1&t=$top_time"
content=`wget -T $timeout -U "$useragent" --no-check-certificate -q -O - $url`
content=`wget -T $timeout --no-check-certificate -q -O - $url`
done