From 56120bbe7806a6cc4f6656639730b4c61bc7d5cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Ostroluck=C3=BD?= Date: Sun, 17 Feb 2019 17:55:07 +0100 Subject: [PATCH] Adapt to reddit changes, trade grep for jq, parallelism --- README.md | 11 +++++------ download-subreddit-images.sh | 24 ++++++++++++------------ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 9c6cfc7..3c17dd5 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,9 @@ Simple Subreddit Image Downloader Tired of all of those reddit downloaders which want you to install tons of dependencies and then don't work anyway? Me too. *Simple Subreddit Image Downloader* is bash script which: -- has minimal external dependencies -- downloads full-size images from subreddits -- is crossplatform (tested on windows with cygwin) -- uses SSL connection +- downloads ALL images from specified subreddit in full size +- Linux/MacOS/Windows +- Parallel download This script just downloads all directly linked images in subreddit. For more complex usage, use other reddit image downloader. @@ -14,10 +13,10 @@ Requirements ============ - bash (cygwin is OK) - wget -- GNU grep (on MacOS install with `brew install grep --with-default-names`) +- jq Usage ===== -`./rdit.sh ` +`./download-subreddit-images.sh ` Script downloads images to folder named "down" in current directory. If you want to change that, you need to edit destination in rdit.sh for now. \ No newline at end of file diff --git a/download-subreddit-images.sh b/download-subreddit-images.sh index bc70a04..d5b5fae 100644 --- a/download-subreddit-images.sh +++ b/download-subreddit-images.sh @@ -8,21 +8,21 @@ url="https://www.reddit.com/r/$subreddit/.json?raw_json=1" content=`wget -U "$useragent" -q -O - $url` mkdir -p $subreddit while : ; do - urls=$(echo -e "$content"|grep -Po '"source": {"url":.*?[^\\]",'|cut -f 6 -d '"') - names=$(echo -e "$content"|grep -Po '"title":.*?[^\\]",'|cut -f 4 -d '"') - ids=$(echo -e "$content"|grep -Po '"id":.*?[^\\]",'|cut -f 4 -d '"') + urls=$(echo -e "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")) | .data.preview.images[0].source.url') + names=$(echo -e "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")) | .data.title') + ids=$(echo -e "$content"| jq -r '.data.children[]|select(.data.post_hint|test("image")) | .data.id') a=1 - for url in $(echo -e "$urls"); do - if [ -n "`echo "$url"|egrep \".gif|.jpg\"`" ]; then - name=`echo -e "$names"|sed -n "$a"p` - id=`echo -e "$ids"|sed -n "$a"p` - echo $name - newname="$name"_"$subreddit"_$id.${url##*.} - wget -U "$useragent" --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url - fi + wait # prevent spawning too many processes + for url in $urls; do + name=`echo -e "$names"|sed -n "$a"p` + id=`echo -e "$ids"|sed -n "$a"p` + ext=`echo -e "${url##*.}"|cut -d '?' -f 1` + newname="$name"_"$subreddit"_$id.$ext + echo $name + wget -U "$useragent" --no-check-certificate -nv -nc -P down -O "$subreddit/$newname" $url &>/dev/null & a=$(($a+1)) done - after=$(echo -e "$content"|grep -Po '"after":.*?[^\\]",'|cut -f 4 -d '"'|tail -n 1) + after=$(echo -e "$content"| jq -r '.data.after') if [ -z $after ]; then break fi