From b23b438e68131060fd0045a8f9a27dd990e5d30d Mon Sep 17 00:00:00 2001 From: bronze Date: Tue, 26 Dec 2023 19:09:22 -0500 Subject: [PATCH] new scripts + resume functionality --- CHANGELOG.md | 6 +++ dandl-tag.sh | 116 ++++++++++++++++++++++++++++++++++++++++++++++++ gelbdl-tag.sh | 23 +++++++--- moedl-tag.sh | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 258 insertions(+), 6 deletions(-) create mode 100755 dandl-tag.sh create mode 100755 moedl-tag.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index f2ceb7a..1b20781 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # CHANGELOG +## 12/26/2023 + +### ADDED +- added `-r` resume option under `*-tag.sh` scripts +- added `dandl-tag.sh` and `moedl-tag.sh` + ## 12/25/2023 (Merry Christmas!) ### FIXED diff --git a/dandl-tag.sh b/dandl-tag.sh new file mode 100755 index 0000000..0dc5444 --- /dev/null +++ b/dandl-tag.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +USE_TOR=false +DELAY=1 +RESUME=false + +function usage { + echo "./$(basename "$0") [-t] [-s] [-r]" + echo "Mass downloader for Danbooru" + echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!" + echo " -h shows this help message" + echo " -t downloads using tor (requires torsocks)" + echo " -s sets the delay after each request, defaults to 1" + echo " -a tag or artist name" + echo " -r will download until it hits a file that already exists" +} + +# list of arguments expected in the input +optstring=":hts:a:r" + +while getopts ${optstring} arg; do + case ${arg} in + h) + usage + exit + ;; + t) + USE_TOR=true + echo -n "Using Tor with IP: " + torsocks curl ip.me + ;; + s) + DELAY="${OPTARG}" + ;; + a) + TAGS+=("$OPTARG") + ;; + r) + RESUME=true + ;; + :) + echo "$0: Must supply an argument to -$OPTARG." >&2 + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 2 + ;; + esac +done + +for TAG in "${TAGS[@]}"; do + echo "$TAG" + # CREATE FOLDER AND CD INTO IT + mkdir -v "$TAG" + cd "$TAG" || exit + # GET TAG TOTAL COUNT + if $USE_TOR; then + TAG_COUNT=$(torsocks curl -s "https://danbooru.donmai.us/tags.json?only=id,name,post_count&search\[name_matches\]=$TAG" | jq -r '.[].post_count') + else + TAG_COUNT=$(curl -s "https://danbooru.donmai.us/tags.json?only=id,name,post_count&search\[name_matches\]=$TAG" | jq -r '.[].post_count') + fi + # NESTED LOOP TO GET ALL POSTS UNDER TAG + TAG_PAGES=$((TAG_COUNT / 200)) + for (( PAGE = 0; PAGE <= TAG_PAGES; PAGE++ )) + do + if $USE_TOR; then + JSON_URL+=$(torsocks curl -s "https://danbooru.donmai.us/posts.json?page=$PAGE&limit=200&tags=$TAG" | jq -r '.[].id') + if ((PAGE < TAG_PAGES)); then + JSON_URL+=$'\n' + fi + else + JSON_URL+=$(curl -s "https://danbooru.donmai.us/posts.json?page=$PAGE&limit=200&tags=$TAG" | jq -r '.[].id') + if ((PAGE < TAG_PAGES)); then + JSON_URL+=$'\n' + fi + fi + sleep "$DELAY" + done + # NESTED LOOP FOR IMAGES IN THIS TAG + echo "$JSON_URL" | while read -r i; do + if $USE_TOR; then + JSON=$(torsocks curl -s "https://danbooru.donmai.us/posts/$i.json") + else + JSON=$(curl -s "https://danbooru.donmai.us/posts/$i.json") + fi + # STORE FILE URL AND TAGS INTO VARIABLES + FILE_DATE=$(echo "$JSON" | jq -r '."created_at"') + FILE_URL=$(echo "$JSON" | jq -r '."file_url"') + FILE_TAGS=$(echo "$JSON" | jq -r '."tag_string"' | sed 's/\ /,/g') + FILE_MD5=$(echo "$JSON" | jq -r '.md5') + FILE_EXT=$(echo "$JSON" | jq -r '.file_ext') + FILE="$FILE_MD5.$FILE_EXT" + if $RESUME; then + if [[ -f "$FILE" ]]; then + echo "$FILE exists." + exit + fi + fi + # DOWNLOAD FILE + if $USE_TOR; then + torsocks curl -O -J "$FILE_URL" + else + curl -O -J "$FILE_URL" + fi + # ADD TAGS TO NEW IMAGE + setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" + setfattr --name=user.checksum --value="$FILE_MD5" "$FILE" + # SET TIME TO TIME UPLOADED + touch -d "$FILE_DATE" "$FILE" + # DELAY BEFORE NEXT FETCH + sleep "$DELAY" + done + # BACK OUT OF FOLDER + cd .. +done \ No newline at end of file diff --git a/gelbdl-tag.sh b/gelbdl-tag.sh index 8d45574..db2968e 100755 --- a/gelbdl-tag.sh +++ b/gelbdl-tag.sh @@ -2,9 +2,10 @@ USE_TOR=false DELAY=1 +RESUME=false function usage { - echo "./$(basename "$0") [-t] [-s] [-l] -a tag -a tag2" + echo "./$(basename "$0") [-t] [-s] [-r] -a tag -a tag2" echo "Mass downloader for Gelbooru" echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!" echo " -h shows this help message" @@ -14,7 +15,7 @@ function usage { } # list of arguments expected in the input -optstring=":hts:a:l:" +optstring=":hts:a:r" while getopts ${optstring} arg; do case ${arg} in @@ -33,6 +34,9 @@ while getopts ${optstring} arg; do a) TAGS+=("$OPTARG") ;; + r) + RESUME=true + ;; :) echo "$0: Must supply an argument to -$OPTARG." >&2 exit 1 @@ -47,8 +51,8 @@ done for TAG in "${TAGS[@]}"; do echo "$TAG" # CREATE FOLDER AND CD INTO IT - mkdir -v $TAG - cd $TAG + mkdir -v "$TAG" + cd "$TAG" || exit # GET TAG TOTAL COUNT if $USE_TOR; then TAG_COUNT=$(torsocks curl -s "https://gelbooru.com/index.php?page=dapi&s=tag&q=index&json=1&name=$TAG" | jq -r '."tag" | .[] | ."count"') @@ -58,7 +62,7 @@ for TAG in "${TAGS[@]}"; do # NESTED LOOP TO GET ALL POSTS UNDER TAG TAG_PAGES=$((TAG_COUNT / 100)) - for (( PAGE = 0; PAGE <= $TAG_PAGES; PAGE++ )) + for (( PAGE = 0; PAGE <= TAG_PAGES; PAGE++ )) do if $USE_TOR; then JSON+=$(torsocks curl -s "https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&pid=$PAGE&tags=$TAG" | jq -r '.post') @@ -69,7 +73,7 @@ for TAG in "${TAGS[@]}"; do done # NESTED LOOP FOR IMAGES IN THIS TAG - echo "$JSON" | jq -c '.[]' | while read i; do + echo "$JSON" | jq -c '.[]' | while read -r i; do #echo $i | jq -r '."id"' FILE_DATE=$(echo "$i" | jq -r '."created_at"') FILE_URL=$(echo "$i" | jq -r '."file_url"') @@ -77,6 +81,13 @@ for TAG in "${TAGS[@]}"; do FILE_TAGS=$(echo "$i" | jq -r '."tags"' | sed 's/\ /,/g') FILE=$(echo "$FILE_URL" | sed 's/\// /g' | awk '{print $NF}') + if $RESUME; then + if [[ -f "$FILE" ]]; then + echo "$FILE exists." + exit + fi + fi + # DOWNLOAD FILE if $USE_TOR; then torsocks curl -O -J "$FILE_URL" diff --git a/moedl-tag.sh b/moedl-tag.sh new file mode 100755 index 0000000..f1ad4d6 --- /dev/null +++ b/moedl-tag.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +USE_TOR=false +DELAY=1 +RESUME=false +URL="konachan.com" + +function usage { + echo "./$(basename "$0") [-t] [-s] [-r] [-c site.com]" + echo "Mass downloader for moebooru imageboards (think konachan and yande.re)" + echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!" + echo " -h shows this help message" + echo " -t downloads using tor (requires torsocks)" + echo " -s sets the delay after each request, defaults to 1" + echo " -a tag or artist name" + echo " -r will download until it hits a file that already exists" + echo " -c custom url (defaults to konachan if unset)" +} + +# list of arguments expected in the input +optstring=":hts:a:rc:" + +while getopts ${optstring} arg; do + case ${arg} in + h) + usage + exit + ;; + t) + USE_TOR=true + echo -n "Using Tor with IP: " + torsocks curl ip.me + ;; + s) + DELAY="${OPTARG}" + ;; + a) + TAGS+=("$OPTARG") + ;; + r) + RESUME=true + ;; + c) + URL="${OPTARG}" + ;; + :) + echo "$0: Must supply an argument to -$OPTARG." >&2 + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 2 + ;; + esac +done + +for TAG in "${TAGS[@]}"; do + echo "$TAG" + # CREATE FOLDER AND CD INTO IT + mkdir -v "$TAG" + cd "$TAG" || exit + if $USE_TOR; then + TAG_COUNT=$(torsocks curl -s "https://$URL/tag.json?name=$TAG" | jq -r '.[]."count"') + else + TAG_COUNT=$(curl -s "https://$URL/tag.json?name=$TAG" | jq -r '.[]."count"') + fi + # NESTED LOOP TO GET ALL POSTS UNDER TAG + TAG_PAGES=$((TAG_COUNT / 100)) + ID_LIST="" + for (( PAGE = 0; PAGE <= TAG_PAGES; PAGE++ )) + do + if $USE_TOR; then + ID_LIST+=$(torsocks curl -s "https://$URL/post.json?page=$PAGE&limit=100&tags=$TAG" | jq -r '.[]."id"') + if ((PAGE < TAG_PAGES)); then + ID_LIST+=$'\n' + fi + else + ID_LIST+=$(curl -s "https://$URL/post.json?page=$PAGE&limit=100&tags=$TAG" | jq -r '.[]."id"') + if ((PAGE < TAG_PAGES)); then + ID_LIST+=$'\n' + fi + fi + sleep "$DELAY" + done + echo "$ID_LIST" | while read -r IMAGE_ID; do + # DOWNLOAD JSON + if $USE_TOR; then + JSON=$(torsocks curl -s "https://$URL/post.json?tags=id:$IMAGE_ID") + else + JSON=$(curl -s "https://$URL/post.json?tags=id:$IMAGE_ID") + fi + # STORE FILE URL AND TAGS INTO VARIABLES + FILE_DATE=$(echo "$JSON" | jq -r '.[]."created_at"') + FILE_URL=$(echo "$JSON" | jq -r '.[] | ."file_url"') + FILE_TAGS=$(echo "$JSON" | jq -r '.[] | ."tags"' | sed 's/\ /,/g') + FILE=$(echo "$JSON" | jq -r '.[] | ."file_url"' | sed 's/\// /g' | awk '{print $5}') + FILE_WITHSPACE=$(echo "$JSON" | jq -r '.[] | ."file_url"' | sed 's/\// /g' | awk '{print $5}' | sed 's/\%20/ /g') + if $RESUME; then + if [[ -f "$FILE_WITHSPACE" ]]; then + echo "$FILE_WITHSPACE exists." + exit + fi + fi + # DOWNLOAD FILE + if $USE_TOR; then + torsocks curl -O -J "$FILE_URL" + else + curl -O -J "$FILE_URL" + fi + # ADD TAGS TO NEW IMAGE + setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" + mv "$FILE" "$FILE_WITHSPACE" + # DELAY BEFORE NEXT FETCH + touch -d "@$FILE_DATE" "$FILE_WITHSPACE" + sleep "$DELAY" + done + # BACK OUT OF FOLDER + cd .. +done \ No newline at end of file