From 8e7b75f81729dac6510af339e561d040fa21f747 Mon Sep 17 00:00:00 2001 From: bronze Date: Mon, 25 Dec 2023 03:20:20 -0500 Subject: [PATCH] 12/25/2023 fixed video bug added e621 support added gelbooru script to dl all images under given tags see new CHANGELOG.md --- CHANGELOG.md | 19 +++++++++ README.md | 10 +++-- dandl.sh | 3 +- dantag.sh | 0 e621dl.sh | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++ e621tag.sh | 72 ++++++++++++++++++++++++++++++++++ gelbdl-tag.sh | 100 +++++++++++++++++++++++++++++++++++++++++++++++ gelbdl.sh | 18 +++++++-- gelbtag.sh | 14 ++++++- moedl.sh | 0 10 files changed, 332 insertions(+), 9 deletions(-) create mode 100644 CHANGELOG.md mode change 100644 => 100755 dantag.sh create mode 100755 e621dl.sh create mode 100755 e621tag.sh create mode 100755 gelbdl-tag.sh mode change 100644 => 100755 moedl.sh diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f2ceb7a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +# CHANGELOG + +## 12/25/2023 (Merry Christmas!) + +### FIXED +- bug where downloaded webm / mp4 files would not get tagged (gelbooru) +### ADDED +- e621.net support with `e621dl.sh` and `e621tag.sh` +- `gelbdl-tag.sh` for downloading all posts under given tags +- added "-d" option to `gelbdl.sh` and `gelbtag.sh` +- - sets the date of the file to the date it was uploaded to Gelbooru + + +## 9/29/2023 + +### ADDED +- Created repo with initial Gelbooru support +- Danbooru support +- Moebooru (konachan and yande.re) support \ No newline at end of file diff --git a/README.md b/README.md index c2867ae..84bf231 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,6 @@ scripts for downloading and tagging images from booru websites - see https://wiki.archlinux.org/title/Extended_attributes -- These took me a whole morning to write. Its very much WIP... - ## Depends on: `jq` for json parsing @@ -28,4 +26,10 @@ scripts for downloading and tagging images from booru websites `moedl.sh` downloads and tags files from moebooru imageboard (think konachan and yande.re) links in files.txt -`moetag.sh` hashes and looks up every image in a folder (provided you specify the booru) \ No newline at end of file +`moetag.sh` hashes and looks up every image in a folder (provided you specify the booru) + +## BUGS + +Until federation is added to gitea, I can't do issues here :( + +Please feel free to reach out to me on fedi at [@bronze@pl.kitsunemimi.club](https://pl.kitsunemimi.club/users/bronze) , [@bronze@wolfgirl.bar](https://wolfgirl.bar/users/bronze) or by email at [ bronze@kitsunemimi.club](mailto:bronze@kitsunemimi.club) \ No newline at end of file diff --git a/dandl.sh b/dandl.sh index fc2897b..6d4d62a 100755 --- a/dandl.sh +++ b/dandl.sh @@ -63,7 +63,8 @@ while read f; do curl -O -J "$FILE_URL" fi # ADD TAGS TO NEW IMAGE - setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" + setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE_MD5"* + setfattr --name=user.checksum --value="$FILE_MD5" "$FILE_MD5"* # DELAY BEFORE NEXT FETCH sleep $DELAY done < files.txt \ No newline at end of file diff --git a/dantag.sh b/dantag.sh old mode 100644 new mode 100755 diff --git a/e621dl.sh b/e621dl.sh new file mode 100755 index 0000000..9884c74 --- /dev/null +++ b/e621dl.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +USE_TOR=false +DELAY=1 +UA="Booru-Tools/0.1" +HAS_USERNAME=false +HAS_API_KEY=false + + +function usage { + echo "./$(basename $0) [-t] [-s]" + echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!" + echo " -h shows this help message" + echo " -t e621 BLOCKS TOR WITH CLOUDFLARE, DOES NOT WORK" + echo " -p downloads using proxy" + echo " -s sets the delay after each request, defaults to 1" + echo " -u e621.net username" + echo " -k e621.net API key" +} + +# list of arguments expected in the input +optstring=":hts:u:k:" + +while getopts ${optstring} arg; do + case ${arg} in + h) + usage + exit + ;; + t) + USE_TOR=true + echo "e621 BLOCKS TOR WITH CLOUDFLARE" + echo "EXITING SINCE SCRIPT WILL NOT WORK" + exit + ;; + s) + DELAY="${OPTARG}" + ;; + u) + HAS_USERNAME=true + USERNAME="${OPTARG}" + ;; + k) + HAS_API_KEY=true + API_KEY="${OPTARG}" + ;; + :) + echo "$0: Must supply an argument to -$OPTARG." >&2 + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 2 + ;; + esac +done + +while read f; do + echo "$f" + # MODIFY URL TO API CALL + JSON_URL=`echo $f | sed "s/\?q.*//g"` + JSON_URL="$JSON_URL.json" + # DOWNLOAD JSON + if $HAS_USERNAME && $HAS_API_KEY ; then + JSON_URL="$JSON_URL?login=$USERNAME&api_key=$API_KEY" + fi + echo $JSON_URL + if $USE_TOR; then + #JSON=`torsocks curl -A "$UA" -s "$JSON_URL"` + echo "e621 BLOCKS TOR WITH CLOUDFLARE" + echo "EXITING SINCE SCRIPT WILL NOT WORK" + exit + else + JSON=`curl -A "$UA" -s "$JSON_URL"` + fi + # STORE TAGS INTO VARIABLES (god what the hell) + FILE_URL=`echo $JSON | jq -r '.post | .file."url"'` + FILE_TAGS_GENERAL=`echo $JSON | jq -r '.post."tags"."general"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS_ARTIST=`echo $JSON | jq -r '.post."tags"."artist"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS_CHARACTER=`echo $JSON | jq -r '.post."tags"."character"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS_SPECIES=`echo $JSON | jq -r '.post."tags"."species"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS_META=`echo $JSON | jq -r '.post."tags"."meta"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_DESCRIPTION=`echo $JSON | jq -r '.post'.'description'` + FILE_MD5=`echo $JSON | jq -r '.post'.'file'.'md5'` + FILE_EXT=`echo $JSON | jq -r '.post'.'file'.'ext'` + FILE_TAGS=`echo "$FILE_TAGS_ARTIST,$FILE_TAGS_CHARACTER,$FILE_TAGS_GENERAL,$FILE_TAGS_SPECIES,$FILE_TAGS_META"` + #echo $FILE_TAGS + # DOWNLOAD FILE + if $USE_TOR; then + #torsocks curl -O -J $FILE_URL + echo "e621 BLOCKS TOR WITH CLOUDFLARE" + echo "EXITING SINCE SCRIPT WILL NOT WORK" + exit + else + curl -O -J $FILE_URL + fi + # ADD TAGS TO NEW IMAGE + FILE="$FILE_MD5.$FILE_EXT" + setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" + setfattr -n user.xdg.creator -v "$FILE_TAGS_ARTIST" "$FILE" + setfattr -n user.xdg.comment -v "$FILE_DESCRIPTION" "$FILE" + setfattr --name=user.checksum --value="$FILE_MD5" "$FILE" + # DELAY BEFORE NEXT FETCH + sleep $DELAY +done < files.txt \ No newline at end of file diff --git a/e621tag.sh b/e621tag.sh new file mode 100755 index 0000000..d3cd0c3 --- /dev/null +++ b/e621tag.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +USE_TOR=false +DELAY=1 +UA="Booru-Tools/0.1" + +function usage { + echo "./$(basename $0) [-t] [-s]" + echo "Tags existing pictures inside a folder" + echo " -h shows this help message" + echo " -t e621 BLOCKS TOR WITH CLOUDFLARE, DOES NOT WORK" + echo " -s sets the delay after each request, defaults to 1" +} + +# list of arguments expected in the input +optstring=":hts:" + +while getopts ${optstring} arg; do + case ${arg} in + h) + usage + exit + ;; + t) + USE_TOR=true + echo "e621 BLOCKS TOR WITH CLOUDFLARE" + echo "EXITING SINCE SCRIPT WILL NOT WORK" + exit + ;; + s) + DELAY="${OPTARG}" + ;; + :) + echo "$0: Must supply an argument to -$OPTARG." >&2 + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 2 + ;; + esac +done + +for FILE in *; do + echo "$FILE" + # GET MD5 HASH + FILE_MD5=`md5sum "$FILE" | awk '{print $1}'` + echo $FILE_MD5 + # DOWNLOAD JSON + URL=https://e621.net/posts.json?tags=md5: + if $USE_TOR; then + #JSON=`torsocks curl -A "$UA" -s "https://e621.net/posts.json?tags=md5:$FILE_MD5"` + echo "e621 BLOCKS TOR WITH CLOUDFLARE" + echo "EXITING SINCE SCRIPT WILL NOT WORK" + exit + else + JSON=`curl -s -A "$UA" "https://e621.net/posts.json?tags=md5:$FILE_MD5"` + fi + # STORE TAGS INTO VARIABLES (god what the hell) + FILE_TAGS_GENERAL=`echo $JSON | jq -r '.posts | .[] | ."tags"."general"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS_ARTIST=`echo $JSON| jq -r '.posts | .[] | ."tags"."artist"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS_CHARACTER=`echo $JSON| jq -r '.posts | .[] | ."tags"."character"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS_SPECIES=`echo $JSON| jq -r '.posts | .[] | ."tags"."species"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS_META=`echo $JSON| jq -r '.posts | .[] | ."tags"."meta"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'` + FILE_TAGS=`echo "$FILE_TAGS_ARTIST,$FILE_TAGS_CHARACTER,$FILE_TAGS_GENERAL,$FILE_TAGS_SPECIES,$FILE_TAGS_META"` + echo $FILE_TAGS + # ADD TAGS TO IMAGE + setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" + setfattr -n user.xdg.creator -v "$FILE_TAGS_ARTIST" "$FILE" + # DELAY BEFORE NEXT FETCH + sleep $DELAY +done \ No newline at end of file diff --git a/gelbdl-tag.sh b/gelbdl-tag.sh new file mode 100755 index 0000000..b2016b2 --- /dev/null +++ b/gelbdl-tag.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +USE_TOR=false +DELAY=1 +LIMIT=200 + +function usage { + echo "./$(basename $0) [-t] [-s] [-l] -a tag -a tag2" + echo "Mass downloader for Gelbooru" + echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!" + echo " -h shows this help message" + echo " -t downloads using tor (requires torsocks)" + echo " -s sets the delay after each request, defaults to 1" + echo " -a tag or artist name" + echo " -l limit of single json request (defaults to 100)" +} + +# list of arguments expected in the input +optstring=":hts:a:l:" + +while getopts ${optstring} arg; do + case ${arg} in + h) + usage + exit + ;; + t) + USE_TOR=true + echo -n "Using Tor with IP: " + torsocks curl ip.me + ;; + s) + DELAY="${OPTARG}" + ;; + a) + TAGS+=("$OPTARG") + ;; + l) + LIMIT="${OPTARG}" + ;; + :) + echo "$0: Must supply an argument to -$OPTARG." >&2 + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 2 + ;; + esac +done + +for TAG in "${TAGS[@]}"; do + echo $TAG + # CREATE FOLDER AND CD INTO IT + mkdir -v $TAG + cd $TAG + # GET TAG TOTAL COUNT + if $USE_TOR; then + TAG_COUNT=`torsocks curl -s "https://gelbooru.com/index.php?page=dapi&s=tag&q=index&json=1&name=$TAG" | jq -r '."tag" | .[] | ."count"'` + else + TAG_COUNT=`curl -s "https://gelbooru.com/index.php?page=dapi&s=tag&q=index&json=1&name=$TAG" | jq -r '."tag" | .[] | ."count"'` + fi + + # NESTED LOOP TO GET ALL POSTS UNDER TAG + TAG_PAGES=$((TAG_COUNT / 100)) + for (( PAGE = 0; PAGE <= $TAG_PAGES; PAGE++ )) + do + if $USE_TOR; then + JSON+=`torsocks curl -s "https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&pid=$PAGE&tags=$TAG" | jq -r '.post'` + else + JSON+=`curl -s "https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&pid=$PAGE&tags=$TAG" | jq -r '.post'` + fi + sleep $DELAY + done + + # NESTED LOOP FOR IMAGES IN THIS TAG + echo $JSON | jq -c '.[]' | while read i; do + #echo $i | jq -r '."id"' + FILE_DATE=`echo $i | jq -r '."created_at"'` + FILE_URL=`echo $i | jq -r '."file_url"'` + FILE_MD5=`echo $i | jq -r '."md5"'` + FILE_TAGS=`echo $i | jq -r '."tags"' | sed 's/\ /,/g'` + FILE=`echo $FILE_URL | sed 's/\// /g' | awk '{print $NF}'` + + # DOWNLOAD FILE + if $USE_TOR; then + torsocks curl -O -J $FILE_URL + else + curl -O -J $FILE_URL + fi + # ADD TAGS TO NEW IMAGE + setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" + setfattr --name=user.checksum --value="$FILE_MD5" "$FILE" + touch -d "$FILE_DATE" "$FILE" + # DELAY BEFORE NEXT FETCH + sleep $DELAY + done + # BACK OUT OF FOLDER + cd .. +done \ No newline at end of file diff --git a/gelbdl.sh b/gelbdl.sh index 9d8344c..688b243 100755 --- a/gelbdl.sh +++ b/gelbdl.sh @@ -2,18 +2,21 @@ USE_TOR=false DELAY=1 +USE_DATE=false function usage { - echo "./$(basename $0) [-t] [-s]" + echo "./$(basename $0) [-t] [-s 1] [-d]" echo "Mass downloader for Gelbooru" echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!" echo " -h shows this help message" echo " -t downloads using tor (requires torsocks)" echo " -s sets the delay after each request, defaults to 1" + echo " -d sets the date of the file downloaded to the date it was uploaded to Gelbooru" + } # list of arguments expected in the input -optstring=":hts:" +optstring=":hts:d" while getopts ${optstring} arg; do case ${arg} in @@ -29,6 +32,9 @@ while getopts ${optstring} arg; do s) DELAY="${OPTARG}" ;; + d) + USE_DATE=true + ;; :) echo "$0: Must supply an argument to -$OPTARG." >&2 exit 1 @@ -53,7 +59,9 @@ while read f; do # STORE FILE URL AND TAGS INTO VARIABLES FILE_URL=`echo $JSON | jq -r '.post | .[] | ."file_url"'` FILE_TAGS=`echo $JSON | jq -r '.post | .[] | ."tags"' | sed 's/\ /,/g'` - FILE=`echo $JSON | jq -r '.post | .[] | ."image"'` + FILE_MD5=`echo $JSON | jq -r '.post | .[] | ."md5"'` + FILE_DATE=`echo $JSON | jq -r '.post | .[] | ."created_at"'` + FILE=`echo $FILE_URL | sed 's/\// /g' | awk '{print $NF}'` # DOWNLOAD FILE if $USE_TOR; then torsocks curl -O -J $FILE_URL @@ -62,6 +70,10 @@ while read f; do fi # ADD TAGS TO NEW IMAGE setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" + setfattr --name=user.checksum --value="$FILE_MD5" "$FILE" + if $USE_DATE; then + touch -d "$FILE_DATE" "$FILE" + fi # DELAY BEFORE NEXT FETCH sleep $DELAY done < files.txt \ No newline at end of file diff --git a/gelbtag.sh b/gelbtag.sh index 2cfdbcb..dc58d0b 100755 --- a/gelbtag.sh +++ b/gelbtag.sh @@ -2,6 +2,7 @@ USE_TOR=false DELAY=1 +USE_DATE=false function usage { echo "./$(basename $0) [-t] [-s]" @@ -10,10 +11,12 @@ function usage { echo " -h shows this help message" echo " -t downloads using tor (requires torsocks)" echo " -s sets the delay after each request, defaults to 1" + echo " -d sets the date of the file downloaded to the date it was uploaded to Gelbooru" + } # list of arguments expected in the input -optstring=":hts:" +optstring=":hts:d" while getopts ${optstring} arg; do case ${arg} in @@ -29,6 +32,9 @@ while getopts ${optstring} arg; do s) DELAY="${OPTARG}" ;; + d) + USE_DATE=true + ;; :) echo "$0: Must supply an argument to -$OPTARG." >&2 exit 1 @@ -52,8 +58,12 @@ for FILE in *; do fi # STORE TAGS INTO VARIABLES FILE_TAGS=`echo $JSON | jq -r '.post | .[] | ."tags"' | sed 's/\ /,/g'` + FILE_DATE=`echo $JSON | jq -r '.post | .[] | ."created_at"'` # ADD TAGS TO IMAGE setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" + if $USE_DATE; then + touch -d "$FILE_DATE" "$FILE" + fi # DELAY BEFORE NEXT FETCH sleep $DELAY -done +done \ No newline at end of file diff --git a/moedl.sh b/moedl.sh old mode 100644 new mode 100755