12/25/2023

fixed video bug
added e621 support
added gelbooru script to dl all images under given tags

see new CHANGELOG.md
This commit is contained in:
bronze 2023-12-25 03:20:20 -05:00
parent 11ce6fa401
commit 8e7b75f817
10 changed files with 332 additions and 9 deletions

19
CHANGELOG.md Normal file
View File

@ -0,0 +1,19 @@
# CHANGELOG
## 12/25/2023 (Merry Christmas!)
### FIXED
- bug where downloaded webm / mp4 files would not get tagged (gelbooru)
### ADDED
- e621.net support with `e621dl.sh` and `e621tag.sh`
- `gelbdl-tag.sh` for downloading all posts under given tags
- added "-d" option to `gelbdl.sh` and `gelbtag.sh`
- - sets the date of the file to the date it was uploaded to Gelbooru
## 9/29/2023
### ADDED
- Created repo with initial Gelbooru support
- Danbooru support
- Moebooru (konachan and yande.re) support

View File

@ -8,8 +8,6 @@ scripts for downloading and tagging images from booru websites
- see https://wiki.archlinux.org/title/Extended_attributes - see https://wiki.archlinux.org/title/Extended_attributes
- These took me a whole morning to write. Its very much WIP...
## Depends on: ## Depends on:
`jq` for json parsing `jq` for json parsing
@ -28,4 +26,10 @@ scripts for downloading and tagging images from booru websites
`moedl.sh` downloads and tags files from moebooru imageboard (think konachan and yande.re) links in files.txt `moedl.sh` downloads and tags files from moebooru imageboard (think konachan and yande.re) links in files.txt
`moetag.sh` hashes and looks up every image in a folder (provided you specify the booru) `moetag.sh` hashes and looks up every image in a folder (provided you specify the booru)
## BUGS
Until federation is added to gitea, I can't do issues here :(
Please feel free to reach out to me on fedi at [@bronze@pl.kitsunemimi.club](https://pl.kitsunemimi.club/users/bronze) , [@bronze@wolfgirl.bar](https://wolfgirl.bar/users/bronze) or by email at [ bronze@kitsunemimi.club](mailto:bronze@kitsunemimi.club)

View File

@ -63,7 +63,8 @@ while read f; do
curl -O -J "$FILE_URL" curl -O -J "$FILE_URL"
fi fi
# ADD TAGS TO NEW IMAGE # ADD TAGS TO NEW IMAGE
setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE_MD5"*
setfattr --name=user.checksum --value="$FILE_MD5" "$FILE_MD5"*
# DELAY BEFORE NEXT FETCH # DELAY BEFORE NEXT FETCH
sleep $DELAY sleep $DELAY
done < files.txt done < files.txt

0
dantag.sh Normal file → Executable file
View File

105
e621dl.sh Executable file
View File

@ -0,0 +1,105 @@
#!/bin/bash
USE_TOR=false
DELAY=1
UA="Booru-Tools/0.1"
HAS_USERNAME=false
HAS_API_KEY=false
function usage {
echo "./$(basename $0) [-t] [-s]"
echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!"
echo " -h shows this help message"
echo " -t e621 BLOCKS TOR WITH CLOUDFLARE, DOES NOT WORK"
echo " -p downloads using proxy"
echo " -s sets the delay after each request, defaults to 1"
echo " -u e621.net username"
echo " -k e621.net API key"
}
# list of arguments expected in the input
optstring=":hts:u:k:"
while getopts ${optstring} arg; do
case ${arg} in
h)
usage
exit
;;
t)
USE_TOR=true
echo "e621 BLOCKS TOR WITH CLOUDFLARE"
echo "EXITING SINCE SCRIPT WILL NOT WORK"
exit
;;
s)
DELAY="${OPTARG}"
;;
u)
HAS_USERNAME=true
USERNAME="${OPTARG}"
;;
k)
HAS_API_KEY=true
API_KEY="${OPTARG}"
;;
:)
echo "$0: Must supply an argument to -$OPTARG." >&2
exit 1
;;
?)
echo "Invalid option: -${OPTARG}."
exit 2
;;
esac
done
while read f; do
echo "$f"
# MODIFY URL TO API CALL
JSON_URL=`echo $f | sed "s/\?q.*//g"`
JSON_URL="$JSON_URL.json"
# DOWNLOAD JSON
if $HAS_USERNAME && $HAS_API_KEY ; then
JSON_URL="$JSON_URL?login=$USERNAME&api_key=$API_KEY"
fi
echo $JSON_URL
if $USE_TOR; then
#JSON=`torsocks curl -A "$UA" -s "$JSON_URL"`
echo "e621 BLOCKS TOR WITH CLOUDFLARE"
echo "EXITING SINCE SCRIPT WILL NOT WORK"
exit
else
JSON=`curl -A "$UA" -s "$JSON_URL"`
fi
# STORE TAGS INTO VARIABLES (god what the hell)
FILE_URL=`echo $JSON | jq -r '.post | .file."url"'`
FILE_TAGS_GENERAL=`echo $JSON | jq -r '.post."tags"."general"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS_ARTIST=`echo $JSON | jq -r '.post."tags"."artist"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS_CHARACTER=`echo $JSON | jq -r '.post."tags"."character"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS_SPECIES=`echo $JSON | jq -r '.post."tags"."species"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS_META=`echo $JSON | jq -r '.post."tags"."meta"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_DESCRIPTION=`echo $JSON | jq -r '.post'.'description'`
FILE_MD5=`echo $JSON | jq -r '.post'.'file'.'md5'`
FILE_EXT=`echo $JSON | jq -r '.post'.'file'.'ext'`
FILE_TAGS=`echo "$FILE_TAGS_ARTIST,$FILE_TAGS_CHARACTER,$FILE_TAGS_GENERAL,$FILE_TAGS_SPECIES,$FILE_TAGS_META"`
#echo $FILE_TAGS
# DOWNLOAD FILE
if $USE_TOR; then
#torsocks curl -O -J $FILE_URL
echo "e621 BLOCKS TOR WITH CLOUDFLARE"
echo "EXITING SINCE SCRIPT WILL NOT WORK"
exit
else
curl -O -J $FILE_URL
fi
# ADD TAGS TO NEW IMAGE
FILE="$FILE_MD5.$FILE_EXT"
setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE"
setfattr -n user.xdg.creator -v "$FILE_TAGS_ARTIST" "$FILE"
setfattr -n user.xdg.comment -v "$FILE_DESCRIPTION" "$FILE"
setfattr --name=user.checksum --value="$FILE_MD5" "$FILE"
# DELAY BEFORE NEXT FETCH
sleep $DELAY
done < files.txt

72
e621tag.sh Executable file
View File

@ -0,0 +1,72 @@
#!/bin/bash
USE_TOR=false
DELAY=1
UA="Booru-Tools/0.1"
function usage {
echo "./$(basename $0) [-t] [-s]"
echo "Tags existing pictures inside a folder"
echo " -h shows this help message"
echo " -t e621 BLOCKS TOR WITH CLOUDFLARE, DOES NOT WORK"
echo " -s sets the delay after each request, defaults to 1"
}
# list of arguments expected in the input
optstring=":hts:"
while getopts ${optstring} arg; do
case ${arg} in
h)
usage
exit
;;
t)
USE_TOR=true
echo "e621 BLOCKS TOR WITH CLOUDFLARE"
echo "EXITING SINCE SCRIPT WILL NOT WORK"
exit
;;
s)
DELAY="${OPTARG}"
;;
:)
echo "$0: Must supply an argument to -$OPTARG." >&2
exit 1
;;
?)
echo "Invalid option: -${OPTARG}."
exit 2
;;
esac
done
for FILE in *; do
echo "$FILE"
# GET MD5 HASH
FILE_MD5=`md5sum "$FILE" | awk '{print $1}'`
echo $FILE_MD5
# DOWNLOAD JSON
URL=https://e621.net/posts.json?tags=md5:
if $USE_TOR; then
#JSON=`torsocks curl -A "$UA" -s "https://e621.net/posts.json?tags=md5:$FILE_MD5"`
echo "e621 BLOCKS TOR WITH CLOUDFLARE"
echo "EXITING SINCE SCRIPT WILL NOT WORK"
exit
else
JSON=`curl -s -A "$UA" "https://e621.net/posts.json?tags=md5:$FILE_MD5"`
fi
# STORE TAGS INTO VARIABLES (god what the hell)
FILE_TAGS_GENERAL=`echo $JSON | jq -r '.posts | .[] | ."tags"."general"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS_ARTIST=`echo $JSON| jq -r '.posts | .[] | ."tags"."artist"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS_CHARACTER=`echo $JSON| jq -r '.posts | .[] | ."tags"."character"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS_SPECIES=`echo $JSON| jq -r '.posts | .[] | ."tags"."species"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS_META=`echo $JSON| jq -r '.posts | .[] | ."tags"."meta"' | sed 's/\"//g' | sed 's/\[//g' | sed 's/\]//g' | tr -d '[:space:]'`
FILE_TAGS=`echo "$FILE_TAGS_ARTIST,$FILE_TAGS_CHARACTER,$FILE_TAGS_GENERAL,$FILE_TAGS_SPECIES,$FILE_TAGS_META"`
echo $FILE_TAGS
# ADD TAGS TO IMAGE
setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE"
setfattr -n user.xdg.creator -v "$FILE_TAGS_ARTIST" "$FILE"
# DELAY BEFORE NEXT FETCH
sleep $DELAY
done

100
gelbdl-tag.sh Executable file
View File

@ -0,0 +1,100 @@
#!/bin/bash
USE_TOR=false
DELAY=1
LIMIT=200
function usage {
echo "./$(basename $0) [-t] [-s] [-l] -a tag -a tag2"
echo "Mass downloader for Gelbooru"
echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!"
echo " -h shows this help message"
echo " -t downloads using tor (requires torsocks)"
echo " -s sets the delay after each request, defaults to 1"
echo " -a tag or artist name"
echo " -l limit of single json request (defaults to 100)"
}
# list of arguments expected in the input
optstring=":hts:a:l:"
while getopts ${optstring} arg; do
case ${arg} in
h)
usage
exit
;;
t)
USE_TOR=true
echo -n "Using Tor with IP: "
torsocks curl ip.me
;;
s)
DELAY="${OPTARG}"
;;
a)
TAGS+=("$OPTARG")
;;
l)
LIMIT="${OPTARG}"
;;
:)
echo "$0: Must supply an argument to -$OPTARG." >&2
exit 1
;;
?)
echo "Invalid option: -${OPTARG}."
exit 2
;;
esac
done
for TAG in "${TAGS[@]}"; do
echo $TAG
# CREATE FOLDER AND CD INTO IT
mkdir -v $TAG
cd $TAG
# GET TAG TOTAL COUNT
if $USE_TOR; then
TAG_COUNT=`torsocks curl -s "https://gelbooru.com/index.php?page=dapi&s=tag&q=index&json=1&name=$TAG" | jq -r '."tag" | .[] | ."count"'`
else
TAG_COUNT=`curl -s "https://gelbooru.com/index.php?page=dapi&s=tag&q=index&json=1&name=$TAG" | jq -r '."tag" | .[] | ."count"'`
fi
# NESTED LOOP TO GET ALL POSTS UNDER TAG
TAG_PAGES=$((TAG_COUNT / 100))
for (( PAGE = 0; PAGE <= $TAG_PAGES; PAGE++ ))
do
if $USE_TOR; then
JSON+=`torsocks curl -s "https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&pid=$PAGE&tags=$TAG" | jq -r '.post'`
else
JSON+=`curl -s "https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&pid=$PAGE&tags=$TAG" | jq -r '.post'`
fi
sleep $DELAY
done
# NESTED LOOP FOR IMAGES IN THIS TAG
echo $JSON | jq -c '.[]' | while read i; do
#echo $i | jq -r '."id"'
FILE_DATE=`echo $i | jq -r '."created_at"'`
FILE_URL=`echo $i | jq -r '."file_url"'`
FILE_MD5=`echo $i | jq -r '."md5"'`
FILE_TAGS=`echo $i | jq -r '."tags"' | sed 's/\ /,/g'`
FILE=`echo $FILE_URL | sed 's/\// /g' | awk '{print $NF}'`
# DOWNLOAD FILE
if $USE_TOR; then
torsocks curl -O -J $FILE_URL
else
curl -O -J $FILE_URL
fi
# ADD TAGS TO NEW IMAGE
setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE"
setfattr --name=user.checksum --value="$FILE_MD5" "$FILE"
touch -d "$FILE_DATE" "$FILE"
# DELAY BEFORE NEXT FETCH
sleep $DELAY
done
# BACK OUT OF FOLDER
cd ..
done

View File

@ -2,18 +2,21 @@
USE_TOR=false USE_TOR=false
DELAY=1 DELAY=1
USE_DATE=false
function usage { function usage {
echo "./$(basename $0) [-t] [-s]" echo "./$(basename $0) [-t] [-s 1] [-d]"
echo "Mass downloader for Gelbooru" echo "Mass downloader for Gelbooru"
echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!" echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!"
echo " -h shows this help message" echo " -h shows this help message"
echo " -t downloads using tor (requires torsocks)" echo " -t downloads using tor (requires torsocks)"
echo " -s sets the delay after each request, defaults to 1" echo " -s sets the delay after each request, defaults to 1"
echo " -d sets the date of the file downloaded to the date it was uploaded to Gelbooru"
} }
# list of arguments expected in the input # list of arguments expected in the input
optstring=":hts:" optstring=":hts:d"
while getopts ${optstring} arg; do while getopts ${optstring} arg; do
case ${arg} in case ${arg} in
@ -29,6 +32,9 @@ while getopts ${optstring} arg; do
s) s)
DELAY="${OPTARG}" DELAY="${OPTARG}"
;; ;;
d)
USE_DATE=true
;;
:) :)
echo "$0: Must supply an argument to -$OPTARG." >&2 echo "$0: Must supply an argument to -$OPTARG." >&2
exit 1 exit 1
@ -53,7 +59,9 @@ while read f; do
# STORE FILE URL AND TAGS INTO VARIABLES # STORE FILE URL AND TAGS INTO VARIABLES
FILE_URL=`echo $JSON | jq -r '.post | .[] | ."file_url"'` FILE_URL=`echo $JSON | jq -r '.post | .[] | ."file_url"'`
FILE_TAGS=`echo $JSON | jq -r '.post | .[] | ."tags"' | sed 's/\ /,/g'` FILE_TAGS=`echo $JSON | jq -r '.post | .[] | ."tags"' | sed 's/\ /,/g'`
FILE=`echo $JSON | jq -r '.post | .[] | ."image"'` FILE_MD5=`echo $JSON | jq -r '.post | .[] | ."md5"'`
FILE_DATE=`echo $JSON | jq -r '.post | .[] | ."created_at"'`
FILE=`echo $FILE_URL | sed 's/\// /g' | awk '{print $NF}'`
# DOWNLOAD FILE # DOWNLOAD FILE
if $USE_TOR; then if $USE_TOR; then
torsocks curl -O -J $FILE_URL torsocks curl -O -J $FILE_URL
@ -62,6 +70,10 @@ while read f; do
fi fi
# ADD TAGS TO NEW IMAGE # ADD TAGS TO NEW IMAGE
setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE"
setfattr --name=user.checksum --value="$FILE_MD5" "$FILE"
if $USE_DATE; then
touch -d "$FILE_DATE" "$FILE"
fi
# DELAY BEFORE NEXT FETCH # DELAY BEFORE NEXT FETCH
sleep $DELAY sleep $DELAY
done < files.txt done < files.txt

View File

@ -2,6 +2,7 @@
USE_TOR=false USE_TOR=false
DELAY=1 DELAY=1
USE_DATE=false
function usage { function usage {
echo "./$(basename $0) [-t] [-s]" echo "./$(basename $0) [-t] [-s]"
@ -10,10 +11,12 @@ function usage {
echo " -h shows this help message" echo " -h shows this help message"
echo " -t downloads using tor (requires torsocks)" echo " -t downloads using tor (requires torsocks)"
echo " -s sets the delay after each request, defaults to 1" echo " -s sets the delay after each request, defaults to 1"
echo " -d sets the date of the file downloaded to the date it was uploaded to Gelbooru"
} }
# list of arguments expected in the input # list of arguments expected in the input
optstring=":hts:" optstring=":hts:d"
while getopts ${optstring} arg; do while getopts ${optstring} arg; do
case ${arg} in case ${arg} in
@ -29,6 +32,9 @@ while getopts ${optstring} arg; do
s) s)
DELAY="${OPTARG}" DELAY="${OPTARG}"
;; ;;
d)
USE_DATE=true
;;
:) :)
echo "$0: Must supply an argument to -$OPTARG." >&2 echo "$0: Must supply an argument to -$OPTARG." >&2
exit 1 exit 1
@ -52,8 +58,12 @@ for FILE in *; do
fi fi
# STORE TAGS INTO VARIABLES # STORE TAGS INTO VARIABLES
FILE_TAGS=`echo $JSON | jq -r '.post | .[] | ."tags"' | sed 's/\ /,/g'` FILE_TAGS=`echo $JSON | jq -r '.post | .[] | ."tags"' | sed 's/\ /,/g'`
FILE_DATE=`echo $JSON | jq -r '.post | .[] | ."created_at"'`
# ADD TAGS TO IMAGE # ADD TAGS TO IMAGE
setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE" setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE"
if $USE_DATE; then
touch -d "$FILE_DATE" "$FILE"
fi
# DELAY BEFORE NEXT FETCH # DELAY BEFORE NEXT FETCH
sleep $DELAY sleep $DELAY
done done

0
moedl.sh Normal file → Executable file
View File