new scripts + resume functionality

This commit is contained in:
bronze 2023-12-26 19:09:22 -05:00
parent 32c199af14
commit b23b438e68
4 changed files with 258 additions and 6 deletions

View File

@ -1,5 +1,11 @@
# CHANGELOG
## 12/26/2023
### ADDED
- added `-r` resume option under `*-tag.sh` scripts
- added `dandl-tag.sh` and `moedl-tag.sh`
## 12/25/2023 (Merry Christmas!)
### FIXED

116
dandl-tag.sh Executable file
View File

@ -0,0 +1,116 @@
#!/bin/bash
USE_TOR=false
DELAY=1
RESUME=false
function usage {
echo "./$(basename "$0") [-t] [-s] [-r]"
echo "Mass downloader for Danbooru"
echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!"
echo " -h shows this help message"
echo " -t downloads using tor (requires torsocks)"
echo " -s sets the delay after each request, defaults to 1"
echo " -a tag or artist name"
echo " -r will download until it hits a file that already exists"
}
# list of arguments expected in the input
optstring=":hts:a:r"
while getopts ${optstring} arg; do
case ${arg} in
h)
usage
exit
;;
t)
USE_TOR=true
echo -n "Using Tor with IP: "
torsocks curl ip.me
;;
s)
DELAY="${OPTARG}"
;;
a)
TAGS+=("$OPTARG")
;;
r)
RESUME=true
;;
:)
echo "$0: Must supply an argument to -$OPTARG." >&2
exit 1
;;
?)
echo "Invalid option: -${OPTARG}."
exit 2
;;
esac
done
for TAG in "${TAGS[@]}"; do
echo "$TAG"
# CREATE FOLDER AND CD INTO IT
mkdir -v "$TAG"
cd "$TAG" || exit
# GET TAG TOTAL COUNT
if $USE_TOR; then
TAG_COUNT=$(torsocks curl -s "https://danbooru.donmai.us/tags.json?only=id,name,post_count&search\[name_matches\]=$TAG" | jq -r '.[].post_count')
else
TAG_COUNT=$(curl -s "https://danbooru.donmai.us/tags.json?only=id,name,post_count&search\[name_matches\]=$TAG" | jq -r '.[].post_count')
fi
# NESTED LOOP TO GET ALL POSTS UNDER TAG
TAG_PAGES=$((TAG_COUNT / 200))
for (( PAGE = 0; PAGE <= TAG_PAGES; PAGE++ ))
do
if $USE_TOR; then
JSON_URL+=$(torsocks curl -s "https://danbooru.donmai.us/posts.json?page=$PAGE&limit=200&tags=$TAG" | jq -r '.[].id')
if ((PAGE < TAG_PAGES)); then
JSON_URL+=$'\n'
fi
else
JSON_URL+=$(curl -s "https://danbooru.donmai.us/posts.json?page=$PAGE&limit=200&tags=$TAG" | jq -r '.[].id')
if ((PAGE < TAG_PAGES)); then
JSON_URL+=$'\n'
fi
fi
sleep "$DELAY"
done
# NESTED LOOP FOR IMAGES IN THIS TAG
echo "$JSON_URL" | while read -r i; do
if $USE_TOR; then
JSON=$(torsocks curl -s "https://danbooru.donmai.us/posts/$i.json")
else
JSON=$(curl -s "https://danbooru.donmai.us/posts/$i.json")
fi
# STORE FILE URL AND TAGS INTO VARIABLES
FILE_DATE=$(echo "$JSON" | jq -r '."created_at"')
FILE_URL=$(echo "$JSON" | jq -r '."file_url"')
FILE_TAGS=$(echo "$JSON" | jq -r '."tag_string"' | sed 's/\ /,/g')
FILE_MD5=$(echo "$JSON" | jq -r '.md5')
FILE_EXT=$(echo "$JSON" | jq -r '.file_ext')
FILE="$FILE_MD5.$FILE_EXT"
if $RESUME; then
if [[ -f "$FILE" ]]; then
echo "$FILE exists."
exit
fi
fi
# DOWNLOAD FILE
if $USE_TOR; then
torsocks curl -O -J "$FILE_URL"
else
curl -O -J "$FILE_URL"
fi
# ADD TAGS TO NEW IMAGE
setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE"
setfattr --name=user.checksum --value="$FILE_MD5" "$FILE"
# SET TIME TO TIME UPLOADED
touch -d "$FILE_DATE" "$FILE"
# DELAY BEFORE NEXT FETCH
sleep "$DELAY"
done
# BACK OUT OF FOLDER
cd ..
done

View File

@ -2,9 +2,10 @@
USE_TOR=false
DELAY=1
RESUME=false
function usage {
echo "./$(basename "$0") [-t] [-s] [-l] -a tag -a tag2"
echo "./$(basename "$0") [-t] [-s] [-r] -a tag -a tag2"
echo "Mass downloader for Gelbooru"
echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!"
echo " -h shows this help message"
@ -14,7 +15,7 @@ function usage {
}
# list of arguments expected in the input
optstring=":hts:a:l:"
optstring=":hts:a:r"
while getopts ${optstring} arg; do
case ${arg} in
@ -33,6 +34,9 @@ while getopts ${optstring} arg; do
a)
TAGS+=("$OPTARG")
;;
r)
RESUME=true
;;
:)
echo "$0: Must supply an argument to -$OPTARG." >&2
exit 1
@ -47,8 +51,8 @@ done
for TAG in "${TAGS[@]}"; do
echo "$TAG"
# CREATE FOLDER AND CD INTO IT
mkdir -v $TAG
cd $TAG
mkdir -v "$TAG"
cd "$TAG" || exit
# GET TAG TOTAL COUNT
if $USE_TOR; then
TAG_COUNT=$(torsocks curl -s "https://gelbooru.com/index.php?page=dapi&s=tag&q=index&json=1&name=$TAG" | jq -r '."tag" | .[] | ."count"')
@ -58,7 +62,7 @@ for TAG in "${TAGS[@]}"; do
# NESTED LOOP TO GET ALL POSTS UNDER TAG
TAG_PAGES=$((TAG_COUNT / 100))
for (( PAGE = 0; PAGE <= $TAG_PAGES; PAGE++ ))
for (( PAGE = 0; PAGE <= TAG_PAGES; PAGE++ ))
do
if $USE_TOR; then
JSON+=$(torsocks curl -s "https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&pid=$PAGE&tags=$TAG" | jq -r '.post')
@ -69,7 +73,7 @@ for TAG in "${TAGS[@]}"; do
done
# NESTED LOOP FOR IMAGES IN THIS TAG
echo "$JSON" | jq -c '.[]' | while read i; do
echo "$JSON" | jq -c '.[]' | while read -r i; do
#echo $i | jq -r '."id"'
FILE_DATE=$(echo "$i" | jq -r '."created_at"')
FILE_URL=$(echo "$i" | jq -r '."file_url"')
@ -77,6 +81,13 @@ for TAG in "${TAGS[@]}"; do
FILE_TAGS=$(echo "$i" | jq -r '."tags"' | sed 's/\ /,/g')
FILE=$(echo "$FILE_URL" | sed 's/\// /g' | awk '{print $NF}')
if $RESUME; then
if [[ -f "$FILE" ]]; then
echo "$FILE exists."
exit
fi
fi
# DOWNLOAD FILE
if $USE_TOR; then
torsocks curl -O -J "$FILE_URL"

119
moedl-tag.sh Executable file
View File

@ -0,0 +1,119 @@
#!/bin/bash
USE_TOR=false
DELAY=1
RESUME=false
URL="konachan.com"
function usage {
echo "./$(basename "$0") [-t] [-s] [-r] [-c site.com]"
echo "Mass downloader for moebooru imageboards (think konachan and yande.re)"
echo "Simply make a files.txt inside a folder and paste all your links, then run this script to download them all!"
echo " -h shows this help message"
echo " -t downloads using tor (requires torsocks)"
echo " -s sets the delay after each request, defaults to 1"
echo " -a tag or artist name"
echo " -r will download until it hits a file that already exists"
echo " -c custom url (defaults to konachan if unset)"
}
# list of arguments expected in the input
optstring=":hts:a:rc:"
while getopts ${optstring} arg; do
case ${arg} in
h)
usage
exit
;;
t)
USE_TOR=true
echo -n "Using Tor with IP: "
torsocks curl ip.me
;;
s)
DELAY="${OPTARG}"
;;
a)
TAGS+=("$OPTARG")
;;
r)
RESUME=true
;;
c)
URL="${OPTARG}"
;;
:)
echo "$0: Must supply an argument to -$OPTARG." >&2
exit 1
;;
?)
echo "Invalid option: -${OPTARG}."
exit 2
;;
esac
done
for TAG in "${TAGS[@]}"; do
echo "$TAG"
# CREATE FOLDER AND CD INTO IT
mkdir -v "$TAG"
cd "$TAG" || exit
if $USE_TOR; then
TAG_COUNT=$(torsocks curl -s "https://$URL/tag.json?name=$TAG" | jq -r '.[]."count"')
else
TAG_COUNT=$(curl -s "https://$URL/tag.json?name=$TAG" | jq -r '.[]."count"')
fi
# NESTED LOOP TO GET ALL POSTS UNDER TAG
TAG_PAGES=$((TAG_COUNT / 100))
ID_LIST=""
for (( PAGE = 0; PAGE <= TAG_PAGES; PAGE++ ))
do
if $USE_TOR; then
ID_LIST+=$(torsocks curl -s "https://$URL/post.json?page=$PAGE&limit=100&tags=$TAG" | jq -r '.[]."id"')
if ((PAGE < TAG_PAGES)); then
ID_LIST+=$'\n'
fi
else
ID_LIST+=$(curl -s "https://$URL/post.json?page=$PAGE&limit=100&tags=$TAG" | jq -r '.[]."id"')
if ((PAGE < TAG_PAGES)); then
ID_LIST+=$'\n'
fi
fi
sleep "$DELAY"
done
echo "$ID_LIST" | while read -r IMAGE_ID; do
# DOWNLOAD JSON
if $USE_TOR; then
JSON=$(torsocks curl -s "https://$URL/post.json?tags=id:$IMAGE_ID")
else
JSON=$(curl -s "https://$URL/post.json?tags=id:$IMAGE_ID")
fi
# STORE FILE URL AND TAGS INTO VARIABLES
FILE_DATE=$(echo "$JSON" | jq -r '.[]."created_at"')
FILE_URL=$(echo "$JSON" | jq -r '.[] | ."file_url"')
FILE_TAGS=$(echo "$JSON" | jq -r '.[] | ."tags"' | sed 's/\ /,/g')
FILE=$(echo "$JSON" | jq -r '.[] | ."file_url"' | sed 's/\// /g' | awk '{print $5}')
FILE_WITHSPACE=$(echo "$JSON" | jq -r '.[] | ."file_url"' | sed 's/\// /g' | awk '{print $5}' | sed 's/\%20/ /g')
if $RESUME; then
if [[ -f "$FILE_WITHSPACE" ]]; then
echo "$FILE_WITHSPACE exists."
exit
fi
fi
# DOWNLOAD FILE
if $USE_TOR; then
torsocks curl -O -J "$FILE_URL"
else
curl -O -J "$FILE_URL"
fi
# ADD TAGS TO NEW IMAGE
setfattr -n user.xdg.tags -v "$FILE_TAGS" "$FILE"
mv "$FILE" "$FILE_WITHSPACE"
# DELAY BEFORE NEXT FETCH
touch -d "@$FILE_DATE" "$FILE_WITHSPACE"
sleep "$DELAY"
done
# BACK OUT OF FOLDER
cd ..
done