-
Notifications
You must be signed in to change notification settings - Fork 0
/
sporeget_wget.sh
61 lines (54 loc) · 1.57 KB
/
sporeget_wget.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
version="v0.1-20231104"
links_output=$(python sporeget.py "$@")
links_exit_code=$?
if [ $links_exit_code -ne 0 ]; then
echo "$links_output"
fi
if [ $links_exit_code -eq 0 ]; then
command=$(echo $@ | cut -d ' ' -f 1)
arg=$(echo $@ | cut -d ' ' -f 2)
workdir=$(pwd)
timestamp=$(date +"%Y%m%d%H%M%S")
itemname="sporeget-$command-$arg-$timestamp"
inputfile="$itemname.txt"
useragent=$(cat useragent.txt)
warcdir="saved"
warcfile="$itemname"
wait="0.5"
waitretry="5"
timeout="60"
tries="3"
if ! [[ -d "$workdir/$warcdir" ]]; then
mkdir "$workdir/$warcdir"
fi
cd "$workdir/$warcdir"
touch "$inputfile"
echo "$links_output" > "$inputfile"
wgetargs=(
"--user-agent=$useragent"
"--header=Connection: keep-alive"
"--reject-reserved-subnets"
"--warc-dedup-url-agnostic"
"--warc-file=$warcfile"
#"--warc-item-name=$itemname"
"--warc-header=x-sporeget-version: $version"
"--warc-cdx"
"--no-check-certificate" #Spore website seems to have problems with SSL certificates sometimes.
"--content-on-error"
"--wait=$wait"
"--waitretry=$waitretry"
"--timeout=$timeout"
"--tries=$tries"
"--retry-connrefused"
"--no-cookies"
"--no-parent"
"--no-http-keep-alive"
"--delete-after"
#"--page-requisites"
"--domains=www.spore.com,static.spore.com,pollinator.spore.com"
"--input-file=$inputfile"
)
wget-lua "${wgetargs[@]}" 2>&1
cd $workdir
fi