diff --git a/functions/build_lists.sh b/functions/build_lists.sh index 7d1c112e4..9465c9cfb 100644 --- a/functions/build_lists.sh +++ b/functions/build_lists.sh @@ -51,7 +51,7 @@ format_file() { bash functions/tools.sh format "$1" } -# Build list functions are to specify the syntax of the lists. +# Build list functions are to specify the syntax of the lists for the build function. # $syntax: name of list syntax # $directory: directory to create list in # $comment: character used for comments (blank defaults to #) diff --git a/functions/check_dead.sh b/functions/check_dead.sh index 4831c2fa3..69bf16241 100644 --- a/functions/check_dead.sh +++ b/functions/check_dead.sh @@ -26,7 +26,7 @@ main() { check_alive update_light_file - # Cache dead domains (skip processing dead domains through alive check) + # Cache dead domains (done last to skip alive domains check) cat dead_in_raw.tmp >> "$DEAD_DOMAINS" format_file "$DEAD_DOMAINS" } @@ -53,8 +53,8 @@ check_subdomains() { # Remove dead root domains from raw file and root domains file comm -23 "$RAW" <(printf "%s" "$dead_root_domains") > raw.tmp - mv raw.tmp "$RAW" comm -23 "$ROOT_DOMAINS" <(printf "%s" "$dead_root_domains") > root.tmp + mv raw.tmp "$RAW" mv root.tmp "$ROOT_DOMAINS" log_event "$dead_root_domains" dead raw @@ -86,8 +86,8 @@ check_redundant() { # Remove unused wildcards from raw file and wildcards file comm -23 "$RAW" collated_dead_wildcards.tmp > raw.tmp - mv raw.tmp "$RAW" comm -23 "$WILDCARDS" collated_dead_wildcards.tmp > wildcards.tmp + mv raw.tmp "$RAW" mv wildcards.tmp "$WILDCARDS" log_event "$(> "$RAW" # Add resurrected domains to raw file + # Add resurrected domains to raw file + printf "%s\n" "$alive_domains" >> "$RAW" format_file "$RAW" log_event "$alive_domains" resurrected dead_domains_file diff --git a/functions/check_parked.sh b/functions/check_parked.sh index 320e694c7..02b3b3a60 100644 --- a/functions/check_parked.sh +++ b/functions/check_parked.sh @@ -23,11 +23,6 @@ main() { } remove_parked_domains() { - # Reset split files before run - find . -maxdepth 1 -type f -name "x??" -delete - - printf "\n[start] Analyzing %s entries for parked domains\n" "$(wc -l < "$RAW")" - # Retrieve parked domains and return if none found retrieve_parked "$RAW" || return @@ -38,11 +33,6 @@ remove_parked_domains() { } add_unparked_domains() { - # Reset split files before run - find . -maxdepth 1 -type f -name "x??" -delete - - printf "\n[start] Analyzing %s entries for unparked domains\n" "$(wc -l < "$PARKED_DOMAINS")" - # Retrieve parked domains and return if none found retrieve_parked "$PARKED_DOMAINS" || return @@ -66,7 +56,13 @@ add_unparked_domains() { # Output: # exit status 1 if no parked domains were found retrieve_parked() { - : > parked_domains.tmp # Truncate parked domains (prevents missing file error) + # Truncate parked domains (prevents missing file error) + : > parked_domains.tmp + # Truncate split files before run + find . -maxdepth 1 -type f -name "x??" -delete + + printf "\n[info] Processing file %s\n + [start] Analyzing %s entries for parked domains\n" "$1" "$(wc -l < "$1")" # Split file into 12 equal files split -d -l $(($(wc -l < "$1")/12)) "$1" @@ -160,4 +156,5 @@ cleanup() { } trap cleanup EXIT + main diff --git a/functions/tools.sh b/functions/tools.sh index 3c55c04e1..28c19b98e 100644 --- a/functions/tools.sh +++ b/functions/tools.sh @@ -2,6 +2,7 @@ # Tools.sh is a shell wrapper intended to store commonly used functions. # Function 'format' is called to standardize the format of a file. +# $1: file to be formatted format() { local -r file="$1" [[ ! -f "$file" ]] && return diff --git a/functions/update_readme.sh b/functions/update_readme.sh index 0e137f428..41dc55e46 100644 --- a/functions/update_readme.sh +++ b/functions/update_readme.sh @@ -134,18 +134,18 @@ Thanks to the following people for the help, inspiration, and support! EOF } -# Function 'print_stats' prints the various statistics for each source -# $1: source to process (leave blank to process all sources). +# Function 'print_stats' is an echo wrapper that returns the statistics for each source. +# $1: source to process (default is all sources) print_stats() { [[ -n "$1" ]] && source="$1" || source='All sources' printf "%5s |%10s |%8s%% | %s\n" "$(sum "$TODAY" "$1")" \ "$(sum "$YESTERDAY" "$1")" "$(sum_excluded "$1" )" "$source" } -# Function 'sum' is an echo wrapper that sums up the domains retrieved by -# that source for that particular day. +# Function 'sum' is an echo wrapper that returns the total sum of domains retrieved +# by that source for that particular day. # $1: day to process -# $2: source to process +# $2: source to process (default is all sources) sum() { # Print dash if no runs for that day found ! grep -qF "$1" "$SOURCE_LOG" && { printf "-"; return; } @@ -153,9 +153,9 @@ sum() { csvcut -c 5 | awk '{total += $1} END {print total}' } -# Function 'count_excluded' is an echo wrapper that counts the % of excluded domains -# of raw count retrieved from each source. -# $1: source to process +# Function 'count_excluded' is an echo wrapper that returns the % of excluded domains +# out of the raw count retrieved from each source. +# $1: source to process (default is all sources) count_excluded() { csvgrep -c 2 -m "$1" "$SOURCE_LOG" | csvgrep -c 14 -m yes > source_rows.tmp @@ -172,8 +172,8 @@ count_excluded() { printf "%s" "$((excluded_count*100/raw_count))" } -# Function 'format_file' is a shell wrapper to standardize the format of a file -# $1: file to format. +# Function 'format_file' is a shell wrapper to standardize the format of a file. +# $1: file to format format_file() { bash functions/tools.sh format "$1" } diff --git a/functions/validate_raw.sh b/functions/validate_raw.sh index 2d00afd09..a78f51531 100644 --- a/functions/validate_raw.sh +++ b/functions/validate_raw.sh @@ -114,7 +114,7 @@ validate_raw() { # Collate filtered wildcards to exclude from dead check printf "%s\n" "$wildcards" >> "$WILDCARDS" - # Collate filtered redundant domains for dead check + # Collate filtered redundant domains for dead check grep -Ff <(printf "%s" "$wildcards") redundant_domains.tmp >> "$REDUNDANT_DOMAINS" format_file "$WILDCARDS" @@ -138,10 +138,9 @@ validate_raw() { printf "\n\e[1mProblematic domains (%s):\e[0m\n" "$(wc -l < filter_log.tmp)" sort -u filter_log.tmp + # Save changes to raw file and raw light file printf "%s\n" "$domains" > "$RAW" format_file "$RAW" - - # Remove filtered domains from light file comm -12 "$RAW" "$RAW_LIGHT" > light.tmp && mv light.tmp "$RAW_LIGHT" total_whitelisted_count="$((whitelisted_count + whitelisted_tld_count))"