diff --git a/build.sh b/build.sh index 7dacc96b6..8c170976d 100644 --- a/build.sh +++ b/build.sh @@ -41,6 +41,7 @@ Statistics for each source*: Today | Yesterday | Dead | Source $(print_stats "Google Search") $(print_stats "aa419.org") +$(print_stats "dfpi.ca.gov") $(print_stats "guntab.com") $(print_stats "petscams.com") $(print_stats "scam.delivery") @@ -93,6 +94,7 @@ Additionally, wildcard domains are periodically added manually to the blocklist ## Sources - [Google's Custom Search JSON API](https://developers.google.com/custom-search/v1/introduction): Google Search API - [Artists Against 419](https://db.aa419.org/fakebankslist.php): fake sites database +- [DFPI's Crypto Scam Tracker](https://dfpi.ca.gov/crypto-scams/): crypto scams database - [GunTab](https://www.guntab.com/scam-websites): firearm scam sites database - [PetScams.com](https://petscams.com/): pet scam sites database - [Scam.Delivery](https://scam.delivery/): delivery scam sites database diff --git a/config/source_log.csv b/config/source_log.csv index dc4d0f766..fe02f9012 100644 --- a/config/source_log.csv +++ b/config/source_log.csv @@ -198,4 +198,3 @@ Time,Source,Item,Raw Count,Final Count,Whitelisted,Dead,Redundant,Toplist Count, 16:33:19 22-03-24,Google Search,"We are an online retailer company. Our company sells thousands of products with guaranteed quality....",0,0,0,0,0,0,,1,yes 17:47:44 22-03-24,dfpi.ca.gov,dfpi.ca.gov,135,133,2,0,0,1,antrush.com,0,no 18:12:04 22-03-24,dfpi.ca.gov,dfpi.ca.gov,135,133,2,0,0,0,,0,yes -18:22:23 22-03-24,chainabuse.com,chainabuse.com,10,9,1,0,0,6,binance.com bitpay.com cex.io invity.io nexo.com paybis.com,0,no diff --git a/retrieve.sh b/retrieve.sh index 0840c4d86..8be6ce7dc 100644 --- a/retrieve.sh +++ b/retrieve.sh @@ -37,16 +37,15 @@ function main { function retrieve_new { mkdir data/pending # Intialize pending directory - #source_aa419 - source_chainabuse - #source_dfpi - #source_guntab - #source_petscams - #source_scamdelivery # Has captchas - #source_scamdirectory - #source_scamadviser - #source_stopgunscams - #source_google_search + source_aa419 + source_dfpi + source_guntab + source_petscams + source_scamdelivery # Has captchas + source_scamdirectory + source_scamadviser + source_stopgunscams + source_google_search } function retrieve_existing { @@ -185,18 +184,6 @@ function source_dfpi { process_source "$source" "$source" "$domains_file" } -function source_chainabuse { - source='chainabuse.com' - domains_file="data/pending/domains_${source}.tmp" - url='https://www.chainabuse.com' - printf "\nSource: %s\n\n" "$source" - for page in {0..9}; do # Loop through pages - curl -s "${url}/reports?page=${page}sort=newest/" | grep -oE '"domain":"(https?://)?[[:alnum:].-]+\.[[:alnum:]-]{2,}' | - sed 's/"domain":"//' >> "$domains_file" - done - process_source "$source" "$source" "$domains_file" -} - function source_google_search { source='Google Search' printf "\nSource: %s\n\n" "$source"