diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 3a0942cfc..1cadd8023 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -21,14 +21,17 @@ jobs: dead-check: needs: retrieve + #if: ${{ success() || failure() }} uses: ./.github/workflows/dead.yml build: needs: dead-check + if: ${{ success() || failure() }} uses: ./.github/workflows/build.yml tidy_log: needs: build + if: ${{ success() || failure() }} runs-on: ubuntu-latest steps: - name: Checkout diff --git a/config/blacklist.txt b/config/blacklist.txt index e825bcd65..dc7a9819e 100644 --- a/config/blacklist.txt +++ b/config/blacklist.txt @@ -88,6 +88,10 @@ quintaldog.cz racecars24.fr rateglo.com raybansales.us +local-meets.com +neighborhoodsluts.com +oneclick2her.com +tapnfuck.com reifenversand-arndt.com relayreporty.com reloadsworld.com diff --git a/config/search_terms.csv b/config/search_terms.csv index ba5ef4f01..832aa65bd 100644 --- a/config/search_terms.csv +++ b/config/search_terms.csv @@ -38,16 +38,16 @@ We source designer style products. And have been able to do this by building gre "is the global leading online retailer for tops, bottoms and dresses. Our dedicated fashion buyers, who have more than 10 years' experience in this industry, use their skill and knowledge to spot the trend of the curve and bring the most stunning and fetching stuffs for our customers.",, "The pharmacy you are currently dealing with comes first in the market of delivering and distribution of high-quality medications throughout the world. The primary target of our company is to supply professionally manufactured brand and generic medications at the lowest prices you would never have come across in your local medical stores. We ship all over the world and warrant top-quality service. You save your money and time with us.",y,Low count (5) "mainly focuses on wear. Our goal is always to provide our customers with high quality fashion products at down to earth prices. We are dedicated to making you look good for less. We do our best to find and design the styles that makes you look fashion and feel good.",y,Low count (2) -"In addition to offering a wide variety of handgun rounds, rifle ammo, and shotgun shells for sale, we also frequently stock great deals on ammo cans and other accessories. So if you’re looking for a great ammunition selection, you’ve found it!",,Low count (8) +"In addition to offering a wide variety of handgun rounds, rifle ammo, and shotgun shells for sale, we also frequently stock great deals on ammo cans and other accessories. So if you’re looking for a great ammunition selection, you’ve found it!",y,Low count (8) "The shipment of your product is our highest priority and everyday we dispatch hundreds of products to satisfied customers across the world. However, because the majority of our warehouses are internationally-based, our shipping and handling times can vary and we are therefore unable to offer next day delivery.",, "Immediately, I realized there was something outstanding about Robert and his family, who’ve been breeding Goldens for over 15 years. They were sincerely passionate about the breed, and about their dogs in particular.",y,Low count (4) "We are an online retailer company. Our company sells thousands of products with guaranteed quality.",y,Low count (9) "corteiz official store",y,Low count (3) -"To provide the highest quality craftsmanship and best customer service possible every day, with every customer, and with every pair of shoes.",,Low count (10) +"To provide the highest quality craftsmanship and best customer service possible every day, with every customer, and with every pair of shoes.",,Low count (?) "We love cannabis for its wonderful properties which is exactly why we offer the services that we do. As a mail-order cannabis service, we strive to provide high-quality cannabis for customers when they need it. As the years pass on, we can appreciate the miracle effects of cannabis which is great for people who suffer from a variety of different diseases, disorders, and conditions, and we strive to provide accessibility for those in need.",, -"We are one of the world’s leading Asset Management firms with approximately $500 billion in Assets under management that creates lasting impact for our investors, teams, businesses and the communities in which we live.",, +"We are one of the world’s leading Asset Management firms with approximately $500 billion in Assets under management that creates lasting impact for our investors, teams, businesses and the communities in which we live.",,Low count (?) "is the cryptocurrency trading platform equipped with the high-tech blockchain technology. We believe this technology will prosper our lives and increase the value of assets. Our aim is to provide more customers with a better online cryptocurrency trading environment, and to create the wise investment environment.",, -"you join a community of millions of people who choose to share their opinions and complete offers in exchange for rewards.",, -"we are passionate about providing high-quality, stylish T-shirts that allow our customers to express themselves creatively. Whether you're looking for bold designs, personalized prints, or trendy graphics, we have something for everyone.",, -"Before we can show you nude pics of horny women in your area that want to fuck right now, we need to ask a few quick questions.",, -"You will see hot nudes! Please be discreet.",, +"you join a community of millions of people who choose to share their opinions and complete offers in exchange for rewards.",,Low count (7) +"we are passionate about providing high-quality, stylish T-shirts that allow our customers to express themselves creatively. Whether you're looking for bold designs, personalized prints, or trendy graphics, we have something for everyone.",,Low count (7) +"Before we can show you nude pics of horny women in your area that want to fuck right now, we need to ask a few quick questions.",,Low count (13) +"You will see hot nudes! Please be discreet.",y,Low count (2) diff --git a/config/whitelist.txt b/config/whitelist.txt index 966726903..43cb8cbe7 100644 --- a/config/whitelist.txt +++ b/config/whitelist.txt @@ -134,7 +134,6 @@ vikalpa.org.in virtualgraffiti.com virtuemap.com walkhighlands.co.uk -web.archive.org webparanoid.com westerndigital.com whitecoatinvestor.com @@ -148,3 +147,4 @@ zdnet.com zendesk.com zoomex.com zoominfo.com +archive. \ No newline at end of file diff --git a/retrieve.sh b/retrieve.sh index d01d05a98..c9c1caa19 100644 --- a/retrieve.sh +++ b/retrieve.sh @@ -153,7 +153,7 @@ function source_google_search { # Retrieve new domains while read -r search_term; do # Loop through search terms # Return if rate limited - [[ "$rate_limited" == true ]] && { printf "! Both API keys are rate limited.\n"; return; } + [[ "$rate_limited" == true ]] && { printf "! Both Google Search API keys are rate limited.\n"; return; } search_google "$search_term" done < <(csvgrep -c 2 -m 'y' -i "$search_terms_file" | csvcut -c 1 | csvformat -U 1 | tail -n +2) } @@ -174,7 +174,7 @@ function search_google { if grep -qF 'rateLimitExceeded' <<< "$page_results"; then # Break loop if second key is also rate limited [[ "$google_search_id" == "$google_search_id_2" ]] && { rate_limited=true; break; } - printf "! Rate limited. Switching API keys.\n" + printf "! Google Search rate limited. Switching API keys.\n" google_search_api_key="$google_search_api_key_2" && google_search_id="$google_search_id_2" continue # Continue to next page (current rate limited page is not repeated) fi