diff --git a/.github/workflows/opensquat.yml b/.github/workflows/opensquat.yml new file mode 100644 index 000000000..7a30ba691 --- /dev/null +++ b/.github/workflows/opensquat.yml @@ -0,0 +1,29 @@ +name: Run openSquat +run-name: Run openSquat +on: + workflow_dispatch: + workflow_call: + #schedule: + # - cron: '0 1 * * *' +permissions: + contents: write + +jobs: + run-opensquat: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: main + + - name: opensquat.sh + run: bash functions/opensquat.sh + + - name: Push + run: | + git config user.email ${{ vars.GIT_EMAIL }} + git config user.name ${{ vars.GIT_USERNAME }} + git add . + git diff-index --quiet HEAD || git commit -m "Retrieve domains via openSquat" + git push -q diff --git a/config/opensquat_keywords.txt b/config/opensquat_keywords.txt new file mode 100644 index 000000000..15de49604 --- /dev/null +++ b/config/opensquat_keywords.txt @@ -0,0 +1,229 @@ +google +facebook +paypal +amazon- +microsoft +whatsapp +adidas +vogue +yeezy +nike +abercrombie +armani +billabong +vuitton +hermes +gucci +cartier +chanel +prada +swarovski +burberry +levis +skechers +puma +foradoption +greyparrot +shippingagency +alfresco- +fnamerica +bulkammunition +beararchery +rockislandarmory +springfieldarmory +bularmory +australianshepherd +barrettfirearm +berettafirearm +bassethound +beaglepup +bengalkit +berettagun +berettausa +bernesemountain +bordercollie +bostonterrier +breedershome +britishshorthair +brooks- +browningfirearm +browningarm +browninggun +browningusa +browningweapon +frenchbulldog +bulldogpup +englishbulldog +bulldogbreed +bulldoggen +bulldoghome +bulldogshome +bullterrierpup +shirebullterrier +storecanada +illuminatiworld +worldwide +winchesteramm +winchesterarm +winchesterfirearm +winchestergun +winchesterprimer +winchesterusa +euronics +gunstore +universal- +unitedship +unitedparcel +unitedlogistic +unitedexpress +capitaltrust +globaltrust +translogistic +cointrade +bostonterrier +bullterrier +terrierhome +teacuppomeranian +teacupchihuahua +teacuppup +teacupyorki +teacuppoodle +teacupmalt +teacuphome +taurusfirearm +taurusarm +taurusgun +taurusjudge +taurususa +tacticalgun +tacticalshop +tacticalknife +tacticalfirearm +swiftcourier +swiftcargo +swiftlogistics +swiftsend +swiftship +swiftdel +supreme- +supply- +usstylish +style- +staffordshire +springfield- +springfieldarm +springfieldfirearm +springfieldgun +springfieldusa +ssdsolution +chemicalsolution +siberiankitt +siberianhusky +shippingcontainer +uspship +expressship +discreetship +unitedship +shippingagency +shippingexpress +shippingservice +shippingdeliver +shippinglogistic +logisticsship +shippingcompany +cargoshipp +priorityship +sheltiepup +sheltiehome +deliveryservice +courierservice +cargoservice +logisticservice +logisticsservice +shippingservice +expressservice +scottishfold +expresslogistic +onschuhe +supraschuhe +schnauzerpup +minischnauzer +kittenscattery +catscattery +samoyedpup +salomon- +salomonoutlet +russianblue +jackrussell +onrunning +rottweilerhom +germanrottweiler +rottweilerpups +rottweilerbreed +henryrifle +marlinrifle +seekinsprecisionrifle +tikkarifle +marlinrifle +goldenretriever +retrieverpup +labradorretriever +magnumresearch +researchchemical +exoticreptile +firearmsreload +handraised +reloadingbrass +familyraised +pugpup +puppieshome +puppyhome +puppiesfor +prettylitter +buypsychedelic +prioritymail +priorityship +federalprimers +blackhorn209powder +imrpowder +smokelesspowder +winchesterpowder +ammopowder +poultryfarm +poodlemalt +poodlepup +poodlehome +forsale +poodleshome +pomeranianpup +pocketbully +corteiz +pinballmachine + +expresscargo +expresslogistic +expressdelivery +lululemon +autoreifen- +expressshipping +deliverymails +discreetdelivery +discretemails +usps- +packagingexpress +trackingservices +shippingdeliveries +mailshipping +shippingexpress +petshome +puppyspot +puppyhome +deliveryservice +goldenretriever +goldendoodle +winchestergun +winchesteramm +worldwide +discount +deutsch +forsale \ No newline at end of file diff --git a/config/search_terms.csv b/config/search_terms.csv index f92ddd78d..fc91bc4d0 100644 --- a/config/search_terms.csv +++ b/config/search_terms.csv @@ -51,3 +51,4 @@ We source designer style products. And have been able to do this by building gre "we are passionate about providing high-quality, stylish T-shirts that allow our customers to express themselves creatively. Whether you're looking for bold designs, personalized prints, or trendy graphics, we have something for everyone.",,Low count (7) "Before we can show you nude pics of horny women in your area that want to fuck right now, we need to ask a few quick questions.",y,Low count (13) "You will see hot nudes! Please be discreet.",y,Low count (2) +"There is no shortage of outstanding casinos in Australia. But for travelers and those who want to get away for a few days and enjoy a loaded gambling spree, an in-and-out casino may not always be enough. Thankfully, casino hotels offer the opportunity to hop out of a comfortable bed, have a delicious breakfast on-site, and take but a few steps to slot machines and table games.",, diff --git a/functions/opensqat.sh b/functions/opensqat.sh new file mode 100644 index 000000000..a37c8e6c4 --- /dev/null +++ b/functions/opensqat.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Uses openSquat to find phishing domains from a list of newly +# registered domains. + +readonly DEAD_DOMAINS='data/dead_domains.txt' +readonly PARKED_DOMAINS='data/parked_domains.txt' +readonly KEYWORDS='config/opensquat_keywords.txt' +readonly NRD='list/wildcard_domains/nrd.txt' + +opensquat() { + # Install openSquat + git clone https://github.com/atenreiro/opensquat + pip install -r opensquat/requirements.txt + + # Save previous NRD list for comparison + mv "$NRD" old_nrd.tmp + + # Collate fresh NRD list and exit with status 1 if any link is broken + { + wget -qO - 'https://raw.githubusercontent.com/shreshta-labs/newly-registered-domains/main/nrd-1m.csv' \ + || exit 1 + wget -qO - 'https://cdn.jsdelivr.net/gh/hagezi/dns-blocklists@latest/wildcard/nrds.10-onlydomains.txt' \ + | grep -vF '#' || exit 1 + curl -sH 'User-Agent: openSquat-2.1.0' 'https://feeds.opensquat.com/domain-names.txt' \ + || exit 1 + } >> "$NRD" + + sort -u "$NRD" -o "$NRD" + + # Filter out previously processed domains and known dead or parked domains + comm -23 "$NRD" <(sort old_nrd.tmp "$DEAD_DOMAINS" "$PARKED_DOMAINS") > new_nrd.tmp + + mkdir -p data/pending + + # Run openSquat and collect results + python3 opensquat/opensquat.py -k "$KEYWORDS" \ + -o data/pending/domains_opensquat.tmp -d new_nrd.tmp +} + +# Function 'format_file' calls a shell wrapper to standardize the format +# of a file. +# $1: file to format +format_file() { + bash functions/tools.sh format "$1" +} + +cleanup() { + # Delete openSquat + rm -r opensquat + + find . -maxdepth 1 -type f -name "*.tmp" -delete +} + +# Entry point + +trap cleanup EXIT + +for file in config/* data/*; do + format_file "$file" +done + +opensquat \ No newline at end of file diff --git a/functions/retrieve_domains.sh b/functions/retrieve_domains.sh index affe1ec43..d37c2933d 100644 --- a/functions/retrieve_domains.sh +++ b/functions/retrieve_domains.sh @@ -48,6 +48,7 @@ source() { source_scamadviser source_stopgunscams source_google_search + source_opensquat } # Function 'process_source' filters results retrieved from a source. @@ -397,6 +398,17 @@ search_google() { process_source } +source_opensquat() { + local source='openSquat' + local ignore_from_light=true + local results_file='data/pending/domains_opensquat.tmp' + + # Return if results file not found (source is the file itself) + [[ ! -f "$results_file" ]] && return + + process_source +} + source_manual() { local source='Manual' local results_file='data/pending/domains_manual.tmp'