From 0a6188620962c3d6f14cda3de7dae3832cf1a5a8 Mon Sep 17 00:00:00 2001 From: J <91372088+jarelllama@users.noreply.github.com> Date: Thu, 26 Dec 2024 13:29:10 +0800 Subject: [PATCH] Make minimum character cound for valid domains be 2 --- data/raw.txt | 101 ++++++++++++++++++++++++++++++++++++ data/raw_light.txt | 97 +++++++++++++++++++++++++++++++++- scripts/retrieve_domains.sh | 2 +- scripts/test_functions.sh | 10 +++- scripts/tools.sh | 2 +- scripts/validate_domains.sh | 2 +- 6 files changed, 208 insertions(+), 6 deletions(-) diff --git a/data/raw.txt b/data/raw.txt index 95ff57cc2..f3cd729e8 100644 --- a/data/raw.txt +++ b/data/raw.txt @@ -681,6 +681,7 @@ 0daa70aafd.com 0daz9m70.duckdns.org 0dz9gnj.duckdns.org +0e.si 0e038c81b034f7c5.com 0e24f9cfc5f15c23.com 0e32a04yah09oo4ei03irs0249secure83i.netlify.app @@ -741,12 +742,16 @@ 0hk9dtm.duckdns.org 0hl2rjykjp.shop 0htmbsxrm.duckdns.org +0i.pm +0i.wf 0i39n57z.duckdns.org 0i7.cc 0i84o595.duckdns.org 0i9.cc 0idmnz70x.top 0iuye6wj2ks02q8.freewebhostmost.com +0j.re +0j.wf 0j126w59q3ahz0h5.top 0j1bd4z9w3vd.yoga 0j4st4myh.duckdns.org @@ -805,6 +810,7 @@ 0osjwree57.freewebhostmost.com 0p-segdd-bhddmx-le.weebly.com 0p.mywebsites360.com +0p.rs 0p3ifm.duckdns.org 0p5o3slzj.duckdns.org 0p74ene2.duckdns.org @@ -830,6 +836,7 @@ 0siyapg.duckdns.org 0sqfjq8.duckdns.org 0storageatools0.xyz +0t.yt 0t6j9239lv.com 0t7uihb.duckdns.org 0tcv48q7u.duckdns.org @@ -846,6 +853,7 @@ 0utlook-microsoft.com 0utlookonline.webflow.io 0uw8bh48my.com +0v.wf 0v10nc.duckdns.org 0v2dl58ic.duckdns.org 0v6qej7v.skin @@ -854,6 +862,7 @@ 0vijv8w.com 0vxnmfnyy.top 0vz.cc +0w.pm 0w0b87f4dxc.buzz 0w0wucbxy3.top 0w2jx77sjbp3zjnnh467vu7p0.cyou @@ -2607,6 +2616,7 @@ 1hr34s.duckdns.org 1hvg1ad3.square.site 1hzq.jacoboon.shop +1i.pm 1i6jiu.duckdns.org 1inch-defi.vercel.app 1inch-defiwallet.webflow.io @@ -2624,6 +2634,7 @@ 1insurance.lat 1isd8w1sl.duckdns.org 1iywxln2uz382tvm.top +1j.pm 1j0.cc 1j117.net 1j48g5.duckdns.org @@ -2756,6 +2767,8 @@ 1tr7dus.duckdns.org 1ts8lhexi.duckdns.org 1tyhmbhv.duckdns.org +1u.pm +1u.wf 1u28sr1w1.duckdns.org 1uc58t8.com 1uccbal.weebly.com @@ -3794,6 +3807,9 @@ 2hnp93vqk.duckdns.org 2hx99sgp45.icu 2hzmwflbxojp.cominbaxz.xyz +2i.nu +2i.pm +2i.wf 2i1b60.duckdns.org 2i8s8p.duckdns.org 2ibc908vd66.top @@ -3897,6 +3913,8 @@ 2smx202417.cat 2smx202418.cat 2smx202419.cat +2t.pm +2t.wf 2t487.hp.peraichi.com 2t6pmeilf.top 2t93pnohq.duckdns.org @@ -4781,6 +4799,7 @@ 3dsecure-buy.com 3dwall.art 3e.alspoint.top +3e.pm 3e.simth.me 3e05d6c0-b6c2-49d1-b8f6-d9985f5bb553.com 3e4rter4.shopifay.in @@ -4815,6 +4834,7 @@ 3ghvvjke4.duckdns.org 3gjg2f84.skin 3gjjjcrhpb4h.wiki +3h.wf 3h1.xyz 3h3yyrla.duckdns.org 3h4yd01pw1g17jh.xyz @@ -4990,6 +5010,7 @@ 3ygd.pages.dev 3yh2025pd.com 3ysuzjrxr.duckdns.org +3z.nu 3z1wew.duckdns.org 3z2cfq.duckdns.org 3z4mhwsb.duckdns.org @@ -5419,6 +5440,7 @@ 4bvbkc-87g.com 4bvm62zymd7.top 4bxbhjbv.duckdns.org +4c.pm 4c29n5td6t.top 4c7z2ia.duckdns.org 4c8557717148853f.com @@ -5499,6 +5521,7 @@ 4ip-secure-bofa.serveusers.com 4it2bpz.duckdns.org 4iv1wv92.skin +4j.pm 4j1fa47.duckdns.org 4j2tqvq1q.com 4jco9.doctortrf.com @@ -5513,12 +5536,14 @@ 4lga5mmbs.duckdns.org 4lm9ry.duckdns.org 4lv76hqq.duckdns.org +4m.wf 4mae8.typee.top 4mfo2a.duckdns.org 4more.llmfun.top 4mukla.com 4mv0dy01bhwy.shop 4my1jb858pvc5.yoga +4n.wf 4nfpva8v.duckdns.org 4nh3rej.duckdns.org 4njtfgc.duckdns.org @@ -5539,6 +5564,7 @@ 4pt2yy.eth.limo 4pwymbjah.duckdns.org 4py3i0.duckdns.org +4q.pm 4q1dzv.duckdns.org 4q4x99p.duckdns.org 4qfqzc2c.duckdns.org @@ -5559,6 +5585,7 @@ 4rtzys.shop 4rxofficial.com 4ryta9i39.com +4s.pm 4s7fhmxyvnvfg.skin 4s9twx.duckdns.org 4sa98d798.blogspot.tw @@ -5601,6 +5628,8 @@ 4vwmd-g68.com 4vxdgxrx.skin 4vxx2kx4.skin +4w.pm +4w.wf 4w089m03.skin 4w3g6s9p.duckdns.org 4w9g6fl6.duckdns.org @@ -6847,6 +6876,7 @@ 5ri22tr.duckdns.org 5rligz.duckdns.org 5rpzbrn-j6.trust11.cc +5s.pm 5s3zb4hw.site 5sed1.eieaapdnox.top 5souxue.com @@ -6910,6 +6940,8 @@ 5ytjhup.duckdns.org 5yxt26it.duckdns.org 5yz1tlv.duckdns.org +5z.pm +5z.wf 5z16vp1qo5z.wiki 5z211jw.duckdns.org 5zgq2.hp.peraichi.com @@ -7721,6 +7753,9 @@ 6sao9cvr.duckdns.org 6sjm7gnr.skin 6sx-9y-5d-6g-8f-4c.cyou +6t.nz +6t.pm +6t.re 6t2pat9.duckdns.org 6t4.nl 6tdu14s.duckdns.org @@ -7742,6 +7777,7 @@ 6vcm30xm.store 6vf95df6bv4d9bv.webhop.net 6vnb86f.duckdns.org +6w.re 6w295387k.skin 6w3vqx.duckdns.org 6w6thw6u6u6w46yw46.pages.dev @@ -7759,6 +7795,7 @@ 6xf2025pd.com 6xhysr.duckdns.org 6xy619ey.duckdns.org +6y.re 6y87wqfwf.cyou 6yfvgvq46.top 6yiyh5rx.skin @@ -8758,6 +8795,7 @@ 7cric-apk.pro 7cvl071a.duckdns.org 7cvwln31.duckdns.org +7d.wf 7d0w61.duckdns.org 7d1kvsewt.duckdns.org 7d2vvzox2.duckdns.org @@ -9955,6 +9993,9 @@ 8sr77ydl.duckdns.org 8ss1u7kgo.kkyijv10.top 8starbank.com +8t.ae +8t.pm +8t.wf 8t2wsbc.duckdns.org 8tenpbdfarts.shop 8tiygjh.store @@ -10721,6 +10762,8 @@ 9q52sfs3y.duckdns.org 9q9rjhs6rqk6f.yoga 9qmj5f7bqg.sbs +9r.re +9r.sk 9r0bc5okc.top 9rfhbf65b1f5gbni.shop 9rwoiy6g.duckdns.org @@ -11029,6 +11072,7 @@ a.vip3656jun27.cc a.xxdd98.dns-dynamic.net a.xxmm98.dns-dynamic.net a.yyyy.cf +a0.pm a0012ae78.com a011f905053b0f9c.com a0156356.xsph.ru @@ -26838,6 +26882,7 @@ b8r.cc b8ruh2.duckdns.org b8x.org b8yxmetpg0uns.pages.dev +b9.pm b9254db7269d0f7d.com b92678.com b92755.com @@ -38023,6 +38068,7 @@ c.sucora.workers.dev c.vip3656240126.cc c.vip3656jun27.cc c0-0pw34r.com +c0.wf c0089be7.341756953.xyz c0093088014545.lhr.life c00lkiddrobloxid.blogspot.bg @@ -38248,6 +38294,7 @@ c6jz863yx.skin c6mj6-saaaa-aaaag-abeea-cai.raw.ic0.app c6o.cc c6vj816d.shop +c7.lc c70gg438jd.skin c7118bbaa21d429462a378145a66fb9c-dot-goff039302032323.rj.r.appspot.com c729ts.duckdns.org @@ -40773,6 +40820,7 @@ cdxbxxgcxx.com cdylxx.net cdypto.com ce-logistic.com +ce.tv ce02ee6e3641f05aed8560661b06b63cd91afa7b3a8540092cbb1cf5f9.pages.dev ce11910.tw1.ru ce17011.tw1.ru @@ -52392,6 +52440,7 @@ d.shope-my.cc d.shope-my.top d.xiaomi321.eu.org d.xyzz.cc +d0.wf d029bc69065f7c3d.com d071f87h.skin d08264-ref.webflow.io @@ -60049,6 +60098,7 @@ e.shope-my.top e.sinetw.shop e.tinebit.com e.turspost.vip +e0.wf e097.okdfhb.art e0gjh9hr.duckdns.org e0kp1z1172fc.live @@ -60206,6 +60256,7 @@ e8mw0.shop e8tp352gz.skin e8v80hqpx.top e8yr5fe.ctk9akio.eu.org +e9.wf e90qg604qj.skin e91hn419z.shop e93bx199z.shop @@ -65453,6 +65504,7 @@ f.bbuaa.top f.gtsepress.vip f.jtexpressr.top f.vpncake.workers.dev +f0.tel f0315086.xsph.ru f0359971.xsph.ru f0372926.xsph.ru @@ -72379,6 +72431,7 @@ fyylso.duckdns.org fyynhnbgqb.icu fz-crypto.com fz-telegram.org +fz.ms fz1yeisln.duckdns.org fz5wqdj3db.ru fz848f0.duckdns.org @@ -72462,6 +72515,7 @@ g.t9live3.vip g.t9live4.vip g.thepaper.cc g.wappinfo.top +g0.pm g00dm0v135.top g00dv1b35.top g00gle.info @@ -72550,6 +72604,9 @@ g3ddpj.duckdns.org g3qwgfpcdgkt.com g3slkze.duckdns.org g3zpbtfp26.sbs +g4.nu +g4.tel +g4.wf g40cd270z.shop g40cm146r.shop g40fd203d.shop @@ -77313,6 +77370,7 @@ googlc.vip google--play.com google--services.com google--sevices.com +google-.online google-123.com google-22r.com google-33r.com @@ -84719,6 +84777,8 @@ h.birds.com h.euroasin.com h.m.u.k.speho.faradid.zlc.ir h.mtradi.top +h0.pm +h0.wf h00u8w0y.asia h015hudfq7pz0.yoga h023slch.duckdns.org @@ -84966,6 +85026,7 @@ h5qz147r.shop h5uox.shop h5vr8.shop h5wmqi.duckdns.org +h6.re h60gv738d.shop h60zy862d.shop h62bx345zc.skin @@ -93033,6 +93094,7 @@ i.gtexpsers.cc i.it-admincenter.com i.jtexpressr.cc i.procedura-ln.185-42-14-109.cprapid.com +i0.wf i06c6co3.duckdns.org i0dkycw1z.duckdns.org i0dlivv.duckdns.org @@ -93041,6 +93103,7 @@ i0ukqkau.doc.checkiteasy.com i0up.com i1-usps.shop i1-usps.top +i1.pm i12-10-58h58188.shop i12b.net i13jk316d.shop @@ -97882,6 +97945,7 @@ iywsqbkjkr.shop iyye9d.duckdns.org iyyrkqzqrjpjh5hq.top iyyujhj1tmbfyitq.cyou +iz.gy iz4s8v.duckdns.org iz9l96jwbklj.top izakupowo.apilo.com @@ -97916,6 +97980,7 @@ j.cardiolis-new.com j.easyloss-new.com j.sandiu5666.co j.waptradinfo.top +j0.wf j04rz8x.duckdns.org j06cc22ju.duckdns.org j07fy4bkr.duckdns.org @@ -97935,6 +98000,7 @@ j1rj741d.shop j1tqhmav.duckdns.org j1uuvr9qev.quiizon.sbs j1x8kqnzpmqszwzf.top +j2.gy j20fj469d.shop j22dk132d.shop j22m363j3p0cdgp.xyz @@ -98071,6 +98137,7 @@ j7mxdjm2.paperform.co j7np571d.shop j7o15rugs.duckdns.org j7vq3.shop +j8.si j807gqswg6.cyou j80pm35k2pg.xyz j81g4fw2.skin @@ -101272,6 +101339,7 @@ k-usps.top k-wayoutletfrance.fr k.hondrodoc-new.com k.hsd.elementfx.com +k0.pm k00fcv0u1.com k01mx.com k0c.cc @@ -105324,6 +105392,7 @@ l-usps.top l.bahernpost.cc l.ead.me l.outlettojp.top +l0.wf l0aqclb3a.duckdns.org l0g-zimbra.webflow.io l0gin-dan4.myz.info @@ -111734,6 +111803,9 @@ m-trzur-startio.gitbook.io m-und-s-marketing.online m-xiaomivip.com m-xvideos.com +m0.nu +m0.wf +m0.yt m041t7480n.cyou m04hpqw63.ukit.me m0civq.duckdns.org @@ -111859,6 +111931,7 @@ m4dk273z.shop m4gk270d.shop m4l6vl55u.top m4z.replit.app +m5.lt m526an.duckdns.org m52kb65zs.skin m52yj907mv.skin @@ -126295,6 +126368,7 @@ n2tflix.netlify.app n2w1ytxhxwc.top n2wt473kw.skin n2wv9.shop +n3.wf n30bk971d.shop n32hp748d.shop n36pq399bt.skin @@ -126328,6 +126402,7 @@ n4h3au7v5.duckdns.org n4kfri9.duckdns.org n4wj62wt.skin n4wyj438.skin +n5.ms n50xs528d.shop n52qv671uu.skin n535bh.duckdns.org @@ -136674,6 +136749,7 @@ p.singposttrackb.top p.t9live2.vip p.t9live3.vip p0.fsoall.ir +p0.wf p00wyuhq6.top p072mx595s.top p0bettery.com @@ -136761,6 +136837,7 @@ p2ptradingspace.com p2yq8g2i.duckdns.org p3.154-216-18-83.cprapid.com p3.elvinlong.cn +p3.ms p309nw.duckdns.org p30jm230d.shop p30jt782d.shop @@ -136902,6 +136979,7 @@ p8k.smart-bnb.org p8u5sge.duckdns.org p8xk4hl2fv9wq7mv.com p9.193-222-96-128.cprapid.com +p9.tel p90gj540d.shop p90jfya.duckdns.org p90mx346jl.skin @@ -146083,6 +146161,8 @@ q.turspost.cc q.turspost.vip q.wbhal.gq q.yydsyytt.com +q0.pm +q0.wf q09wjk4n.duckdns.org q0iokc3t.duckdns.org q0ogle.com @@ -147708,6 +147788,8 @@ r.jtexpxees.cc r.malajtexpess.cc r.perusposts.cc r.postofduomi.cc +r0.pm +r0.wf r032s4d0q.duckdns.org r048dbkc.duckdns.org r08y8zdb.duckdns.org @@ -147813,6 +147895,7 @@ r5qs259r.shop r5t98jwuy.skin r5x8lm2z9.com r5xdwb8h4.top +r6.nz r61fb954qx.skin r62wj803r.shop r637cs2753df533.com @@ -153933,6 +154016,7 @@ s.waptradinfo.top s.webstibons.com s.ymunifo.com s0.fsoall.ir +s0.pm s004u.csb.app s06ww1jg.skin s0l9b8h20v7.top @@ -154190,6 +154274,7 @@ s78bp809d.shop s79bv736z.shop s7d6sjkii.duckdns.org s7r5f8n5.skin +s8.cx s80921.blogspot.com s80rk212pg.skin s83dwf8w.skin @@ -170977,6 +171062,7 @@ t.m-telegram.cc t.me-ton.one t.sads77.top t.soft.sh +t0.wf t08dmf2w.skin t08mg-vmvg.com t0dffwq85yx.top @@ -171167,6 +171253,7 @@ t6n3w8c2.work t6nwfn9j.skin t6pg754wd.skin t6pj561d.shop +t7.nz t71bt851d.shop t71mk185d.shop t72pq639d.shop @@ -187249,6 +187336,8 @@ u.jtexpressr.cc u.jtexpresx.cc u.jtexpresx.vip u.ppx6.com +u0.nz +u0.pm u0130037.cp.regruhosting.ru u0206390-center.isp.regruhosting.ru u06wtvmqgk.shop @@ -193124,6 +193213,7 @@ v.aispoint.vip v.gtsepress.vip v.majesticbank.sc v.waptradinfo.top +v0.cx v0dacom-pts.cam v0dacom-pts.one v0dacom.art @@ -196594,6 +196684,7 @@ vrtistic.shop vrvtn.top vry1fy8q.square.site vryclientamzsecure.myddns.me +vs.gy vs95off.com vsa2fu.duckdns.org vsafbg.blogspot.mk @@ -197098,6 +197189,7 @@ w.turspost.vip w.waptradinfo.top w.winnermy.com w.yydsyyrr.com +w0.pm w06mk1fh.wiki w0b9nuic.duckdns.org w0iik.shop @@ -197221,6 +197313,8 @@ w3q9odxk.duckdns.org w3s7-j0y0us-n0el-miche1in.glitch.me w3x05cgqpv19hu4h.xyz w3xw1qqb0.cyou +w4.nz +w4.wf w40pt766dw.skin w40vj651d.shop w41fk517bd.skin @@ -197273,6 +197367,7 @@ w5qh76745n.top w5xg186d.shop w5yjb2.duckdns.org w5yp6qvmz06.xyz +w6.nz w62dw445pv.skin w62zp397gc.skin w63321.com @@ -203412,6 +203507,7 @@ wk-whatsapp.com wk.448836.xyz wk.bicvip.com wk.nattw.com +wk.pl wk.xzte.eu.org wk1.xlq888.cloudns.org wk7bxl6.duckdns.org @@ -205239,6 +205335,8 @@ www-yg777.com.ph www-yh97828.com www-youtubetomp4.com www-zzcpz.com +www.4.fidelityprimes.com +www.rbs-supportdigital.com www000085.com www000478.com www001177.com @@ -209555,6 +209653,8 @@ y.jtexpresx.cc y.jtexpresx.vip y.jtexpsers.cc y.startlink.vip +y0.pm +y0.wf y02vju9.duckdns.org y04351335.duckdns.org y04uw.safwhatsapp.top @@ -214842,6 +214942,7 @@ zjzyshipping.com zk-nations.vercel.app zk-telegram.org zk-twitter.org +zk.qa zk122.net zk153t.kro.kr zk4.me diff --git a/data/raw_light.txt b/data/raw_light.txt index 1b5e1932e..17a0379bf 100644 --- a/data/raw_light.txt +++ b/data/raw_light.txt @@ -4,18 +4,27 @@ 09uu0u0.com 0a0074066c49886a39b5a3072582f5d6.net 0cf5ff34.ngrok.io +0e.si 0ffice365.nu 0ffice365.org 0ffice395.net +0i.pm +0i.wf +0j.re +0j.wf 0nedr1ve.com 0nlyfams.com 0onlyfans.com 0openai.com +0p.rs 0pdent.com 0pena1.com 0storageatools0.xyz +0t.yt 0utl00k.org +0v.wf 0vijv8w.com +0w.pm 1000mostrecommended.com 10bet938.com 10xtdg.com @@ -42,6 +51,8 @@ 1cbit.org 1clickshops.com 1firstcapitals.com +1i.pm +1j.pm 1k4.xyz 1n4.xyz 1smartdesign.com @@ -49,6 +60,8 @@ 1stpremierplc.com 1sttheworld.com 1tab.co +1u.pm +1u.wf 2022factorysale.ru 2024giftexchage.info 2024giftredem.info @@ -64,7 +77,12 @@ 24hrsroyaltrst.com 27o.nl 2fa3c2fa16c47d9b9bff8986a42b048f.com +2i.nu +2i.pm +2i.wf 2n8rd3zz1.top +2t.pm +2t.wf 2xdepp.com 2yd.eu 322ffbbc7c1b312c2f9d942f20422f8d.com @@ -77,16 +95,19 @@ 3656qq.com 365cn.vip 37513.cn +3e.pm 3ec9b600789b3bacf2c72ebae142a9c3.net 3ed.hu 3ellsfargo.com 3ezzy.finance +3h.wf 3h1.xyz 3hhr8h2hx.top 3iq-digital.com 3kdyn.com 3number.employerdbz.icu 3potify.shop +3z.nu 4-72-oc-w.top 4-72.homes 4-72com.ink @@ -99,12 +120,20 @@ 455686.c1.biz 48hourseasyproact.com 48proactivemarketing.com +4c.pm 4iktok.com +4j.pm 4k1.xyz +4m.wf +4n.wf +4q.pm 4roblox.shop +4s.pm 4uscompanyllc.com 4uvize.com 4vri.website +4w.pm +4w.wf 4wheelsfame.com 4xq.nl 51goout.com @@ -121,9 +150,12 @@ 5qe8.com 5qh.net 5qw.pw +5s.pm 5sed1.eieaapdnox.top 5starjersey.com 5v0.nl +5z.pm +5z.wf 6-5-grendel-ammo.com 60af8bc770ccf54023d620dfd5ceb7b8-customers-mufg.com 60i.nl @@ -186,14 +218,20 @@ 6836501.com 6ax.nl 6google.com +6t.nz +6t.pm +6t.re 6t4.nl +6w.re 6wr9.com +6y.re 70334p7x2zwnx4ninrrbfz02.z13.web.core.windows.net 73780fbd309561e201a4aee9914d882d.org 750review.com 756723.top 77unishop.com 79jewellers.com +7d.wf 7jj34.com 7nq48cg.njwhmg.info 7p8t44.shop @@ -210,6 +248,9 @@ 8836500.vip 8836522.vip 8885716.com +8t.ae +8t.pm +8t.wf 8tradersmarket.com 900cap.blogspot.com 93mobiles.com @@ -224,11 +265,14 @@ 9a89785fcf4ffd75b7363a31b6dddec4.serveo.net 9bet938.com 9gp.cc +9r.re +9r.sk 9xflix.bar a-fin.net a-isth.ink a-libaba.sbs a-spotify.freewebhostmost.com +a0.pm a0728173.xsph.ru a0917004.xsph.ru a1mazon.com @@ -1532,6 +1576,7 @@ b2secops.co.uk b3vv.com b8x.org b8yxmetpg0uns.pages.dev +b9.pm b9inance.com babaroga.lib babybeans.shop @@ -2613,6 +2658,7 @@ bytesbazar.com bytewhales.com bytlo.com bytrejo.com +c0.wf c0inbase.org c0lnbase.com c13a856f4a879a89e9a638207efd6c94.biz @@ -2620,6 +2666,7 @@ c3de05dcd2b74491887f.z13.web.core.windows.net c4ypto.com c4z.pl c5ypto.com +c7.lc caadapsal.top caasdagg.top cabezonshop.me @@ -4094,6 +4141,7 @@ cyrilluseashop.com cysotomotiv.com cywdhlposs.ink czfirearmstore.usa.com +d0.wf d1sneyplus.com d2luxcosmetics.com daatha.com @@ -4809,7 +4857,9 @@ dynamics.ddnsking.com dynastydigitalfx.com dzppyl.love e-cavi.com +e0.wf e3tk4.shop +e9.wf eabilityz.com eachustr.com eaglebossconcrete.com @@ -5457,6 +5507,7 @@ eyzoei.com ezicerinks.shop eznb.net f-istudio.ru +f0.tel f0559838.xsph.ru f0595867.xsph.ru f1acebook.com @@ -6105,14 +6156,19 @@ fyfyvfytvghv.workers.dev fygchcytctyctyctytytyxty.d1f6y3om6phcc6.amplifyapp.com fyingshy.com fynavi.com +fz.ms g-1st.com g-coinbase-ppsecure.com.serveo.net g-oogle.net +g0.pm g00gle.info g00gle.online g00qle.com g0oogle.site g1vm6.shop +g4.nu +g4.tel +g4.wf g4u22g.blogspot.com g5wcesdfjzne7255.onion.to g6nx2.shop @@ -7564,11 +7620,14 @@ gyzu.mozillaupdates.us gz3.nl gzdonno.com gzwx001.com +h0.pm +h0.wf h2gconsultores.cl h378576.atwebpages.com h4wn3.shop h5.ichcoin.top h5.luckycoinn.com +h6.re haaretz-news.com habfan.com habitatfortool.com @@ -8151,7 +8210,9 @@ hzace.art i-consultants.online i-dhl.top i-postacg-me.top +i0.wf i0up.com +i1.pm i6n.xyz i758769.atwebpages.com iaidevrssfeed.centralus.cloudapp.azure.com @@ -8686,9 +8747,13 @@ iwebtech.in iworldfxdigitals.com ixfkrld.heavyteen.shop iybaorcv.black +iz.gy izezggefgegfzto.ws j-ic.co.intneral-document-he-gr-me.run.place +j0.wf +j2.gy j68.info +j8.si j9roofing.com jaabalshop.com jabhp.art @@ -8982,6 +9047,7 @@ jventurey.monster jwilliamsattorneys.com jwskuw.biz jwskuw.love +k0.pm k1gkl25as.top k2spicesprayshop.com k2spicevendors.com @@ -9301,6 +9367,7 @@ kyhgyhf.com kzbxpmtdqvlyo.love kzktio.com l.ead.me +l0.wf l12.shigexc.shop l2nlp.ygrrvlvvgf.top l6.datingeo.com @@ -9882,6 +9949,9 @@ lynxx.click lyrades.com lyycs.store m-grill.com +m0.nu +m0.wf +m0.yt m1crosoft.support m1crosoft365.com m8zk1.shop @@ -10991,8 +11061,10 @@ mziep.pw n27.de3.mytemp.website n2dlk.bfrtbrkefc.top n2nstore.com +n3.wf n3tc4t.hopto.com n3tflix.shop +n5.ms n9fz.com nachary.shop nacki.store @@ -11821,6 +11893,7 @@ ozekfivdg.love oztzshop.com ozzon.co.za ozzshop.myshoplaza.com +p0.wf p0rnexternalswx.z13.web.core.windows.net p0rnhub.click p0rnhub.online @@ -11829,6 +11902,8 @@ p0rnsgetcomics.z13.web.core.windows.net p0rnsverbatim.z13.web.core.windows.net p1nterest.com p2pnations-capital.com +p3.ms +p9.tel pa6ypal.com paaypal.org pacermonitor.com @@ -13023,6 +13098,8 @@ pycchol.shop pyhdge.com pyth-network-io.firebaseapp.com pzrs.shop +q0.pm +q0.wf q6zp8.shop qaardan.org qaliexpress.com @@ -13145,6 +13222,8 @@ qwjbusdi-sd.top qzbiog.africantexas.shop qzjfyl.com qzkgp.top +r0.pm +r0.wf r0bl0x.ru r1visa.law.blog r2elajikcosf7zee.onion.to @@ -13152,6 +13231,7 @@ r3fy6.shop r4e.pl r551001.com r58ua.ygrrvlvvgf.top +r6.nz r6hp2.shop rabbitstub.com racebook.nu @@ -13620,12 +13700,14 @@ s-fortfahren.com s-neu.info s-p-o-o-f-e-d.h-o-s-t.name s-prozess.com +s0.pm s1gn1fyh0se.cyou s3-be.cloud s3-dk.cloud s3-eu-north-1.culture-quest.shop s4iq9.shop s5s9mr9tqkt1e2ggi.z20.web.core.windows.net +s8.cx s8iz8.eieaapdnox.top sa.skin.healthfindings.website saaadnesss.shop @@ -15020,11 +15102,13 @@ szteroid.com szuperpiac.hu t-iktok.com t-iktok.shop +t0.wf t0kempock2t.com t0kinpoiket.mom t0knnponket.mom t3mk7.shop t551001.com +t7.nz t78r.vip t7myr.com taabao.com @@ -16620,6 +16704,8 @@ tz-line.cc tz6.org tzdxi.com tzkcl.shop +u0.nz +u0.pm u1ib3.shop u4ks8.shop u6oo3.shop @@ -17284,6 +17370,7 @@ uyghur.25u.com uzgirlsmassaj.com uzwmlmz.shop v-b-h.us +v0.cx v0dacom.art v0dacom.one v0dacom.shop @@ -17577,6 +17664,7 @@ vrl-sgqnmd.cc vrl-y6523.top vrlltagov.ink vrtistic.shop +vs.gy vs95off.com vsliveagent.com vsudbduvrdhdh.blogspot.is @@ -17606,7 +17694,11 @@ vww-kraken-pro.com vybroh.click vzkpwtgoyxvfj.work vzoh.art +w0.pm w2mu4.shop +w4.nz +w4.wf +w6.nz w88br.com w88kub.com waalmart.info @@ -18346,6 +18438,8 @@ xzamster.com xznrkvibjqu.beauty y-dhl.top y-stop.com +y0.pm +y0.wf y0utube.click y0utube.xyz y6ss1.shop @@ -18793,6 +18887,7 @@ zjcqes.shop zjhao.dtdns.net zjsndb.xyz zjyrsolar.com +zk.qa zk4.me zksync-dapp-portal.web.app zksyncera-ec57f1.ingress-haven.ewp.live @@ -18836,4 +18931,4 @@ zynpurestoe.com zystfree-heaven.com zz3r0.com zzock.shop -zzzakaito.com +zzzakaito.com \ No newline at end of file diff --git a/scripts/retrieve_domains.sh b/scripts/retrieve_domains.sh index 8945af888..b324d64d8 100644 --- a/scripts/retrieve_domains.sh +++ b/scripts/retrieve_domains.sh @@ -27,7 +27,7 @@ readonly DOMAIN_DASH_REGEX='[[:alnum:].-]+-[[:alnum:]-]+' # Only matches domains # Note the [[:alnum:]] in the front and end of the main domain body is to # prevent matching entries that start or end with a dash or period. -readonly DOMAIN_REGEX='[[:alnum:]][[:alnum:].-]+[[:alnum:]]\.[[:alnum:]-]*[a-z]{2,}[[:alnum:]-]*' +readonly DOMAIN_REGEX='[[:alnum:]][[:alnum:].-]*[[:alnum:]]\.[[:alnum:]-]*[a-z]{2,}[[:alnum:]-]*' readonly -a SOURCES=( source_165antifraud diff --git a/scripts/test_functions.sh b/scripts/test_functions.sh index 83fde4328..2a79c6cd7 100644 --- a/scripts/test_functions.sh +++ b/scripts/test_functions.sh @@ -411,6 +411,7 @@ test_invalid_removal() { printf "invalid-test.1x\n" printf "invalid-test.com/subfolder\n" printf "invalid-test-.com\n" + printf "i.com\n" } >> data/pending/domains_scamadviser.com.tmp # EXPECTED OUTPUT @@ -423,6 +424,7 @@ test_invalid_removal() { printf "invalid-test.1x\n" printf "invalid-test.com/subfolder\n" printf "invalid-test-.com\n" + printf "i.com\n" } >> out_manual_review.txt printf "invalid-test.xn--903fds\n" >> out_raw.txt @@ -434,6 +436,7 @@ test_invalid_removal() { printf "invalid,invalid-test.1x,scamadviser.com\n" printf "invalid,invalid-test.com/subfolder,scamadviser.com\n" printf "invalid,invalid-test-.com,scamadviser.com\n" + printf "invalid,i.com,scamadviser.com\n" } >> out_log.txt return @@ -448,12 +451,14 @@ test_invalid_removal() { printf "invalid-test.xn--903fds\n" printf "invalid-test.x\n" printf "invalid-test.100\n" + printf "invalid-test.1x\n" } >> input.txt # Validation script checks for invalid entries in the dead domains file { - printf "invalid-test.1x\n" + printf "invalid-test.com/subfolder\n" printf "invalid-test-.com\n" + printf "i.com\n" printf "dead-domain.com\n" } >> "$DEAD_DOMAINS" @@ -465,9 +470,10 @@ test_invalid_removal() { printf "invalid,100.100.100.1,raw\n" printf "invalid,invalid-test.x,raw\n" printf "invalid,invalid-test.100,raw\n" - printf "invalid,invalid-test.1x,dead_domains_file\n" + printf "invalid,invalid-test.1x,raw\n" printf "invalid,invalid-test.com/subfolder,dead_domains_file\n" printf "invalid,invalid-test-.com,dead_domains_file\n" + printf "invalid,i.com,dead_domains_file\n" } >> out_log.txt } diff --git a/scripts/tools.sh b/scripts/tools.sh index 6a9064588..096483f78 100644 --- a/scripts/tools.sh +++ b/scripts/tools.sh @@ -120,7 +120,7 @@ download_nrd_feed() { # Download the feeds in parallel # Note the feeds currently have a bug where it contains invalid domains curl -sSLZ "$url1" "$url2" | mawk '!/#/' \ - | grep -oE '[[:alnum:]][[:alnum:].-]+[[:alnum:]]\.[[:alnum:]-]*[a-z]{2,}[[:alnum:]-]*' \ + | grep -oE '[[:alnum:]][[:alnum:].-]*[[:alnum:]]\.[[:alnum:]-]*[a-z]{2,}[[:alnum:]-]*' \ > nrd.tmp format_file nrd.tmp diff --git a/scripts/validate_domains.sh b/scripts/validate_domains.sh index c743b86c8..a237e0abf 100644 --- a/scripts/validate_domains.sh +++ b/scripts/validate_domains.sh @@ -76,7 +76,7 @@ validate() { filter "$whitelisted_tld" whitelisted_tld # Remove non-domain entries including IP addresses excluding Punycode - regex='^[[:alnum:]][[:alnum:].-]+[[:alnum:]]\.[[:alnum:]-]*[a-z]{2,}[[:alnum:]-]*$' + regex='^[[:alnum:]][[:alnum:].-]*[[:alnum:]]\.[[:alnum:]-]*[a-z]{2,}[[:alnum:]-]*$' invalid="$(grep -vE "$regex" "$RAW")" filter "$invalid" invalid # The dead domains file is also checked here as invalid entries may get