diff --git a/collectors/cveorg/keywords.py b/collectors/cveorg/keywords.py index 0525abe8b..60f66a8ba 100644 --- a/collectors/cveorg/keywords.py +++ b/collectors/cveorg/keywords.py @@ -1,786 +1,32 @@ import re -ALLOWLIST = [ - "GIMP", - "Spring", - "dotnet", - "kernel", -] +from celery.utils.log import get_task_logger -# r'\b\.NET\b' does not match properly because word boundary \b does not cooperate well -# with dot. -ALLOWLIST_SPECIAL_CASES = [r"(?:\W|^)\.NET\b"] +from collectors.ps_constants.constants import ( + PS_CONSTANTS_REPO_BRANCH, + PS_CONSTANTS_REPO_URL, +) +from collectors.ps_constants.core import fetch_ps_constants -BLOCKLIST = [ - r"(HPE|Hewlett Packard Enterprise).*(IceWall|FlexNetwork|FlexFabric|OneView|Nimble)", - r"(Industrial Edge Management|Nucleus NET|SINEC).*[\n]*.*siemens", - r"(Jfinal|Final)[ _]CMS", - r"(Pinniped Supervisor|VMware Cloud Foundation).*[\n]*.*vmware.*", - r"(SIMATIC|Mendix|Parasolid|Opcenter Quality|SCALANCE).*[\n]*.*siemens", - r"(Simcenter Femap|LOGO!|Solid Edge|APOGEE).*[\n]*.*siemens", - r".*plugin.*for WordPress", - "1Password", - "72crm", - "74cmsSE", - "ABB e-Design", - "ABB netCADOPS", - "ACEweb Online Portal", - "AEF CMS", - "ALPS ALPINE touchpad driver", - "APNGDis", - "ASANHAMAYESH CMS", - "Academy Learning Management System", - "Accusoft ImageGear", - "Acronis Cyber Backup", - "Acronis True Image", - "Adobe Acrobat Reader", - "Adobe Acrobat and Reader", - "Adobe Animate", - "Adobe Bridge", - "Adobe Campaign", - "Adobe Character Animator", - "Adobe Commerce", - "Adobe Dimension", - "Adobe Experience Manager", - "Adobe FrameMaker", - "Adobe Illustrator", - "Adobe InCopy", - "Adobe InDesign", - "Adobe Lightroom", - "Adobe Media Encoder", - "Adobe Photoshop", - "Adobe Premiere Elements", - "Adobe RoboHelp", - "Advanced SystemCare Ultimate", - "Advantech", - "AeroCMS", - "Afian FileRun", - "AirWave", - "Ajenti", - "AnchorCMS", - "AntSword", - "Anuko Time Tracker", - "Apache Geode", - "Apache NiFi", - "Apache OpenMeetings", - "Apache ShenYu", - "Apache Syncope", - "Apache Wicket", - "Apartment Visitor Management System", - "Apexis", - "AppFormix", - "ArcGIS Server", - "Arista EOS", - "ArsenoL", - "Artica Web Proxy", - "Aruba (ClearPass|EdgeConnect|Networks)", - "ArubaOS", - "Atlassian Bamboo", - "Atlassian Confluence", - "Atlassian Crucible", - "Atlassian Fisheye", - "Atlassian JIRA", - "Aurea Jive", - "Automation License Manager", - "Automotive Shop Management System", - "Avaya", - "Avira", - "Avolve Software ProjectDox", - "AxxonSoft", - "AyaCMS", - "BEESCMS", - "BMC Medical", - "BMC Remedy AR System", - "BMC Remedy Action Request", - "Backdrop CMS", - "Badminton Center Management", - "Bagecms", - "Barco Control Room Management", - "BaserCMS", - "Bento4", - "Best Student Result Management System", - "BigBlueButton", - "BigTree CMS", - "Billing System Project", - "Bitcoin Core", - "Bitdefender Antivirus", - "Bitdefender Engines", - "BlackBerry QNX Software Development Platform", - "BlackBerry UEM Management Console", - "BlackCat CMS", - "Bludit", - "BlueSpice", - "Bookme Control Panel", - "Bravo Tejari", - "Brocade Fabric OS", - "Brocade Fibre", - "Brocade SANnav", - "BtiTracker", - "CCN-lite", - "CMS Made Simple", - "CMSuno", - "CODESYS", - "CSZCMS", - "CactusVPN", - "Call for Papers", - "Campcodes Advanced Online Voting System", - "Canteen Management System", - "Car Rental Management", - "Carbon Black", - "Carel pCOWeb", - "Centum CS", - "Chamilo LMS", - "Chaoji CMS", - "ChatBot App with Suggestion", - "ChemCMS", - "Cisco", - "Citrix NetScaler", - "Clansphere CMS", - "Classcms", - "Claymore Dual Miner", - "Clinic's Patient Management System", - "CloudMe", - "CloudVision Portal", - "Clustered Data ONTAP", - "CoDeSys Runtime", - "Codoforum", - "College Management System", - "Combodo iTop", - "Complete Online Job Search", - "Composr CMS", - "Contiki-NG", - r"Converse\.js", - "CoverCMS", - "Cozy", - "Craft CMS", - "CraftCMS", - "Creditwest Bank CMS", - "Cybozu Garoon", - r"D-LINK DIR.*", - r"D-LINK.*(DIR|COVR|DAP).*", - r"D-LINK.*(DIR|COVR|DAP|DSL|DCS).*", - "DIAEnergie", - r"DIR.*[\n]*.*dlink.com.*", - "Dataiku DSS", - "DedeCMS", - "Dell (Client )?BIOS", - "Dell (Hybrid Client|GeoDrive)", - "Dell Container Storage", - "Dell EMC", - "Dell NetWorker", - "Dell PowerScale", - "Dell SonicWALL Scrutinizer", - "Dell Storage Manager", - "Dell Wyse Management Suite", - "Delta Electronics", - "Delta Industrial Automation", - "Desigo", - "Digital Guardian Managment Console", - "DiliCMS", - "DiligentCMS", - "Discuz", - "Disk Savvy Enterprise", - "DocuTrac QuicDoc", - "Dolibarr", - "DolphinPHP", - "DomainMOD", - "Doufoxcms", - "DrayTek", - "Dreamer CMS", - "EGavilan Media", - "EMC Data Protection Advisor", - "EPIC MyChart", - "ESPCMS", - "EasyCMS", - "Eaton's", - "Edimax", - "Emlog Pro", - "Enalean Tuleap", - "Enhancesoft osTicket", - "Epson Airprint", - "Eshtery CMS", - "EspoCRM", - "Expense Management System", - "Explzh", - "Exponent CMS", - "Exponent-CMS", - "EyouCMS", - "F-Secure Atlant", - "F5 BIG-IP", - "FATEK FvDesigner", - "FUDforum", - "FUEL-CMS", - "FactoryTalk", - "Fast Food Ordering System", - "FastAdmin", - "FastCMS", - r"FeMiner.*wms", - "Feehi CMS", - "FeehiCMS", - "FeiFeiCMS", - "FiberHome", - "FlatCore-CMS", - "Flexense DiskBoss", - "Flexense DiskPulse", - "Flexense DiskSavvy", - "Flexense DiskSorter", - "Flexense DupScout", - "Flexense SyncBreeze", - "Flexense VX Search", - "Food Ordering Management System", - "ForgeRock", - r"FortiADC|FortiMail", - "FortiAnalyzer", - "FortiClient", - "FortiNAC", - "FortiOS", - "FortiSOAR", - "Fortinet", - r"Foxit .*PDF reader", - "Frog CMS", - "Fuji Electric", - "FusionCompute", - "FusionSphere OpenStack", - "GE D60", - "GPAC ", - "GSKit", - "GXCMS", - "Galileo CMS", - "Gallagher Command Centre", - "Garage Management System", - "Geist WatchDog Console", - "Gemini-Net", - "GeniXCMS", - "GetSimple CMS", - "GetSimpleCMS", - "GilaCMS", - "Gleez CMS", - "Grandstream", - "GreenCMS", - "Gxlcms", - "Gym Management System", - r"H3C (Magic|H200|GR[0-9-]+|B5 Mini)", - r"HCL (iNotes|Commerce|Workload Automation|Digital Experience)", - "HP Security", - "HPE Aruba AirWave Glass", - "HPE Aruba ClearPass Policy Manager", - "HPE Business Process Monitor", - "HPE Cloud Optimizer", - "HPE Data Protector", - "HPE Diagnostics", - "HPE Helion Eucalyptus", - "HPE IceWall Federation Agent", - "HPE Insight Control", - "HPE Integrated Lights-Out", - "HPE Intelligent Management Center", - "HPE LoadRunner", - "HPE Matrix Operating Environment", - "HPE Network Automation", - "HPE Network Node Manager", - "HPE NonStop Server", - "HPE NonStop Software Essentials", - "HPE OfficeConnect Network Switches", - "HPE OpenCall Media Platform", - "HPE Operations Bridge Analytics", - "HPE Operations Orchestration Community", - "HPE Pay Per Use", - "HPE Project and Portfolio Management", - "HPE SiteScope", - "HPE Smart Storage Administrator", - "HPE StoreVirtual", - "HPE Systems Insight Manager", - "HPE UCMDB", - "HPE Version Control Repository Manager", - "HPE Vertica Analytics", - "HPE iMC PLAT", - "HashiCorp Terraform", - "Helmet Store Showroom", - "Hewlett Packard Enterprise Intelligent Management Center", - "Hewlett Packard Enterprise Moonshot Provisioning Manager", - r"Hirschmann.*[\n]*.*belden", - "Honeywell", - "HongCMS", - "Horizon Client for Windows", - "Hospital Management System", - "Hotel Management System", - "HotelDruid", - "Human Resource Management System", - "I, Librarian", - "I-librarian", - "IBM AIX", - "IBM API Connect", - "IBM App Connect Enterprise", - "IBM AppScan", - "IBM Aspera", - "IBM Aspera Web Application", - "IBM BigFix", - "IBM Business Automation Content Analyzer", - "IBM Business Automation Workflow", - "IBM Business Process Manager", - "IBM CICS", - "IBM Campaign", - "IBM Capacity Management Analytics", - "IBM Cloud Pak", - "IBM CloudPak", - "IBM Cognos", - "IBM Connections", - "IBM Content Manager", - "IBM Content Navigator", - "IBM Curam", - "IBM Daeja ViewONE", - "IBM Data Risk Manager", - "IBM DataPower Gateway", - "IBM Db2", - "IBM Db2U", - "IBM Domino", - "IBM Doors", - "IBM Emptoris", - "IBM Endpoint Manager", - "IBM Engineering Lifecycle Optimization", - "IBM Event Streams", - "IBM Financial Transaction Manager", - "IBM Flex System", - "IBM Forms Experience Builder", - "IBM Forms Server", - "IBM InfoSphere", - "IBM Jazz", - "IBM Jazz Foundation", - "IBM Jazz Reporting Service", - "IBM MQ", - "IBM MQ Appliance", - "IBM Maximo", - "IBM Notes", - "IBM Planning Analytics", - "IBM Power Hardware Management Console", - "IBM Publishing Engine", - "IBM QRadar", - "IBM RSA DM", - "IBM Rational", - "IBM Rhapsody", - "IBM Robotic", - "IBM Sametime", - "IBM Secure External Authentication Server", - "IBM Security Access Manager", - "IBM Security Guardium", - "IBM Security Identity Governance and Intelligence", - "IBM Security Key Lifecycle Manager", - "IBM Security QRadar", - "IBM Security Secret Server", - "IBM Security SiteProtector", - "IBM Security Trusteer Pinpoint Detect", - "IBM Security Verify Access", - "IBM Security Verify Governance", - "IBM Security Verify Information Queue", - "IBM SiteProtector Appliance", - "IBM Spectrum", - "IBM Spectrum Protect Plus", - "IBM Spectrum Scale", - "IBM Sterling B2B Integrator", - "IBM Sterling Connect:Direct", - "IBM Sterling File Gateway", - "IBM Sterling Partner Engagement Manager", - "IBM Sterling Secure Proxy", - "IBM TRIRIGA", - "IBM Tealeaf", - "IBM Tivoli", - "IBM UrbanCode Deploy", - "IBM Watson", - "IBM WebSphere", - "IBM XIV Storage", - "IBM i ", - "IBM i2 iBase", - "INTELBRAS", - "IOBit Malware Fighter", - "ImageWorsener", - "Imagely NextGEN Gallery", - "InHand Networks", - "Ingredients Stock Management System", - "InspIRCd", - "Insurance Management System", - "Intel (R) LED Manager for NUC", - "Intel Server Boards", - "Intel(R) Graphics Drivers", - "Intel(R) PAC with Arria(R)", - "Intel(R) Server Boards", - "Intelbras TELEFONE IP", - "InventoryManagementSystem", - "Invision Power Board", - "IonizeCMS", - "Ipswitch WhatsUp Gold", - "Ivanti Endpoint Security", - "JEXTN", - "JFrog Artifactory", - "JT2Go", - "Jeecg-boot", - "JerryScript", - "Jiangmin Antivirus", - "Jirafeau", - "Jizhicms", - "Joomla!", - "Joyent SmartOS", - "Judging Management System", - "JupyterHub OAuthenticator", - "Kaspersky Secure Mail", - "Kentico", - "Kingsoft Internet Security", - "KiteCMS", - "Kiwi TCMS", - "Kliqqi CMS", - "LAquis SCADA", - "LJCMS", - "Library Management System", - "LibreNMS", - "Liferay Portal", - "LogicalDoc", - "Loway QueueMetrics", - "M-Files Server", - "MB CONNECT LINE", - "MDaemon", - "MKCMS", - "MOXA NPort", - "MP Form Mail", - "MTS Simple Booking", - "MZ Automation", - "Magnolia CMS", - "Mahara", - "Mailbutler Shimo", - "MalwareFox AntiMalware", - "Malwarebytes Anti-Malware", - "ManageEngine OpManager", - "ManageEngine Service Desk Plus", - "March Hare WINCVS", - "McAfee Network Security Management", - "McAfee VirusScan Enterprise", - "Merchandise Online Store", - "MetInfo", - "Micro Focus ArcSight", - "Micro Focus ArcSight Management Center", - "Micro Focus Operations Bridge", - "Micro Focus Project", - "Micro Focus UCMDB", - "Micro Focus Universal CMDB", - "Micro Focus ZENworks", - "Micropoint proactive", - "Microsoft", - "Microweber", - "MikroTik's RouterOS", - "Mikrotik RouterOs", - "Ming-Soft/MCMS", - "MiniCMS", - "Mitel ST", - "Mitsubishi E-Designer", - "Mitsubishi Electric", - "Mobotix", - "Money Transfer Management System", - "MonstaFTP", - "Monstra CMS", - "Moxa OnCell", - "NETGEAR", - "NVIDIA GeForce NOW", - "Navarino Infinity", - "NetEx HyperIP", - "NetIQ Access Manager", - "NetIQ Identity Manager", - "NetIQ Identity Reporting", - "NetIQ iManager", - "Nginx NJS", - "Niagara", - "Nokia", - "NoneCms", - "NordVPN", - "Nortek Linear", - "Novel-Plus", - "NukeViet CMS", - "OPTILINK OP", - "OSIsoft PI", - "OTCMS", - "OXID eShop", - "October CMS", - "Octopus Deploy", - "Omron CX-One", - "Omron CX-Supervisor", - "Online Car Wash Booking System", - "Online Diagnostic Lab Management System", - "Online Examination System", - "Online Fire Reporting System", - "Online Food Ordering System", - "Online Leave Management System", - "Online Ordering System", - "Online Pet Shop We App", - "Online Railway Reservation System", - "Online Sports Complex Booking System", - "Online Student Rate System", - "Online Tours & Travels Management System", - "Open Source SACCO Management System", - "Open-AudIT Professional", - "OpenBMC", - "OpenEMR", - "OpenHarmony", - "OpenLiteSpeed", - "OpenMRS", - "OpenScape Deployment Service", - "Opencast", - "Ozeki NG SMS Gateway", - "PAN-OS", - "PHP Scripts Mall", - "PHPGurukul", - "PHPJabbers Class Scheduling System", - "POSCMS", - "Paessler PRTG Network Monitor", - "Pagekit CMS", - "Pandora FMS", - "Parallels Remote Application Server", - "PayPal", - "PbootCMS", - "Pega Platform", - "Pegasystems Pega Platform", - "Pharmacy Management System", - "Philips Intellispace Portal", - "PicturesPro Photo Cart", - "Piwigo", - "Pixar OpenUSD", - "Plixer Scrutinizer", - "Plone CMS", - "Pluck", - "PowerCMS", - "PrestaShop", - "PrivateVPN", - "Project-Pier", - "Promise Technology", - "PublicCMS", - "Pulse Connect Secure", - "Pulse Secure Desktop Client", - "PureVPN", - "PyroCMS", - "QNAP QTS", - "Quest NetVault", - "QuickTime", - "RPCMS", - "RUGGEDCOM", - "Rapid Software LLC Rapid SCADA", - "Red Discord Bot", - "Rescue Dispatch Management", - "Restaurant POS System", - "Robustel R1510", - "Rocket.Chat", - "Rockwell Automation", - "RosarioSIS", - "Ruckus Networks", - "Rukovoditel", - "SAP 3D Visual Enterprise Viewer", - "SAP Adaptive Server Enterprise", - "SAP BASIS", - "SAP Banking Services", - "SAP Business Objects Business Intelligence Platform", - "SAP Commerce versions", - "SAP Data Hub", - "SAP ERP", - "SAP Fiori Launchpad", - "SAP Marketing", - "SAP NetWeaver", - "SEMCMS", - "SICAM", - r"SIMATIC.*(PCS|CP)", - "Sagemcom", - "Sandoba CP:Shop", - "Sanitization Management System", - "Saperion Web Client", - "Schneider Electric", - "School Activity Updates with SMS Notification", - "SeaCms", - "Seagate Media Server", - r"Secomea (GateManager|SiteManager)", - "SeedDMS", - "Shimmie", - "Shirne CMS", - "ShopXO", - "Shopwind", - "Silverstripe", - "Simple Bus Ticket Booking System", - "Simple Client Management System", - "Simple Cold Storage Management System", - "Simple Customer Relationship Management", - "Simple E-Learning System", - "Simple Image Gallery System", - "Simple Inventory System", - "Simple Online Book Store System", - "Simple Online Public Access Catalog", - "Simple Task Scheduling System", - "Sinsiu Sinsiu Enterprise Website System", - "SmartVista", - "SnapCreek Duplicator", - "SolarView Compact", - "Solutions Atlantic Regulatory Reporting System", - "SonicWall SMA100", - "Sophos Endpoint Protection", - "Sophos Firewall", - "SourceCodester", - "SpamTitan", - "SpiderControl MicroBrowser", - "Square 9 GlobalForms", - "Stock Management System", - "Stormshield Network Security", - "Student Clearance System", - "Student Information System", - "Subrion CMS", - "SugarCRM", - "Sumatra PDF", - "Symantec", - "Synacor Zimbra", - "Synology DiskStation Manager", - "Synology Photo", - "Synology Router Manager", - "Synology Surveillance Station", - "SysAid Help Desk", - "Sysax Multi Server", - "TIBCO DataSynapse GridServer Manager", - "TOTOLINK", - r"TP-Link.*(TL|AX10v1|Tapo)", - "TRENDNet", - "Taocms", - "Telegram Desktop", - "Tenda AC15", - "Tenda AC9", - r"Tenda( |_.*)", - "Textpattern CMS", - "Train Scheduler App", - "TreasuryXpress", - "Trend Micro", - "TuziCMS", - "Twonky Server", - "UCMS ", - "UJCMS", - "Ubiquiti Networks EdgeOS", - "Unisphere for PowerMax", - "Unisys ClearPath", - "Unisys Stealth SVG", - "United Planet Intrexx Professional", - "Unitrends Backup", - "Untis WebUntis", - "Userscape HelpSpot", - r"VIDEOJET.*[\n]*.*psirt", - "VMware ESXi and vCenter Server", - "VMware Fusion", - "VMware Workstation", - "Vehicle Booking System", - "Verint Workforce Optimization", - "Veritas NetBackup", - "Verizon 5G Home", - "Vesta Control Panel", - "Victor CMS", - "VirtueMart", - "WBCE CMS", - "WECON LeviStudioU", - "WPS Office", - "WSO2 Enterprise Integrator", - "WTCMS", - "WUZHI CMS", - "WatchDog Anti-Malware", - "Wavlink", - "Web Based Quiz System", - "WebDynpro Java", - "Weblication CMS", - "Wedding Management System", - "Wedding Planner", - "Weeny Audio Cutter", - "Wellcms", - "Western Bridge Cobub Razor", - "Western Digital My Cloud", - "Winmail", - "Wireless IP Camera 360", - "WoWonder", - "WonderCMS", - "WordPress theme", - r"WordPress.*plugin", - "Wowza Streaming", - "XYHCMS", - "Xiaomi.*phones", - "Xiuno BBS", - "XunRuiCMS", - "Yab Quarx", - "Yahoo!", - "Yxcms", - "YxtCMF", - "YzmCMS", - "Z-BlogPHP", - "Zenario CMS", - "Zikula Application Framework", - "Zoho ManageEngine", - "ZoneAlarm", - "ZoneMinder", - "Zoo Management System", - "Zulip Desktop", - "Zyxel", - "baijiacms", - "bootstrap-table", - "chatwoot", - "cmseasy", - "comforte SWAP", - "concretecms", - "dotCMS", - "drawio", - "eDNA Enterprise Data Historian", - "ebCMS", - r"ednareporting\.asmx", - "elitecms", - "emoncms", - "enhavo CMS", - "htmly", - "https://gitee.com/oufu/ofcms/", - "https://github.com/cesanta/mjs/", - "https://github.com/kabirkhyrul/HMS/", - "https://github.com/vapor/vapor/", - "https://github.com/wp-plugins", - "https://support.zte.com.cn/support/", - "https://www.autodesk.com/", - "https://www.solarwinds.com/", - "iDashboards", - "iPayPal", - "iRedMail", - "iScripts SupportDesk", - "iScripts UberforX", - "iScripts eSwap", - "iTunes", - "iota All-In-One Security Kit", - "ismartgate PRO", - "joyplus-cms", - "lyadmin", - "madlib-object-utils", - r"mySCADA myPRO|Measuresoft ScadaPro", - "open5gs", - "perfex crm", - "phpjs", - "pimcore", - r"plugin <= [0-9\.]+ at WordPress", - r"plugins.*wordpress", - "portfolioCMS", - "prime-jwt", - "publify", - "puppyCMS", - "rap2hpoutre Laravel Log Viewer", - "rdiffweb", - r"siteserver (CMS|SSCMS)", - "swftools", - "totaljs", - "trudesk", - "usememos/memos", - "vBulletin", - r"win32k\.sys", - "wityCMS", - "wuzhicms", - "yetiforcecrm", - "zzcms", -] +logger = get_task_logger(__name__) -BLOCKLIST_CASE_SENSITIVE = ["iOS"] -KEYWORD_ALLOWLIST = [ - re.compile(rf"\b{keyword}\b", re.IGNORECASE) for keyword in ALLOWLIST -] + [re.compile(keyword) for keyword in ALLOWLIST_SPECIAL_CASES] +def fetch_keywords_from_ps_constants(): + url = f"{PS_CONSTANTS_REPO_URL}/-/raw/{PS_CONSTANTS_REPO_BRANCH}/data/cveorg_keywords.yml" + logger.info(f"Fetching CVEorg keywords from '{url}'") + keywords = fetch_ps_constants(url) -KEYWORD_BLOCKLIST = [ - re.compile(rf"\b{keyword}\b", re.IGNORECASE) for keyword in BLOCKLIST -] + [re.compile(rf"\b{keyword}\b") for keyword in BLOCKLIST_CASE_SENSITIVE] + try: + allowlist = keywords["allowlist"] + allowlist_special_cases = keywords["allowlist_special_cases"] + blocklist = keywords["blocklist"] + blocklist_special_cases = keywords["blocklist_special_cases"] + except KeyError: + raise KeyError( + "The ps-constants repository does not contain the expected CVEorg keyword lists." + ) + + return allowlist, allowlist_special_cases, blocklist, blocklist_special_cases def check_keywords(text): @@ -789,17 +35,32 @@ def check_keywords(text): Returns tuple of matched blocklisted and allowlisted keywords. """ - allowlist = [] - for word in (regex.search(text) for regex in KEYWORD_ALLOWLIST): + ( + allowlist, + allowlist_special_cases, + blocklist, + blocklist_special_cases, + ) = fetch_keywords_from_ps_constants() + + allowlisted_keywords = [ + re.compile(rf"\b{keyword}\b", re.IGNORECASE) for keyword in allowlist + ] + [re.compile(keyword) for keyword in allowlist_special_cases] + + blocklisted_keywords = [ + re.compile(rf"\b{keyword}\b", re.IGNORECASE) for keyword in blocklist + ] + [re.compile(rf"\b{keyword}\b") for keyword in blocklist_special_cases] + + in_allowlist = [] + for word in (regex.search(text) for regex in allowlisted_keywords): if word is not None: - allowlist.append(word.group().strip()) + in_allowlist.append(word.group().strip()) - blocklist = [] - for word in (regex.search(text) for regex in KEYWORD_BLOCKLIST): + in_blocklist = [] + for word in (regex.search(text) for regex in blocklisted_keywords): if word is not None: - blocklist.append(word.group()) + in_blocklist.append(word.group()) - return sorted(blocklist), sorted(allowlist) + return sorted(in_blocklist), sorted(in_allowlist) def should_create_snippet(text): diff --git a/collectors/cveorg/tests/conftest.py b/collectors/cveorg/tests/conftest.py index 6f619bf1e..638435d84 100644 --- a/collectors/cveorg/tests/conftest.py +++ b/collectors/cveorg/tests/conftest.py @@ -42,3 +42,22 @@ def get_repo_changes(self): monkeypatch.setattr(CVEorgCollector, "clone_repo", clone_repo) monkeypatch.setattr(CVEorgCollector, "update_repo", update_repo) monkeypatch.setattr(CVEorgCollector, "get_repo_changes", get_repo_changes) + + +@pytest.fixture() +def mock_keywords(monkeypatch) -> None: + """ + Set testing keywords to mock the ones from the ps-constants repository. + """ + import collectors.cveorg.keywords as cveorg_keywords + + def mock_keywords(): + allowlist = ["kernel"] + allowlist_special_cases = [r"(?:\W|^)\.NET\b"] + blocklist = [r".*plugin.*for WordPress", "Cisco", "IBM Tivoli", "iTunes"] + blocklist_special_cases = ["iOS"] + return allowlist, allowlist_special_cases, blocklist, blocklist_special_cases + + monkeypatch.setattr( + cveorg_keywords, "fetch_keywords_from_ps_constants", mock_keywords + ) diff --git a/collectors/cveorg/tests/test_collectors.py b/collectors/cveorg/tests/test_collectors.py index 30c7093b1..b8362de86 100644 --- a/collectors/cveorg/tests/test_collectors.py +++ b/collectors/cveorg/tests/test_collectors.py @@ -14,7 +14,7 @@ class TestCVEorgCollector: @pytest.mark.vcr - def test_collect_cveorg_records(self, mock_repo): + def test_collect_cveorg_records(self, mock_repo, mock_keywords): """ Test that snippets and flaws are created correctly. """ @@ -38,7 +38,7 @@ def test_collect_cveorg_records(self, mock_repo): assert snippet2 assert snippet2.flaw == flaw2 - def test_collect_cveorg_record_when_flaw_exists(self, mock_repo): + def test_collect_cveorg_record_when_flaw_exists(self, mock_repo, mock_keywords): """ Test that only a snippet is created when a flaw already exists. """ @@ -57,7 +57,7 @@ def test_collect_cveorg_record_when_flaw_exists(self, mock_repo): snippet = Snippet.objects.first() assert snippet.flaw == flaw - def test_ignored_cveorg_records(self, mock_repo): + def test_ignored_cveorg_records(self, mock_repo, mock_keywords): """ Test that snippets and flaws are not created when they do not comply with rules. """ @@ -157,7 +157,7 @@ def get_repo_changes(self): assert Snippet.objects.all().count() == 0 assert Flaw.objects.all().count() == 0 - def test_atomicity(self, monkeypatch, mock_repo): + def test_atomicity(self, monkeypatch, mock_repo, mock_keywords): """ Test that flaw and snippet are not created if any error occurs during the flaw creation. """ diff --git a/collectors/cveorg/tests/test_keywords.py b/collectors/cveorg/tests/test_keywords.py index 4c261e8b2..0a182ae70 100644 --- a/collectors/cveorg/tests/test_keywords.py +++ b/collectors/cveorg/tests/test_keywords.py @@ -11,7 +11,7 @@ ("we want to allowlist kernel", ([], ["kernel"])), ], ) -def test_check_keywords(text, expected_output): +def test_check_keywords(text, expected_output, mock_keywords): assert check_keywords(text) == expected_output @@ -22,7 +22,7 @@ def test_check_keywords(text, expected_output): ("new iOS is released", (["iOS"], [])), ], ) -def test_check_keywords_case_sensitive(text, expected_output): +def test_check_keywords_case_sensitive(text, expected_output, mock_keywords): assert check_keywords(text) == expected_output @@ -39,7 +39,7 @@ def test_check_keywords_case_sensitive(text, expected_output): ("new iOS is released", (["iOS"], [])), ], ) -def test_check_keywords_word_boundary(text, expected_output): +def test_check_keywords_word_boundary(text, expected_output, mock_keywords): assert check_keywords(text) == expected_output @@ -54,7 +54,7 @@ def test_check_keywords_word_boundary(text, expected_output): ("end of sentence .NET. new sentence", ([], [".NET"])), ], ) -def test_check_keywords_dotnet_special_case(text, expected_output): +def test_check_keywords_dotnet_special_case(text, expected_output, mock_keywords): assert check_keywords(text) == expected_output @@ -78,7 +78,7 @@ def test_check_keywords_dotnet_special_case(text, expected_output): ), ], ) -def test_check_keywords_wordpress(text, expected_output): +def test_check_keywords_wordpress(text, expected_output, mock_keywords): assert check_keywords(text) == expected_output @@ -97,7 +97,7 @@ def test_check_keywords_wordpress(text, expected_output): (None, False), ], ) -def test_should_create_snippet(text, should_create): +def test_should_create_snippet(text, should_create, mock_keywords): """ Check whether a snippet should be created based on keywords in `text`. """ diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index eaab105fb..c990ab408 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased +### Changed +- Use CVEorg keywords from ps-constants (OSIDB-3694) + ## [4.6.0] - 2024-12-02 ### Added - Update field `updated_dt` on queryset update (OSIDB-3573)