From b8ff6c7a178ac7026cb28dfb39e1856e42ceed1d Mon Sep 17 00:00:00 2001 From: Rajat Venkatesh Date: Tue, 21 Jan 2020 15:14:45 +0530 Subject: [PATCH] fix: JSON output format lists PII types found for each column Fix #51 --- piicatcher/explorer/explorer.py | 3 ++- piicatcher/explorer/metadata.py | 2 +- tests/test_dbmetadata.py | 18 ++++++++++++++++++ tests/test_explorer.py | 31 ++++++++++++++++++++++++++++++- 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/piicatcher/explorer/explorer.py b/piicatcher/explorer/explorer.py index 924529d..82c18f1 100644 --- a/piicatcher/explorer/explorer.py +++ b/piicatcher/explorer/explorer.py @@ -7,6 +7,7 @@ from piicatcher.explorer.metadata import Schema, Table, Column from piicatcher.catalog.db import DbStore +from piicatcher.piitypes import PiiTypeEncoder class Explorer(ABC): @@ -58,7 +59,7 @@ def output(cls, ns, explorer): headers = ["schema", "table", "column", "has_pii"] tableprint.table(explorer.get_tabular(ns.list_all), headers) elif ns.output_format == "json": - print(json.dumps(explorer.get_dict(), sort_keys=True, indent=2)) + print(json.dumps(explorer.get_dict(), sort_keys=True, indent=2, cls=PiiTypeEncoder)) elif ns.output_format == "db": DbStore.save_schemas(explorer) diff --git a/piicatcher/explorer/metadata.py b/piicatcher/explorer/metadata.py index 724a3a0..61c2fbf 100644 --- a/piicatcher/explorer/metadata.py +++ b/piicatcher/explorer/metadata.py @@ -134,6 +134,6 @@ def shallow_scan(self): def get_dict(self): return { - 'has_pii': self.has_pii(), + 'pii_types': list(self.get_pii_types()), 'name': self.get_name() } diff --git a/tests/test_dbmetadata.py b/tests/test_dbmetadata.py index bd0cf04..da8fea7 100644 --- a/tests/test_dbmetadata.py +++ b/tests/test_dbmetadata.py @@ -1,5 +1,6 @@ from unittest import TestCase from piicatcher.explorer.metadata import Column, Table, Schema +from piicatcher.piitypes import PiiTypes from piicatcher.scanner import RegexScanner, NERScanner @@ -29,16 +30,19 @@ def test_negative_scan_column(self): col = Column('col') col.scan('abc', [RegexScanner(), NERScanner()]) self.assertFalse(col.has_pii()) + self.assertEqual({'pii_types': [], 'name': 'col'}, col.get_dict()) def test_positive_scan_column(self): col = Column('col') col.scan('Jonathan Smith', [RegexScanner(), NERScanner()]) self.assertTrue(col.has_pii()) + self.assertEqual({'pii_types': [PiiTypes.PERSON], 'name': 'col'}, col.get_dict()) def test_null_scan_column(self): col = Column('col') col.scan(None, [RegexScanner(), NERScanner()]) self.assertFalse(col.has_pii()) + self.assertEqual({'pii_types': [], 'name': 'col'}, col.get_dict()) def test_no_pii_table(self): schema = Schema('public') @@ -48,6 +52,10 @@ def test_no_pii_table(self): table.scan(self.data_generator) self.assertFalse(table.has_pii()) + self.assertEqual({ + 'columns': [{'name': 'a', 'pii_types': []}, {'name': 'b', 'pii_types': []}], + 'has_pii': False, + 'name': 'no_pii'}, table.get_dict()) def test_partial_pii_table(self): schema = Schema('public') @@ -60,6 +68,11 @@ def test_partial_pii_table(self): cols = table.get_columns() self.assertTrue(cols[0].has_pii()) self.assertFalse(cols[1].has_pii()) + self.assertEqual({ + 'columns': [{'name': 'a', 'pii_types': [PiiTypes.PHONE]}, + {'name': 'b', 'pii_types': []}], + 'has_pii': True, + 'name': 'partial_pii'}, table.get_dict()) def test_full_pii_table(self): schema = Schema('public') @@ -73,6 +86,11 @@ def test_full_pii_table(self): cols = table.get_columns() self.assertTrue(cols[0].has_pii()) self.assertTrue(cols[1].has_pii()) + self.assertEqual({ + 'columns': [{'name': 'name', 'pii_types': [PiiTypes.PERSON]}, + {'name': 'location', 'pii_types': [PiiTypes.LOCATION]}], + 'has_pii': True, + 'name': 'full_pii'}, table.get_dict()) class ShallowScan(TestCase): diff --git a/tests/test_explorer.py b/tests/test_explorer.py index 8914430..546f2a0 100644 --- a/tests/test_explorer.py +++ b/tests/test_explorer.py @@ -1,9 +1,10 @@ +import json from argparse import Namespace from unittest import TestCase from piicatcher.explorer.explorer import Explorer from piicatcher.explorer.metadata import Column, Schema, Table -from piicatcher.piitypes import PiiTypes +from piicatcher.piitypes import PiiTypes, PiiTypeEncoder class MockExplorer(Explorer): @@ -46,3 +47,31 @@ def test_tabular_pii(self): ['testSchema', 't1', 'c2', True] ], self.explorer.get_tabular(False)) + def test_json(self): + self.assertEqual('''[ + { + "has_pii": false, + "name": "testSchema", + "tables": [ + { + "columns": [ + { + "name": "c1", + "pii_types": [] + }, + { + "name": "c2", + "pii_types": [ + { + "__enum__": "PiiTypes.LOCATION" + } + ] + } + ], + "has_pii": false, + "name": "t1" + } + ] + } +]''', + json.dumps(self.explorer.get_dict(), sort_keys=True, indent=2, cls=PiiTypeEncoder))