Skip to content

Commit

Permalink
fix: JSON output format lists PII types found for each column
Browse files Browse the repository at this point in the history
Fix #51
  • Loading branch information
Rajat Venkatesh authored and vrajat committed Jan 21, 2020
1 parent 8b77848 commit b8ff6c7
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 3 deletions.
3 changes: 2 additions & 1 deletion piicatcher/explorer/explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from piicatcher.explorer.metadata import Schema, Table, Column
from piicatcher.catalog.db import DbStore
from piicatcher.piitypes import PiiTypeEncoder


class Explorer(ABC):
Expand Down Expand Up @@ -58,7 +59,7 @@ def output(cls, ns, explorer):
headers = ["schema", "table", "column", "has_pii"]
tableprint.table(explorer.get_tabular(ns.list_all), headers)
elif ns.output_format == "json":
print(json.dumps(explorer.get_dict(), sort_keys=True, indent=2))
print(json.dumps(explorer.get_dict(), sort_keys=True, indent=2, cls=PiiTypeEncoder))
elif ns.output_format == "db":
DbStore.save_schemas(explorer)

Expand Down
2 changes: 1 addition & 1 deletion piicatcher/explorer/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,6 @@ def shallow_scan(self):

def get_dict(self):
return {
'has_pii': self.has_pii(),
'pii_types': list(self.get_pii_types()),
'name': self.get_name()
}
18 changes: 18 additions & 0 deletions tests/test_dbmetadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest import TestCase
from piicatcher.explorer.metadata import Column, Table, Schema
from piicatcher.piitypes import PiiTypes
from piicatcher.scanner import RegexScanner, NERScanner


Expand Down Expand Up @@ -29,16 +30,19 @@ def test_negative_scan_column(self):
col = Column('col')
col.scan('abc', [RegexScanner(), NERScanner()])
self.assertFalse(col.has_pii())
self.assertEqual({'pii_types': [], 'name': 'col'}, col.get_dict())

def test_positive_scan_column(self):
col = Column('col')
col.scan('Jonathan Smith', [RegexScanner(), NERScanner()])
self.assertTrue(col.has_pii())
self.assertEqual({'pii_types': [PiiTypes.PERSON], 'name': 'col'}, col.get_dict())

def test_null_scan_column(self):
col = Column('col')
col.scan(None, [RegexScanner(), NERScanner()])
self.assertFalse(col.has_pii())
self.assertEqual({'pii_types': [], 'name': 'col'}, col.get_dict())

def test_no_pii_table(self):
schema = Schema('public')
Expand All @@ -48,6 +52,10 @@ def test_no_pii_table(self):

table.scan(self.data_generator)
self.assertFalse(table.has_pii())
self.assertEqual({
'columns': [{'name': 'a', 'pii_types': []}, {'name': 'b', 'pii_types': []}],
'has_pii': False,
'name': 'no_pii'}, table.get_dict())

def test_partial_pii_table(self):
schema = Schema('public')
Expand All @@ -60,6 +68,11 @@ def test_partial_pii_table(self):
cols = table.get_columns()
self.assertTrue(cols[0].has_pii())
self.assertFalse(cols[1].has_pii())
self.assertEqual({
'columns': [{'name': 'a', 'pii_types': [PiiTypes.PHONE]},
{'name': 'b', 'pii_types': []}],
'has_pii': True,
'name': 'partial_pii'}, table.get_dict())

def test_full_pii_table(self):
schema = Schema('public')
Expand All @@ -73,6 +86,11 @@ def test_full_pii_table(self):
cols = table.get_columns()
self.assertTrue(cols[0].has_pii())
self.assertTrue(cols[1].has_pii())
self.assertEqual({
'columns': [{'name': 'name', 'pii_types': [PiiTypes.PERSON]},
{'name': 'location', 'pii_types': [PiiTypes.LOCATION]}],
'has_pii': True,
'name': 'full_pii'}, table.get_dict())


class ShallowScan(TestCase):
Expand Down
31 changes: 30 additions & 1 deletion tests/test_explorer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
from argparse import Namespace
from unittest import TestCase

from piicatcher.explorer.explorer import Explorer
from piicatcher.explorer.metadata import Column, Schema, Table
from piicatcher.piitypes import PiiTypes
from piicatcher.piitypes import PiiTypes, PiiTypeEncoder


class MockExplorer(Explorer):
Expand Down Expand Up @@ -46,3 +47,31 @@ def test_tabular_pii(self):
['testSchema', 't1', 'c2', True]
], self.explorer.get_tabular(False))

def test_json(self):
self.assertEqual('''[
{
"has_pii": false,
"name": "testSchema",
"tables": [
{
"columns": [
{
"name": "c1",
"pii_types": []
},
{
"name": "c2",
"pii_types": [
{
"__enum__": "PiiTypes.LOCATION"
}
]
}
],
"has_pii": false,
"name": "t1"
}
]
}
]''',
json.dumps(self.explorer.get_dict(), sort_keys=True, indent=2, cls=PiiTypeEncoder))

0 comments on commit b8ff6c7

Please sign in to comment.