-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
184 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import astroid | ||
from pylint.checkers import BaseChecker, BaseRawFileChecker | ||
|
||
|
||
class NotebookChecker(BaseRawFileChecker): | ||
__implements__ = (BaseRawFileChecker,) | ||
|
||
name = 'databricks-notebooks' | ||
msgs = { | ||
'E9999': ( | ||
'dbutils.notebook.run() is not allowed', | ||
'notebooks-dbutils-run', | ||
'Used when dbutils.notebook.run() is used' | ||
), | ||
'E9998': ( | ||
'dbutils.fs is not allowed', | ||
'notebooks-dbutils-fs', | ||
'Used when dbutils.fs is used' | ||
), | ||
'E9997': ( | ||
'dbutils.credentials is not allowed', | ||
'notebooks-dbutils-credentials', | ||
'Used when dbutils.credentials is used' | ||
), | ||
'E9996': ( | ||
'Notebooks should not have more than 75 cells', | ||
'notebooks-too-many-cells', | ||
'Used when the number of cells in a notebook is greater than 75' | ||
), | ||
'E9995': ( | ||
'Star import is not allowed', | ||
'notebooks-star-import', | ||
'Used when there is a star import from pyspark.sql.functions' | ||
), | ||
'E9994': ( | ||
'Using %run is not allowed', | ||
'notebooks-percent-run', | ||
'Used when `# MAGIC %run` comment is used', | ||
), | ||
} | ||
|
||
def process_module(self, node: astroid.Module): | ||
"""Read raw module. Need to do some tricks, as `ast` doesn't provide access for comments. | ||
Alternative libraries that can parse comments along with the code: | ||
- https://github.com/Instagram/LibCST/ (MIT + PSF) | ||
- https://github.com/python/cpython/tree/3.10/Lib/lib2to3 (PSF), removed in Python 3.12 | ||
- https://github.com/t3rn0/ast-comments (MIT) | ||
- https://github.com/facebookincubator/bowler (MIT), abandoned | ||
- https://github.com/PyCQA/redbaron (LGPLv3) | ||
""" | ||
cells = 1 | ||
with node.stream() as stream: | ||
for (lineno, line) in enumerate(stream): | ||
lineno += 1 | ||
if lineno == 1 and line != b'# Databricks notebook source\n': | ||
# this is not a Databricks notebook | ||
return | ||
if line == b'# COMMAND ----------\n': | ||
cells += 1 | ||
if cells > 75: | ||
self.add_message('notebooks-too-many-cells', line=lineno) | ||
continue | ||
if line.startswith(b'# MAGIC %run'): | ||
self.add_message('notebooks-percent-run', line=lineno) | ||
|
||
def visit_module(self, node): | ||
# add message if dbutils.notebook.run() is used | ||
if node.name == 'dbutils.notebook.run': | ||
self.add_message('notebooks-dbutils-run', node=node) | ||
|
||
# add message if dbutils.fs is used | ||
if node.name == 'dbutils.fs': | ||
self.add_message('notebooks-dbutils-fs', node=node) | ||
|
||
# add message if dbutils.credentials is used | ||
if node.name == 'dbutils.credentials': | ||
self.add_message('notebooks-dbutils-credentials', node=node) | ||
|
||
# Notebooks should not have more than 75 cells. | ||
if len(node.body) > 75: | ||
self.add_message('notebooks-too-many-cells', node=node) | ||
|
||
def visit_importfrom(self, node: astroid.ImportFrom): | ||
# add message if there's a star import from pyspark.sql.functions import * | ||
if node.modname == 'pyspark.sql.functions' and node.names[0][0] == '*': | ||
self.add_message('notebooks-star-import', node=node) | ||
|
||
|
||
def register(linter): | ||
linter.register_checker(NotebookChecker(linter)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Databricks notebook source | ||
# MAGIC %md # Here's markdown cell | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %run ./something | ||
|
||
# COMMAND ---------- | ||
|
||
# and here we do star import | ||
from pyspark.sql.functions import * | ||
|
||
|
||
# # COMMAND ---------- | ||
# | ||
# # but no dbutils.library.restartPython() | ||
# !pip install databricks-sdk | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md not good chaining | ||
|
||
# COMMAND ---------- | ||
|
||
df = spark \ | ||
.table('samples.nyctaxi.trips') \ | ||
.limit(10) | ||
display(df) | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md good chaining | ||
|
||
# COMMAND ---------- | ||
|
||
df = (spark | ||
.table('samples.nyctaxi.trips') | ||
.limit(10)) | ||
display(df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import astroid | ||
import pylint.testutils | ||
|
||
from pylint.lint.pylinter import PyLinter | ||
|
||
from databricks.labs.pylint.notebooks import NotebookChecker | ||
|
||
|
||
def test_percent_run(): | ||
linter = PyLinter() | ||
checker = NotebookChecker(linter) | ||
linter.disable("all") | ||
linter.register_checker(checker) | ||
linter.check(["samples/TestForPylint.py"]) | ||
|
||
assert 1 == len(linter.reporter.messages) | ||
|
||
|
||
class TestNotebookChecker(pylint.testutils.CheckerTestCase): | ||
CHECKER_CLASS = NotebookChecker | ||
|
||
def test_import_from_pyspark(self): | ||
node = astroid.extract_node(""" | ||
from pyspark.sql.functions import * | ||
""") | ||
|
||
with self.assertAddsMessages( | ||
pylint.testutils.MessageTest( | ||
msg_id="notebooks-star-import", | ||
node=node, | ||
), ignore_position=True | ||
): | ||
self.checker.visit_importfrom(node) | ||
|
||
def test_percent_run(self): | ||
node = astroid.extract_node("""# Databricks notebook source | ||
# MAGIC %md # Here's markdown cell | ||
# COMMAND ---------- | ||
# MAGIC %run ./something | ||
# COMMAND ---------- | ||
print('hello') | ||
""") | ||
|
||
with self.assertAddsMessages( | ||
pylint.testutils.MessageTest( | ||
msg_id="notebooks-percent-run", | ||
node=node, | ||
), ignore_position=True | ||
): | ||
self.checker.process_module(node) |