Skip to content

Commit

Permalink
chore(github-growth): set commitfilechange language (#55880)
Browse files Browse the repository at this point in the history
  • Loading branch information
cathteng committed Sep 12, 2023
1 parent 91584df commit 930abe1
Show file tree
Hide file tree
Showing 4 changed files with 255 additions and 15 deletions.
8 changes: 4 additions & 4 deletions fixtures/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"id": "133d60480286590a610a0eb7352ff6e02b9674c4",
"tree_id": "f9d2a07e9488b91af2641b26b9407fe22a451433",
"distinct": true,
"message": "Update README.md (àgain)",
"message": "Update hello.py",
"timestamp": "2015-05-05T19:45:15-04:00",
"url": "https://github.com/baxterthehacker/public-repo/commit/133d60480286590a610a0eb7352ff6e02b9674c4",
"author": {
Expand All @@ -36,7 +36,7 @@
],
"modified": [
"README.md"
"hello.py"
]
},
{
Expand Down Expand Up @@ -98,7 +98,7 @@
"id": "0d1a26e67d8f5eaf1f6ba5c57fc3c7d91ac0fd1c",
"tree_id": "f9d2a07e9488b91af2641b26b9407fe22a451433",
"distinct": true,
"message": "Update README.md",
"message": "Update hello.py",
"timestamp": "2015-05-05T19:40:15-04:00",
"url": "https://github.com/baxterthehacker/public-repo/commit/0d1a26e67d8f5eaf1f6ba5c57fc3c7d91ac0fd1c",
"author": {
Expand All @@ -118,7 +118,7 @@
],
"modified": [
"README.md"
"hello.py"
]
},
"repository": {
Expand Down
202 changes: 202 additions & 0 deletions src/sentry/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,3 +700,205 @@ def from_str(cls, string: str) -> Optional[int]:
"*/healthz",
"*/ping",
]

# Generated from https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/languages.yml and our list of platforms/languages
EXTENSION_LANGUAGE_MAP = {
"c": "c",
"cats": "c",
"h": "objective-c",
"idc": "c",
"cs": "c#",
"cake": "coffeescript",
"csx": "c#",
"linq": "c#",
"cpp": "c++",
"c++": "c++",
"cc": "c++",
"cp": "c++",
"cppm": "c++",
"cxx": "c++",
"h++": "c++",
"hh": "c++",
"hpp": "c++",
"hxx": "c++",
"inc": "php",
"inl": "c++",
"ino": "c++",
"ipp": "c++",
"ixx": "c++",
"re": "c++",
"tcc": "c++",
"tpp": "c++",
"txx": "c++",
"chs": "c2hs haskell",
"clj": "clojure",
"bb": "clojure",
"boot": "clojure",
"cl2": "clojure",
"cljc": "clojure",
"cljs": "clojure",
"cljs.hl": "clojure",
"cljscm": "clojure",
"cljx": "clojure",
"hic": "clojure",
"coffee": "coffeescript",
"_coffee": "coffeescript",
"cjsx": "coffeescript",
"iced": "coffeescript",
"cfm": "coldfusion",
"cfml": "coldfusion",
"cfc": "coldfusion cfc",
"cr": "crystal",
"dart": "dart",
"ex": "elixir",
"exs": "elixir",
"fs": "f#",
"fsi": "f#",
"fsx": "f#",
"go": "go",
"groovy": "groovy",
"grt": "groovy",
"gtpl": "groovy",
"gvy": "groovy",
"gsp": "groovy server pages",
"hcl": "hcl",
"nomad": "hcl",
"tf": "hcl",
"tfvars": "hcl",
"workflow": "hcl",
"hs": "haskell",
"hs-boot": "haskell",
"hsc": "haskell",
"java": "java",
"jav": "java",
"jsh": "java",
"jsp": "java server pages",
"tag": "java server pages",
"js": "javascript",
"_js": "javascript",
"bones": "javascript",
"cjs": "javascript",
"es": "javascript",
"es6": "javascript",
"frag": "javascript",
"gs": "javascript",
"jake": "javascript",
"javascript": "javascript",
"jsb": "javascript",
"jscad": "javascript",
"jsfl": "javascript",
"jslib": "javascript",
"jsm": "javascript",
"jspre": "javascript",
"jss": "javascript",
"jsx": "javascript",
"mjs": "javascript",
"njs": "javascript",
"pac": "javascript",
"sjs": "javascript",
"ssjs": "javascript",
"xsjs": "javascript",
"xsjslib": "javascript",
"js.erb": "javascript+erb",
"kt": "kotlin",
"ktm": "kotlin",
"kts": "kotlin",
"litcoffee": "literate coffeescript",
"coffee.md": "literate coffeescript",
"lhs": "literate haskell",
"lua": "lua",
"fcgi": "ruby",
"nse": "lua",
"p8": "lua",
"pd_lua": "lua",
"rbxs": "lua",
"rockspec": "lua",
"wlua": "lua",
"numpy": "numpy",
"numpyw": "numpy",
"numsc": "numpy",
"ml": "ocaml",
"eliom": "ocaml",
"eliomi": "ocaml",
"ml4": "ocaml",
"mli": "ocaml",
"mll": "ocaml",
"mly": "ocaml",
"m": "objective-c",
"mm": "objective-c++",
"cl": "opencl",
"opencl": "opencl",
"php": "php",
"aw": "php",
"ctp": "php",
"php3": "php",
"php4": "php",
"php5": "php",
"phps": "php",
"phpt": "php",
"pl": "perl",
"al": "perl",
"cgi": "python",
"perl": "perl",
"ph": "perl",
"plx": "perl",
"pm": "perl",
"psgi": "perl",
"t": "perl",
"py": "python",
"gyp": "python",
"gypi": "python",
"lmi": "python",
"py3": "python",
"pyde": "python",
"pyi": "python",
"pyp": "python",
"pyt": "python",
"pyw": "python",
"rpy": "python",
"spec": "ruby",
"tac": "python",
"wsgi": "python",
"xpy": "python",
"rb": "ruby",
"builder": "ruby",
"eye": "ruby",
"gemspec": "ruby",
"god": "ruby",
"jbuilder": "ruby",
"mspec": "ruby",
"pluginspec": "ruby",
"podspec": "ruby",
"prawn": "ruby",
"rabl": "ruby",
"rake": "ruby",
"rbi": "ruby",
"rbuild": "ruby",
"rbw": "ruby",
"rbx": "ruby",
"ru": "ruby",
"ruby": "ruby",
"thor": "ruby",
"watchr": "ruby",
"rs": "rust",
"rs.in": "rust",
"scala": "scala",
"kojo": "scala",
"sbt": "scala",
"sc": "scala",
"smk": "snakemake",
"snakefile": "snakemake",
"swift": "swift",
"tsx": "tsx",
"ts": "typescript",
"cts": "typescript",
"mts": "typescript",
"upc": "unified parallel c",
"vb": "visual basic .net",
"vbhtml": "visual basic .net",
"bas": "visual basic 6.0",
"cls": "visual basic 6.0",
"ctl": "visual basic 6.0",
"dsr": "visual basic 6.0",
"frm": "visual basic 6.0",
}
35 changes: 31 additions & 4 deletions src/sentry/integrations/github/webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from sentry import analytics, features, options
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import Endpoint, region_silo_endpoint
from sentry.constants import ObjectStatus
from sentry.constants import EXTENSION_LANGUAGE_MAP, ObjectStatus
from sentry.integrations.utils.scope import clear_tags_and_context
from sentry.models import Commit, CommitAuthor, Organization, PullRequest, Repository
from sentry.models.commitfilechange import CommitFileChange
Expand Down Expand Up @@ -47,6 +47,18 @@ def get_github_external_id(event: Mapping[str, Any], host: str | None = None) ->
return f"{host}:{external_id}" if host else external_id


def get_file_language(filename: str):
extension = filename.split(".")[-1]
language = None
if extension != filename:
language = EXTENSION_LANGUAGE_MAP.get(extension)

if language is None:
logger.info("github.unaccounted_file_lang", extra={"extension": extension})

return language


class Webhook:
provider = "github"

Expand Down Expand Up @@ -357,16 +369,31 @@ def _handle(
date_added=parse_date(commit["timestamp"]).astimezone(timezone.utc),
)
for fname in commit["added"]:
language = get_file_language(fname)
CommitFileChange.objects.create(
organization_id=organization.id, commit=c, filename=fname, type="A"
organization_id=organization.id,
commit=c,
filename=fname,
type="A",
language=language,
)
for fname in commit["removed"]:
language = get_file_language(fname)
CommitFileChange.objects.create(
organization_id=organization.id, commit=c, filename=fname, type="D"
organization_id=organization.id,
commit=c,
filename=fname,
type="D",
language=language,
)
for fname in commit["modified"]:
language = get_file_language(fname)
CommitFileChange.objects.create(
organization_id=organization.id, commit=c, filename=fname, type="M"
organization_id=organization.id,
commit=c,
filename=fname,
type="M",
language=language,
)
except IntegrityError:
pass
Expand Down
25 changes: 18 additions & 7 deletions tests/sentry/integrations/github/test_webhooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sentry import options
from sentry.constants import ObjectStatus
from sentry.models import Commit, CommitAuthor, GroupLink, PullRequest, Repository
from sentry.models.commitfilechange import CommitFileChange
from sentry.silo import SiloMode
from sentry.testutils.cases import APITestCase
from sentry.testutils.helpers.features import with_feature
Expand All @@ -37,7 +38,7 @@ def test_unregistered_event(self):
data=PUSH_EVENT_EXAMPLE_INSTALLATION,
content_type="application/json",
HTTP_X_GITHUB_EVENT="UnregisteredEvent",
HTTP_X_HUB_SIGNATURE="sha1=56a3df597e02adbc17fb617502c70e19d96a6136",
HTTP_X_HUB_SIGNATURE="sha1=f834c327e17e6ef77f7882f948022747b379a1e3",
HTTP_X_GITHUB_DELIVERY=str(uuid4()),
)

Expand Down Expand Up @@ -81,7 +82,7 @@ def _setup_repo_test(self, project):
data=PUSH_EVENT_EXAMPLE_INSTALLATION,
content_type="application/json",
HTTP_X_GITHUB_EVENT="push",
HTTP_X_HUB_SIGNATURE="sha1=56a3df597e02adbc17fb617502c70e19d96a6136",
HTTP_X_HUB_SIGNATURE="sha1=f834c327e17e6ef77f7882f948022747b379a1e3",
HTTP_X_GITHUB_DELIVERY=str(uuid4()),
)

Expand Down Expand Up @@ -112,7 +113,7 @@ def test_simple(self):
commit = commit_list[0]

assert commit.key == "133d60480286590a610a0eb7352ff6e02b9674c4"
assert commit.message == "Update README.md (àgain)"
assert commit.message == "Update hello.py"
assert commit.author.name == "bàxterthehacker"
assert commit.author.email == "baxterthehacker@users.noreply.github.com"
assert commit.author.external_id is None
Expand All @@ -127,6 +128,11 @@ def test_simple(self):
assert commit.author.external_id is None
assert commit.date_added == datetime(2015, 5, 5, 23, 40, 15, tzinfo=timezone.utc)

commit_filechanges = CommitFileChange.objects.all()
assert len(commit_filechanges) == 2
assert commit_filechanges[0].language == "python"
assert commit_filechanges[1].language is None

def test_auto_linking_missing_feature_flag(self):
project = self.project # force creation

Expand Down Expand Up @@ -200,7 +206,7 @@ def test_anonymous_lookup(self):
commit = commit_list[0]

assert commit.key == "133d60480286590a610a0eb7352ff6e02b9674c4"
assert commit.message == "Update README.md (àgain)"
assert commit.message == "Update hello.py"
assert commit.author.name == "bàxterthehacker"
assert commit.author.email == "baxterthehacker@example.com"
assert commit.date_added == datetime(2015, 5, 5, 23, 45, 15, tzinfo=timezone.utc)
Expand All @@ -213,6 +219,11 @@ def test_anonymous_lookup(self):
assert commit.author.email == "baxterthehacker@example.com"
assert commit.date_added == datetime(2015, 5, 5, 23, 40, 15, tzinfo=timezone.utc)

commit_filechanges = CommitFileChange.objects.all()
assert len(commit_filechanges) == 2
assert commit_filechanges[0].language == "python"
assert commit_filechanges[1].language is None

def test_multiple_orgs(self):
project = self.project # force creation

Expand Down Expand Up @@ -257,7 +268,7 @@ def test_multiple_orgs(self):
data=PUSH_EVENT_EXAMPLE_INSTALLATION,
content_type="application/json",
HTTP_X_GITHUB_EVENT="push",
HTTP_X_HUB_SIGNATURE="sha1=56a3df597e02adbc17fb617502c70e19d96a6136",
HTTP_X_HUB_SIGNATURE="sha1=f834c327e17e6ef77f7882f948022747b379a1e3",
HTTP_X_GITHUB_DELIVERY=str(uuid4()),
)

Expand Down Expand Up @@ -301,7 +312,7 @@ def test_multiple_orgs_creates_missing_repos(self, mock_metrics):
data=PUSH_EVENT_EXAMPLE_INSTALLATION,
content_type="application/json",
HTTP_X_GITHUB_EVENT="push",
HTTP_X_HUB_SIGNATURE="sha1=56a3df597e02adbc17fb617502c70e19d96a6136",
HTTP_X_HUB_SIGNATURE="sha1=f834c327e17e6ef77f7882f948022747b379a1e3",
HTTP_X_GITHUB_DELIVERY=str(uuid4()),
)

Expand Down Expand Up @@ -349,7 +360,7 @@ def test_multiple_orgs_ignores_hidden_repo(self):
data=PUSH_EVENT_EXAMPLE_INSTALLATION,
content_type="application/json",
HTTP_X_GITHUB_EVENT="push",
HTTP_X_HUB_SIGNATURE="sha1=56a3df597e02adbc17fb617502c70e19d96a6136",
HTTP_X_HUB_SIGNATURE="sha1=f834c327e17e6ef77f7882f948022747b379a1e3",
HTTP_X_GITHUB_DELIVERY=str(uuid4()),
)

Expand Down

0 comments on commit 930abe1

Please sign in to comment.