Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use pyink #3216

Merged
merged 3 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# apply pyink
40a6e074e5224d733f964be00e21e0a1cb98bd2e
128 changes: 70 additions & 58 deletions .github/analytics/get_repo_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,18 @@
import matplotlib.dates as mdates


token = os.environ["GITHUB_TOKEN"]
endpoint = r"https://api.github.com/graphql"
headers = {"Authorization": f"bearer {token}"}
token = os.environ['GITHUB_TOKEN']
endpoint = r'https://api.github.com/graphql'
headers = {'Authorization': f'bearer {token}'}

#------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# GraphQL
#------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# NOTE: This GraphQL logic was ported and adapted from this script:
# https://github.com/scientific-python/devstats-data/blob/4c022961abc4ca6061f8719d9c3387e98734b90c/query.py
# It contains style differences from Google's style guide.


def load_query_from_file(fname, repo_owner, repo_name) -> str:
with open(fname) as fh:
query = fh.read()
Expand Down Expand Up @@ -75,9 +76,7 @@ def send_query(query, query_type, cursor=None):
# TODO: Expand this, either by parsing the query type from the query
# directly or manually adding more query_types to the set
if query_type not in {'issues', 'pullRequests'}:
raise ValueError(
'Only \'issues\' and \'pullRequests\' queries are currently supported'
)
raise ValueError("Only 'issues' and 'pullRequests' queries are currently supported")
# TODO: Generalize this
# WARNING: The cursor injection depends on the specific structure of the
# query, this is the main reason why query types are limited to issues/PRs
Expand All @@ -86,12 +85,13 @@ def send_query(query, query_type, cursor=None):
cursor_ind = query.find(cursor_insertion_key) + len(cursor_insertion_key)
query = query[:cursor_ind] + f'after:"{cursor}", ' + query[cursor_ind:]
# Build request payload
payload = {'query' : query}
payload = {'query': query}
response = requests.post(endpoint, json=payload, headers=headers)
return json.loads(response.content)


def get_all_responses(query, query_type):
"Helper function to bypass GitHub GraphQL API node limit."
'Helper function to bypass GitHub GraphQL API node limit.'
# Get data from a single response
initial_data = send_query(query, query_type)
data, last_cursor, total_count = parse_single_query(initial_data, query_type)
Expand All @@ -105,6 +105,7 @@ def get_all_responses(query, query_type):
print('Done.')
return data


def parse_single_query(data, query_type):
"""
Parses the data returned by `send_query`
Expand Down Expand Up @@ -159,20 +160,21 @@ def __init__(self, query_fname, query_type, repo_owner, repo_name):
self.load_query()

def load_query(self):
self.query = load_query_from_file(
self.query_fname, self.repo_owner, self.repo_name
)
self.query = load_query_from_file(self.query_fname, self.repo_owner, self.repo_name)

def get(self):
self.raw_data = get_all_responses(self.query, self.query_type)

#------------------------------------------------------------------------------

# ------------------------------------------------------------------------------
# metrics helpers
#------------------------------------------------------------------------------
# ------------------------------------------------------------------------------


def _to_datetime(date_str: str) -> datetime:
return datetime.fromisoformat(date_str.replace('Z', ''))


def _get_issues_features(issues):
for issue in issues:
issue = issue['node']
Expand All @@ -191,12 +193,13 @@ def _get_issues_features(issues):
time_issue_closed = _to_datetime(event['createdAt'])

yield {
'created_at': created_at,
'time_labeled_or_converted': time_labeled_or_converted,
'time_issue_closed': time_issue_closed,
'issue_closed': issue['state'] == 'CLOSED',
'created_at': created_at,
'time_labeled_or_converted': time_labeled_or_converted,
'time_issue_closed': time_issue_closed,
'issue_closed': issue['state'] == 'CLOSED',
}


def _get_pr_features(prs):
for pr in prs:
pr = pr['node']
Expand All @@ -207,24 +210,21 @@ def _get_pr_features(prs):
time_merged_or_closed = None
time_review = None

if pr["reviews"]["nodes"]:
review = pr["reviews"]["nodes"][0]
time_review = _to_datetime(review["createdAt"])
if pr['reviews']['nodes']:
review = pr['reviews']['nodes'][0]
time_review = _to_datetime(review['createdAt'])

for event in pr['timelineItems']['edges']:
event = event['node']

if (
time_labeled_or_assigned is None
and event['__typename'] == 'LabeledEvent'
and 'cla:' not in event['label']['name']
time_labeled_or_assigned is None
and event['__typename'] == 'LabeledEvent'
and 'cla:' not in event['label']['name']
):
time_labeled_or_assigned = _to_datetime(event['createdAt'])

if (
time_labeled_or_assigned is None
and event['__typename'] == 'AssignedEvent'
):
if time_labeled_or_assigned is None and event['__typename'] == 'AssignedEvent':
time_labeled_or_assigned = _to_datetime(event['createdAt'])

if event['__typename'] in {'ClosedEvent', 'MergedEvent'}:
Expand All @@ -234,17 +234,19 @@ def _get_pr_features(prs):
ready_for_review_at = _to_datetime(event['createdAt'])

yield {
'created_at': created_at,
'ready_for_review_at': ready_for_review_at,
'time_labeled_or_assigned': time_labeled_or_assigned,
'time_merged_or_closed': time_merged_or_closed,
'time_review': time_review,
'pr_closed': pr['state'] != 'OPEN',
'created_at': created_at,
'ready_for_review_at': ready_for_review_at,
'time_labeled_or_assigned': time_labeled_or_assigned,
'time_merged_or_closed': time_merged_or_closed,
'time_review': time_review,
'pr_closed': pr['state'] != 'OPEN',
}


def _start_of_month(date: datetime) -> datetime:
return date.replace(day=1, hour=0, minute=0, second=0, microsecond=0)


def _shift_n_months(date: datetime, n: int) -> datetime:
month = ((date.month + n - 1) % 12) + 1

Expand All @@ -258,14 +260,14 @@ def _shift_n_months(date: datetime, n: int) -> datetime:


def _rolling_window(
df: pd.DataFrame,
f: Callable[[pd.DataFrame], pd.Series],
window_size: int = 6,
step: int = 1,
df: pd.DataFrame,
f: Callable[[pd.DataFrame], pd.Series],
window_size: int = 6,
step: int = 1,
) -> pd.DataFrame:
# start of month of the first issue
start: datetime = df.iloc[0]['created_at'].replace(
day=1, hour=0, minute=0, second=0, microsecond=0
day=1, hour=0, minute=0, second=0, microsecond=0
)
end = _shift_n_months(start, window_size)

Expand All @@ -286,56 +288,66 @@ def _rolling_window(

return df


def _process_prs(df: pd.DataFrame) -> pd.Series:
return pd.Series({
'pr_response_time': df['pr_response_time'].dt.days.mean(),
'pr_resolution_time': df['pr_resolution_time'].dt.days.mean(),
'pr_response_time': df['pr_response_time'].dt.days.mean(),
'pr_resolution_time': df['pr_resolution_time'].dt.days.mean(),
})


def _process_issues(df: pd.DataFrame) -> pd.Series:
return pd.Series({
'issue_response_time': df['issue_response_time'].dt.days.mean(),
'issue_resolution_time': df['issue_resolution_time'].dt.days.mean(),
'issue_response_time': df['issue_response_time'].dt.days.mean(),
'issue_resolution_time': df['issue_resolution_time'].dt.days.mean(),
})

#-----------------------------------------------------------------------------

# -----------------------------------------------------------------------------
# main
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
FLAGS = flags.FLAGS
flags.DEFINE_string('repo_owner', 'google', 'User name or organization')
flags.DEFINE_string('repo_name', 'flax', 'Name of the repository')


def main(_):
repo_owner: str = FLAGS.repo_owner
repo_name: str = FLAGS.repo_name

# Download issue data
issues = GithubGrabber(
'.github/analytics/issue_activity_since_date.gql',
'issues',
repo_owner=repo_owner,
repo_name=repo_name,
'.github/analytics/issue_activity_since_date.gql',
'issues',
repo_owner=repo_owner,
repo_name=repo_name,
)
issues.get()

df_issues = df_issues0 = pd.DataFrame(list(_get_issues_features(issues.raw_data)))
df_issues['issue_response_time'] = df_issues['time_labeled_or_converted'] - df_issues['created_at']
df_issues['issue_resolution_time'] = df_issues['time_issue_closed'] - df_issues['created_at']
df_issues['issue_response_time'] = (
df_issues['time_labeled_or_converted'] - df_issues['created_at']
)
df_issues['issue_resolution_time'] = (
df_issues['time_issue_closed'] - df_issues['created_at']
)

df_issues = _rolling_window(df_issues, _process_issues)

prs = GithubGrabber(
'.github/analytics/pr_data_query.gql',
'pullRequests',
repo_owner=repo_owner,
repo_name=repo_name,
'.github/analytics/pr_data_query.gql',
'pullRequests',
repo_owner=repo_owner,
repo_name=repo_name,
)
prs.get()

df_prs = df_prs0 = pd.DataFrame(list(_get_pr_features(prs.raw_data)))
time_response = df_prs[['time_labeled_or_assigned', 'time_review']].min(axis=1)
df_prs['pr_response_time'] = time_response - df_prs['ready_for_review_at']
df_prs['pr_resolution_time'] = df_prs['time_merged_or_closed'] - df_prs['ready_for_review_at']
df_prs['pr_resolution_time'] = (
df_prs['time_merged_or_closed'] - df_prs['ready_for_review_at']
)

df_prs = _rolling_window(df_prs, _process_prs)

Expand Down Expand Up @@ -367,7 +379,6 @@ def main(_):
plt.gca().xaxis.set_major_locator(plt.MaxNLocator(5))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))


# plot for isssue_response_time
plt.figure()
plt.plot(df_issues['period_end'], df_issues['issue_response_time'])
Expand Down Expand Up @@ -411,5 +422,6 @@ def main(_):
# show plots
plt.show()


if __name__ == '__main__':
app.run(main)
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
hooks:
- id: check-toml
- id: trailing-whitespace
exclude: ^docs/.*\.md$
exclude: ^docs/.*\.md$|^dev/.*\.py$
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
hooks:
Expand All @@ -29,3 +29,7 @@ repos:
--extra-keys,
"metadata.kernelspec metadata.vscode metadata.colab cell.metadata.executionInfo.user cell.metadata.executionInfo.user_tz cell.metadata.colab",
]
- repo: https://github.com/google/pyink
rev: 23.5.0
hooks:
- id: pyink
30 changes: 18 additions & 12 deletions dev/update_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

Alternatively, the list can also be provided from the local environment with:

python dev --versions="$(pip freeze | sed s/==/-/g) flax-0.3.6"
python dev --versions="$(pip freeze | sed s/==/-/g) flax-0.3.6"
"""

import pathlib
Expand All @@ -58,7 +58,8 @@
'`--version="$(pip freeze | sed s/==/-/g) flax-0.3.6"` '
'(note the flax version "override") '
'or from the "install dependencies" step in the github build action '
'https://github.com/google/flax/actions/workflows/build.yml')
'https://github.com/google/flax/actions/workflows/build.yml',
)
flags.mark_flag_as_required('versions')
flags.DEFINE_bool('verbose', False, 'enables verbose output.')
flags.DEFINE_list('ignore', ['jax'], 'packages not to add to requirements.')
Expand All @@ -67,22 +68,26 @@
import_re = re.compile(r'(?:from|import)\s+(\w+)')
# maps `import cv2` to `pip install opencv-python`
pkg_map = {
'absl': 'absl-py',
'atari_py': 'atari-py',
'cv2': 'opencv-python',
'ml_collections': 'ml-collections',
'PIL': 'Pillow',
'tensorflow_datasets': 'tensorflow-datasets',
'tensorflow_text': 'tensorflow-text',
'absl': 'absl-py',
'atari_py': 'atari-py',
'cv2': 'opencv-python',
'ml_collections': 'ml-collections',
'PIL': 'Pillow',
'tensorflow_datasets': 'tensorflow-datasets',
'tensorflow_text': 'tensorflow-text',
}
standard_libs = set('codecs collections dataclasses datetime enum functools math multiprocessing itertools os pathlib random re sys tempfile time typing unicodedata warnings'.split(' '))
standard_libs = set(
'codecs collections dataclasses datetime enum functools math multiprocessing itertools os pathlib random re sys tempfile time typing unicodedata warnings'.split(
' '
)
)


def main(argv):
del argv

versions = {
pkg_version[:pkg_version.rindex('-')]: pkg_version[pkg_version.rindex('-') + 1:]
pkg_version[: pkg_version.rindex('-')]: pkg_version[pkg_version.rindex('-') + 1 :]
for pkg_version in FLAGS.versions.replace('\n', ' ').split(' ')
if '-' in pkg_version
}
Expand Down Expand Up @@ -117,7 +122,8 @@ def main(argv):
print(f'{requirements} -', end=' ')
with requirements.open('w') as f:
for pkg in sorted(pkgs, key=str.casefold):
if pkg in ignore: continue
if pkg in ignore:
continue
pkg = pkg_map.get(pkg, pkg)
print(f'{pkg}-{versions[pkg]}', end=' ')
f.write(f'{pkg}=={versions[pkg]}\n')
Expand Down
Loading