Skip to content

Commit

Permalink
Merge pull request #3216 from google:use-pyink
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 550029828
  • Loading branch information
Flax Authors committed Jul 21, 2023
2 parents 39e3e32 + 97d038c commit 3ab11fe
Show file tree
Hide file tree
Showing 171 changed files with 9,142 additions and 5,771 deletions.
2 changes: 2 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# apply pyink
40a6e074e5224d733f964be00e21e0a1cb98bd2e
127 changes: 73 additions & 54 deletions .github/analytics/get_repo_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,18 @@
import matplotlib.dates as mdates


token = os.environ["GITHUB_TOKEN"]
endpoint = r"https://api.github.com/graphql"
headers = {"Authorization": f"bearer {token}"}
token = os.environ['GITHUB_TOKEN']
endpoint = r'https://api.github.com/graphql'
headers = {'Authorization': f'bearer {token}'}

#------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# GraphQL
#------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# NOTE: This GraphQL logic was ported and adapted from this script:
# https://github.com/scientific-python/devstats-data/blob/4c022961abc4ca6061f8719d9c3387e98734b90c/query.py
# It contains style differences from Google's style guide.


def load_query_from_file(fname, repo_owner, repo_name) -> str:
with open(fname) as fh:
query = fh.read()
Expand Down Expand Up @@ -75,9 +76,9 @@ def send_query(query, query_type, cursor=None):
# TODO: Expand this, either by parsing the query type from the query
# directly or manually adding more query_types to the set
if query_type not in {'issues', 'pullRequests'}:
raise ValueError(
'Only \'issues\' and \'pullRequests\' queries are currently supported'
)
raise ValueError(
"Only 'issues' and 'pullRequests' queries are currently supported"
)
# TODO: Generalize this
# WARNING: The cursor injection depends on the specific structure of the
# query, this is the main reason why query types are limited to issues/PRs
Expand All @@ -86,12 +87,13 @@ def send_query(query, query_type, cursor=None):
cursor_ind = query.find(cursor_insertion_key) + len(cursor_insertion_key)
query = query[:cursor_ind] + f'after:"{cursor}", ' + query[cursor_ind:]
# Build request payload
payload = {'query' : query}
payload = {'query': query}
response = requests.post(endpoint, json=payload, headers=headers)
return json.loads(response.content)


def get_all_responses(query, query_type):
"Helper function to bypass GitHub GraphQL API node limit."
'Helper function to bypass GitHub GraphQL API node limit.'
# Get data from a single response
initial_data = send_query(query, query_type)
data, last_cursor, total_count = parse_single_query(initial_data, query_type)
Expand All @@ -105,6 +107,7 @@ def get_all_responses(query, query_type):
print('Done.')
return data


def parse_single_query(data, query_type):
"""
Parses the data returned by `send_query`
Expand Down Expand Up @@ -160,19 +163,22 @@ def __init__(self, query_fname, query_type, repo_owner, repo_name):

def load_query(self):
self.query = load_query_from_file(
self.query_fname, self.repo_owner, self.repo_name
self.query_fname, self.repo_owner, self.repo_name
)

def get(self):
self.raw_data = get_all_responses(self.query, self.query_type)

#------------------------------------------------------------------------------

# ------------------------------------------------------------------------------
# metrics helpers
#------------------------------------------------------------------------------
# ------------------------------------------------------------------------------


def _to_datetime(date_str: str) -> datetime:
return datetime.fromisoformat(date_str.replace('Z', ''))


def _get_issues_features(issues):
for issue in issues:
issue = issue['node']
Expand All @@ -191,12 +197,13 @@ def _get_issues_features(issues):
time_issue_closed = _to_datetime(event['createdAt'])

yield {
'created_at': created_at,
'time_labeled_or_converted': time_labeled_or_converted,
'time_issue_closed': time_issue_closed,
'issue_closed': issue['state'] == 'CLOSED',
'created_at': created_at,
'time_labeled_or_converted': time_labeled_or_converted,
'time_issue_closed': time_issue_closed,
'issue_closed': issue['state'] == 'CLOSED',
}


def _get_pr_features(prs):
for pr in prs:
pr = pr['node']
Expand All @@ -207,23 +214,23 @@ def _get_pr_features(prs):
time_merged_or_closed = None
time_review = None

if pr["reviews"]["nodes"]:
review = pr["reviews"]["nodes"][0]
time_review = _to_datetime(review["createdAt"])
if pr['reviews']['nodes']:
review = pr['reviews']['nodes'][0]
time_review = _to_datetime(review['createdAt'])

for event in pr['timelineItems']['edges']:
event = event['node']

if (
time_labeled_or_assigned is None
and event['__typename'] == 'LabeledEvent'
and 'cla:' not in event['label']['name']
time_labeled_or_assigned is None
and event['__typename'] == 'LabeledEvent'
and 'cla:' not in event['label']['name']
):
time_labeled_or_assigned = _to_datetime(event['createdAt'])

if (
time_labeled_or_assigned is None
and event['__typename'] == 'AssignedEvent'
time_labeled_or_assigned is None
and event['__typename'] == 'AssignedEvent'
):
time_labeled_or_assigned = _to_datetime(event['createdAt'])

Expand All @@ -234,17 +241,19 @@ def _get_pr_features(prs):
ready_for_review_at = _to_datetime(event['createdAt'])

yield {
'created_at': created_at,
'ready_for_review_at': ready_for_review_at,
'time_labeled_or_assigned': time_labeled_or_assigned,
'time_merged_or_closed': time_merged_or_closed,
'time_review': time_review,
'pr_closed': pr['state'] != 'OPEN',
'created_at': created_at,
'ready_for_review_at': ready_for_review_at,
'time_labeled_or_assigned': time_labeled_or_assigned,
'time_merged_or_closed': time_merged_or_closed,
'time_review': time_review,
'pr_closed': pr['state'] != 'OPEN',
}


def _start_of_month(date: datetime) -> datetime:
return date.replace(day=1, hour=0, minute=0, second=0, microsecond=0)


def _shift_n_months(date: datetime, n: int) -> datetime:
month = ((date.month + n - 1) % 12) + 1

Expand All @@ -258,14 +267,14 @@ def _shift_n_months(date: datetime, n: int) -> datetime:


def _rolling_window(
df: pd.DataFrame,
f: Callable[[pd.DataFrame], pd.Series],
window_size: int = 6,
step: int = 1,
df: pd.DataFrame,
f: Callable[[pd.DataFrame], pd.Series],
window_size: int = 6,
step: int = 1,
) -> pd.DataFrame:
# start of month of the first issue
start: datetime = df.iloc[0]['created_at'].replace(
day=1, hour=0, minute=0, second=0, microsecond=0
day=1, hour=0, minute=0, second=0, microsecond=0
)
end = _shift_n_months(start, window_size)

Expand All @@ -286,56 +295,66 @@ def _rolling_window(

return df


def _process_prs(df: pd.DataFrame) -> pd.Series:
return pd.Series({
'pr_response_time': df['pr_response_time'].dt.days.mean(),
'pr_resolution_time': df['pr_resolution_time'].dt.days.mean(),
'pr_response_time': df['pr_response_time'].dt.days.mean(),
'pr_resolution_time': df['pr_resolution_time'].dt.days.mean(),
})


def _process_issues(df: pd.DataFrame) -> pd.Series:
return pd.Series({
'issue_response_time': df['issue_response_time'].dt.days.mean(),
'issue_resolution_time': df['issue_resolution_time'].dt.days.mean(),
'issue_response_time': df['issue_response_time'].dt.days.mean(),
'issue_resolution_time': df['issue_resolution_time'].dt.days.mean(),
})

#-----------------------------------------------------------------------------

# -----------------------------------------------------------------------------
# main
#-----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
FLAGS = flags.FLAGS
flags.DEFINE_string('repo_owner', 'google', 'User name or organization')
flags.DEFINE_string('repo_name', 'flax', 'Name of the repository')


def main(_):
repo_owner: str = FLAGS.repo_owner
repo_name: str = FLAGS.repo_name

# Download issue data
issues = GithubGrabber(
'.github/analytics/issue_activity_since_date.gql',
'issues',
repo_owner=repo_owner,
repo_name=repo_name,
'.github/analytics/issue_activity_since_date.gql',
'issues',
repo_owner=repo_owner,
repo_name=repo_name,
)
issues.get()

df_issues = df_issues0 = pd.DataFrame(list(_get_issues_features(issues.raw_data)))
df_issues['issue_response_time'] = df_issues['time_labeled_or_converted'] - df_issues['created_at']
df_issues['issue_resolution_time'] = df_issues['time_issue_closed'] - df_issues['created_at']
df_issues['issue_response_time'] = (
df_issues['time_labeled_or_converted'] - df_issues['created_at']
)
df_issues['issue_resolution_time'] = (
df_issues['time_issue_closed'] - df_issues['created_at']
)

df_issues = _rolling_window(df_issues, _process_issues)

prs = GithubGrabber(
'.github/analytics/pr_data_query.gql',
'pullRequests',
repo_owner=repo_owner,
repo_name=repo_name,
'.github/analytics/pr_data_query.gql',
'pullRequests',
repo_owner=repo_owner,
repo_name=repo_name,
)
prs.get()

df_prs = df_prs0 = pd.DataFrame(list(_get_pr_features(prs.raw_data)))
time_response = df_prs[['time_labeled_or_assigned', 'time_review']].min(axis=1)
df_prs['pr_response_time'] = time_response - df_prs['ready_for_review_at']
df_prs['pr_resolution_time'] = df_prs['time_merged_or_closed'] - df_prs['ready_for_review_at']
df_prs['pr_resolution_time'] = (
df_prs['time_merged_or_closed'] - df_prs['ready_for_review_at']
)

df_prs = _rolling_window(df_prs, _process_prs)

Expand Down Expand Up @@ -367,7 +386,6 @@ def main(_):
plt.gca().xaxis.set_major_locator(plt.MaxNLocator(5))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))


# plot for isssue_response_time
plt.figure()
plt.plot(df_issues['period_end'], df_issues['issue_response_time'])
Expand Down Expand Up @@ -411,5 +429,6 @@ def main(_):
# show plots
plt.show()


if __name__ == '__main__':
app.run(main)
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
hooks:
- id: check-toml
- id: trailing-whitespace
exclude: ^docs/.*\.md$
exclude: ^docs/.*\.md$|^dev/.*\.py$
- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
hooks:
Expand All @@ -29,3 +29,7 @@ repos:
--extra-keys,
"metadata.kernelspec metadata.vscode metadata.colab cell.metadata.executionInfo.user cell.metadata.executionInfo.user_tz cell.metadata.colab",
]
- repo: https://github.com/google/pyink
rev: 23.5.0
hooks:
- id: pyink
30 changes: 19 additions & 11 deletions dev/update_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@
'`--version="$(pip freeze | sed s/==/-/g) flax-0.3.6"` '
'(note the flax version "override") '
'or from the "install dependencies" step in the github build action '
'https://github.com/google/flax/actions/workflows/build.yml')
'https://github.com/google/flax/actions/workflows/build.yml',
)
flags.mark_flag_as_required('versions')
flags.DEFINE_bool('verbose', False, 'enables verbose output.')
flags.DEFINE_list('ignore', ['jax'], 'packages not to add to requirements.')
Expand All @@ -67,22 +68,28 @@
import_re = re.compile(r'(?:from|import)\s+(\w+)')
# maps `import cv2` to `pip install opencv-python`
pkg_map = {
'absl': 'absl-py',
'atari_py': 'atari-py',
'cv2': 'opencv-python',
'ml_collections': 'ml-collections',
'PIL': 'Pillow',
'tensorflow_datasets': 'tensorflow-datasets',
'tensorflow_text': 'tensorflow-text',
'absl': 'absl-py',
'atari_py': 'atari-py',
'cv2': 'opencv-python',
'ml_collections': 'ml-collections',
'PIL': 'Pillow',
'tensorflow_datasets': 'tensorflow-datasets',
'tensorflow_text': 'tensorflow-text',
}
standard_libs = set('codecs collections dataclasses datetime enum functools math multiprocessing itertools os pathlib random re sys tempfile time typing unicodedata warnings'.split(' '))
standard_libs = set(
'codecs collections dataclasses datetime enum functools math'
' multiprocessing itertools os pathlib random re sys tempfile time typing'
' unicodedata warnings'.split(' ')
)


def main(argv):
del argv

versions = {
pkg_version[:pkg_version.rindex('-')]: pkg_version[pkg_version.rindex('-') + 1:]
pkg_version[: pkg_version.rindex('-')]: pkg_version[
pkg_version.rindex('-') + 1 :
]
for pkg_version in FLAGS.versions.replace('\n', ' ').split(' ')
if '-' in pkg_version
}
Expand Down Expand Up @@ -117,7 +124,8 @@ def main(argv):
print(f'{requirements} -', end=' ')
with requirements.open('w') as f:
for pkg in sorted(pkgs, key=str.casefold):
if pkg in ignore: continue
if pkg in ignore:
continue
pkg = pkg_map.get(pkg, pkg)
print(f'{pkg}-{versions[pkg]}', end=' ')
f.write(f'{pkg}=={versions[pkg]}\n')
Expand Down
Loading

0 comments on commit 3ab11fe

Please sign in to comment.