-
Notifications
You must be signed in to change notification settings - Fork 1
/
commits_bids.py
executable file
·127 lines (95 loc) · 3.47 KB
/
commits_bids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
import os, io
import subprocess
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import datetime
# The git commands, copied/stolen from Tyler!
def get_commits():
cwd = os.getcwd()
os.chdir('/home/sebastian/forks/numpy')
# subprocess.call(['git', 'checkout', 'master'])
# built up from: https://stackoverflow.com/a/47213799/2942522
out = subprocess.check_output(
['git', 'log', '--use-mailmap',
# Add this line to only use merge commits (see Tylers code)
# However, the filtering does not work for merge commits, I think
# Tylers may work for them.
# '--merges', '--first-parent',
# "--no-merges",
'master', '--format=%aN\t%aI\t%f'])
os.chdir(cwd)
out = out.replace(b"+00:00", b"")
log_data = io.BytesIO()
log_data.write(b"name\ttime\ttitle\n")
log_data.write(out)
del out
log_data.seek(0)
commits = pd.read_csv(log_data, parse_dates=["time"], sep="\t")
return commits
if __name__ == "__main__":
commits = get_commits()
if False:
# Filter by time, so autoscaling works...
commits = commits.iloc[(commits["time"] > datetime.datetime(2013, 1, 1)).values]
commits = commits.set_index("time")
_all_community = set()
def find_bids(row):
name = row["name"]
time = row.name
if "mattip" in name.lower() or "matti p" in name.lower():
if time > datetime.datetime(2018, 3, 31):
return "BIDS"
else:
return "Community"
if "tyler reddy" in name.lower():
if time > datetime.datetime(2018, 6, 8):
return "BIDS"
else:
return "Community"
return "BIDS"
if "stefan van der walt" in name.lower() or "stefanv" in name.lower():
# (Second one never happens)
return "BIDS"
if "sebastian berg" in name.lower() or "seberg" == name.lower():
if time > datetime.datetime(2019, 4, 24):
return "BIDS"
else:
return "Community"
if name not in _all_community:
# print(name)
_all_community.add(name)
return "Community"
commits["name"] = commits.apply(find_bids, axis=1)
groups = {}
for g, comm in commits.groupby("name"):
resampled = comm["name"].resample("Q").count() / 3
groups[g] = resampled
plt.figure(figsize=(4, 4/3*2))
categories = ["Community", "BIDS"]
stacked = []
for color, cat in zip(["C0", "C1"], categories):
next_stack = groups[cat]
if len(stacked) > 0:
prev_vals = stacked[-1].values
next_stack = next_stack + stacked[-1]
else:
prev_vals = np.zeros_like(next_stack.values)
stacked.append(next_stack)
plt.fill_between(
next_stack.index, next_stack.values, prev_vals,
label=cat, lw=0, zorder=4 if cat == "Community" else 3,
color=color, alpha=0.8)
plt.plot(
next_stack.index, next_stack.values, "o",
zorder=4 if cat == "Community" else 3, color=color,
mec="k")
plt.xlim(datetime.datetime(2013, 1, 1), datetime.datetime(2019, 6, 30))
plt.ylim(0, 230)
plt.ylabel("Commits per month")
plt.xlabel("Quarter")
plt.tight_layout()
plt.legend()
plt.savefig("commits_community_bids.pdf")
plt.show()