-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(scripts): Add script to calculate additional stats from Git repo…
…sitory
- Loading branch information
Showing
2 changed files
with
169 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
#!/usr/bin/env python3 | ||
import subprocess | ||
import collections | ||
import os | ||
import argparse | ||
from typing import List, Dict | ||
from datetime import datetime | ||
|
||
|
||
class GitRepoAnalyzer: | ||
def __init__(self, repo_path: str): | ||
self.repo_path = os.path.abspath(repo_path) | ||
if not self._is_valid_git_repo(): | ||
raise ValueError(f"{self.repo_path} is not a valid Git repository") | ||
|
||
def _is_valid_git_repo(self) -> bool: | ||
return os.path.isdir(os.path.join(self.repo_path, ".git")) | ||
|
||
def _run_git_command(self, args: List[str]) -> str: | ||
try: | ||
return subprocess.check_output(["git", "-C", self.repo_path] + args, stderr=subprocess.DEVNULL).decode("utf-8") | ||
except subprocess.CalledProcessError: | ||
raise RuntimeError(f"Error running git command in {self.repo_path}") | ||
|
||
def get_commit_data(self, format_string: str) -> List[str]: | ||
output = self._run_git_command(["log", f"--pretty=format:{format_string}"]) | ||
return output.split("\n") | ||
|
||
def analyze(self) -> Dict[str, Dict]: | ||
authors = self.get_commit_data("%an") | ||
author_emails = self.get_commit_data("%ae") | ||
committers = self.get_commit_data("%cn") | ||
committer_emails = self.get_commit_data("%ce") | ||
dates = self.get_commit_data("%ad") | ||
messages = self.get_commit_data("%s") | ||
|
||
author_counts = collections.Counter(authors) | ||
committer_counts = collections.Counter(committers) | ||
author_email_counts = collections.Counter(author_emails) | ||
committer_email_counts = collections.Counter(committer_emails) | ||
|
||
date_objects = [datetime.strptime(date, "%a %b %d %H:%M:%S %Y %z") for date in dates] | ||
earliest_commit = min(date_objects) | ||
latest_commit = max(date_objects) | ||
|
||
file_changes = self._run_git_command(["log", "--numstat", "--format="]) | ||
additions, deletions = 0, 0 | ||
files_changed = set() | ||
for line in file_changes.split("\n"): | ||
if line.strip(): | ||
add, delete, filename = line.split("\t") | ||
if add != "-" and delete != "-": # Check if the values are numeric | ||
additions += int(add) | ||
deletions += int(delete) | ||
files_changed.add(filename) | ||
|
||
branches = self._run_git_command(["branch", "-r"]).split("\n") | ||
branch_count = len([b for b in branches if b.strip()]) | ||
|
||
repo_age = latest_commit - earliest_commit | ||
commits_per_day = len(authors) / (repo_age.days + 1) | ||
|
||
message_lengths = [len(msg) for msg in messages] | ||
avg_message_length = sum(message_lengths) / len(message_lengths) | ||
|
||
return { | ||
"authors": dict(author_counts), | ||
"author_emails": dict(author_email_counts), | ||
"committers": dict(committer_counts), | ||
"committer_emails": dict(committer_email_counts), | ||
"commit_count": len(authors), | ||
"date_range": {"earliest": earliest_commit, "latest": latest_commit, "duration": repo_age}, | ||
"file_changes": {"additions": additions, "deletions": deletions, "files_changed": len(files_changed)}, | ||
"branch_count": branch_count, | ||
"commits_per_day": commits_per_day, | ||
"avg_message_length": avg_message_length, | ||
} | ||
|
||
|
||
class GitRepoReporter: | ||
def __init__(self, data: Dict[str, Dict]): | ||
self.data = data | ||
|
||
def print_contribution_stats(self): | ||
print("\nContribution Statistics:") | ||
print(f"Total Contributors: {len(self.data['authors'])}") | ||
print(f"Total Commits: {self.data['commit_count']}") | ||
print(f"Average Commits per Contributor: {self.data['commit_count'] / len(self.data['authors']):.2f}") | ||
|
||
def print_top_contributors(self, n: int): | ||
print(f"\nTop {n} Contributors:") | ||
for author, count in sorted(self.data["authors"].items(), key=lambda x: x[1], reverse=True)[:n]: | ||
print(f" {author}: {count} commits") | ||
|
||
def print_email_domain_stats(self): | ||
print("\nEmail Domain Statistics:") | ||
email_domains = collections.Counter([email.split("@")[1] for email in self.data["author_emails"]]) | ||
for domain, count in email_domains.most_common(5): | ||
print(f" {domain}: {count} commits") | ||
|
||
def print_time_stats(self): | ||
print("\nTime Statistics:") | ||
print(f"Repository Age: {self.data['date_range']['duration'].days} days") | ||
print(f"First Commit: {self.data['date_range']['earliest'].strftime('%Y-%m-%d')}") | ||
print(f"Latest Commit: {self.data['date_range']['latest'].strftime('%Y-%m-%d')}") | ||
print(f"Average Commits per Day: {self.data['commits_per_day']:.2f}") | ||
|
||
def print_code_change_stats(self): | ||
print("\nCode Change Statistics:") | ||
print(f"Total Lines Added: {self.data['file_changes']['additions']}") | ||
print(f"Total Lines Deleted: {self.data['file_changes']['deletions']}") | ||
print(f"Total Files Changed: {self.data['file_changes']['files_changed']}") | ||
print(f"Average Lines per Commit: {(self.data['file_changes']['additions'] + self.data['file_changes']['deletions']) / self.data['commit_count']:.2f}") | ||
|
||
def print_repo_structure(self): | ||
print("\nRepository Structure:") | ||
print(f"Number of Branches: {self.data['branch_count']}") | ||
|
||
def print_commit_message_stats(self): | ||
print("\nCommit Message Statistics:") | ||
print(f"Average Commit Message Length: {self.data['avg_message_length']:.2f} characters") | ||
|
||
def print_author_committer_diff(self): | ||
print("\nDifferences between Authors and Committers:") | ||
all_names = set(self.data["authors"].keys()) | set(self.data["committers"].keys()) | ||
differences = [ | ||
(name, self.data["authors"].get(name, 0), self.data["committers"].get(name, 0)) | ||
for name in all_names | ||
if self.data["authors"].get(name, 0) != self.data["committers"].get(name, 0) | ||
] | ||
if differences: | ||
for name, author_count, committer_count in sorted(differences, key=lambda x: abs(x[1] - x[2]), reverse=True): | ||
print(f" {name}: Authored {author_count}, Committed {committer_count}") | ||
else: | ||
print(" No differences found between authors and committers") | ||
|
||
def print_summary(self, top_n: int = 5): | ||
print("Repository Analysis Summary") | ||
print("===========================") | ||
self.print_contribution_stats() | ||
self.print_top_contributors(top_n) | ||
self.print_email_domain_stats() | ||
self.print_time_stats() | ||
self.print_code_change_stats() | ||
self.print_repo_structure() | ||
self.print_commit_message_stats() | ||
self.print_author_committer_diff() | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Analyze Git repository for detailed contribution statistics") | ||
parser.add_argument("repo_path", nargs="?", default=".", help="Path to the Git repository (default: current directory)") | ||
parser.add_argument("-n", "--top_contributors", type=int, default=5, help="Number of top contributors to display (default: 5)") | ||
args = parser.parse_args() | ||
|
||
try: | ||
analyzer = GitRepoAnalyzer(args.repo_path) | ||
data = analyzer.analyze() | ||
reporter = GitRepoReporter(data) | ||
reporter.print_summary(args.top_contributors) | ||
except (ValueError, RuntimeError) as e: | ||
print(f"Error: {str(e)}") | ||
except Exception as e: | ||
print(f"An unexpected error occurred: {str(e)}") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |