Implements output, filters CLI arguments, and period aggregation

- Implement a function aggregating commit stats by author and/or by week|month
- Deletes type hints
- Modify TempRemoteRepo to be able to force remote fetch even for local
  repository (usefull in tests)
This commit is contained in:
Yann Weber 2023-11-26 16:19:05 +01:00
commit 15eb1c817a
3 changed files with 242 additions and 67 deletions

View file

@ -6,7 +6,4 @@ unittest:
lint:
pylint git_oh.py test.py
mypy:
mypy git_oh.py
.PHONY: lint mypy

288
git_oh.py
View file

@ -3,61 +3,72 @@
import argparse
import calendar
import csv
import datetime
import shutil
import sys
import tempfile
import warnings
from collections.abc import Sequence
from typing import Generator
import git
from git.repo.base import Repo
from git.objects.commit import Commit
def main():
""" Main function parse args from CLI and run the program """
""" Main function parse args from CLI, count commits in given
repository, and display/output them
"""
args = parse_args()
repo = TempRemoteRepo(args.url, silent=args.silent)
commit_stats = {}
for commit in repo:
author = commit.author
if author not in commit_stats:
commit_stats[author] = {'total': 0, 'off': 0}
commit_stats[author]['total'] += 1
if not in_office_hours(commit.committed_datetime,
commit_datetime = commit.authored_datetime
if args.from_date is not None and \
args.from_date > commit_datetime.date():
continue
if args.to_date is not None and\
args.to_date < commit_datetime.date():
continue
in_oh = in_office_hours(commit_datetime,
starthour=args.daystart,
stophour=args.daystop,
weekend=args.weekend):
print(f"{commit} {commit.author.name:>30s}\
{commit.committed_datetime.strftime('%a %H:%M %Y-%m-%d')}")
commit_stats[author]['off'] += 1
weekend=args.weekend)
commit_by_author(commit.author, commit_datetime, in_oh, commit_stats,
group=args.group_by)
if args.verbose and not in_oh:
print(f"{commit} \
{commit.authored_datetime.strftime('%a %H:%M %Y-%m-%d')} \
{commit.author.name!r}")
for author, stats in commit_stats.items():
pct_off = (stats['off'] / stats['total'])*100
print(f"{author.name:20s} : {pct_off:3.1f}% off ({stats['off']}/{stats['total']})")
if len(commit_stats) == 0:
print("No commit in given repository/date range", file=sys.stderr)
return
if args.csv_output is None:
print(result_cli(commit_stats))
else:
result_csv(commit_stats, args.csv_output)
args.csv_output.close()
def in_office_hours(moment:datetime.datetime=datetime.datetime.now(),
starthour:datetime.time=datetime.time(8,0,0),
stophour:datetime.time=datetime.time(20,0,0),
weekend:Sequence[int]=(5,6))->bool:
def in_office_hours(moment=datetime.datetime.now(),
starthour=datetime.time(8,0,0),
stophour=datetime.time(20,0,0),
weekend=(5,6)):
""" Indicates if a moment is in office hours.
Office hours are localized, comparisons are done without
Office hours are localized, comparisons is done without
taking care of tzoffset : in fact, if a working day starts at
08:00, 07:59:59+0200 is off as 07:59:59+0000 is.
Arguments :
- moment : the moment to compare with office hours
- starthour : standard day start hour
- stophour : standard day stop hour
- moment : the datetime.datetime to compare with office hours
- starthour : standard day start datetime.time
- stophour : standard day stop datetime.time
- weekend : list of days off (0,1,....,6)
Returns True if in office hours else False
"""
for dow in weekend:
if dow < 0 or dow > 6:
@ -73,7 +84,55 @@ tzinfo : comparisons are done without taking tzoffset in considaration")
return starthour <= localtime <= stophour
def iter_commits(repo:Repo)->Generator[Commit, None, None]:
def commit_by_author(author, moment, in_oh, acc, group=None):
""" Update stats with commits total and commits off office hour by
authors
Arguments :
- author : The commit author (git.util.Author instance)
- moment : The commit authored datetime.datetime
- in_oh : boolean indicating if a commit is in office hours
- acc : the dictionnary accumulating the commits count (modified by
reference). Keys are author instances and values are dict with
counters values.
Counters are dict with group as key. The group 'all' is always
present.
When grouping by month group key in format YEAR-MONTH are added.
When grouping by week group key in format YEAR-WEEK are added.
Each group is a dict with keys :
- 'total' for total number of commit
- 'off_oh' for the number of commit out off office hours
- group : Commit count can be aggregated by 'month' or 'week'
Returns None, modifications are done in acc by reference
"""
group_key = None
if group == 'week':
cal = moment.isocalendar()
group_key = f"{cal.year:04d}-W{cal.week:02d}"
elif group == 'month':
group_key = f"{moment.year:04d}-{moment.month:02d}"
elif group is None:
group_key = None
else:
err = f"Invalid group {group!r}. Valid groups are week and month"
raise ValueError(err)
if author not in acc:
acc[author] = {'all': {'off_oh': 0, 'total': 0}}
acc[author]['all']['total'] += 1
if group_key:
if group_key not in acc[author]:
acc[author].update({group_key: {'off_oh': 0, 'total': 0}})
acc[author][group_key]['total'] += 1
if not in_oh:
acc[author]['all']['off_oh'] += 1
if group_key:
acc[author][group_key]['off_oh'] += 1
def iter_commits(repo):
""" Generator on all git commits in given repository.
Recursively iterate on each commit in each branch/ref not yielding
@ -89,53 +148,151 @@ def iter_commits(repo:Repo)->Generator[Commit, None, None]:
encountered.add(commit.binsha)
yield commit
class TempRemoteRepo(git.repo.base.Repo):
""" A temporary repository referencing a remote repository
Allows to iterate on all commits without cloning the remote.
If repository is local iterate on local commit without using
a temporary repository
"""
def __init__(self, remote_url, silent=True, force_remote=False):
""" Initialize a new empty repository referencing a remote repo in
order to iterate on its commit
Arguments :
- remote_url : The url of the remote repo to reference
- silent : if False display progress on stderr
- force_remote : if True create a temporary repo even for
local file:// url
"""
self.temppath = None
if (not force_remote) and remote_url.startswith("file://"):
repo_path = remote_url[len("file://"):]
else:
repo_path = self.temppath = tempfile.mkdtemp(prefix="git_oh_")
git.Repo.init(self.temppath)
super().__init__(repo_path)
if self.temppath is not None:
remote_progress = None if silent else RemoteFetchProgress()
origin = self.create_remote("origin", remote_url)
origin.fetch(progress=remote_progress)
def __del__(self):
""" Clean temporary repository if needed """
if self.temppath is not None:
shutil.rmtree(self.temppath)
super().__del__()
def __iter__(self):
""" Iterate on commits """
return iter_commits(self)
class RemoteFetchProgress(git.RemoteProgress):
""" Report progress for remote fetch """
def update(self, op_code, cur_count, max_count=None, message=""):
""" Display progression on a single line of stderr """
if not max_count:
return
if max_count:
pct = (cur_count / max_count)*100
msg = f"{pct:5.1f}% {message}"
print(f"{msg:<80s}", end="\r", file=sys.stderr)
class TempRemoteRepo(Repo):
""" A temporary repository referencing a remote repository
Allows to iterate on all commits without cloning the remote
"""
def result_csv(commit_stats, ofd=sys.stdout):
""" Output the stats in an open file as CSV
def __init__(self, remote_url:str, silent=True):
""" Initialize a new empty repository referencing a remote repo
Output 3 columns by group :
- GRP-total : the number of commit in the group
- GRP-off_oh : the number of commit off office hour
- GRP-prop_off_oh : the ratio off_oh / total
Arguments :
- remote_url : The url of the remote repo to reference
- silent : if False display progress on stderr
- commit_stats : A dict with author as key and stats as values (
see commit_by_author() )
- ofd : Output TextIO
"""
self.temppath = tempfile.mkdtemp(prefix="git_oh_")
git.Repo.init(self.temppath)
super().__init__(self.temppath)
groups = set()
for author, stats in commit_stats.items():
groups.update(stats.keys())
remote_progress = None if silent else RemoteFetchProgress()
self.create_remote("origin", remote_url).fetch(progress=remote_progress)
for author in commit_stats:
for group in groups:
commit_stats[author].setdefault(group, {'total': 0, 'off_oh': 0})
gstat = commit_stats[author][group]
if gstat['total']:
gstat['prop_off_oh'] = gstat['off_oh']/gstat['total']
else:
gstat['prop_off_oh'] = 0.0
def __del__(self):
shutil.rmtree(self.temppath)
super().__del__()
groups = sorted(groups)
all_keys = ['author', 'author_email']
for group in groups:
all_keys += [
f"{group}-total",
f"{group}-off_oh",
f"{group}-prop_off_oh"]
writer = csv.DictWriter(ofd, all_keys)
writer.writeheader()
for author, stats in commit_stats.items():
row = {'author': author.name,
'author_email': author.email}
for kgroup, values in stats.items():
row.update({f'{kgroup}-{kval}': val
for kval, val in values.items()})
writer.writerow(row)
def __iter__(self)->Generator[Commit, None, None]:
return iter_commits(self)
def result_cli(commit_stats):
""" Format stats for cli output
Arguments :
- commit_stats : A dict with author as key and stats as values (
see commit_by_author() )
def valid_day(value:str) -> int:
Returns a string representing an ascii array with precentage of commits
off office hours.
"""
all_keys = set()
for stats in commit_stats.values():
all_keys.update(stats.keys())
all_keys = sorted(all_keys)
stats_count = len(all_keys)
stat_width = 10
author_width = max((len(author.name) for author in commit_stats))+2
hsep = '+' + ('-' * author_width) + '+'
hsep += (('-' * stat_width)+ '+') * stats_count
hsep += "\n"
header = f"|{'Author':>{author_width-1}s} |"
for key in all_keys:
header += f"{key:>{stat_width-1}s} |"
result = f"{hsep}{header}\n{hsep}"
for author, stats in commit_stats.items():
result += f"|{author.name:>{author_width-1}s} |"
for key in all_keys:
if key not in stats:
result += f"{'none':>{stat_width-1}s} |"
else:
pct = (stats[key]['off_oh'] / stats[key]['total'])*100
result += f"{pct:{stat_width-2}.0f}% |"
result += f"\n{hsep}"
return result
def valid_day(value):
""" Take a locale dayname and convert it to a day number
Valid days are calendar.day_name, calendar.day_abbr and integers
in [0..6]
Arguments :
- value : the value to convert
- value : the str to convert
Returns an int in [0..6]
"""
@ -167,28 +324,47 @@ def valid_day(value:str) -> int:
raise ValueError(f"Invalid day {value!r}. Valid days are : {valid_days}")
def parse_args(argv:list[str]=None):
def parse_args(argv=None):
""" Argument parser for CLI
Exits printing help if invalid arguments given, else returns
Arguments :
- argv : A list like sys.argv[1:]
Print help and sys.exit() if invalid arguments given, else returns
arguments in a named tuple.
"""
parser = argparse.ArgumentParser(description="""Program description
With fancy multiline explanations
""")
parser = argparse.ArgumentParser(description="Count the number of \
commits done in/out of office hours.")
parser.add_argument("url", help="Git repository URL")
parser.add_argument("-d", "--daystart", type=datetime.time.fromisoformat,
default=datetime.time(8,0,0),
help="Day start time (ISO 8601)")
parser.add_argument("-D", "--daystop", type=datetime.time.fromisoformat,
default=datetime.time(20,0,0),
help="Day stop time (ISO 8601)")
parser.add_argument("-w", "--weekend", type=str, default="5,6",
metavar='DAYSOFF',
help="Indicate days off separated by ','. Locale day names or \
days number (0..6 starting with monday) can be used. By default saturday \
and sunday are off. Use special value -w NUL to indicate no days off.")
parser.add_argument("-f", "--from", type=datetime.date.fromisoformat,
dest="from_date",
help="Exclude commits before given ISO 8601 date")
parser.add_argument("-t", "--to", type=datetime.date.fromisoformat,
dest="to_date",
help="Exclude commits after given ISO 8601 date")
parser.add_argument("-g", "--group-by", choices=['week', 'month'],
help="Generate commit stats by week or month")
parser.add_argument("-s", "--silent", action="store_true", default=False,
help="Do not display fetch progression on stderr")
parser.add_argument("-o", "--csv-output", type=argparse.FileType("w"),
default=None,
help="Store commit counts in a CSV file instead of printing it \
on stdout (use '-' for stdout)")
parser.add_argument("-v", "--verbose", action="store_true",
default=False,
help="Output all commit off office hours on stdout")
args = parser.parse_args(argv)

View file

@ -79,7 +79,8 @@ class TestGitIterations(unittest.TestCase):
""" Testing TempRemoteRepo class commit fetch __iter__ method """
commits = {self.git_commit_mod(author=self.actors[0]).hexsha
for _ in range(10)}
repo = git_oh.TempRemoteRepo(f"file://{self.repo_path:s}")
repo = git_oh.TempRemoteRepo(f"file://{self.repo_path:s}",
force_remote=True)
found_commits = {commit.hexsha for commit in repo}
@ -89,7 +90,8 @@ class TestGitIterations(unittest.TestCase):
def test_temp_remote_repo_cleanup(self):
""" Testing TempRemoteRepo class cleanup """
self.git_commit_mod(author=self.actors[0])
repo = git_oh.TempRemoteRepo(f"file://{self.repo_path:s}")
repo = git_oh.TempRemoteRepo(f"file://{self.repo_path:s}",
force_remote=True)
tmppath = repo.temppath
self.assertTrue(os.path.isdir(tmppath))
@ -114,7 +116,7 @@ class TestGitIterations(unittest.TestCase):
for _ in range(10)]
self.repo.git.checkout("HEAD", b=f"branch-{i:d}")
repo_url = f"file://{self.repo_path:s}"
repo = git_oh.TempRemoteRepo(repo_url)
repo = git_oh.TempRemoteRepo(repo_url, force_remote=True)
found_commits = {commit.hexsha for commit in repo}
commits = {commit.hexsha for commit in commits}