Browse Source

Implements output, filters CLI arguments, and period aggregation

- Implement a function aggregating commit stats by author and/or by week|month
- Deletes type hints
- Modify TempRemoteRepo to be able to force remote fetch even for local
  repository (usefull in tests)
Yann Weber 11 months ago
parent
commit
15eb1c817a
3 changed files with 243 additions and 68 deletions
  1. 0
    3
      Makefile
  2. 238
    62
      git_oh.py
  3. 5
    3
      test.py

+ 0
- 3
Makefile View File

@@ -6,7 +6,4 @@ unittest:
6 6
 lint:
7 7
 	pylint git_oh.py test.py
8 8
 
9
-mypy:
10
-	mypy git_oh.py
11
-
12 9
 .PHONY: lint mypy

+ 238
- 62
git_oh.py View File

@@ -3,61 +3,72 @@
3 3
 
4 4
 import argparse
5 5
 import calendar
6
+import csv
6 7
 import datetime
7 8
 import shutil
8 9
 import sys
9 10
 import tempfile
10 11
 import warnings
11
-from collections.abc import Sequence
12
-from typing import Generator
13 12
 
14 13
 import git
15 14
 
16
-from git.repo.base import Repo
17
-from git.objects.commit import Commit
18 15
 
19 16
 def main():
20
-    """ Main function parse args from CLI and run the program """
17
+    """ Main function parse args from CLI, count commits in given 
18
+        repository, and display/output them
19
+    """
21 20
     args = parse_args()
22 21
 
23 22
     repo = TempRemoteRepo(args.url, silent=args.silent)
24 23
 
25 24
     commit_stats = {}
26
-
27 25
     for commit in repo:
28
-        author = commit.author
29
-        if author not in commit_stats:
30
-            commit_stats[author] = {'total': 0, 'off': 0}
31
-
32
-        commit_stats[author]['total'] += 1
33
-        if not in_office_hours(commit.committed_datetime,
34
-                starthour=args.daystart,
35
-                stophour=args.daystop,
36
-                weekend=args.weekend):
37
-            print(f"{commit} {commit.author.name:>30s}\
38
-{commit.committed_datetime.strftime('%a %H:%M %Y-%m-%d')}")
39
-            commit_stats[author]['off'] += 1
40
-
41
-    for author, stats in commit_stats.items():
42
-        pct_off = (stats['off'] / stats['total'])*100
43
-        print(f"{author.name:20s} : {pct_off:3.1f}% off ({stats['off']}/{stats['total']})")
26
+        commit_datetime = commit.authored_datetime
27
+        if args.from_date is not None and \
28
+                args.from_date > commit_datetime.date():
29
+            continue
30
+        if args.to_date is not None and\
31
+                args.to_date < commit_datetime.date():
32
+            continue
33
+        in_oh = in_office_hours(commit_datetime,
34
+                    starthour=args.daystart,
35
+                    stophour=args.daystop,
36
+                    weekend=args.weekend)
37
+        commit_by_author(commit.author, commit_datetime, in_oh, commit_stats,
38
+                group=args.group_by)
39
+        if args.verbose and not in_oh:
40
+            print(f"{commit} \
41
+{commit.authored_datetime.strftime('%a %H:%M %Y-%m-%d')} \
42
+{commit.author.name!r}")
43
+
44
+    if len(commit_stats) == 0:
45
+        print("No commit in given repository/date range", file=sys.stderr)
46
+        return
47
+
48
+    if args.csv_output is None:
49
+        print(result_cli(commit_stats))
50
+    else:
51
+        result_csv(commit_stats, args.csv_output)
52
+        args.csv_output.close()
44 53
 
45 54
 
46
-def in_office_hours(moment:datetime.datetime=datetime.datetime.now(),
47
-        starthour:datetime.time=datetime.time(8,0,0),
48
-        stophour:datetime.time=datetime.time(20,0,0),
49
-        weekend:Sequence[int]=(5,6))->bool:
55
+def in_office_hours(moment=datetime.datetime.now(),
56
+        starthour=datetime.time(8,0,0),
57
+        stophour=datetime.time(20,0,0),
58
+        weekend=(5,6)):
50 59
     """ Indicates if a moment is in office hours.
51 60
 
52
-    Office hours are localized, comparisons are done without
61
+    Office hours are localized, comparisons is done without
53 62
     taking care of tzoffset : in fact, if a working day starts at
54 63
     08:00, 07:59:59+0200 is off as 07:59:59+0000 is.
55 64
 
56 65
     Arguments :
57
-        - moment : the moment to compare with office hours
58
-        - starthour : standard day start hour
59
-        - stophour : standard day stop hour
66
+        - moment : the datetime.datetime to compare with office hours
67
+        - starthour : standard day start datetime.time
68
+        - stophour : standard day stop datetime.time
60 69
         - weekend : list of days off (0,1,....,6)
70
+
71
+    Returns True if in office hours else False
61 72
     """
62 73
     for dow in weekend:
63 74
         if dow < 0 or dow > 6:
@@ -73,7 +84,55 @@ tzinfo : comparisons are done without taking tzoffset in considaration")
73 84
     return starthour <= localtime <= stophour
74 85
 
75 86
 
76
-def iter_commits(repo:Repo)->Generator[Commit, None, None]:
87
+def commit_by_author(author, moment, in_oh, acc, group=None):
88
+    """ Update stats with commits total and commits off office hour by
89
+        authors
90
+
91
+    Arguments :
92
+    - author : The commit author (git.util.Author instance)
93
+    - moment : The commit authored datetime.datetime
94
+    - in_oh : boolean indicating if a commit is in office hours
95
+    - acc : the dictionnary accumulating the commits count (modified by
96
+        reference). Keys are author instances and values are dict with
97
+        counters values.
98
+        Counters are dict with group as key. The group 'all' is always
99
+        present.
100
+        When grouping by month group key in format YEAR-MONTH are added.
101
+        When grouping by week group key in format YEAR-WEEK are added.
102
+        Each group is a dict with keys :
103
+        - 'total' for total number of commit
104
+        - 'off_oh' for the number of commit out off office hours
105
+    - group : Commit count can be aggregated by 'month' or 'week'
106
+
107
+    Returns None, modifications are done in acc by reference
108
+    """
109
+    group_key = None
110
+    if group == 'week':
111
+        cal = moment.isocalendar()
112
+        group_key = f"{cal.year:04d}-W{cal.week:02d}"
113
+    elif group == 'month':
114
+        group_key = f"{moment.year:04d}-{moment.month:02d}"
115
+    elif group is None:
116
+        group_key = None
117
+    else:
118
+        err = f"Invalid group {group!r}. Valid groups are week and month"
119
+        raise ValueError(err)
120
+
121
+    if author not in acc:
122
+        acc[author] = {'all': {'off_oh': 0, 'total': 0}}
123
+    acc[author]['all']['total'] += 1
124
+    if group_key:
125
+        if group_key not in acc[author]:
126
+            acc[author].update({group_key: {'off_oh': 0, 'total': 0}})
127
+        acc[author][group_key]['total'] += 1
128
+
129
+    if not in_oh:
130
+        acc[author]['all']['off_oh'] += 1
131
+        if group_key:
132
+            acc[author][group_key]['off_oh'] += 1
133
+
134
+
135
+def iter_commits(repo):
77 136
     """ Generator on all git commits in given repository.
78 137
 
79 138
     Recursively iterate on each commit in each branch/ref not yielding
@@ -89,53 +148,151 @@ def iter_commits(repo:Repo)->Generator[Commit, None, None]:
89 148
                 encountered.add(commit.binsha)
90 149
                 yield commit
91 150
 
92
-class RemoteFetchProgress(git.RemoteProgress):
93
-    """ Report progress for remote fetch """
94
-
95
-    def update(self, op_code, cur_count, max_count=None, message=""):
96
-        """ Display progression on a single line of stderr """
97
-        if not max_count:
98
-            return
99
-        pct = (cur_count / max_count)*100
100
-        msg = f"{pct:5.1f}% {message}"
101
-        print(f"{msg:<80s}", end="\r", file=sys.stderr)
102
-
103
-class TempRemoteRepo(Repo):
151
+class TempRemoteRepo(git.repo.base.Repo):
104 152
     """ A temporary repository referencing a remote repository
105 153
 
106
-    Allows to iterate on all commits without cloning the remote
154
+    Allows to iterate on all commits without cloning the remote.
155
+    If repository is local iterate on local commit without using
156
+    a temporary repository
107 157
     """
108 158
 
109
-    def __init__(self, remote_url:str, silent=True):
110
-        """ Initialize a new empty repository referencing a remote repo
159
+    def __init__(self, remote_url, silent=True, force_remote=False):
160
+        """ Initialize a new empty repository referencing a remote repo in
161
+        order to iterate on its commit
111 162
 
112 163
         Arguments :
113 164
             - remote_url : The url of the remote repo to reference
114 165
             - silent : if False display progress on stderr
166
+            - force_remote : if True create a temporary repo even for
167
+                local file:// url
115 168
         """
116
-        self.temppath = tempfile.mkdtemp(prefix="git_oh_")
117
-        git.Repo.init(self.temppath)
118
-        super().__init__(self.temppath)
119
-
120
-        remote_progress = None if silent else RemoteFetchProgress()
121
-        self.create_remote("origin", remote_url).fetch(progress=remote_progress)
169
+        self.temppath = None
170
+        if (not force_remote) and remote_url.startswith("file://"):
171
+            repo_path = remote_url[len("file://"):]
172
+        else:
173
+            repo_path = self.temppath = tempfile.mkdtemp(prefix="git_oh_")
174
+            git.Repo.init(self.temppath)
175
+        super().__init__(repo_path)
176
+
177
+        if self.temppath is not None:
178
+            remote_progress = None if silent else RemoteFetchProgress()
179
+            origin = self.create_remote("origin", remote_url)
180
+            origin.fetch(progress=remote_progress)
122 181
 
123 182
     def __del__(self):
124
-        shutil.rmtree(self.temppath)
183
+        """ Clean temporary repository if needed """
184
+        if self.temppath is not None:
185
+            shutil.rmtree(self.temppath)
125 186
         super().__del__()
126 187
 
127
-    def __iter__(self)->Generator[Commit, None, None]:
188
+    def __iter__(self):
189
+        """ Iterate on commits """
128 190
         return iter_commits(self)
129 191
 
130 192
 
131
-def valid_day(value:str) -> int:
193
+class RemoteFetchProgress(git.RemoteProgress):
194
+    """ Report progress for remote fetch """
195
+
196
+    def update(self, op_code, cur_count, max_count=None, message=""):
197
+        """ Display progression on a single line of stderr """
198
+        if max_count:
199
+            pct = (cur_count / max_count)*100
200
+            msg = f"{pct:5.1f}% {message}"
201
+            print(f"{msg:<80s}", end="\r", file=sys.stderr)
202
+
203
+
204
+def result_csv(commit_stats, ofd=sys.stdout):
205
+    """ Output the stats in an open file as CSV
206
+
207
+    Output 3 columns by group :
208
+    - GRP-total : the number of commit in the group
209
+    - GRP-off_oh : the number of commit off office hour
210
+    - GRP-prop_off_oh : the ratio off_oh / total
211
+
212
+    Arguments :
213
+    - commit_stats : A dict with author as key and stats as values (
214
+        see commit_by_author() )
215
+    - ofd : Output TextIO
216
+    """
217
+    groups = set()
218
+    for author, stats in commit_stats.items():
219
+        groups.update(stats.keys())
220
+
221
+    for author in commit_stats:
222
+        for group in groups:
223
+            commit_stats[author].setdefault(group, {'total': 0, 'off_oh': 0})
224
+            gstat = commit_stats[author][group]
225
+            if gstat['total']:
226
+                gstat['prop_off_oh'] = gstat['off_oh']/gstat['total']
227
+            else:
228
+                gstat['prop_off_oh'] = 0.0
229
+
230
+    groups = sorted(groups)
231
+    all_keys  = ['author', 'author_email']
232
+    for group in groups:
233
+        all_keys += [
234
+                f"{group}-total",
235
+                f"{group}-off_oh",
236
+                f"{group}-prop_off_oh"]
237
+    writer = csv.DictWriter(ofd, all_keys)
238
+    writer.writeheader()
239
+    for author, stats in commit_stats.items():
240
+        row = {'author': author.name,
241
+               'author_email': author.email}
242
+        for kgroup, values in stats.items():
243
+            row.update({f'{kgroup}-{kval}': val
244
+                        for kval, val in values.items()})
245
+        writer.writerow(row)
246
+
247
+def result_cli(commit_stats):
248
+    """ Format stats for cli output
249
+
250
+    Arguments :
251
+    - commit_stats : A dict with author as key and stats as values (
252
+        see commit_by_author() )
253
+
254
+    Returns a string representing an ascii array with precentage of commits
255
+    off office hours.
256
+    """
257
+
258
+    all_keys = set()
259
+    for stats in commit_stats.values():
260
+        all_keys.update(stats.keys())
261
+    all_keys = sorted(all_keys)
262
+    stats_count = len(all_keys)
263
+
264
+    stat_width = 10
265
+    author_width = max((len(author.name) for author in commit_stats))+2
266
+
267
+    hsep = '+' + ('-' * author_width) + '+'
268
+    hsep += (('-' * stat_width)+ '+') * stats_count
269
+    hsep += "\n"
270
+
271
+    header = f"|{'Author':>{author_width-1}s} |"
272
+    for key in all_keys:
273
+        header += f"{key:>{stat_width-1}s} |"
274
+
275
+    result = f"{hsep}{header}\n{hsep}"
276
+    for author, stats in commit_stats.items():
277
+        result += f"|{author.name:>{author_width-1}s} |"
278
+        for key in all_keys:
279
+            if key not in stats:
280
+                result += f"{'none':>{stat_width-1}s} |"
281
+            else:
282
+                pct = (stats[key]['off_oh'] / stats[key]['total'])*100
283
+                result += f"{pct:{stat_width-2}.0f}% |"
284
+        result += f"\n{hsep}"
285
+
286
+    return result
287
+
288
+def valid_day(value):
132 289
     """ Take a locale dayname and convert it to a day number
133 290
 
134 291
     Valid days are calendar.day_name, calendar.day_abbr and integers
135 292
     in [0..6]
136 293
 
137 294
     Arguments :
138
-        - value : the value to convert
295
+        - value : the str to convert
139 296
 
140 297
     Returns an int in [0..6]
141 298
     """
@@ -167,28 +324,47 @@ def valid_day(value:str) -> int:
167 324
     raise ValueError(f"Invalid day {value!r}. Valid days are : {valid_days}")
168 325
 
169 326
 
170
-def parse_args(argv:list[str]=None):
327
+def parse_args(argv=None):
171 328
     """ Argument parser for CLI
172 329
     
173
-    Exits printing help if invalid arguments given, else returns 
330
+    Arguments :
331
+    - argv : A list like sys.argv[1:]
332
+
333
+    Print help and sys.exit() if invalid arguments given, else returns 
174 334
     arguments in a named tuple.
175 335
     """
176
-    parser = argparse.ArgumentParser(description="""Program description
177
-
178
-With fancy multiline explanations
179
-""")
336
+    parser = argparse.ArgumentParser(description="Count the number of \
337
+commits done in/out of office hours.")
180 338
 
181 339
     parser.add_argument("url", help="Git repository URL")
182 340
     parser.add_argument("-d", "--daystart", type=datetime.time.fromisoformat,
341
+            default=datetime.time(8,0,0),
183 342
             help="Day start time (ISO 8601)")
184 343
     parser.add_argument("-D", "--daystop", type=datetime.time.fromisoformat,
344
+            default=datetime.time(20,0,0),
185 345
             help="Day stop time (ISO 8601)")
186 346
     parser.add_argument("-w", "--weekend", type=str, default="5,6",
347
+            metavar='DAYSOFF',
187 348
             help="Indicate days off separated by ','. Locale day names or \
188 349
 days number (0..6 starting with monday) can be used. By default saturday \
189 350
 and sunday are off. Use special value -w NUL to indicate no days off.")
351
+    parser.add_argument("-f", "--from", type=datetime.date.fromisoformat,
352
+            dest="from_date",
353
+            help="Exclude commits before given ISO 8601 date")
354
+    parser.add_argument("-t", "--to", type=datetime.date.fromisoformat,
355
+            dest="to_date",
356
+            help="Exclude commits after given ISO 8601 date")
357
+    parser.add_argument("-g", "--group-by", choices=['week', 'month'],
358
+            help="Generate commit stats by week or month")
190 359
     parser.add_argument("-s", "--silent", action="store_true", default=False,
191 360
             help="Do not display fetch progression on stderr")
361
+    parser.add_argument("-o", "--csv-output", type=argparse.FileType("w"),
362
+            default=None,
363
+            help="Store commit counts in a CSV file instead of printing it \
364
+on stdout (use '-' for stdout)")
365
+    parser.add_argument("-v", "--verbose", action="store_true",
366
+            default=False,
367
+            help="Output all commit off office hours on stdout")
192 368
 
193 369
     args = parser.parse_args(argv)
194 370
 

+ 5
- 3
test.py View File

@@ -79,7 +79,8 @@ class TestGitIterations(unittest.TestCase):
79 79
         """ Testing TempRemoteRepo class commit fetch __iter__ method """
80 80
         commits = {self.git_commit_mod(author=self.actors[0]).hexsha
81 81
                 for _ in range(10)}
82
-        repo = git_oh.TempRemoteRepo(f"file://{self.repo_path:s}")
82
+        repo = git_oh.TempRemoteRepo(f"file://{self.repo_path:s}",
83
+                force_remote=True)
83 84
 
84 85
         found_commits = {commit.hexsha for commit in repo}
85 86
 
@@ -89,7 +90,8 @@ class TestGitIterations(unittest.TestCase):
89 90
     def test_temp_remote_repo_cleanup(self):
90 91
         """ Testing TempRemoteRepo class cleanup """
91 92
         self.git_commit_mod(author=self.actors[0])
92
-        repo = git_oh.TempRemoteRepo(f"file://{self.repo_path:s}")
93
+        repo = git_oh.TempRemoteRepo(f"file://{self.repo_path:s}",
94
+                force_remote=True)
93 95
         tmppath = repo.temppath
94 96
 
95 97
         self.assertTrue(os.path.isdir(tmppath))
@@ -114,7 +116,7 @@ class TestGitIterations(unittest.TestCase):
114 116
                     for _ in range(10)]
115 117
             self.repo.git.checkout("HEAD", b=f"branch-{i:d}")
116 118
         repo_url = f"file://{self.repo_path:s}"
117
-        repo = git_oh.TempRemoteRepo(repo_url)
119
+        repo = git_oh.TempRemoteRepo(repo_url, force_remote=True)
118 120
         found_commits = {commit.hexsha for commit in repo}
119 121
         commits = {commit.hexsha for commit in commits}
120 122
 

Loading…
Cancel
Save