import logging import re from collections.abc import Sequence from rich.logging import RichHandler HEADER_RE = re.compile(r'@@ -(?P\d+),(?P\d+)' r' \+(?P\d+),(?P\d+)' r' @@ ?(?P.*)') logging.basicConfig( level="INFO", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()] ) logger = logging.getLogger(__name__) class SideBySideDiffLine: line: int original: str new: str def __init__(self, line: int, original: str, new: str): self.line = line self.original = original self.new = new class Hunk: header: str lines: list[str] original_start_line: int original_line_count: int new_start_line: int new_line_count: int context: str original: list[str] new: list[str] def __init__(self, hunk_str: str): header, *body = hunk_str.splitlines() self.header = header self.lines = body self.parse_header(header) self.parse_body(body) def parse_header(self, header: str) -> None: """Parses a header. The format is: @@ -l,s +l,s @@ optional section heading where l: starting line s: number of lines example: @@ -13,10 +13,15 @@ on: """ match = HEADER_RE.match(header) if match: self.original_start_line = int(match.group('original_start_line')) self.original_line_count = int(match.group('original_line_count')) self.new_start_line = int(match.group('new_start_line')) self.new_line_count = int(match.group('new_line_count')) self.context = match.group('context') else: raise ValueError(f'Error parsing {header}') def parse_body(self, lines: list[str]) -> None: self.original = [] self.new = [] for line in lines: if line.startswith('-'): self.original.append(line) elif line.startswith('+'): self.new.append(line) else: self.original.append(line) self.new.append(line) def side_by_side(self) -> Sequence[SideBySideDiffLine]: start = min(self.original_start_line, self.new_start_line) end = max(self.original_start_line + self.original_line_count, self.new_start_line, self.new_line_count) retval: list[SideBySideDiffLine] = [] original_line_counter = 0 new_line_counter = 0 for line in range(start, end): original = '' new = '' if line in range(self.original_start_line, self.original_start_line + self.original_line_count): original = self.original[original_line_counter] original_line_counter += 1 if line in range(self.new_start_line, self.new_start_line + self.new_line_count): new = self.new[new_line_counter] new_line_counter += 1 retval.append(SideBySideDiffLine(line, original, new)) return retval class GithubDiffParser: diff: str def __init__(self, diff: str): self.diff = diff def hunks(self) -> list[Hunk]: retval: list[Hunk] = [] hunk_str = '' for line in self.diff.splitlines(): if line.startswith('@@'): logger.info('adding %s', line) if hunk_str != '': retval.append(Hunk(hunk_str)) hunk_str = '' hunk_str += line hunk_str += '\n' if hunk_str != '': retval.append(Hunk(hunk_str)) return retval if __name__ == '__main__': from rich.console import Console from rich.table import Table import sys filename = sys.argv[-1] with open(filename) as fp: parser = GithubDiffParser(fp.read()) for hunk in parser.hunks(): table = Table(title=hunk.context) table.add_column('line') table.add_column('original') table.add_column('new') for diff_line in hunk.side_by_side(): table.add_row(str(diff_line.line), diff_line.original, diff_line.new) console = Console() console.print(table)