diff options
Diffstat (limited to 'kodereviewer/diff_parser.py')
-rw-r--r-- | kodereviewer/diff_parser.py | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/kodereviewer/diff_parser.py b/kodereviewer/diff_parser.py new file mode 100644 index 0000000..378c795 --- /dev/null +++ b/kodereviewer/diff_parser.py @@ -0,0 +1,146 @@ +import logging +import re +from collections.abc import Sequence + +from rich.logging import RichHandler + +HEADER_RE = re.compile(r'@@ -(?P<original_start_line>\d+),(?P<original_line_count>\d+)' + r' \+(?P<new_start_line>\d+),(?P<new_line_count>\d+)' + r' @@ ?(?P<context>.*)') + +logging.basicConfig( + level="INFO", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()] +) +logger = logging.getLogger(__name__) + + +class SideBySideDiffLine: + line: int + original: str + new: str + + def __init__(self, line: int, original: str, new: str): + self.line = line + self.original = original + self.new = new + + +class Hunk: + header: str + lines: list[str] + + original_start_line: int + original_line_count: int + new_start_line: int + new_line_count: int + context: str + + original: list[str] + new: list[str] + + def __init__(self, hunk_str: str): + header, *body = hunk_str.splitlines() + self.header = header + self.lines = body + + self.parse_header(header) + self.parse_body(body) + + def parse_header(self, header: str) -> None: + """Parses a header. + The format is: @@ -l,s +l,s @@ optional section heading + where + l: starting line + s: number of lines + + example: @@ -13,10 +13,15 @@ on: + """ + match = HEADER_RE.match(header) + if match: + self.original_start_line = int(match.group('original_start_line')) + self.original_line_count = int(match.group('original_line_count')) + self.new_start_line = int(match.group('new_start_line')) + self.new_line_count = int(match.group('new_line_count')) + self.context = match.group('context') + else: + raise ValueError(f'Error parsing {header}') + + def parse_body(self, lines: list[str]) -> None: + self.original = [] + self.new = [] + + for line in lines: + if line.startswith('-'): + self.original.append(line) + elif line.startswith('+'): + self.new.append(line) + else: + self.original.append(line) + self.new.append(line) + + def side_by_side(self) -> Sequence[SideBySideDiffLine]: + start = min(self.original_start_line, self.new_start_line) + end = max(self.original_start_line + self.original_line_count, self.new_start_line, self.new_line_count) + retval: list[SideBySideDiffLine] = [] + original_line_counter = 0 + new_line_counter = 0 + + for line in range(start, end): + original = '' + new = '' + if line in range(self.original_start_line, self.original_start_line + self.original_line_count): + original = self.original[original_line_counter] + original_line_counter += 1 + + if line in range(self.new_start_line, self.new_start_line + self.new_line_count): + new = self.new[new_line_counter] + new_line_counter += 1 + + retval.append(SideBySideDiffLine(line, original, new)) + + return retval + + +class GithubDiffParser: + diff: str + + def __init__(self, diff: str): + self.diff = diff + + def hunks(self) -> list[Hunk]: + retval: list[Hunk] = [] + + hunk_str = '' + for line in self.diff.splitlines(): + if line.startswith('@@'): + logger.info('adding %s', line) + if hunk_str != '': + retval.append(Hunk(hunk_str)) + hunk_str = '' + hunk_str += line + hunk_str += '\n' + if hunk_str != '': + retval.append(Hunk(hunk_str)) + return retval + +if __name__ == '__main__': + from rich.console import Console + from rich.table import Table + + import sys + filename = sys.argv[-1] + + with open(filename) as fp: + parser = GithubDiffParser(fp.read()) + for hunk in parser.hunks(): + + table = Table(title=hunk.context) + table.add_column('line') + table.add_column('original') + table.add_column('new') + + for diff_line in hunk.side_by_side(): + table.add_row(str(diff_line.line), diff_line.original, diff_line.new) + + console = Console() + console.print(table) |