aboutsummaryrefslogtreecommitdiff
path: root/kodereviewer/diff_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'kodereviewer/diff_parser.py')
-rw-r--r--kodereviewer/diff_parser.py146
1 files changed, 146 insertions, 0 deletions
diff --git a/kodereviewer/diff_parser.py b/kodereviewer/diff_parser.py
new file mode 100644
index 0000000..378c795
--- /dev/null
+++ b/kodereviewer/diff_parser.py
@@ -0,0 +1,146 @@
+import logging
+import re
+from collections.abc import Sequence
+
+from rich.logging import RichHandler
+
+HEADER_RE = re.compile(r'@@ -(?P<original_start_line>\d+),(?P<original_line_count>\d+)'
+ r' \+(?P<new_start_line>\d+),(?P<new_line_count>\d+)'
+ r' @@ ?(?P<context>.*)')
+
+logging.basicConfig(
+ level="INFO", format="%(message)s", datefmt="[%X]", handlers=[RichHandler()]
+)
+logger = logging.getLogger(__name__)
+
+
+class SideBySideDiffLine:
+ line: int
+ original: str
+ new: str
+
+ def __init__(self, line: int, original: str, new: str):
+ self.line = line
+ self.original = original
+ self.new = new
+
+
+class Hunk:
+ header: str
+ lines: list[str]
+
+ original_start_line: int
+ original_line_count: int
+ new_start_line: int
+ new_line_count: int
+ context: str
+
+ original: list[str]
+ new: list[str]
+
+ def __init__(self, hunk_str: str):
+ header, *body = hunk_str.splitlines()
+ self.header = header
+ self.lines = body
+
+ self.parse_header(header)
+ self.parse_body(body)
+
+ def parse_header(self, header: str) -> None:
+ """Parses a header.
+ The format is: @@ -l,s +l,s @@ optional section heading
+ where
+ l: starting line
+ s: number of lines
+
+ example: @@ -13,10 +13,15 @@ on:
+ """
+ match = HEADER_RE.match(header)
+ if match:
+ self.original_start_line = int(match.group('original_start_line'))
+ self.original_line_count = int(match.group('original_line_count'))
+ self.new_start_line = int(match.group('new_start_line'))
+ self.new_line_count = int(match.group('new_line_count'))
+ self.context = match.group('context')
+ else:
+ raise ValueError(f'Error parsing {header}')
+
+ def parse_body(self, lines: list[str]) -> None:
+ self.original = []
+ self.new = []
+
+ for line in lines:
+ if line.startswith('-'):
+ self.original.append(line)
+ elif line.startswith('+'):
+ self.new.append(line)
+ else:
+ self.original.append(line)
+ self.new.append(line)
+
+ def side_by_side(self) -> Sequence[SideBySideDiffLine]:
+ start = min(self.original_start_line, self.new_start_line)
+ end = max(self.original_start_line + self.original_line_count, self.new_start_line, self.new_line_count)
+ retval: list[SideBySideDiffLine] = []
+ original_line_counter = 0
+ new_line_counter = 0
+
+ for line in range(start, end):
+ original = ''
+ new = ''
+ if line in range(self.original_start_line, self.original_start_line + self.original_line_count):
+ original = self.original[original_line_counter]
+ original_line_counter += 1
+
+ if line in range(self.new_start_line, self.new_start_line + self.new_line_count):
+ new = self.new[new_line_counter]
+ new_line_counter += 1
+
+ retval.append(SideBySideDiffLine(line, original, new))
+
+ return retval
+
+
+class GithubDiffParser:
+ diff: str
+
+ def __init__(self, diff: str):
+ self.diff = diff
+
+ def hunks(self) -> list[Hunk]:
+ retval: list[Hunk] = []
+
+ hunk_str = ''
+ for line in self.diff.splitlines():
+ if line.startswith('@@'):
+ logger.info('adding %s', line)
+ if hunk_str != '':
+ retval.append(Hunk(hunk_str))
+ hunk_str = ''
+ hunk_str += line
+ hunk_str += '\n'
+ if hunk_str != '':
+ retval.append(Hunk(hunk_str))
+ return retval
+
+if __name__ == '__main__':
+ from rich.console import Console
+ from rich.table import Table
+
+ import sys
+ filename = sys.argv[-1]
+
+ with open(filename) as fp:
+ parser = GithubDiffParser(fp.read())
+ for hunk in parser.hunks():
+
+ table = Table(title=hunk.context)
+ table.add_column('line')
+ table.add_column('original')
+ table.add_column('new')
+
+ for diff_line in hunk.side_by_side():
+ table.add_row(str(diff_line.line), diff_line.original, diff_line.new)
+
+ console = Console()
+ console.print(table)