From 4687d732236d287f5cef9202a06c06124f0fec3b Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 12 Mar 2024 23:58:07 +0100 Subject: [PATCH] WIP: First proof of concept for Nextflow logs --- src/toolong/format_parser.py | 168 +++++++++++++++++++++++++++++++++-- 1 file changed, 163 insertions(+), 5 deletions(-) diff --git a/src/toolong/format_parser.py b/src/toolong/format_parser.py index 202237e..192c8c1 100644 --- a/src/toolong/format_parser.py +++ b/src/toolong/format_parser.py @@ -53,6 +53,129 @@ def parse(self, line: str) -> ParseResult | None: return timestamp, line, text +class NextflowRegexLogFormatOne(LogFormat): + REGEX = re.compile(".*?") + LOG_LEVELS = { + "DEBUG": ["dim white on black", ""], + "INFO": ["bold black on green", "on #042C07"], + "WARN": ["bold black on yellow", "on #44450E"], + "ERROR": ["bold black on red", "on #470005"], + } + + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + groups = match.groupdict() +# if not text.spans: +# text = self.highlighter(text) + if date := groups.get("date", None): + _, timestamp = timestamps.parse(groups["date"]) + text.highlight_words([date], "not bold magenta") + if thread := groups.get("thread", None): + text.highlight_words([thread], "blue") + if log_level := groups.get("log_level", None): + text.highlight_words([f" {log_level} "], self.LOG_LEVELS[log_level][0]) + text.stylize_before(self.LOG_LEVELS[log_level][1]) + if logger_name := groups.get("logger_name", None): + text.highlight_words([logger_name], "cyan") + if process_name := groups.get("process_name", None): + text.highlight_words([process_name], "bold cyan") + if message := groups.get("message", None): + text.highlight_words([message], 'dim' if log_level == 'DEBUG' else '') + + return None, line, text + + +class NextflowRegexLogFormatTwo(LogFormat): + REGEX = re.compile(".*?") + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + text.stylize_before("dim") + groups = match.groupdict() + if process := groups.get("process", None): + text.highlight_words([process], 'blue not dim') + if process_name := groups.get("process_name", None): + text.highlight_words([process_name], 'bold cyan not dim') + + return None, line, text + +class NextflowRegexLogFormatThree(LogFormat): + REGEX = re.compile(".*?") + CHANNEL_TYPES = { + "(value)": "green", + "(cntrl)": "yellow", + "(queue)": "magenta", + } + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + groups = match.groupdict() + if port := groups.get("port", None): + text.highlight_words([port], 'blue') + if channel_type := groups.get("channel_type", None): + text.highlight_words([channel_type], self.CHANNEL_TYPES[channel_type]) + if channel_state := groups.get("channel_state", None): + text.highlight_words([channel_state], 'cyan' if channel_state == 'OPEN' else 'yellow') + text.highlight_words(["; channel:"], 'dim') + if channel_name := groups.get("channel_name", None): + text.highlight_words([channel_name], 'cyan') + + return None, line, text + +class NextflowRegexLogFormatFour(LogFormat): + REGEX = re.compile(".*?") + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + text.stylize_before("dim") + groups = match.groupdict() + text.highlight_words(["status="], 'dim') + if status := groups.get("status", None): + text.highlight_words([status], 'cyan not dim') + + return None, line, text + + +class NextflowRegexLogFormatFive(LogFormat): + REGEX = re.compile(".*?") + highlighter = LogHighlighter() + + def parse(self, line: str) -> ParseResult | None: + match = self.REGEX.fullmatch(line) + if match is None: + return None + + text = Text.from_ansi(line) + text.stylize_before("dim") + groups = match.groupdict() + if script_id := groups.get("script_id", None): + text.highlight_words([script_id], 'blue') + if script_path := groups.get("script_path", None): + text.highlight_words([script_path], 'magenta') + + return None, line, text + class CommonLogFormat(RegexLogFormat): REGEX = re.compile( @@ -66,6 +189,36 @@ class CombinedLogFormat(RegexLogFormat): ) +class NextflowLogFormat(NextflowRegexLogFormatOne): + REGEX = re.compile( + r'(?P\w+-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) (?P\[.*\]?) (?P\w+)\s+(?P[\w\.]+) - (?P.*?)$' + ) + + +class NextflowLogFormatActiveProcess(NextflowRegexLogFormatTwo): + REGEX = re.compile( + r'^(?P\[process\]) (?P.*?)(?P[^:]+?)?$' + ) + + +class NextflowLogFormatActiveProcessDetails(NextflowRegexLogFormatThree): + REGEX = re.compile( + r' (?Pport \d+): (?P\((value|queue|cntrl)\)) (?P\S+)\s+; channel: (?P.*?)$' + ) + + +class NextflowLogFormatActiveProcessStatus(NextflowRegexLogFormatFour): + REGEX = re.compile( + r'^ status=(?P.*?)?$' + ) + + +class NextflowLogFormatScriptParse(NextflowRegexLogFormatFive): + REGEX = re.compile( + r'^ (?PScript_\w+:) (?P.*?)$' + ) + + class DefaultLogFormat(LogFormat): highlighter = LogHighlighter() @@ -96,10 +249,15 @@ def parse(self, line: str) -> ParseResult | None: FORMATS = [ - JSONLogFormat(), - CommonLogFormat(), - CombinedLogFormat(), - DefaultLogFormat(), + # JSONLogFormat(), + # CommonLogFormat(), + # CombinedLogFormat(), + NextflowLogFormat(), + NextflowLogFormatActiveProcess(), + NextflowLogFormatActiveProcessDetails(), + NextflowLogFormatActiveProcessStatus(), + NextflowLogFormatScriptParse(), + # DefaultLogFormat(), ] @@ -120,4 +278,4 @@ def parse(self, line: str) -> ParseResult: del self._formats[index : index + 1] self._formats.insert(0, format) return parse_result - return None, "", Text() + return None, line, Text.from_ansi(line)