diff --git a/open_notebook/utils.py b/open_notebook/utils.py index 014ca9e..a32d6eb 100644 --- a/open_notebook/utils.py +++ b/open_notebook/utils.py @@ -220,6 +220,10 @@ def compare_versions(version1: str, version2: str) -> int: return 0 +# Compile regex pattern once for better performance +THINK_PATTERN = re.compile(r'(.*?)', re.DOTALL) + + def parse_thinking_content(content: str) -> Tuple[str, str]: """ Parse message content to extract thinking content from tags. @@ -240,11 +244,16 @@ def parse_thinking_content(content: str) -> Tuple[str, str]: >>> print(cleaned) "Here's my answer" """ - # Pattern to match ... blocks (including multiline) - think_pattern = r'(.*?)' + # Input validation + if not isinstance(content, str): + return "", str(content) if content is not None else "" + + # Limit processing for very large content (100KB limit) + if len(content) > 100000: + return "", content # Find all thinking blocks - thinking_matches = re.findall(think_pattern, content, re.DOTALL) + thinking_matches = THINK_PATTERN.findall(content) if not thinking_matches: return "", content @@ -253,7 +262,7 @@ def parse_thinking_content(content: str) -> Tuple[str, str]: thinking_content = "\n\n".join(match.strip() for match in thinking_matches) # Remove all ... blocks from the original content - cleaned_content = re.sub(think_pattern, "", content, flags=re.DOTALL) + cleaned_content = THINK_PATTERN.sub("", content) # Clean up extra whitespace cleaned_content = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned_content).strip()