Fix special char rendering issues in Gerrit UI

Addressed a problem where characters like '_' or '*' in ChatGPT
responses might mistakenly be displayed as bold or italic text when used
outside of code blocks. This commit corrects these display errors.

Jira-Id: IT-103
Change-Id: I774e04d760afeb279ba27590769ecbed4aa83ab9
Signed-off-by: Patrizio <patrizio.gelosi@amarulasolutions.com>
diff --git a/src/main/java/com/googlesource/gerrit/plugins/chatgpt/client/GerritClientPostComments.java b/src/main/java/com/googlesource/gerrit/plugins/chatgpt/client/GerritClientPostComments.java
index 8bf8056..851e9e8 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/chatgpt/client/GerritClientPostComments.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/chatgpt/client/GerritClientPostComments.java
@@ -17,7 +17,7 @@
 import java.util.List;
 import java.util.Map;
 
-import static com.googlesource.gerrit.plugins.chatgpt.utils.ReviewUtils.processGerritMessage;
+import static com.googlesource.gerrit.plugins.chatgpt.utils.ReviewUtils.processChatGptMessage;
 import static java.net.HttpURLConnection.HTTP_OK;
 
 @Slf4j
@@ -44,7 +44,7 @@
         List<String> messages = new ArrayList<>();
         Map<String, List<GerritComment>> comments = new HashMap<>();
         for (ReviewBatch reviewBatch : reviewBatches) {
-            String message = processGerritMessage(reviewBatch.getContent());
+            String message = processChatGptMessage(reviewBatch.getContent());
             if (message.trim().isEmpty()) {
                 log.info("Empty message from post comment not submitted.");
                 continue;
diff --git a/src/main/java/com/googlesource/gerrit/plugins/chatgpt/utils/ReviewUtils.java b/src/main/java/com/googlesource/gerrit/plugins/chatgpt/utils/ReviewUtils.java
index e2db193..22f5c54 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/chatgpt/utils/ReviewUtils.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/chatgpt/utils/ReviewUtils.java
@@ -3,11 +3,32 @@
 import java.time.LocalDateTime;
 import java.time.ZoneOffset;
 import java.time.format.DateTimeFormatter;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static com.googlesource.gerrit.plugins.chatgpt.utils.StringUtils.parseOutOfDelimiters;
 
 public class ReviewUtils {
+    private static final Pattern SANITIZE_REGEX = Pattern.compile("(\\*{1,2}|(?<!\\w)_{1,2})(.+?)\\1",
+            Pattern.DOTALL);
 
-    public static String processGerritMessage(String message) {
-        return message.replaceAll( "```\\w+|(?<!\\n\\n)```", "\n\n```");
+    private static String sanitizeGerritComment(String message) {
+        // Sanitize sequences of asterisks ("*") and underscores ("_") that would be incorrectly interpreted as
+        // delimiters of Italic and Bold text
+        Matcher sanitizeMatcher = SANITIZE_REGEX.matcher(message);
+        return sanitizeMatcher.replaceAll("\\\\$1$2\\\\$1");
+    }
+
+    private static String sanitizeOutsideInlineCodeBlocks(String message) {
+        // Sanitize the content outside the inline code blocks (delimited by a single "`").
+        return parseOutOfDelimiters(message, "`", ReviewUtils::sanitizeGerritComment);
+    }
+
+    public static String processChatGptMessage(String message) {
+        // Sanitize code blocks (delimited by "```") by stripping out the language for syntax highlighting and ensuring
+        // that is preceded by two "\n" chars. Additionally, sanitize the content outside these blocks.
+        return parseOutOfDelimiters(message, "\\s*```\\w*\\s*", ReviewUtils::sanitizeOutsideInlineCodeBlocks,
+                "\n\n```\n", "\n```\n");
     }
 
     public static long getTimeStamp(String updatedString) {
diff --git a/src/main/java/com/googlesource/gerrit/plugins/chatgpt/utils/StringUtils.java b/src/main/java/com/googlesource/gerrit/plugins/chatgpt/utils/StringUtils.java
new file mode 100644
index 0000000..921ab7b
--- /dev/null
+++ b/src/main/java/com/googlesource/gerrit/plugins/chatgpt/utils/StringUtils.java
@@ -0,0 +1,31 @@
+package com.googlesource.gerrit.plugins.chatgpt.utils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.Function;
+
+class StringUtils {
+
+    public static String parseOutOfDelimiters(String body, String splitDelim, Function<String, String> processMessage,
+                                              String leftDelimReplacement, String rightDelimReplacement) {
+        String[] chunks = body.split(splitDelim, -1);
+        List<String> resultChunks = new ArrayList<>();
+        int lastChunk = chunks.length -1;
+        for (int i = 0; i <= lastChunk; i++) {
+            String chunk = chunks[i];
+            if (i % 2 == 0 || i == lastChunk) {
+                resultChunks.add(processMessage.apply(chunk));
+            }
+            else {
+                resultChunks.addAll(Arrays.asList(leftDelimReplacement, chunk, rightDelimReplacement));
+            }
+        }
+        return String.join("", resultChunks);
+    }
+
+    public static String parseOutOfDelimiters(String body, String splitDelim, Function<String, String> processMessage) {
+        return parseOutOfDelimiters(body, splitDelim, processMessage, splitDelim, splitDelim);
+    }
+
+}
diff --git a/src/test/resources/__files/chatGptExpectedResponseStreamed.json b/src/test/resources/__files/chatGptExpectedResponseStreamed.json
index ba61890..820a351 100644
--- a/src/test/resources/__files/chatGptExpectedResponseStreamed.json
+++ b/src/test/resources/__files/chatGptExpectedResponseStreamed.json
@@ -1 +1 @@
-{"comments":{"test_file.py":[{"line":29,"range":{"start_line":29,"end_line":29,"start_character":4,"end_character":43},"message":"The change from unpacking kwargs as position arguments (*kwargs) to passing them as a single dictionary might cause issues if the receiving function expects individual keyword arguments rather than a single dictionary. Consider preserving the original method of passing kwargs unless this behavior is intentional and the receiving function has been designed to accept a dictionary."}]},"message":"The commit message \u0027Minor Fixes\u0027 is too vague and does not provide enough context about the nature of the changes. A good commit message should explain what was changed and why. Consider specifying the components affected by the \u0027minor fixes\u0027 and briefly describe the nature of the fixes for clarity."}
\ No newline at end of file
+{"comments":{"test_file.py":[{"line":29,"range":{"start_line":29,"end_line":29,"start_character":4,"end_character":43},"message":"The change from unpacking kwargs as position arguments (\\*kwargs) to passing them as a single dictionary might cause issues if the receiving function expects individual keyword arguments rather than a single dictionary. Consider preserving the original method of passing \\**kwargs unless this behavior is intentional and the receiving function has been designed to accept a dictionary."}]},"message":"The commit message \u0027Minor Fixes\u0027 is too vague and does not provide enough context about the nature of the changes. A good commit message should explain what was changed and why. Consider specifying the components affected by the \u0027minor fixes\u0027 and briefly describe the nature of the fixes for clarity."}
\ No newline at end of file
diff --git a/src/test/resources/__files/chatGptResponseReview.json b/src/test/resources/__files/chatGptResponseReview.json
index d08242f..32d1eb0 100644
--- a/src/test/resources/__files/chatGptResponseReview.json
+++ b/src/test/resources/__files/chatGptResponseReview.json
@@ -11,7 +11,7 @@
             "type": "function",
             "function": {
               "name": "format_replies",
-              "arguments": "{\n  \"replies\": [\n    {\n      \"reply\": \"The commit message 'Test Commit Message' is too vague and does not provide information about the specific changes made. A more detailed message is necessary to understand what has been fixed.\"\n    },\n    {\n      \"reply\": \"Confirm that the method 'importclass' is meant to change its behavior when 'class_name' is None. The new lines suggest 'class_name' will be derived from the 'module_name' in such cases, which can have unintended effects if not explicitly intended.\",\n      \"filename\": \"test_file.py\",\n      \"lineNumber\": 19,\n      \"codeSnippet\": \"if not class_name:\nmodule_name,class_name=module_name.rsplit('.',1)\"\n    },\n    {\n      \"reply\": \"The added check to determine if 'class_name' is None seems to modify the 'module_name' by splitting it and taking the last element. There should be an assignment to 'class_name' since the class to be imported is meant to be the last part of 'module_name' after splitting.\",\n      \"filename\": \"test_file.py\",\n      \"lineNumber\": 20,\n      \"codeSnippet\": \"module_name, class_name = module_name.rsplit('.', 1)\"\n    },\n    {\n      \"reply\": \"The code line 'from types import Any, Callable, ...' should use 'typing' for imports instead of 'types'.\",\n      \"filename\": \"test_file.py\",\n      \"lineNumber\": 1,\n      \"codeSnippet\": \"from types import\"\n    },\n    {\n      \"reply\": \"There is a typo in the import statement. The correct function should be 'import_module' from the 'importlib' module, not 'importclass' which does not exist. Correct code:\n```python\nloaded_module = import_module(module_name, fromlist=[class_name])```\",\n      \"filename\": \"test_file.py\",\n      \"lineNumber\": 21,\n      \"codeSnippet\": \"loaded_module = importclass(module_name, fromlist=[class_name])\"\n    }\n  ]\n  }"
+              "arguments": "{\n  \"replies\": [\n    {\n      \"reply\": \"The commit message 'Test Commit Message' is too vague and does not provide information about the specific changes made. A more detailed message is necessary to understand what has been fixed.\"\n    },\n    {\n      \"reply\": \"Confirm that the method 'importclass' is meant to change its behavior when 'class_name' is None. The new lines suggest 'class_name' will be derived from the 'module_name' in such cases, which can have unintended effects if not explicitly intended.\",\n      \"filename\": \"test_file.py\",\n      \"lineNumber\": 19,\n      \"codeSnippet\": \"if not class_name:\nmodule_name,class_name=module_name.rsplit('.',1)\"\n    },\n    {\n      \"reply\": \"The added check to determine if 'class_name' is None seems to modify the 'module_name' by splitting it and taking the last element. There should be an assignment to 'class_name' since the class to be imported is meant to be the last part of 'module_name' after splitting.\",\n      \"filename\": \"test_file.py\",\n      \"lineNumber\": 20,\n      \"codeSnippet\": \"module_name, class_name = module_name.rsplit('.', 1)\"\n    },\n    {\n      \"reply\": \"The code line 'from types import Any, Callable, ...' should use 'typing' for imports instead of 'types'.\",\n      \"filename\": \"test_file.py\",\n      \"lineNumber\": 1,\n      \"codeSnippet\": \"from types import\"\n    },\n    {\n      \"reply\": \"There is a typo in the import statement. The correct function should be '__import__' from the 'importlib' module, not 'importclass' which does not exist. Correct code:\n```python\nloaded_module = import_module(module_name, fromlist=[class_name])```\",\n      \"filename\": \"test_file.py\",\n      \"lineNumber\": 21,\n      \"codeSnippet\": \"loaded_module = importclass(module_name, fromlist=[class_name])\"\n    }\n  ]\n  }"
             }
           }
         ]
diff --git a/src/test/resources/__files/chatGptResponseStreamed.txt b/src/test/resources/__files/chatGptResponseStreamed.txt
index 6a3317c..de7e1e5 100644
--- a/src/test/resources/__files/chatGptResponseStreamed.txt
+++ b/src/test/resources/__files/chatGptResponseStreamed.txt
@@ -62,7 +62,7 @@
 data: {"id": "chatcmpl-8gqm87x4kG9YHacjG4o14jm3mFFC8","object": "chat.completion.chunk","created": 1705051034,"model": "gpt-4","system_fingerprint": "fp_668a673906","choices": [{"index": 0,"delta": {"tool_calls": [{"index": 0,"function": {"arguments": " method"}}]},"logprobs": null,"finish_reason": null}]}
 data: {"id": "chatcmpl-8gqm87x4kG9YHacjG4o14jm3mFFC8","object": "chat.completion.chunk","created": 1705051034,"model": "gpt-4","system_fingerprint": "fp_668a673906","choices": [{"index": 0,"delta": {"tool_calls": [{"index": 0,"function": {"arguments": " of"}}]},"logprobs": null,"finish_reason": null}]}
 data: {"id": "chatcmpl-8gqm87x4kG9YHacjG4o14jm3mFFC8","object": "chat.completion.chunk","created": 1705051034,"model": "gpt-4","system_fingerprint": "fp_668a673906","choices": [{"index": 0,"delta": {"tool_calls": [{"index": 0,"function": {"arguments": " passing"}}]},"logprobs": null,"finish_reason": null}]}
-data: {"id": "chatcmpl-8gqm87x4kG9YHacjG4o14jm3mFFC8","object": "chat.completion.chunk","created": 1705051034,"model": "gpt-4","system_fingerprint": "fp_668a673906","choices": [{"index": 0,"delta": {"tool_calls": [{"index": 0,"function": {"arguments": " kwargs"}}]},"logprobs": null,"finish_reason": null}]}
+data: {"id": "chatcmpl-8gqm87x4kG9YHacjG4o14jm3mFFC8","object": "chat.completion.chunk","created": 1705051034,"model": "gpt-4","system_fingerprint": "fp_668a673906","choices": [{"index": 0,"delta": {"tool_calls": [{"index": 0,"function": {"arguments": " **kwargs"}}]},"logprobs": null,"finish_reason": null}]}
 data: {"id": "chatcmpl-8gqm87x4kG9YHacjG4o14jm3mFFC8","object": "chat.completion.chunk","created": 1705051034,"model": "gpt-4","system_fingerprint": "fp_668a673906","choices": [{"index": 0,"delta": {"tool_calls": [{"index": 0,"function": {"arguments": " unless"}}]},"logprobs": null,"finish_reason": null}]}
 data: {"id": "chatcmpl-8gqm87x4kG9YHacjG4o14jm3mFFC8","object": "chat.completion.chunk","created": 1705051034,"model": "gpt-4","system_fingerprint": "fp_668a673906","choices": [{"index": 0,"delta": {"tool_calls": [{"index": 0,"function": {"arguments": " this"}}]},"logprobs": null,"finish_reason": null}]}
 data: {"id": "chatcmpl-8gqm87x4kG9YHacjG4o14jm3mFFC8","object": "chat.completion.chunk","created": 1705051034,"model": "gpt-4","system_fingerprint": "fp_668a673906","choices": [{"index": 0,"delta": {"tool_calls": [{"index": 0,"function": {"arguments": " behavior"}}]},"logprobs": null,"finish_reason": null}]}
diff --git a/src/test/resources/__files/gerritPatchSetReview.json b/src/test/resources/__files/gerritPatchSetReview.json
index e3d50fe..edd434c 100644
--- a/src/test/resources/__files/gerritPatchSetReview.json
+++ b/src/test/resources/__files/gerritPatchSetReview.json
@@ -1 +1 @@
-{"comments":{"test_file.py":[{"line":19,"range":{"start_line":19,"end_line":20,"start_character":4,"end_character":60},"message":"Confirm that the method \u0027importclass\u0027 is meant to change its behavior when \u0027class_name\u0027 is None. The new lines suggest \u0027class_name\u0027 will be derived from the \u0027module_name\u0027 in such cases, which can have unintended effects if not explicitly intended."},{"line":20,"range":{"start_line":20,"end_line":20,"start_character":8,"end_character":60},"message":"The added check to determine if \u0027class_name\u0027 is None seems to modify the \u0027module_name\u0027 by splitting it and taking the last element. There should be an assignment to \u0027class_name\u0027 since the class to be imported is meant to be the last part of \u0027module_name\u0027 after splitting."},{"line":1,"range":{"start_line":1,"end_line":1,"start_character":0,"end_character":17},"message":"The code line \u0027from types import Any, Callable, ...\u0027 should use \u0027typing\u0027 for imports instead of \u0027types\u0027."},{"line":21,"range":{"start_line":21,"end_line":21,"start_character":4,"end_character":67},"message":"There is a typo in the import statement. The correct function should be \u0027import_module\u0027 from the \u0027importlib\u0027 module, not \u0027importclass\u0027 which does not exist. Correct code:\n\n\n```\nloaded_module \u003d import_module(module_name, fromlist\u003d[class_name])\n\n```"}]},"message":"The commit message \u0027Test Commit Message\u0027 is too vague and does not provide information about the specific changes made. A more detailed message is necessary to understand what has been fixed."}
\ No newline at end of file
+{"comments":{"test_file.py":[{"line":19,"range":{"start_line":19,"end_line":20,"start_character":4,"end_character":60},"message":"Confirm that the method \u0027importclass\u0027 is meant to change its behavior when \u0027class_name\u0027 is None. The new lines suggest \u0027class_name\u0027 will be derived from the \u0027module_name\u0027 in such cases, which can have unintended effects if not explicitly intended."},{"line":20,"range":{"start_line":20,"end_line":20,"start_character":8,"end_character":60},"message":"The added check to determine if \u0027class_name\u0027 is None seems to modify the \u0027module_name\u0027 by splitting it and taking the last element. There should be an assignment to \u0027class_name\u0027 since the class to be imported is meant to be the last part of \u0027module_name\u0027 after splitting."},{"line":1,"range":{"start_line":1,"end_line":1,"start_character":0,"end_character":17},"message":"The code line \u0027from types import Any, Callable, ...\u0027 should use \u0027typing\u0027 for imports instead of \u0027types\u0027."},{"line":21,"range":{"start_line":21,"end_line":21,"start_character":4,"end_character":67},"message":"There is a typo in the import statement. The correct function should be \u0027\\__import\\__\u0027 from the \u0027importlib\u0027 module, not \u0027importclass\u0027 which does not exist. Correct code:\n\n```\nloaded_module \u003d import_module(module_name, fromlist\u003d[class_name])\n```\n"}]},"message":"The commit message \u0027Test Commit Message\u0027 is too vague and does not provide information about the specific changes made. A more detailed message is necessary to understand what has been fixed."}
\ No newline at end of file