diff --git a/Gemfile b/Gemfile index 6820e1d2b..655f1f1ee 100644 --- a/Gemfile +++ b/Gemfile @@ -44,6 +44,7 @@ gem "net-http-persistent" gem "kredis" gem "platform_agent" gem "thruster" +gem "redcarpet" group :development, :test do gem "debug" diff --git a/Gemfile.lock b/Gemfile.lock index 60e9fb2dc..a685bbf4e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -288,6 +288,7 @@ GEM erb psych (>= 4.0.0) tsort + redcarpet (3.6.1) redis (5.4.1) redis-client (>= 0.22.0) redis-client (0.25.2) @@ -428,6 +429,7 @@ DEPENDENCIES puma (~> 6.6) rails! rails_autolink + redcarpet redis (~> 5.4) resque (~> 2.7.0) resque-pool (~> 0.7.1) diff --git a/app/helpers/content_filters.rb b/app/helpers/content_filters.rb index 926a726af..1719f8258 100644 --- a/app/helpers/content_filters.rb +++ b/app/helpers/content_filters.rb @@ -1,3 +1,3 @@ module ContentFilters - TextMessagePresentationFilters = ActionText::Content::Filters.new(RemoveSoloUnfurledLinkText, StyleUnfurledTwitterAvatars, SanitizeTags) + TextMessagePresentationFilters = ActionText::Content::Filters.new(MarkdownFilter, RemoveSoloUnfurledLinkText, StyleUnfurledTwitterAvatars, SanitizeTags) end diff --git a/app/helpers/content_filters/markdown_filter.rb b/app/helpers/content_filters/markdown_filter.rb new file mode 100644 index 000000000..5aad59fc3 --- /dev/null +++ b/app/helpers/content_filters/markdown_filter.rb @@ -0,0 +1,127 @@ +class ContentFilters::MarkdownFilter < ActionText::Content::Filter + # Markdown pattern detection + # NOTE: Underscore-based emphasis (__bold__ and _italic_) is intentionally not supported + # to avoid false positives with code identifiers like __init__ and my_variable_name + # + # NOTE: List patterns require at least 2 consecutive items to trigger detection. + # This prevents false positives like "* walks into room *" or "- Not sure about that" + # while still catching legitimate multi-item lists. + # + # NOTE: To type literal markdown characters, users have two options: + # 1. Use backticks for inline code: `**not bold**` → renders as **not bold** + # 2. Use backslash escaping: \*\*not bold\*\* → renders as **not bold** (plain text) + MARKDOWN_PATTERNS = [ + /\*\*[^*]+\*\*/, # Bold: **text** + /(?\s/m, # Blockquotes: > quote + /^---+$/m # Horizontal rule: --- + ].freeze + + def self.has_markdown?(text) + MARKDOWN_PATTERNS.any? { |pattern| text.match?(pattern) } + end + + def applicable? + has_markdown? && !has_attachments? + end + + def apply + # Pre-process text to ensure lists are properly separated from surrounding content + preprocessed_text = normalize_lists(plain_text_content) + + # Convert markdown to HTML using Redcarpet + markdown_html = self.class.markdown_renderer.render(preprocessed_text) + + # Replace the entire fragment with the rendered markdown + fragment.update do |source| + source.inner_html = markdown_html + end + end + + private + def has_markdown? + self.class.has_markdown?(plain_text_content) + end + + def has_attachments? + # Skip markdown rendering if ActionText attachments are present + # to avoid destroying them when we replace inner_html + fragment.find_all("action-text-attachment").any? + end + + def plain_text_content + fragment.to_plain_text + end + + def normalize_lists(text) + # Add blank lines before and after lists to ensure Redcarpet renders them correctly + # even when they're surrounded by other text. + # This handles cases like: + # - "Hello\n* item 1\n* item 2" -> "Hello\n\n* item 1\n* item 2" + # - "* item 1\n* item 2\nGoodbye" -> "* item 1\n* item 2\n\nGoodbye" + + lines = text.split("\n", -1) + result = [] + in_list = false + list_pattern = /^(\*|-|\d+\.)\s/ + + lines.each_with_index do |line, i| + is_list_item = line.match?(list_pattern) + prev_line = i > 0 ? lines[i - 1] : nil + + # Starting a list: add blank line before if previous line exists and isn't blank + if is_list_item && !in_list + if prev_line && !prev_line.strip.empty? + result << "" + end + in_list = true + end + + # Ending a list: add blank line after + if !is_list_item && in_list && !line.strip.empty? + result << "" + in_list = false + end + + # Exiting list at blank line + if line.strip.empty? + in_list = false + end + + result << line + end + + result.join("\n") + end + + def self.markdown_renderer + @markdown_renderer ||= Redcarpet::Markdown.new( + Redcarpet::Render::HTML.new( + filter_html: true, # Strip HTML tags for security + safe_links_only: true, # Block javascript: and data: URLs + no_styles: true, # Remove inline styles + hard_wrap: true, # Convert single newlines to
tags + link_attributes: { target: "_blank", rel: "noopener noreferrer" } + ), + autolink: true, # Convert plain URLs to links + disable_indented_code_blocks: true, # Disable 4-space indented code blocks + fenced_code_blocks: true, # Allow ``` code blocks + footnotes: false, # Footnotes not needed in chat + highlight: false, # Syntax highlighting handled separately + quote: true, # Enable blockquotes with > + no_intra_emphasis: true, # Prevent emphasis_within_words + space_after_headers: true, # Require space after # for headers + strikethrough: true, # Enable ~~strikethrough~~ + tables: false, # Tables not supported in chat UI + underline: false # Underline not supported + ) + end +end diff --git a/app/helpers/messages_helper.rb b/app/helpers/messages_helper.rb index 0d5a35c11..9d9159cc7 100644 --- a/app/helpers/messages_helper.rb +++ b/app/helpers/messages_helper.rb @@ -57,7 +57,18 @@ def message_presentation(message) when "sound" message_sound_presentation(message) else - auto_link h(ContentFilters::TextMessagePresentationFilters.apply(message.body.body)), html: { target: "_blank" } + # Check for markdown ONCE before filtering (optimization to avoid double checking) + has_markdown = message.body.present? && ContentFilters::MarkdownFilter.has_markdown?(message.plain_text_body) + + filtered_content = ContentFilters::TextMessagePresentationFilters.apply(message.body.body) + + # Only apply auto_link if the message doesn't have markdown + # (markdown filter already processes links and sanitizes HTML) + if has_markdown + filtered_content.html_safe + else + auto_link h(filtered_content), html: { target: "_blank" } + end end rescue Exception => e Sentry.capture_exception(e, extra: { message: message }) diff --git a/test/helpers/content_filters_test.rb b/test/helpers/content_filters_test.rb index 00b270910..82bfe2ad6 100644 --- a/test/helpers/content_filters_test.rb +++ b/test/helpers/content_filters_test.rb @@ -70,7 +70,298 @@ class ContentFiltersTest < ActionView::TestCase assert_match expected, filtered.to_html end + # Markdown filter tests + + test "markdown filter is not applicable to plain text without markdown" do + refute_markdown_applicable "Just plain text here" + end + + test "markdown filter is applicable when markdown patterns are detected" do + assert_markdown_applicable "This has **bold** text" + end + + test "has_markdown? class method detects markdown patterns" do + assert ContentFilters::MarkdownFilter.has_markdown?("This is **bold**") + assert ContentFilters::MarkdownFilter.has_markdown?("This is *italic*") + assert ContentFilters::MarkdownFilter.has_markdown?("This has `code`") + assert ContentFilters::MarkdownFilter.has_markdown?("[link](url)") + assert ContentFilters::MarkdownFilter.has_markdown?("# Header") + + refute ContentFilters::MarkdownFilter.has_markdown?("Just plain text") + refute ContentFilters::MarkdownFilter.has_markdown?("Email: user@example.com") + refute ContentFilters::MarkdownFilter.has_markdown?("Python __init__ method") + refute ContentFilters::MarkdownFilter.has_markdown?("Use my_variable_name") + end + + # Tests for MARKDOWN_PATTERNS (in order of pattern definition) + + test "renders bold text with double asterisks" do + assert_markdown_rendered "This is **bold** text", /bold<\/strong>/ + end + + test "renders italic text with single asterisks" do + assert_markdown_rendered "This is *italic* text", /italic<\/em>/ + end + + test "renders inline code with backticks" do + assert_markdown_rendered "Use the `print()` function", /print\(\)<\/code>/ + end + + test "renders code blocks with triple backticks" do + code_block = "```\ndef hello():\n print('world')\n```" + filtered = apply_text_filters(code_block) + assert_match //, filtered.to_html + assert_match /def hello/, filtered.to_html + end + + test "does not detect indented code blocks as markdown" do + # 4-space indentation doesn't trigger markdown detection (good - avoids false positives) + indented = "Here is code:\n\n def hello():\n print('world')" + # Should not be detected as markdown since no explicit markdown patterns + refute_markdown_applicable indented + end + + test "renders headers" do + filtered = apply_text_filters("# Heading 1\n## Heading 2") + assert_match /

Heading 1<\/h1>/, filtered.to_html + assert_match /

Heading 2<\/h2>/, filtered.to_html + end + + test "renders unordered lists with asterisks" do + filtered = apply_text_filters("* Item 1\n* Item 2\n* Item 3") + assert_match /