From b41ed929b0ee76d8f62e7bebb538c5ec733a42d6 Mon Sep 17 00:00:00 2001 From: Zack Koppert Date: Sun, 7 Jun 2026 21:36:57 -0700 Subject: [PATCH] Upgrade commonmarker to ~> 2.8.2 Commonmarker 2.x is a from-scratch rewrite that swaps the upstream parser from cmark-gfm (C) to comrak (Rust). It also redesigns the Ruby API: - Module: CommonMarker -> Commonmarker (lowercase m) - Render entry point: CommonMarker.render_html(content, opts, exts) -> Commonmarker.to_html(content, options: {parse:, render:, extension:}) - Symbol arrays of opts/exts -> nested option hashes with snake_case keys - :FOOTNOTES moved from a parse opt to an extension - A bundled syntax highlighter plugin is enabled by default The migration preserves the legacy public contract: callers can still pass `commonmarker_opts:` and `commonmarker_exts:` as symbol arrays. The wrapper translates each legacy symbol into the new nested hash structure. Several behaviors had to be pinned explicitly to preserve cmark-gfm 0.x output: - hardbreaks defaults to true in 2.x; set false to match cmark. - tagfilter, autolink, table, strikethrough, tasklist, and shortcodes are extensions that default to on in 2.x but were strictly opt-in in 0.x; explicitly disable any the caller did not request. - header_ids is on by default in 2.x and injects an empty anchor inside every heading; explicitly disable it unless the caller requested it. - The syntax_highlighter plugin is disabled (plugins: {syntax_highlighter: nil}) to keep `
` blocks clean.

Closes #2059

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Signed-off-by: Zack Koppert 
---
 Gemfile                       |  4 +-
 Gemfile.lock                  | 22 ++++-----
 lib/github/markup/markdown.rb | 87 +++++++++++++++++++++++++++++++++--
 3 files changed, 96 insertions(+), 17 deletions(-)

diff --git a/Gemfile b/Gemfile
index 1ea8f2da..6bb9fcb8 100644
--- a/Gemfile
+++ b/Gemfile
@@ -4,9 +4,7 @@ gemspec
 gem "redcarpet", :platforms => :ruby
 gem "kramdown", :platforms => :jruby
 gem "RedCloth"
-# using a tag version here because 0.18.3 was not published by the author to encourage users to upgrade.
-# however we want to bump up to this version since this has a security patch
-gem "commonmarker", git: "https://github.com/gjtorikian/commonmarker.git", tag: "v0.18.3"
+gem "commonmarker", "~> 2.8.2"
 gem "rdoc", "~> 7.2.0"
 gem "org-ruby", "0.9.12"
 gem "creole", "~>0.5.0"
diff --git a/Gemfile.lock b/Gemfile.lock
index 8d0f756d..cd835695 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -1,11 +1,3 @@
-GIT
-  remote: https://github.com/gjtorikian/commonmarker.git
-  revision: 2838ebaa83ee0081d481c21f3bc0e4cb3e8de9da
-  tag: v0.18.3
-  specs:
-    commonmarker (0.18.3)
-      ruby-enum (~> 0.5)
-
 PATH
   remote: .
   specs:
@@ -34,6 +26,13 @@ GEM
     builder (3.3.0)
     cgi (0.5.1)
     charlock_holmes (0.7.9)
+    commonmarker (2.8.2)
+      rb_sys (~> 0.9)
+    commonmarker (2.8.2-aarch64-linux)
+    commonmarker (2.8.2-arm-linux)
+    commonmarker (2.8.2-arm64-darwin)
+    commonmarker (2.8.2-x86_64-darwin)
+    commonmarker (2.8.2-x86_64-linux)
     concurrent-ruby (1.3.6)
     connection_pool (3.0.2)
     crass (1.0.6)
@@ -85,14 +84,15 @@ GEM
       stringio
     racc (1.8.1)
     rake (13.4.2)
+    rake-compiler-dock (1.12.0)
+    rb_sys (0.9.128)
+      rake-compiler-dock (= 1.12.0)
     rdoc (7.2.0)
       erb
       psych (>= 4.0.0)
       tsort
     redcarpet (3.6.1)
     rexml (3.4.4)
-    ruby-enum (0.9.0)
-      i18n
     rubypants (0.7.1)
     rugged (1.9.0)
     sanitize (6.1.3)
@@ -135,7 +135,7 @@ DEPENDENCIES
   RedCloth
   activesupport (~> 8.1.3)
   asciidoctor (~> 2.0.26)
-  commonmarker!
+  commonmarker (~> 2.8.2)
   creole (~> 0.5.0)
   github-linguist (>= 7.1.3)
   github-markup!
diff --git a/lib/github/markup/markdown.rb b/lib/github/markup/markdown.rb
index dcf93229..5b17afb7 100644
--- a/lib/github/markup/markdown.rb
+++ b/lib/github/markup/markdown.rb
@@ -5,9 +5,90 @@ module Markup
     class Markdown < Implementation
       MARKDOWN_GEMS = {
         "commonmarker" => proc { |content, options: {}|
-          commonmarker_opts = [:GITHUB_PRE_LANG].concat(options.fetch(:commonmarker_opts, []))
-          commonmarker_exts = options.fetch(:commonmarker_exts, [:tagfilter, :autolink, :table, :strikethrough])
-          CommonMarker.render_html(content, commonmarker_opts, commonmarker_exts)
+          legacy_opts = options.fetch(:commonmarker_opts, [])
+          legacy_exts = options.fetch(
+            :commonmarker_exts,
+            [:tagfilter, :autolink, :table, :strikethrough],
+          )
+
+          parse_options = {}
+          # commonmarker 2.x changes several render defaults that diverge from cmark-gfm 0.x:
+          #   - hardbreaks defaults to true in 2.x but was false in 0.x.
+          #   - escaped_char_spans defaults to true in 2.x and wraps backslash-escaped chars in
+          #     ; 0.x emitted bare characters.
+          #   - gfm_quirks defaults to false in 2.x; 0.x (cmark-gfm) always had the quirk on,
+          #     which collapses ****foo**** to foo instead of nesting.
+          #   - github_pre_lang defaults to true in 2.x; set explicitly to match the legacy contract.
+          render_options = {
+            github_pre_lang: true,
+            hardbreaks: false,
+            escaped_char_spans: false,
+            gfm_quirks: true,
+          }
+          extension_options = {}
+
+          legacy_opts.each do |opt|
+            case opt
+            when :DEFAULT then nil
+            when :SOURCEPOS then render_options[:sourcepos] = true
+            when :HARDBREAKS then render_options[:hardbreaks] = true
+            when :NOBREAKS then render_options[:hardbreaks] = false
+            when :SMART then parse_options[:smart] = true
+            when :GITHUB_PRE_LANG then render_options[:github_pre_lang] = true
+            when :UNSAFE then render_options[:unsafe] = true
+            when :FOOTNOTES then extension_options[:footnotes] = true
+            when :FULL_INFO_STRING then render_options[:full_info_string] = true
+              # The legacy options below existed in cmark-gfm 0.x but have no direct commonmarker
+              # 2.x equivalent. Accept them so existing callers don't break, but they have no effect:
+              #   :VALIDATE_UTF8 / :LIBERAL_HTML_TAG - enforced at the Rust type layer in 2.x.
+              #   :TABLE_PREFER_STYLE_ATTRIBUTES     - no 2.x render knob for inline table styles.
+              #   :STRIKETHROUGH_DOUBLE_TILDE        - 2.x always accepts both single and double tilde.
+            when :VALIDATE_UTF8, :LIBERAL_HTML_TAG,
+                 :TABLE_PREFER_STYLE_ATTRIBUTES, :STRIKETHROUGH_DOUBLE_TILDE
+              nil
+            else
+              raise ArgumentError, "unknown commonmarker option: #{opt.inspect}"
+            end
+          end
+
+          legacy_exts.each do |ext|
+            case ext
+            when :strikethrough, :tagfilter, :autolink, :table, :tasklist,
+                 :shortcodes, :footnotes, :multiline_block_quotes,
+                 :math_dollars, :math_code, :wikilinks_title_after_pipe,
+                 :wikilinks_title_before_pipe, :underline, :subscript, :spoiler,
+                 :greentext, :alerts, :description_lists
+              extension_options[ext] = true
+            when :header_ids
+              # header_ids takes a string prefix in 2.x rather than a boolean. The legacy contract
+              # only passed it as a symbol, so use an empty prefix to enable anchor generation.
+              extension_options[:header_ids] = ""
+            else
+              raise ArgumentError, "unknown commonmarker extension: #{ext.inspect}"
+            end
+          end
+
+          # Several extensions (tagfilter, autolink, table, strikethrough, tasklist, shortcodes)
+          # are enabled by default in commonmarker 2.x but were strictly opt-in in 0.x. Explicitly
+          # disable any extension the caller did not request so behavior matches the legacy contract.
+          [:strikethrough, :tagfilter, :autolink, :table, :tasklist, :shortcodes].each do |ext|
+            extension_options[ext] = false unless extension_options[ext]
+          end
+
+          # header_ids is enabled by default in commonmarker 2.x (it injects anchor tags inside
+          # every heading). The legacy 0.x wrapper never enabled it implicitly, so disable it
+          # unless the caller explicitly requested it.
+          extension_options[:header_ids] = nil unless extension_options.key?(:header_ids)
+
+          Commonmarker.to_html(
+            content,
+            options: {
+              parse: parse_options,
+              render: render_options,
+              extension: extension_options,
+            },
+            plugins: {syntax_highlighter: nil},
+          )
         },
         "github/markdown" => proc { |content, options: {}|
           GitHub::Markdown.render(content)