Skip to content

Log#cleansed_message optimization#339

Open
dks17 wants to merge 1 commit into
reidmorrison:masterfrom
dks17:cleansed-message-optimization
Open

Log#cleansed_message optimization#339
dks17 wants to merge 1 commit into
reidmorrison:masterfrom
dks17:cleansed-message-optimization

Conversation

@dks17

@dks17 dks17 commented May 23, 2026

Copy link
Copy Markdown

Flamegraph profiling reveals that String#gsub within SemanticLogger::Log#cleansed_message takes more execution time than other methods.

Proposed Optimizations:

  1. Not all log messages contain ANSI escape sequences. Checking for the presence of these substrings using String#include? is significantly faster than unconditionally running a regular expression.
  2. The current regular expression triggers excessive matching. Consequently, processing time and resource consumption scale poorly with longer strings (link (\e(\[([\d;]*[mz]?))?)? vs \e\[[\d;]*[mz]?|\e link).

Benchmark

# frozen_string_literal: true
require "benchmark/ips"
require "benchmark/memory"

def cleansed_message(message)
  message.to_s.gsub(/(\e(\[([\d;]*[mz]?))?)?/, "").strip
end

def opt_1(message)
  msg = message.to_s
  return msg.strip unless msg.include?("\e")

  msg.gsub(/(\e(\[([\d;]*[mz]?))?)?/, "").strip
end

def opt_2(message)
  msg = message.to_s
  return msg.strip unless msg.include?("\e")

  msg.gsub(/\e\[[\d;]*[mz]?|\e/, "").strip
end

TEXT = "Hello"
LONG_TEXT = "2026-05-22 13:05:04.069922 D [1:5272] {component: rails} ActiveRecord::Base --   TRANSACTION (0.9ms)  COMMIT /*source_location:config/environment.rb:5:in '<top (required)>'*/"
COLORED_TEXT = "\e[32mHello\e[0m"
LONG_COLORED_TEXT = "\e[32mHello\e[0m 2026-05-22 13:05:04.069922 D [1:5272] {component: rails} ActiveRecord::Base --   TRANSACTION (0.9ms)  COMMIT /*source_location:config/environment.rb:5:in '<top (required)>'*/"

Benchmark.ips do |x|
  x.config(warmup: 2, time: 5)

  x.report("text + origin") do
    cleansed_message(TEXT)
  end
  x.report("colored + origin") do
    cleansed_message(COLORED_TEXT)
  end
  x.report("long text + origin") do
    cleansed_message(LONG_TEXT)
  end
  x.report("long color + origin") do
    cleansed_message(LONG_COLORED_TEXT)
  end

  x.report("text + opt_1") do
    opt_1(TEXT)
  end
  x.report("colored + opt_1") do
    opt_1(COLORED_TEXT)
  end
  x.report("long text + opt_1") do
    opt_1(LONG_TEXT)
  end
  x.report("long color + opt_1") do
    opt_1(LONG_COLORED_TEXT)
  end

  x.report("text + opt_2") do
    opt_2(TEXT)
  end
  x.report("colored + opt_2") do
    opt_2(COLORED_TEXT)
  end
  x.report("long text + opt_2") do
    opt_2(LONG_TEXT)
  end
  x.report("long color + opt_2") do
    opt_2(LONG_COLORED_TEXT)
  end

  x.compare!
end

Benchmark.memory do |x|
  x.report("text + origin") do
    cleansed_message(TEXT)
  end
  x.report("colored + origin") do
    cleansed_message(COLORED_TEXT)
  end
  x.report("long text + origin") do
    cleansed_message(LONG_TEXT)
  end
  x.report("long color + origin") do
    cleansed_message(LONG_COLORED_TEXT)
  end

  x.report("text + opt_1") do
    opt_1(TEXT)
  end
  x.report("colored + opt_1") do
    opt_1(COLORED_TEXT)
  end
  x.report("long text + opt_1") do
    opt_1(LONG_TEXT)
  end
  x.report("long color + opt_1") do
    opt_1(LONG_COLORED_TEXT)
  end

  x.report("text + opt_2") do
    opt_2(TEXT)
  end
  x.report("colored + opt_2") do
    opt_2(COLORED_TEXT)
  end
  x.report("long text + opt_2") do
    opt_2(LONG_TEXT)
  end
  x.report("long color + opt_2") do
    opt_2(LONG_COLORED_TEXT)
  end

  x.compare!
end

Benchmark results

ruby 3.4.6 (2025-09-16 revision dbd83256b1) +PRISM [x86_64-linux]

Comparison:
       text + opt_2:  4082182.5 i/s
       text + opt_1:  4047313.3 i/s - same-ish: difference falls within error
  long text + opt_1:   744518.9 i/s - 5.48x  slower
  long text + opt_2:   741241.0 i/s - 5.51x  slower
    colored + opt_2:   492406.8 i/s - 8.29x  slower
      text + origin:   277279.9 i/s - 14.72x  slower
 long color + opt_2:   246351.3 i/s - 16.57x  slower
   colored + origin:   217041.6 i/s - 18.81x  slower
    colored + opt_1:   202486.2 i/s - 20.16x  slower
 long color + opt_1:    11407.2 i/s - 357.86x  slower
 long text + origin:    11094.5 i/s - 367.95x  slower
long color + origin:    10415.7 i/s - 391.92x  slower

Comparison:
        text + opt_1:         40 allocated
        text + opt_2:         40 allocated - same
   long text + opt_1:        320 allocated - 8.00x more
   long text + opt_2:        320 allocated - 8.00x more
       text + origin:        456 allocated - 11.40x more
    colored + origin:        456 allocated - 11.40x more
     colored + opt_1:        456 allocated - 11.40x more
     colored + opt_2:        456 allocated - 11.40x more
  long text + origin:        976 allocated - 24.40x more
 long color + origin:        976 allocated - 24.40x more
  long color + opt_1:        976 allocated - 24.40x more
  long color + opt_2:        976 allocated - 24.40x more

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant