From c315eebbb3bd0b8657f844f3aa87f14bb3a71869 Mon Sep 17 00:00:00 2001 From: Bart Veneman Date: Thu, 26 Feb 2026 20:28:21 +0100 Subject: [PATCH] perf: inline is_ident_char() in hot tokenizer loops via CHAR_IDENT bitmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CHAR_IDENT (1 << 5) to the lookup table and replace all 4 is_ident_char() callsites in tight loops with a direct bitmask check, eliminating the 3-function call chain (is_ident_char → is_ident_start → is_alpha) on every character scanned. Benchmark results vs main: - Tokenizer - Large CSS: 29,920 → 32,064 ops/sec (+7.2%) - Tokenizer - Bootstrap CSS: 356 → 401 ops/sec (+12.6%) - Tokenizer - Tailwind CSS: 28 → 31 ops/sec (+10.7%) - Parser - Large CSS: 18,801 → 19,908 ops/sec (+5.9%) - Parser - Bootstrap CSS: 231 → 244 ops/sec (+5.6%) - Parser - Tailwind CSS: 17 → 18 ops/sec (+5.9%) - Parse/walk - Bootstrap CSS: 204 → 215 ops/sec (+5.4%) - Parse/walk - Tailwind CSS: 15 → 16 ops/sec (+6.7%) Co-Authored-By: Claude Sonnet 4.6 --- src/char-types.ts | 11 +++++++++++ src/tokenize.ts | 21 +++++++++++++++------ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/char-types.ts b/src/char-types.ts index 50d69c6..7a9bb93 100644 --- a/src/char-types.ts +++ b/src/char-types.ts @@ -26,6 +26,7 @@ export let CHAR_DIGIT = 1 << 1 // 2 export let CHAR_HEX = 1 << 2 // 4 export let CHAR_WHITESPACE = 1 << 3 // 8 export let CHAR_NEWLINE = 1 << 4 // 16 +export let CHAR_IDENT = 1 << 5 // 32 // Lookup table for ASCII characters (0-127) export let char_types = new Uint8Array(128) @@ -63,6 +64,16 @@ char_types[0x0a] = CHAR_NEWLINE // \n char_types[0x0d] = CHAR_NEWLINE // \r char_types[0x0c] = CHAR_NEWLINE // \f +// Initialize ident characters: letters, digits, hyphen (-), underscore (_) +// Derived from already-populated table so it stays consistent with CHAR_ALPHA/CHAR_DIGIT +for (let i = 0; i < 128; i++) { + if (char_types[i] & (CHAR_ALPHA | CHAR_DIGIT)) { + char_types[i] |= CHAR_IDENT + } +} +char_types[0x2d] |= CHAR_IDENT // hyphen - +char_types[0x5f] |= CHAR_IDENT // underscore _ + export function is_digit(ch: number): boolean { return ch < 128 && (char_types[ch] & CHAR_DIGIT) !== 0 } diff --git a/src/tokenize.ts b/src/tokenize.ts index 162c9cd..b1d509d 100644 --- a/src/tokenize.ts +++ b/src/tokenize.ts @@ -1,12 +1,12 @@ import { is_hex_digit, is_ident_start, - is_ident_char, is_whitespace, char_types, CHAR_DIGIT, CHAR_WHITESPACE, CHAR_NEWLINE, + CHAR_IDENT, } from './char-types' // Local inline version for hot paths that still need it @@ -454,7 +454,10 @@ export class Lexer { } if (is_ident_start(ch) || (ch === CHAR_HYPHEN && is_ident_start(this.peek()))) { // Unit: px, em, rem, etc - while (this.pos < this.source.length && is_ident_char(this.source.charCodeAt(this.pos))) { + // Hot path: inline ident char check in tight loop + while (this.pos < this.source.length) { + let ch = this.source.charCodeAt(this.pos) + if (ch < 0x80 && (char_types[ch] & CHAR_IDENT) === 0) break this.advance() } return this.make_token(TOKEN_DIMENSION, start, this.pos, start_line, start_column) @@ -502,8 +505,8 @@ export class Lexer { // Escape any other character (except newline, already checked) this.advance() } - } else if (is_ident_char(ch)) { - // Normal identifier character + } else if (ch >= 0x80 || (char_types[ch] & CHAR_IDENT) !== 0) { + // Normal identifier character — Hot path: inline ident char check this.advance() } else { // Not part of identifier @@ -589,7 +592,10 @@ export class Lexer { this.advance() // Skip @ // Consume identifier - while (this.pos < this.source.length && is_ident_char(this.source.charCodeAt(this.pos))) { + // Hot path: inline ident char check in tight loop + while (this.pos < this.source.length) { + let ch = this.source.charCodeAt(this.pos) + if (ch < 0x80 && (char_types[ch] & CHAR_IDENT) === 0) break this.advance() } @@ -601,7 +607,10 @@ export class Lexer { this.advance() // Skip # // Consume identifier or hex digits - while (this.pos < this.source.length && is_ident_char(this.source.charCodeAt(this.pos))) { + // Hot path: inline ident char check in tight loop + while (this.pos < this.source.length) { + let ch = this.source.charCodeAt(this.pos) + if (ch < 0x80 && (char_types[ch] & CHAR_IDENT) === 0) break this.advance() }