Skip to content

Commit 9a302e6

Browse files
committed
format: preserve line continuations when re-quoting
Replaced the _decode_literal/_encode_literal helpers with a single-pass, escape-aware re-quote (_scan_quotes + _requote) so converting between quote styles passes through escaped backslashes, line continuations, and other backslash sequences unchanged, and only re-escapes the target delimiter quote character. The old decode/encode round-trip would mangle a single-quoted line-continuation string into a doubled backslash. Fixed fixture case q to convert a single-quoted line-continuation string to double quotes with the continuation preserved verbatim. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01U4hEZuqiuEFH2zy8wWGwyD
1 parent 98d1b5b commit 9a302e6

2 files changed

Lines changed: 59 additions & 31 deletions

File tree

src/cfengine_cli/format.py

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -95,38 +95,67 @@ def text(node: Node) -> str:
9595
return node.text.decode("utf-8")
9696

9797

98-
def _decode_literal(inner: str) -> str:
99-
"""Return the logical character content of a quoted_string's inner text.
98+
def _scan_quotes(inner: str) -> tuple:
99+
"""Report which quote characters the inner text's logical content holds.
100100
101-
CFEngine processes the same escapes for all three quote styles: an escaped
102-
backslash, double quote, or single quote is unescaped, and any other
103-
backslash is kept as-is. (A backtick string still cannot contain a literal
104-
backtick, since the delimiter itself can't be escaped.)
101+
Returns (has_double, has_single, has_backtick). An escaped double or single
102+
quote counts as that quote character; an escaped backslash, a line
103+
continuation, and other backslash sequences contribute no quote character.
105104
"""
106-
out = []
105+
has_double = has_single = has_backtick = False
107106
i = 0
108-
while i < len(inner):
107+
n = len(inner)
108+
while i < n:
109109
c = inner[i]
110-
if c == "\\" and i + 1 < len(inner) and inner[i + 1] in ("\\", '"', "'"):
111-
out.append(inner[i + 1])
110+
if c == "\\" and i + 1 < n:
111+
nxt = inner[i + 1]
112+
if nxt == '"':
113+
has_double = True
114+
elif nxt == "'":
115+
has_single = True
112116
i += 2
113117
continue
114-
out.append(c)
118+
if c == '"':
119+
has_double = True
120+
elif c == "'":
121+
has_single = True
122+
elif c == "`":
123+
has_backtick = True
115124
i += 1
116-
return "".join(out)
125+
return has_double, has_single, has_backtick
117126

118127

119-
def _encode_literal(content: str, delim: str) -> str:
120-
"""Wrap content in delim, escaping as that quote style requires.
128+
def _requote(inner: str, target: str) -> str:
129+
"""Re-emit a quoted_string's inner text using ``target`` as the quote char.
121130
122-
Backslashes are always escaped. Double- and single-quoted strings also
123-
escape their own delimiter; a backtick can't be escaped, so the caller must
124-
only choose backticks when the content contains no backtick.
131+
Escape sequences that mean the same thing in every quote style - an escaped
132+
backslash, a line continuation, or any other backslash sequence - pass
133+
through unchanged. A literal double or single quote is escaped only when it
134+
is the target delimiter, otherwise it is emitted bare.
125135
"""
126-
escaped = content.replace("\\", "\\\\")
127-
if delim != "`":
128-
escaped = escaped.replace(delim, "\\" + delim)
129-
return delim + escaped + delim
136+
out = []
137+
i = 0
138+
n = len(inner)
139+
while i < n:
140+
c = inner[i]
141+
if c == "\\" and i + 1 < n:
142+
nxt = inner[i + 1]
143+
if nxt == '"':
144+
out.append('\\"' if target == '"' else '"')
145+
elif nxt == "'":
146+
out.append("\\'" if target == "'" else "'")
147+
else:
148+
out.append(c + nxt)
149+
i += 2
150+
continue
151+
if c == '"':
152+
out.append('\\"' if target == '"' else '"')
153+
elif c == "'":
154+
out.append("\\'" if target == "'" else "'")
155+
else:
156+
out.append(c)
157+
i += 1
158+
return "".join(out)
130159

131160

132161
def _normalize_quotes(literal: str) -> str:
@@ -143,23 +172,22 @@ def _normalize_quotes(literal: str) -> str:
143172
and literal[0] in ("'", '"', "`")
144173
), f"expected a quoted string literal, got {literal!r}"
145174
delim = literal[0]
146-
content = _decode_literal(literal[1:-1])
147-
has_double = '"' in content
148-
has_single = "'" in content
175+
inner = literal[1:-1]
176+
has_double, has_single, has_backtick = _scan_quotes(inner)
149177
if not has_double:
150178
target = '"'
151179
elif not has_single:
152180
target = "'"
153181
else:
154182
target = "`"
155-
if target == "`" and "`" in content:
156-
# A string containing a double quote, a single quote, and a backtick
157-
# can't use any style without escaping, and a backtick can't be
158-
# escaped, so leave the literal as the author wrote it.
183+
if target == "`" and has_backtick:
184+
# Contains a double quote, a single quote, and a backtick; no style can
185+
# hold all three without escaping, and a backtick can't be escaped, so
186+
# leave the literal as the author wrote it.
159187
return literal
160188
if target == delim:
161189
return literal
162-
return _encode_literal(content, target)
190+
return target + _requote(inner, target) + target
163191

164192

165193
class Formatter:

tests/format/012_quotes.input.cf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ vars:
2020
"n" string => `e\\f`;
2121
"o" string => 'mix "q" it\'s `tick`';
2222
"p" string => 'a\\b "c" it\'s';
23-
"q" string => "foo\
24-
bar";
23+
"q" string => 'foo\
24+
bar';
2525

2626
reports:
2727
'a single-quoted promiser';

0 commit comments

Comments
 (0)