DevToolkit/retest.py at master · SamSi0322/DevToolkit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#!/usr/bin/env python3
"""
retest -- Test regex patterns from the CLI. Like regex101, but terminal. Zero deps.

Test patterns, see matches with groups, do replacements, read from files.
Uses Python's re module -- same regex flavor as your Python code.

Usage:
    py retest.py "\\d+" "abc 123 def 456"                    # Find matches
    py retest.py "(\\w+)@(\\w+)" "alice@gmail bob@yahoo"     # Capture groups
    py retest.py "(?P<y>\\d{4})-(?P<m>\\d{2})" "2024-01-15" # Named groups
    py retest.py "foo" "FOO bar foo" -i                       # Case insensitive
    py retest.py "^\\w+" "line1\\nline2" -m                   # Multiline
    py retest.py "\\d+" "abc 123 def" --replace "NUM"         # Replace matches
    py retest.py "error" -f app.log                           # Test against file
    py retest.py --ref                                        # Quick reference
    echo "test data" | py retest.py "\\w+"                    # Stdin
"""

import argparse
import os
import re
import sys
from pathlib import Path

RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
CYAN = "\033[36m"
RED = "\033[31m"
MAGENTA = "\033[35m"
BG_YELLOW = "\033[43;30m"
BG_CYAN = "\033[46;30m"

GROUP_COLORS = [CYAN, MAGENTA, YELLOW, GREEN, "\033[34m", "\033[91m"]


def color_supported() -> bool:
    if os.environ.get("NO_COLOR"):
        return False
    if sys.platform == "win32":
        return bool(os.environ.get("TERM") or os.environ.get("WT_SESSION"))
    return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()


USE_COLOR = color_supported()


def c(code: str, text: str) -> str:
    return f"{code}{text}{RESET}" if USE_COLOR else text


def highlight_matches(text: str, pattern: re.Pattern) -> str:
    """Highlight matches in text using ANSI colors."""
    if not USE_COLOR:
        return text
    result = []
    last_end = 0
    for m in pattern.finditer(text):
        result.append(text[last_end:m.start()])
        result.append(f"{BG_YELLOW}{m.group()}{RESET}")
        last_end = m.end()
    result.append(text[last_end:])
    return "".join(result)


def show_matches(text: str, pattern: re.Pattern, verbose: bool = False):
    """Display all matches with groups."""
    matches = list(pattern.finditer(text))

    if not matches:
        print(f"\n  {c(RED, 'No matches found.')}\n")
        return

    print(f"\n  {c(GREEN, f'{len(matches)} match(es) found')}\n")

    # Show highlighted text (first 500 chars)
    display_text = text[:500]
    highlighted = highlight_matches(display_text, pattern)
    print(f"  {c(DIM, 'Text:')}")
    for line in highlighted.splitlines():
        print(f"    {line}")
    if len(text) > 500:
        print(c(DIM, f"    ... ({len(text) - 500} more chars)"))
    print()

    # Show each match
    for i, m in enumerate(matches, 1):
        span = f"{m.start()}-{m.end()}"
        print(f"  {c(BOLD, f'Match {i}')} at {c(DIM, f'pos {span}')}:")
        print(f"    Full: {c(BG_YELLOW, m.group())}")

        # Numbered groups
        if m.groups():
            for gi, g in enumerate(m.groups(), 1):
                gc = GROUP_COLORS[(gi - 1) % len(GROUP_COLORS)]
                val = g if g is not None else c(DIM, "None")
                print(f"    Group {gi}: {c(gc, str(val))}")

        # Named groups
        if m.groupdict():
            for name, val in m.groupdict().items():
                display = val if val is not None else c(DIM, "None")
                print(f"    {c(CYAN, name)}: {display}")

        if i >= 20:
            remaining = len(matches) - 20
            if remaining > 0:
                print(c(DIM, f"\n  ... and {remaining} more matches"))
            break

    print()


def show_replace(text: str, pattern: re.Pattern, replacement: str):
    """Show replacement result."""
    result = pattern.sub(replacement, text)
    count = len(pattern.findall(text))

    print(f"\n  {c(BOLD, 'Replacement')} ({count} substitution(s))\n")
    print(f"  {c(DIM, 'Before:')}")
    for line in text[:300].splitlines():
        print(f"    {line}")
    print(f"\n  {c(DIM, 'After:')}")
    for line in result[:300].splitlines():
        print(f"    {c(GREEN, line)}")
    if len(result) > 300:
        print(c(DIM, f"    ... ({len(result) - 300} more chars)"))
    print()


def show_split(text: str, pattern: re.Pattern):
    """Show split result."""
    parts = pattern.split(text)
    print(f"\n  {c(BOLD, 'Split')} ({len(parts)} parts)\n")
    for i, part in enumerate(parts[:30]):
        print(f"    {c(DIM, f'{i}:')} {part!r}")
    print()


def show_reference():
    """Show regex quick reference."""
    print(f"""
  {c(BOLD, 'Regex Quick Reference')} (Python re module)

  {c(CYAN, 'Character Classes')}
    .        Any character (except newline)
    \\d       Digit [0-9]
    \\D       Non-digit
    \\w       Word char [a-zA-Z0-9_]
    \\W       Non-word char
    \\s       Whitespace
    \\S       Non-whitespace
    [abc]    Character set
    [^abc]   Negated set
    [a-z]    Range

  {c(CYAN, 'Quantifiers')}
    *        0 or more
    +        1 or more
    ?        0 or 1
    {{n}}      Exactly n
    {{n,m}}    Between n and m
    *? +? ??  Non-greedy versions

  {c(CYAN, 'Anchors')}
    ^        Start of string/line
    $        End of string/line
    \\b       Word boundary

  {c(CYAN, 'Groups')}
    (abc)         Capture group
    (?P<name>x)   Named group
    (?:abc)       Non-capturing group
    \\1            Backreference to group 1

  {c(CYAN, 'Alternation & Lookahead')}
    a|b      Either a or b
    (?=x)    Lookahead
    (?!x)    Negative lookahead
    (?<=x)   Lookbehind
    (?<!x)   Negative lookbehind

  {c(CYAN, 'Common Patterns')}
    Email:    [\\w.-]+@[\\w.-]+\\.\\w+
    URL:      https?://[^\\s]+
    IPv4:     \\d{{1,3}}\\.\\d{{1,3}}\\.\\d{{1,3}}\\.\\d{{1,3}}
    Date:     \\d{{4}}-\\d{{2}}-\\d{{2}}
    Hex:      #[0-9a-fA-F]{{6}}
    Phone:    \\+?\\d{{1,3}}[-.\\s]?\\d{{3,4}}[-.\\s]?\\d{{4}}
""")


def main():
    parser = argparse.ArgumentParser(
        description="retest -- test regex patterns from the CLI",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument("pattern", nargs="?", help="Regex pattern")
    parser.add_argument("text", nargs="?", help="Text to match against")
    parser.add_argument("-f", "--file", help="Read text from file instead")
    parser.add_argument("-i", "--ignorecase", action="store_true", help="Case insensitive")
    parser.add_argument("-m", "--multiline", action="store_true", help="Multiline mode (^ and $ match line boundaries)")
    parser.add_argument("-s", "--dotall", action="store_true", help="Dot matches newline")
    parser.add_argument("--replace", metavar="REPL", help="Replace matches with REPL")
    parser.add_argument("--split", action="store_true", help="Split text by pattern")
    parser.add_argument("--ref", action="store_true", help="Show regex quick reference")
    parser.add_argument("--count", action="store_true", help="Only show match count")

    args = parser.parse_args()

    if args.ref:
        show_reference()
        return

    if not args.pattern:
        parser.print_help()
        return

    # Build flags
    flags = 0
    if args.ignorecase:
        flags |= re.IGNORECASE
    if args.multiline:
        flags |= re.MULTILINE
    if args.dotall:
        flags |= re.DOTALL

    # Compile pattern
    try:
        pattern = re.compile(args.pattern, flags)
    except re.error as e:
        print(f"\n  {c(RED, f'Invalid pattern: {e}')}\n")
        sys.exit(1)

    # Get text
    if args.file:
        p = Path(args.file)
        if not p.exists():
            print(f"Error: file not found: {args.file}", file=sys.stderr)
            sys.exit(1)
        text = p.read_text(encoding="utf-8", errors="replace")
    elif args.text:
        # Unescape \n, \t for convenience
        text = args.text.replace("\\n", "\n").replace("\\t", "\t")
    elif not sys.stdin.isatty():
        text = sys.stdin.read()
    else:
        print("Error: provide text as argument, --file, or pipe stdin", file=sys.stderr)
        sys.exit(1)

    # Show pattern info
    flag_strs = []
    if args.ignorecase: flag_strs.append("IGNORECASE")
    if args.multiline: flag_strs.append("MULTILINE")
    if args.dotall: flag_strs.append("DOTALL")
    flag_info = f" ({', '.join(flag_strs)})" if flag_strs else ""

    print(f"\n  {c(DIM, 'Pattern:')} {c(CYAN, args.pattern)}{c(DIM, flag_info)}")
    print(f"  {c(DIM, 'Text:')}    {len(text)} chars")

    # Execute
    if args.count:
        count = len(pattern.findall(text))
        print(f"\n  {count} match(es)\n")
    elif args.replace is not None:
        show_replace(text, pattern, args.replace)
    elif args.split:
        show_split(text, pattern)
    else:
        show_matches(text, pattern)


if __name__ == "__main__":
    main()