From 891ececae27e2c905cbe3cae94a61e5487ef231f Mon Sep 17 00:00:00 2001 From: Giorgio Premi Date: Thu, 7 May 2020 17:43:56 +0200 Subject: [PATCH] Prevent regex intensive execution which may cause DoS There are many dot-asterisk greedy matches which may cause very intensive, even indefinite, execution of the script. The worst one was the schemeRe match, which if executed (by accident, or by honoring the MIME headers) against a base64 text will cause infinite execution. This patch tested against a ~1MB file, will drop execution wall-clock time from many hundred of seconds, to a couple of seconds. --- lib/URI/Find.pm | 4 ++-- lib/URI/Find/Schemeless.pm | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/URI/Find.pm b/lib/URI/Find.pm index ad2ec75..21ae3db 100644 --- a/lib/URI/Find.pm +++ b/lib/URI/Find.pm @@ -22,9 +22,9 @@ my $mark = q(-_.!~*'()); my $unreserved = "A-Za-z0-9\Q$mark\E"; my $uric = quotemeta($reserved) . '\p{isAlpha}' . $unreserved . "%"; -# URI scheme pattern without the non-alpha numerics. +# URI scheme pattern without the non-alpha numerics or when very long. # Those are extremely uncommon and interfere with the match. -my($schemeRe) = qr/[a-zA-Z][a-zA-Z0-9\+]*/; +my($schemeRe) = qr/[a-zA-Z][a-zA-Z0-9\+]{0,16}/; my($uricSet) = $uric; # use new set # Some schemes which URI.pm does not explicitly support. diff --git a/lib/URI/Find/Schemeless.pm b/lib/URI/Find/Schemeless.pm index 11283ef..4d78983 100644 --- a/lib/URI/Find/Schemeless.pm +++ b/lib/URI/Find/Schemeless.pm @@ -61,7 +61,7 @@ sub schemeless_uri_re { # false match of "Lite.pm" via "MIME/Lite.pm". (?: ^ | (?<=[\s<>()\{\}\[\]]) ) # hostname - (?: [$dnsSet]+(?:\.[$dnsSet]+)*\.$tldRe + (?: [$dnsSet]{1,63}(?:\.[$dnsSet]{1,63}){0,126}\.$tldRe | (?:\d{1,3}\.){3}\d{1,3} ) # not inet_aton() complete (?: (?=[\s\Q$cruftSet\E]) # followed by unrelated thing