diff --git a/lib/URI/Find.pm b/lib/URI/Find.pm index ad2ec75..21ae3db 100644 --- a/lib/URI/Find.pm +++ b/lib/URI/Find.pm @@ -22,9 +22,9 @@ my $mark = q(-_.!~*'()); my $unreserved = "A-Za-z0-9\Q$mark\E"; my $uric = quotemeta($reserved) . '\p{isAlpha}' . $unreserved . "%"; -# URI scheme pattern without the non-alpha numerics. +# URI scheme pattern without the non-alpha numerics or when very long. # Those are extremely uncommon and interfere with the match. -my($schemeRe) = qr/[a-zA-Z][a-zA-Z0-9\+]*/; +my($schemeRe) = qr/[a-zA-Z][a-zA-Z0-9\+]{0,16}/; my($uricSet) = $uric; # use new set # Some schemes which URI.pm does not explicitly support. diff --git a/lib/URI/Find/Schemeless.pm b/lib/URI/Find/Schemeless.pm index 11283ef..4d78983 100644 --- a/lib/URI/Find/Schemeless.pm +++ b/lib/URI/Find/Schemeless.pm @@ -61,7 +61,7 @@ sub schemeless_uri_re { # false match of "Lite.pm" via "MIME/Lite.pm". (?: ^ | (?<=[\s<>()\{\}\[\]]) ) # hostname - (?: [$dnsSet]+(?:\.[$dnsSet]+)*\.$tldRe + (?: [$dnsSet]{1,63}(?:\.[$dnsSet]{1,63}){0,126}\.$tldRe | (?:\d{1,3}\.){3}\d{1,3} ) # not inet_aton() complete (?: (?=[\s\Q$cruftSet\E]) # followed by unrelated thing