diff --git a/bin/urifind b/bin/urifind index eea1c24..5994938 100644 --- a/bin/urifind +++ b/bin/urifind @@ -11,14 +11,11 @@ our $VERSION = 20111103; use File::Basename qw(basename); use Getopt::Long qw(GetOptions); -use IO::File; use URI::Find; # What to do, and how my $help = 0; my $version = 0; -my $sort = 0; -my $reverse = 0; my $unique = 0; my $prefix = 0; my $noprefix = 0; @@ -27,11 +24,9 @@ my @schemes = (); my $dump = 0; Getopt::Long::Configure(qw{no_ignore_case bundling}); -GetOptions('s!' => \$sort, - 'u!' => \$unique, +GetOptions('u!' => \$unique, 'p!' => \$prefix, 'n!' => \$noprefix, - 'r!' => \$reverse, 'h!' => \$help, 'v!' => \$version, 'd!' => sub { $dump = 1 }, @@ -50,8 +45,6 @@ $prog - find URIs in a document and dump them to STDOUT. Options: - -s Sort results. - -r Reverse sort results (implies -s). -u Return unique results only. -n Don't include filename in output. -p Include filename in output (0 by default, but 1 if @@ -73,9 +66,6 @@ HELP exit(0); } -my (@uris, $count); -unshift @ARGV, \*STDIN unless @ARGV; - if (($prefix + $noprefix) > 1) { my $prog = basename $0; die "Can't specify -p and -n at the same time; try $prog -h\n"; @@ -105,57 +95,29 @@ if ($dump) { } # Find the URIs -for my $argv (@ARGV) { - my ($name, $fh, $data); - - $argv = \*STDIN if ($argv eq '-'); +my %seen; +my $finder = URI::Find->new( + sub { + my ($uri) = @_; - if (ref $argv eq 'GLOB') { - local $/; - $data = <$argv>; - $name = '' - } - else { - local $/; - $fh = IO::File->new($argv) or die "Can't open $argv: $!"; - $data = <$fh>; - $name = $argv; - } - - my $finder = URI::Find->new(sub { push @uris => [ $name, $_[0] ] }); - $finder->find(\$data); -} + # Apply patterns, in @pats + return '' if grep { $uri !~ /$_/ } @pats; -# Apply patterns, in @pats -for my $pat (@pats) { - @uris = grep { $_->[1] =~ /$pat/ } @uris; -} + # Remove redundant links + return '' if $unique && $seen{$uri}++; -# Remove redundant links -if ($unique) { - my %unique; - @uris = grep { ++$unique{$_->[1]} == 1 } @uris; -} + if ($prefix) { + $uri = ($ARGV eq '-' ? '' : $ARGV) . ": $uri"; + } -# Sort links, possibly in reverse -if ($sort || $reverse) { - if ($reverse) { - @uris = sort { $b->[1] cmp $a->[1] } @uris; - } - else { - @uris = sort { $a->[1] cmp $b->[1] } @uris; + print $uri, "\n"; } +); +$| = 1; +while (my $line = readline) { + $finder->find(\$line); } -# Flatten the arrayrefs -if ($prefix) { - @uris = map { join ': ' => @$_ } @uris; -} -else { - @uris = map { $_->[1] } @uris; -} - -print map { "$_\n" } @uris; exit 0; @@ -206,20 +168,6 @@ prefix") switch: http://www.boston.com/index.html http://use.perl.org/ -By default, URIs will be displayed in the order found; to sort them -ascii-betically, use the C<-s> ("sort") option. To reverse sort them, -use the C<-r> ("reverse") flag (C<-r> implies C<-s>). - - $ urifind -s file1 file2 - http://use.perl.org/ - http://www.boston.com/index.html - mailto:webmaster@boston.com - - $ urifind -r file1 file2 - mailto:webmaster@boston.com - http://www.boston.com/index.html - http://use.perl.org/ - Finally, F supports limiting the returned URIs by scheme or by arbitrary pattern, using the C<-S> option (for schemes) and the C<-P> option. Both C<-S> and C<-P> can be specified multiple times: @@ -254,14 +202,6 @@ switch. =over 4 -=item -s - -Sort results. - -=item -r - -Reverse sort results (implies -s). - =item -u Return unique results only.