From 4fe02a3cbb8bb59b54ceaa2acfa48e8a50e5321d Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Mon, 26 Aug 2013 20:56:27 +0200 Subject: [PATCH 1/2] remove (reverse) sort options from urifind --- bin/urifind | 40 +--------------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/bin/urifind b/bin/urifind index eea1c24..ce0a89a 100644 --- a/bin/urifind +++ b/bin/urifind @@ -17,8 +17,6 @@ use URI::Find; # What to do, and how my $help = 0; my $version = 0; -my $sort = 0; -my $reverse = 0; my $unique = 0; my $prefix = 0; my $noprefix = 0; @@ -27,11 +25,9 @@ my @schemes = (); my $dump = 0; Getopt::Long::Configure(qw{no_ignore_case bundling}); -GetOptions('s!' => \$sort, - 'u!' => \$unique, +GetOptions('u!' => \$unique, 'p!' => \$prefix, 'n!' => \$noprefix, - 'r!' => \$reverse, 'h!' => \$help, 'v!' => \$version, 'd!' => sub { $dump = 1 }, @@ -50,8 +46,6 @@ $prog - find URIs in a document and dump them to STDOUT. Options: - -s Sort results. - -r Reverse sort results (implies -s). -u Return unique results only. -n Don't include filename in output. -p Include filename in output (0 by default, but 1 if @@ -137,16 +131,6 @@ if ($unique) { @uris = grep { ++$unique{$_->[1]} == 1 } @uris; } -# Sort links, possibly in reverse -if ($sort || $reverse) { - if ($reverse) { - @uris = sort { $b->[1] cmp $a->[1] } @uris; - } - else { - @uris = sort { $a->[1] cmp $b->[1] } @uris; - } -} - # Flatten the arrayrefs if ($prefix) { @uris = map { join ': ' => @$_ } @uris; @@ -206,20 +190,6 @@ prefix") switch: http://www.boston.com/index.html http://use.perl.org/ -By default, URIs will be displayed in the order found; to sort them -ascii-betically, use the C<-s> ("sort") option. To reverse sort them, -use the C<-r> ("reverse") flag (C<-r> implies C<-s>). - - $ urifind -s file1 file2 - http://use.perl.org/ - http://www.boston.com/index.html - mailto:webmaster@boston.com - - $ urifind -r file1 file2 - mailto:webmaster@boston.com - http://www.boston.com/index.html - http://use.perl.org/ - Finally, F supports limiting the returned URIs by scheme or by arbitrary pattern, using the C<-S> option (for schemes) and the C<-P> option. Both C<-S> and C<-P> can be specified multiple times: @@ -254,14 +224,6 @@ switch. =over 4 -=item -s - -Sort results. - -=item -r - -Reverse sort results (implies -s). - =item -u Return unique results only. From dcfc13aa7acadc89669bac61fc51be1078ae092d Mon Sep 17 00:00:00 2001 From: Lukas Mai Date: Mon, 26 Aug 2013 21:17:15 +0200 Subject: [PATCH 2/2] make urifind input/output streamable This makes it suitable for 'tail -f log | urifind', etc. --- bin/urifind | 56 ++++++++++++++++------------------------------------- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/bin/urifind b/bin/urifind index ce0a89a..5994938 100644 --- a/bin/urifind +++ b/bin/urifind @@ -11,7 +11,6 @@ our $VERSION = 20111103; use File::Basename qw(basename); use Getopt::Long qw(GetOptions); -use IO::File; use URI::Find; # What to do, and how @@ -67,9 +66,6 @@ HELP exit(0); } -my (@uris, $count); -unshift @ARGV, \*STDIN unless @ARGV; - if (($prefix + $noprefix) > 1) { my $prog = basename $0; die "Can't specify -p and -n at the same time; try $prog -h\n"; @@ -99,47 +95,29 @@ if ($dump) { } # Find the URIs -for my $argv (@ARGV) { - my ($name, $fh, $data); - - $argv = \*STDIN if ($argv eq '-'); - - if (ref $argv eq 'GLOB') { - local $/; - $data = <$argv>; - $name = '' - } - else { - local $/; - $fh = IO::File->new($argv) or die "Can't open $argv: $!"; - $data = <$fh>; - $name = $argv; - } +my %seen; +my $finder = URI::Find->new( + sub { + my ($uri) = @_; - my $finder = URI::Find->new(sub { push @uris => [ $name, $_[0] ] }); - $finder->find(\$data); -} + # Apply patterns, in @pats + return '' if grep { $uri !~ /$_/ } @pats; -# Apply patterns, in @pats -for my $pat (@pats) { - @uris = grep { $_->[1] =~ /$pat/ } @uris; -} + # Remove redundant links + return '' if $unique && $seen{$uri}++; -# Remove redundant links -if ($unique) { - my %unique; - @uris = grep { ++$unique{$_->[1]} == 1 } @uris; -} + if ($prefix) { + $uri = ($ARGV eq '-' ? '' : $ARGV) . ": $uri"; + } -# Flatten the arrayrefs -if ($prefix) { - @uris = map { join ': ' => @$_ } @uris; -} -else { - @uris = map { $_->[1] } @uris; + print $uri, "\n"; + } +); +$| = 1; +while (my $line = readline) { + $finder->find(\$line); } -print map { "$_\n" } @uris; exit 0;