replaced tabs with spaces
This commit is contained in:
@@ -12,7 +12,7 @@ line or use the curl interface.
|
||||
|
||||
The latest release of Curl_easy, a Perl interface to curl is available from
|
||||
|
||||
http://curl.haxx.se/libcurl/perl/
|
||||
http://curl.haxx.se/libcurl/perl/
|
||||
|
||||
(Georg Horn's original version of Curl_easy, supporting curl versions
|
||||
before 7.7 is still available from: http://www.koblenz-net.de/~horn/export/ )
|
||||
|
@@ -88,32 +88,32 @@ sub SplitURL {
|
||||
my $inurl = $_[0];
|
||||
|
||||
if($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = $4;
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = $4;
|
||||
}
|
||||
elsif ($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = "";
|
||||
|
||||
if($getpath !~ /\//) {
|
||||
$getpath ="";
|
||||
$getdocument = $3;
|
||||
}
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = "";
|
||||
|
||||
if($getpath !~ /\//) {
|
||||
$getpath ="";
|
||||
$getdocument = $3;
|
||||
}
|
||||
|
||||
}
|
||||
elsif ($inurl=~ /^([^:]+):\/\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = "";
|
||||
$getdocument = "";
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = "";
|
||||
$getdocument = "";
|
||||
}
|
||||
else {
|
||||
print "Couldn't parse the specified URL, retry please!\n";
|
||||
exit;
|
||||
print "Couldn't parse the specified URL, retry please!\n";
|
||||
exit;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,52 +127,52 @@ sub SplitURL {
|
||||
|
||||
if(!$usestdin) {
|
||||
open(HEADGET, "$linkcheck $geturl|") ||
|
||||
die "Couldn't get web page for some reason";
|
||||
die "Couldn't get web page for some reason";
|
||||
headget:
|
||||
while(<HEADGET>) {
|
||||
# print $_;
|
||||
if($_ =~ /HTTP\/.*3\d\d /) {
|
||||
$pagemoved=1;
|
||||
}
|
||||
elsif($pagemoved &&
|
||||
($_ =~ /^Location: (.*)/)) {
|
||||
$geturl = $1;
|
||||
# print $_;
|
||||
if($_ =~ /HTTP\/.*3\d\d /) {
|
||||
$pagemoved=1;
|
||||
}
|
||||
elsif($pagemoved &&
|
||||
($_ =~ /^Location: (.*)/)) {
|
||||
$geturl = $1;
|
||||
|
||||
&SplitURL($geturl);
|
||||
&SplitURL($geturl);
|
||||
|
||||
$pagemoved++;
|
||||
last headget;
|
||||
}
|
||||
$pagemoved++;
|
||||
last headget;
|
||||
}
|
||||
}
|
||||
close(HEADGET);
|
||||
|
||||
if($pagemoved == 1) {
|
||||
print "Page is moved but we don't know where. Did you forget the ",
|
||||
"traling slash?\n";
|
||||
exit;
|
||||
print "Page is moved but we don't know where. Did you forget the ",
|
||||
"traling slash?\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
open(WEBGET, "$htmlget $geturl|") ||
|
||||
die "Couldn't get web page for some reason";
|
||||
die "Couldn't get web page for some reason";
|
||||
|
||||
while(<WEBGET>) {
|
||||
$line = $_;
|
||||
push @indoc, $line;
|
||||
$line=~ s/\n//g;
|
||||
$line=~ s/\r//g;
|
||||
# print $line."\n";
|
||||
$in=$in.$line;
|
||||
$line = $_;
|
||||
push @indoc, $line;
|
||||
$line=~ s/\n//g;
|
||||
$line=~ s/\r//g;
|
||||
# print $line."\n";
|
||||
$in=$in.$line;
|
||||
}
|
||||
|
||||
close(WEBGET);
|
||||
}
|
||||
else {
|
||||
while(<STDIN>) {
|
||||
$line = $_;
|
||||
push @indoc, $line;
|
||||
$line=~ s/\n//g;
|
||||
$line=~ s/\r//g;
|
||||
$in=$in.$line;
|
||||
$line = $_;
|
||||
push @indoc, $line;
|
||||
$line=~ s/\n//g;
|
||||
$line=~ s/\r//g;
|
||||
$in=$in.$line;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,21 +193,21 @@ sub LinkWorks {
|
||||
|
||||
boo:
|
||||
if( $doc[0] =~ /^HTTP[^ ]+ (\d+)/ ) {
|
||||
$error = $1;
|
||||
$error = $1;
|
||||
|
||||
if($error < 400 ) {
|
||||
return "GOOD";
|
||||
}
|
||||
else {
|
||||
if($error < 400 ) {
|
||||
return "GOOD";
|
||||
}
|
||||
else {
|
||||
|
||||
if($head && ($error >= 500)) {
|
||||
# This server doesn't like HEAD!
|
||||
@doc = `$linkcheckfull \"$check\"`;
|
||||
$head = 0;
|
||||
goto boo;
|
||||
}
|
||||
return "BAD";
|
||||
}
|
||||
if($head && ($error >= 500)) {
|
||||
# This server doesn't like HEAD!
|
||||
@doc = `$linkcheckfull \"$check\"`;
|
||||
$head = 0;
|
||||
goto boo;
|
||||
}
|
||||
return "BAD";
|
||||
}
|
||||
}
|
||||
return "BAD";
|
||||
}
|
||||
@@ -219,43 +219,43 @@ sub GetLinks {
|
||||
|
||||
getlinkloop:
|
||||
while($in =~ /[^<]*(<[^>]+>)/g ) {
|
||||
# we have a tag in $1
|
||||
$tag = $1;
|
||||
|
||||
if($tag =~ /^<!--/) {
|
||||
# this is a comment tag, ignore it
|
||||
}
|
||||
else {
|
||||
if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ )>]*)/i) {
|
||||
$url=$2;
|
||||
if($url =~ /^\"(.*)\"$/) {
|
||||
# this was a "string" now $1 has removed the quotes:
|
||||
$url=$1;
|
||||
}
|
||||
# we have a tag in $1
|
||||
$tag = $1;
|
||||
|
||||
if($tag =~ /^<!--/) {
|
||||
# this is a comment tag, ignore it
|
||||
}
|
||||
else {
|
||||
if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ )>]*)/i) {
|
||||
$url=$2;
|
||||
if($url =~ /^\"(.*)\"$/) {
|
||||
# this was a "string" now $1 has removed the quotes:
|
||||
$url=$1;
|
||||
}
|
||||
|
||||
|
||||
$url =~ s/([^\#]*)\#.*/$1/g;
|
||||
$url =~ s/([^\#]*)\#.*/$1/g;
|
||||
|
||||
if($url eq "") {
|
||||
# if the link was nothing than a #-link it may now have
|
||||
# been emptied completely so then we skip the rest
|
||||
next getlinkloop;
|
||||
}
|
||||
if($url eq "") {
|
||||
# if the link was nothing than a #-link it may now have
|
||||
# been emptied completely so then we skip the rest
|
||||
next getlinkloop;
|
||||
}
|
||||
|
||||
if($done{$url}) {
|
||||
# if this url already is done, do next
|
||||
$done{$url}++;
|
||||
next getlinkloop;
|
||||
}
|
||||
if($done{$url}) {
|
||||
# if this url already is done, do next
|
||||
$done{$url}++;
|
||||
next getlinkloop;
|
||||
}
|
||||
|
||||
$done{$url} = 1; # this is "done"
|
||||
$done{$url} = 1; # this is "done"
|
||||
|
||||
push @result, $url;
|
||||
if($tag =~ /< *([^ ]+)/) {
|
||||
# print "TAG: $1\n";
|
||||
$tagtype{$url}=$1;
|
||||
}
|
||||
}
|
||||
push @result, $url;
|
||||
if($tag =~ /< *([^ ]+)/) {
|
||||
# print "TAG: $1\n";
|
||||
$tagtype{$url}=$1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return @result;
|
||||
@@ -268,36 +268,36 @@ for(@links) {
|
||||
$url = $_;
|
||||
|
||||
if($url =~ /^([^:]+):/) {
|
||||
$prot = $1;
|
||||
# if($prot !~ /(http|ftp)/i) {
|
||||
if($prot !~ /http/i) {
|
||||
# this is an unsupported protocol, we ignore this
|
||||
next linkloop;
|
||||
}
|
||||
$link = $url;
|
||||
$prot = $1;
|
||||
# if($prot !~ /(http|ftp)/i) {
|
||||
if($prot !~ /http/i) {
|
||||
# this is an unsupported protocol, we ignore this
|
||||
next linkloop;
|
||||
}
|
||||
$link = $url;
|
||||
}
|
||||
else {
|
||||
if($external) {
|
||||
next linkloop;
|
||||
}
|
||||
if($external) {
|
||||
next linkloop;
|
||||
}
|
||||
|
||||
# this is a link on the save server:
|
||||
if($url =~ /^\//) {
|
||||
# from root
|
||||
$link = "$getprotocol://$getserver$url";
|
||||
}
|
||||
else {
|
||||
# from the scanned page's dir
|
||||
$nyurl=$url;
|
||||
# this is a link on the save server:
|
||||
if($url =~ /^\//) {
|
||||
# from root
|
||||
$link = "$getprotocol://$getserver$url";
|
||||
}
|
||||
else {
|
||||
# from the scanned page's dir
|
||||
$nyurl=$url;
|
||||
|
||||
if(length($getpath) &&
|
||||
($getpath !~ /\/$/) &&
|
||||
($nyurl !~ /^\//)) {
|
||||
# lacks ending slash, add one to the document part:
|
||||
$nyurl = "/".$nyurl;
|
||||
}
|
||||
$link = "$getprotocol://$getserver/$getpath$nyurl";
|
||||
}
|
||||
if(length($getpath) &&
|
||||
($getpath !~ /\/$/) &&
|
||||
($nyurl !~ /^\//)) {
|
||||
# lacks ending slash, add one to the document part:
|
||||
$nyurl = "/".$nyurl;
|
||||
}
|
||||
$link = "$getprotocol://$getserver/$getpath$nyurl";
|
||||
}
|
||||
}
|
||||
|
||||
#print "test $link\n";
|
||||
@@ -313,16 +313,16 @@ for(@links) {
|
||||
|
||||
# If bad and -l, present the line numbers of the usage
|
||||
if("BAD" eq $success) {
|
||||
$badlinks++;
|
||||
if($linenumber) {
|
||||
$line =1;
|
||||
for(@indoc) {
|
||||
if($_ =~ /$url/) {
|
||||
print " line $line\n";
|
||||
}
|
||||
$line++;
|
||||
}
|
||||
}
|
||||
$badlinks++;
|
||||
if($linenumber) {
|
||||
$line =1;
|
||||
for(@indoc) {
|
||||
if($_ =~ /$url/) {
|
||||
print " line $line\n";
|
||||
}
|
||||
$line++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -330,7 +330,7 @@ for(@links) {
|
||||
if($verbose) {
|
||||
print "$allcount links were checked";
|
||||
if($badlinks > 0) {
|
||||
print ", $badlinks were found bad";
|
||||
print ", $badlinks were found bad";
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
@@ -125,32 +125,32 @@ my $badlinks=0;
|
||||
sub SplitURL {
|
||||
my $inurl = $_[0];
|
||||
if($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = $4;
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = $4;
|
||||
}
|
||||
elsif ($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = "";
|
||||
|
||||
if($getpath !~ /\//) {
|
||||
$getpath ="";
|
||||
$getdocument = $3;
|
||||
}
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = "";
|
||||
|
||||
if($getpath !~ /\//) {
|
||||
$getpath ="";
|
||||
$getdocument = $3;
|
||||
}
|
||||
|
||||
}
|
||||
elsif ($inurl=~ /^([^:]+):\/\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = "";
|
||||
$getdocument = "";
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = "";
|
||||
$getdocument = "";
|
||||
}
|
||||
else {
|
||||
print "Couldn't parse the specified URL, retry please!\n";
|
||||
exit;
|
||||
print "Couldn't parse the specified URL, retry please!\n";
|
||||
exit;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,35 +164,35 @@ sub GetRootPage {
|
||||
|
||||
my $pagemoved=0;
|
||||
open(HEADGET, "$linkcheck $geturl|") ||
|
||||
die "Couldn't get web page for some reason";
|
||||
die "Couldn't get web page for some reason";
|
||||
|
||||
while(<HEADGET>) {
|
||||
#print STDERR $_;
|
||||
if($_ =~ /HTTP\/1\.[01] (\d\d\d) /) {
|
||||
#print STDERR $_;
|
||||
if($_ =~ /HTTP\/1\.[01] (\d\d\d) /) {
|
||||
$code=$1;
|
||||
if($code =~ /^3/) {
|
||||
$pagemoved=1;
|
||||
}
|
||||
}
|
||||
}
|
||||
elsif($_ =~ /^Content-Type: ([\/a-zA-Z]+)/) {
|
||||
$type=$1;
|
||||
}
|
||||
elsif($pagemoved &&
|
||||
($_ =~ /^Location: (.*)/)) {
|
||||
$geturl = $1;
|
||||
elsif($pagemoved &&
|
||||
($_ =~ /^Location: (.*)/)) {
|
||||
$geturl = $1;
|
||||
|
||||
&SplitURL($geturl);
|
||||
&SplitURL($geturl);
|
||||
|
||||
$pagemoved++;
|
||||
last;
|
||||
}
|
||||
$pagemoved++;
|
||||
last;
|
||||
}
|
||||
}
|
||||
close(HEADGET);
|
||||
|
||||
if($pagemoved == 1) {
|
||||
print "Page is moved but we don't know where. Did you forget the ",
|
||||
"traling slash?\n";
|
||||
exit;
|
||||
print "Page is moved but we don't know where. Did you forget the ",
|
||||
"traling slash?\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
if($type ne "text/html") {
|
||||
@@ -229,21 +229,21 @@ sub LinkWorks {
|
||||
|
||||
boo:
|
||||
if( $doc[0] =~ /^HTTP[^ ]+ (\d+)/ ) {
|
||||
my $error = $1;
|
||||
my $error = $1;
|
||||
|
||||
if($error < 400 ) {
|
||||
return "GOOD";
|
||||
}
|
||||
else {
|
||||
if($error < 400 ) {
|
||||
return "GOOD";
|
||||
}
|
||||
else {
|
||||
|
||||
if($head && ($error >= 500)) {
|
||||
# This server doesn't like HEAD!
|
||||
@doc = `$linkcheckfull \"$check\"`;
|
||||
$head = 0;
|
||||
goto boo;
|
||||
}
|
||||
return "BAD";
|
||||
}
|
||||
if($head && ($error >= 500)) {
|
||||
# This server doesn't like HEAD!
|
||||
@doc = `$linkcheckfull \"$check\"`;
|
||||
$head = 0;
|
||||
goto boo;
|
||||
}
|
||||
return "BAD";
|
||||
}
|
||||
}
|
||||
return "BAD";
|
||||
}
|
||||
@@ -254,45 +254,45 @@ sub GetLinks {
|
||||
my @result;
|
||||
|
||||
while($in =~ /[^<]*(<[^>]+>)/g ) {
|
||||
# we have a tag in $1
|
||||
my $tag = $1;
|
||||
|
||||
if($tag =~ /^<!--/) {
|
||||
# this is a comment tag, ignore it
|
||||
}
|
||||
else {
|
||||
if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ \)>]*)/i) {
|
||||
my $url=$2;
|
||||
if($url =~ /^\"(.*)\"$/) {
|
||||
# this was a "string" now $1 has removed the quotes:
|
||||
$url=$1;
|
||||
}
|
||||
# we have a tag in $1
|
||||
my $tag = $1;
|
||||
|
||||
if($tag =~ /^<!--/) {
|
||||
# this is a comment tag, ignore it
|
||||
}
|
||||
else {
|
||||
if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ \)>]*)/i) {
|
||||
my $url=$2;
|
||||
if($url =~ /^\"(.*)\"$/) {
|
||||
# this was a "string" now $1 has removed the quotes:
|
||||
$url=$1;
|
||||
}
|
||||
|
||||
|
||||
$url =~ s/([^\#]*)\#.*/$1/g;
|
||||
$url =~ s/([^\#]*)\#.*/$1/g;
|
||||
|
||||
if($url eq "") {
|
||||
# if the link was nothing than a #-link it may now have
|
||||
# been emptied completely so then we skip the rest
|
||||
next;
|
||||
}
|
||||
if($url eq "") {
|
||||
# if the link was nothing than a #-link it may now have
|
||||
# been emptied completely so then we skip the rest
|
||||
next;
|
||||
}
|
||||
|
||||
if($done{$url}) {
|
||||
# if this url already is done, do next
|
||||
$done{$url}++;
|
||||
if($done{$url}) {
|
||||
# if this url already is done, do next
|
||||
$done{$url}++;
|
||||
if($verbose) {
|
||||
print " FOUND $url but that is already checked\n";
|
||||
}
|
||||
next;
|
||||
}
|
||||
next;
|
||||
}
|
||||
|
||||
$done{$url} = 1; # this is "done"
|
||||
$done{$url} = 1; # this is "done"
|
||||
|
||||
push @result, $url;
|
||||
if($tag =~ /< *([^ ]+)/) {
|
||||
$tagtype{$url}=$1;
|
||||
}
|
||||
}
|
||||
push @result, $url;
|
||||
if($tag =~ /< *([^ ]+)/) {
|
||||
$tagtype{$url}=$1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return @result;
|
||||
@@ -437,7 +437,7 @@ while(1) {
|
||||
if($verbose) {
|
||||
print "$allcount links were checked";
|
||||
if($badlinks > 0) {
|
||||
print ", $badlinks were found bad";
|
||||
print ", $badlinks were found bad";
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
@@ -74,7 +74,7 @@ while(<STDIN>) {
|
||||
while($in =~ /[^<]*(<[^>]+>)/g ) {
|
||||
# we have a tag in $1
|
||||
$tag = $1;
|
||||
|
||||
|
||||
if($tag =~ /^<!--/) {
|
||||
# this is a comment tag, ignore it
|
||||
}
|
||||
@@ -99,7 +99,7 @@ while($in =~ /[^<]*(<[^>]+>)/g ) {
|
||||
|
||||
if($enctype eq "multipart/form-data") {
|
||||
$enctype="multipart form upload [use -F]"
|
||||
}
|
||||
}
|
||||
$enctype = "\n--- type: $enctype";
|
||||
}
|
||||
else {
|
||||
@@ -134,7 +134,7 @@ while($in =~ /[^<]*(<[^>]+>)/g ) {
|
||||
$mtag = $1;
|
||||
|
||||
($name, $value)=namevalue($tag);
|
||||
|
||||
|
||||
if($mtag =~ /select/i) {
|
||||
print "Select: NAME=\"$name\"\n";
|
||||
push @vars, "$name";
|
||||
|
@@ -80,32 +80,32 @@ sub SplitURL {
|
||||
my $inurl = $_[0];
|
||||
|
||||
if($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = $4;
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = $4;
|
||||
}
|
||||
elsif ($inurl=~ /^([^:]+):\/\/([^\/]*)\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = "";
|
||||
|
||||
if($getpath !~ /\//) {
|
||||
$getpath ="";
|
||||
$getdocument = $3;
|
||||
}
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = $3;
|
||||
$getdocument = "";
|
||||
|
||||
if($getpath !~ /\//) {
|
||||
$getpath ="";
|
||||
$getdocument = $3;
|
||||
}
|
||||
|
||||
}
|
||||
elsif ($inurl=~ /^([^:]+):\/\/(.*)/ ) {
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = "";
|
||||
$getdocument = "";
|
||||
$getprotocol = $1;
|
||||
$getserver = $2;
|
||||
$getpath = "";
|
||||
$getdocument = "";
|
||||
}
|
||||
else {
|
||||
print "Couldn't parse the specified URL, retry please!\n";
|
||||
exit;
|
||||
print "Couldn't parse the specified URL, retry please!\n";
|
||||
exit;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,52 +119,52 @@ sub SplitURL {
|
||||
|
||||
if(!$usestdin) {
|
||||
open(HEADGET, "$linkcheck $geturl|") ||
|
||||
die "Couldn't get web page for some reason";
|
||||
die "Couldn't get web page for some reason";
|
||||
headget:
|
||||
while(<HEADGET>) {
|
||||
# print $_;
|
||||
if($_ =~ /HTTP\/.*3\d\d /) {
|
||||
$pagemoved=1;
|
||||
}
|
||||
elsif($pagemoved &&
|
||||
($_ =~ /^Location: (.*)/)) {
|
||||
$geturl = $1;
|
||||
# print $_;
|
||||
if($_ =~ /HTTP\/.*3\d\d /) {
|
||||
$pagemoved=1;
|
||||
}
|
||||
elsif($pagemoved &&
|
||||
($_ =~ /^Location: (.*)/)) {
|
||||
$geturl = $1;
|
||||
|
||||
&SplitURL($geturl);
|
||||
&SplitURL($geturl);
|
||||
|
||||
$pagemoved++;
|
||||
last headget;
|
||||
}
|
||||
$pagemoved++;
|
||||
last headget;
|
||||
}
|
||||
}
|
||||
close(HEADGET);
|
||||
|
||||
if($pagemoved == 1) {
|
||||
print "Page is moved but we don't know where. Did you forget the ",
|
||||
"traling slash?\n";
|
||||
exit;
|
||||
print "Page is moved but we don't know where. Did you forget the ",
|
||||
"traling slash?\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
open(WEBGET, "$htmlget $geturl|") ||
|
||||
die "Couldn't get web page for some reason";
|
||||
die "Couldn't get web page for some reason";
|
||||
|
||||
while(<WEBGET>) {
|
||||
$line = $_;
|
||||
push @indoc, $line;
|
||||
$line=~ s/\n//g;
|
||||
$line=~ s/\r//g;
|
||||
# print $line."\n";
|
||||
$in=$in.$line;
|
||||
$line = $_;
|
||||
push @indoc, $line;
|
||||
$line=~ s/\n//g;
|
||||
$line=~ s/\r//g;
|
||||
# print $line."\n";
|
||||
$in=$in.$line;
|
||||
}
|
||||
|
||||
close(WEBGET);
|
||||
}
|
||||
else {
|
||||
while(<STDIN>) {
|
||||
$line = $_;
|
||||
push @indoc, $line;
|
||||
$line=~ s/\n//g;
|
||||
$line=~ s/\r//g;
|
||||
$in=$in.$line;
|
||||
$line = $_;
|
||||
push @indoc, $line;
|
||||
$line=~ s/\n//g;
|
||||
$line=~ s/\r//g;
|
||||
$in=$in.$line;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,43 +174,43 @@ sub GetLinks {
|
||||
|
||||
getlinkloop:
|
||||
while($in =~ /[^<]*(<[^>]+>)/g ) {
|
||||
# we have a tag in $1
|
||||
$tag = $1;
|
||||
|
||||
if($tag =~ /^<!--/) {
|
||||
# this is a comment tag, ignore it
|
||||
}
|
||||
else {
|
||||
if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ )>]*)/i) {
|
||||
$url=$2;
|
||||
if($url =~ /^\"(.*)\"$/) {
|
||||
# this was a "string" now $1 has removed the quotes:
|
||||
$url=$1;
|
||||
}
|
||||
# we have a tag in $1
|
||||
$tag = $1;
|
||||
|
||||
if($tag =~ /^<!--/) {
|
||||
# this is a comment tag, ignore it
|
||||
}
|
||||
else {
|
||||
if($tag =~ /(src|href|background|archive) *= *(\"[^\"]\"|[^ )>]*)/i) {
|
||||
$url=$2;
|
||||
if($url =~ /^\"(.*)\"$/) {
|
||||
# this was a "string" now $1 has removed the quotes:
|
||||
$url=$1;
|
||||
}
|
||||
|
||||
|
||||
$url =~ s/([^\#]*)\#.*/$1/g;
|
||||
$url =~ s/([^\#]*)\#.*/$1/g;
|
||||
|
||||
if($url eq "") {
|
||||
# if the link was nothing than a #-link it may now have
|
||||
# been emptied completely so then we skip the rest
|
||||
next getlinkloop;
|
||||
}
|
||||
if($url eq "") {
|
||||
# if the link was nothing than a #-link it may now have
|
||||
# been emptied completely so then we skip the rest
|
||||
next getlinkloop;
|
||||
}
|
||||
|
||||
if($done{$url}) {
|
||||
# if this url already is done, do next
|
||||
$done{$url}++;
|
||||
next getlinkloop;
|
||||
}
|
||||
if($done{$url}) {
|
||||
# if this url already is done, do next
|
||||
$done{$url}++;
|
||||
next getlinkloop;
|
||||
}
|
||||
|
||||
$done{$url} = 1; # this is "done"
|
||||
$done{$url} = 1; # this is "done"
|
||||
|
||||
push @result, $url;
|
||||
if($tag =~ /< *([^ ]+)/) {
|
||||
# print "TAG: $1\n";
|
||||
$tagtype{$url}=$1;
|
||||
}
|
||||
}
|
||||
push @result, $url;
|
||||
if($tag =~ /< *([^ ]+)/) {
|
||||
# print "TAG: $1\n";
|
||||
$tagtype{$url}=$1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return @result;
|
||||
@@ -223,38 +223,38 @@ for(@links) {
|
||||
$url = $_;
|
||||
|
||||
if($url =~ /^([^:]+):/) {
|
||||
$link = $url;
|
||||
$link = $url;
|
||||
}
|
||||
else {
|
||||
# this is an absolute link on the same server:
|
||||
if($url =~ /^\//) {
|
||||
# from root
|
||||
$link = "$getprotocol://$getserver$url";
|
||||
}
|
||||
else {
|
||||
# from the scanned page's dir
|
||||
$nyurl=$url;
|
||||
# this is an absolute link on the same server:
|
||||
if($url =~ /^\//) {
|
||||
# from root
|
||||
$link = "$getprotocol://$getserver$url";
|
||||
}
|
||||
else {
|
||||
# from the scanned page's dir
|
||||
$nyurl=$url;
|
||||
|
||||
if(length($getpath) &&
|
||||
($getpath !~ /\/$/) &&
|
||||
($nyurl !~ /^\//)) {
|
||||
# lacks ending slash, add one to the document part:
|
||||
$nyurl = "/".$nyurl;
|
||||
}
|
||||
$link = "$getprotocol://$getserver/$getpath$nyurl";
|
||||
}
|
||||
if(length($getpath) &&
|
||||
($getpath !~ /\/$/) &&
|
||||
($nyurl !~ /^\//)) {
|
||||
# lacks ending slash, add one to the document part:
|
||||
$nyurl = "/".$nyurl;
|
||||
}
|
||||
$link = "$getprotocol://$getserver/$getpath$nyurl";
|
||||
}
|
||||
}
|
||||
|
||||
if($link =~ /$getregex/) {
|
||||
if($display) {
|
||||
print "$link\n";
|
||||
}
|
||||
else {
|
||||
if($verbose) {
|
||||
print "Gets $link\n";
|
||||
}
|
||||
print `$urlget $link`;
|
||||
}
|
||||
if($display) {
|
||||
print "$link\n";
|
||||
}
|
||||
else {
|
||||
if($verbose) {
|
||||
print "Gets $link\n";
|
||||
}
|
||||
print `$urlget $link`;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -51,7 +51,7 @@ for(@all) {
|
||||
}
|
||||
print "Recursing for dir $dir$name in target $target/$name\n";
|
||||
|
||||
$nextdepth=$maxdepth-1;
|
||||
$nextdepth=$maxdepth-1;
|
||||
print `$0 $dir$name/ $target/$name $nextdepth`;
|
||||
}
|
||||
elsif($firstletter eq "-") {
|
||||
|
Reference in New Issue
Block a user