Who needs Google API ? Google and Live spider is here :)
8. October 2006
If you are guestbook spammers or some blackhat seo warrior, i am sure that you became to problem how to search your targets. After overusing Google API with strange words i became banned. Only way was script which parse a results directly from website. So here are two which i created in Perl, one for Google, second for Live.com. But be carefull, google bans for overusing webinterface too, so pls use some proxies, i read stories about blocking C class subnet for one month.
google-spider.pl
#! /usr/bin/perl -w
#
# spammer project
# --------------------------------
#
# usage: set all configs in script and run em
#
# (c) 2006 sh@isecure.cz
#
use strict;
use warnings;
use LWP;
use HTTP::Cookies;
use constant SEARCHTEXT => '"trackback.php?id="';
use constant FILE => 'trackback.urls'; # Output file
use constant GOOGLE => 'http://www.google.cz/search?q=';
use constant COOKIESFILE => 'cookies.lwp';
my $browser = LWP::UserAgent->new;
my $response;
my $text;
my $stranka = 0;
my $predchozistranka = 0;
my $i = 0;
# SET PROXY !!!!
# $browser->proxy(['http', 'ftp'], 'http://213.176.161.200:553');
$browser->cookie_jar( HTTP::Cookies->new(
'file' => COOKIESFILE, # where to read/write cookies
'autosave' => 1, # save it to disk when done
));
my @ns_headers = (
'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)',
'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*',
'Accept-Charset' => 'iso-8859-1,*,utf-8',
'Accept-Language' => 'en-US',
);
open (DATA, '>>'.FILE);
while(1) {
$response = $browser->get( GOOGLE.urlencode(SEARCHTEXT).'&start='.($predchozistranka*10) , @ns_headers);
$text = $response->content;
$response->content =~ /<span class=i>(\d*)/mg;
$stranka = $1;
print "[site $stranka]\n";
while( $text =~ /<a class=l href="([^"]*)/mg ) {
print " -> ".$1."\n";
print DATA $1."\n";
}
$i++;
if($predchozistranka >= $stranka) {
last;
} else {
$predchozistranka = $stranka;
}
}
close(DATA);
#********************* FCE *****************************************************
sub urlencode {
my $str = shift;
$str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
return $str;
}
sub urldecode {
my $str = shift;
$str =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
return $str;
}
live-spider.pl
#! /usr/bin/perl -w
#
# perl blog spammer project
# --------------------------------
#
# usage: set all configs in script and run em
#
# (c) 2006 sh@isecure.cz
#
use strict;
use warnings;
use LWP;
use HTTP::Cookies;
use constant SEARCHTEXT => '"trackback.php?id="';
use constant FILE => 'trackback.urls';
use constant GOOGLE => 'http://search.live.com/results.aspx?q=';
use constant COOKIESFILE => 'cookies.lwp';
my $browser = LWP::UserAgent->new;
my $response;
my $text;
my $stranka = 0;
my $predchozistranka = 0;
my $i = 0;
my $link;
# SET PROXY !!!!
# $browser->proxy(['http', 'ftp'], 'http://213.176.161.200:553');
$browser->cookie_jar( HTTP::Cookies->new(
'file' => COOKIESFILE, # where to read/write cookies
'autosave' => 1, # save it to disk when done
));
my @ns_headers = (
'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)',
'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*',
'Accept-Charset' => 'iso-8859-1,*,utf-8',
'Accept-Language' => 'en-US',
);
open (DATA, '>>'.FILE);
while(1) {
$response = $browser->get( GOOGLE.urlencode(SEARCHTEXT).'&first='.($predchozistranka*10+1) , @ns_headers);
$text = $response->content;
# <li class="selected">
# <a>
# 5</a></li>
$response->content =~ /<li class="selected">[^<]*<a>[^\d]*(\d*)[^<]*<\/a>[^<]*<\/li>/mg;
$stranka = $1;
print "[site $stranka]\n";
while( $text =~ /<a href="([^"]*)" gping/mg ) { #"
$link = $1;
if($link =~ /^http/) {
print " -> ".$link."\n";
print DATA $link."\n";
}
}
$i++;
if($predchozistranka >= $stranka) {
last;
} else {
$predchozistranka = $stranka;
}
}
close(DATA);
#********************* FCE *****************************************************
sub urlencode {
my $str = shift;
$str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
return $str;
}
sub urldecode {
my $str = shift;
$str =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
return $str;
}

Vlož koment...