If you are guestbook spammers or some blackhat seo warrior, i am sure that you became to problem how to search your targets. After overusing Google API with strange words i became banned. Only way was script which parse a results directly from website. So here are two which i created in Perl, one for Google, second for Live.com. But be carefull, google bans for overusing webinterface too, so pls use some proxies, i read stories about blocking C class subnet for one month.

google-spider.pl

#! /usr/bin/perl -w

#
#                 spammer project
#       --------------------------------
#
#       usage: set all configs in script and run em
#
#        (c) 2006 sh@isecure.cz
#

use strict;
use warnings;

use LWP;
use HTTP::Cookies;

use constant SEARCHTEXT     => '"trackback.php?id="'; 

use constant FILE           => 'trackback.urls';                    # Output file

use constant GOOGLE         => 'http://www.google.cz/search?q=';
use constant COOKIESFILE    => 'cookies.lwp';

my $browser = LWP::UserAgent->new;
my $response;
my $text;
my $stranka = 0;
my $predchozistranka = 0;
my $i = 0;

# SET PROXY !!!!
# $browser->proxy(['http', 'ftp'], 'http://213.176.161.200:553');

$browser->cookie_jar( HTTP::Cookies->new(
    'file' => COOKIESFILE,  # where to read/write cookies
    'autosave' => 1,        # save it to disk when done
));

my @ns_headers = (
  'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)',
  'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*',
  'Accept-Charset' => 'iso-8859-1,*,utf-8',
  'Accept-Language' => 'en-US',
);

open (DATA, '>>'.FILE);

while(1) {
  $response = $browser->get( GOOGLE.urlencode(SEARCHTEXT).'&start='.($predchozistranka*10) , @ns_headers);
  $text = $response->content;
  $response->content =~ /<span class=i>(\d*)/mg;
  $stranka = $1;
  print "[site $stranka]\n"; 
  while( $text =~ /<a class=l href="([^"]*)/mg ) {
    print " -> ".$1."\n";
    
    print DATA $1."\n";
  }
  $i++;
  
  if($predchozistranka >= $stranka) {
    last;
  } else {
    $predchozistranka = $stranka;
  }
  
}

close(DATA);

#********************* FCE *****************************************************

sub urlencode {

  my $str = shift;
  $str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
  
  return $str;
}

sub urldecode {

  my $str = shift;
  $str =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
  
  return $str;
}

live-spider.pl

#! /usr/bin/perl -w

#
#           perl blog spammer project
#       --------------------------------
#
#       usage: set all configs in script and run em
#
#        (c) 2006 sh@isecure.cz
#

use strict;
use warnings;

use LWP;
use HTTP::Cookies;

use constant SEARCHTEXT     => '"trackback.php?id="';

use constant FILE           => 'trackback.urls';

use constant GOOGLE         => 'http://search.live.com/results.aspx?q=';
use constant COOKIESFILE    => 'cookies.lwp';

my $browser = LWP::UserAgent->new;
my $response;
my $text;
my $stranka = 0;
my $predchozistranka = 0;
my $i = 0;
my $link;

# SET PROXY !!!!
# $browser->proxy(['http', 'ftp'], 'http://213.176.161.200:553');


$browser->cookie_jar( HTTP::Cookies->new(
    'file' => COOKIESFILE,  # where to read/write cookies
    'autosave' => 1,        # save it to disk when done
));

my @ns_headers = (
  'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)',
  'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*',
  'Accept-Charset' => 'iso-8859-1,*,utf-8',
  'Accept-Language' => 'en-US',
);

open (DATA, '>>'.FILE);

while(1) {
  $response = $browser->get( GOOGLE.urlencode(SEARCHTEXT).'&first='.($predchozistranka*10+1) , @ns_headers);
  $text = $response->content;
#            <li class="selected">
#	                <a>
#			              5</a></li>
  $response->content =~ /<li class="selected">[^<]*<a>[^\d]*(\d*)[^<]*<\/a>[^<]*<\/li>/mg;
  
  $stranka = $1;
  print "[site $stranka]\n"; 
  while( $text =~ /<a href="([^"]*)" gping/mg ) {	#"
    $link = $1;
    
    if($link =~ /^http/) {
        print " -> ".$link."\n";
	print DATA $link."\n";
    }
  }
  $i++;
  
  if($predchozistranka >= $stranka) {
    last;
  } else {
    $predchozistranka = $stranka;
  }
  
}

close(DATA);

#********************* FCE *****************************************************

sub urlencode {

  my $str = shift;
  $str =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
  
  return $str;
}

sub urldecode {

  my $str = shift;
  $str =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg;
  
  return $str;
}

Vlož koment...

Jsou povoleny tyto XHTML tagy: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>





© sh, bw and hosting donated by lidos.cz