En gros il télécharge une URL à une certaine fréquence, et peut utiliser un nombre configurable de proxy...
Et je décline toute responsabilité en cas d'utilisation abusive (et si ça sortirait du cadre éthique du forum, n'hésitez pas à supprimer le post)
Code: Select all
#!/bin/bash
#
# Get remote web page at an average given frequency
#
# written by Ki11erwhi1e
#
# Usage : ./wget_url.sh http://www.target.com/
#
# The script is designed to be called by a crontab each minute
# This makes 60 calls per hour, 1440 calls per day
#
#
# For a call in average every hours, set PERIOD=547
# For a call every day, set PERIOD=23
#
PERIOD=547
#
# The page which must be called
# It can also be passed in argument of the script
#
URL="http://www.target.com"
#
# User agent which get the page
#
USER_AGENT="Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)"
#
# Referer of the page (to simulate a click to the page
#
REFERER="http://www.google.ch/search?q=target"
#
# Number of proxy to use.
# 0 means don't use a proxy
# n > 0 means use max n proxies (and perform n call)
#
USEMAXPROXY=5
WGET=`which wget`
getUrl() {
$WGET --user-agent="$USER_AGENT" --page-requisites --no-verbose --delete-after \
--convert-links --referer="$REFERER" \
--tries=1 --timeout=5 --proxy \
"$1"
}
if [ "$1" != "" ]
then
URL=$1
fi
if [ $RANDOM -lt $PERIOD ]
then
if [ $USEMAXPROXY -gt 0 ]
then
PROXYRAWFILE=/tmp/proxyrawfile.txt
PROXYLIST=/tmp/proxylist.txt
PROXYLIST2=/tmp/proxylist2.txt
wget --timestamping --no-verbose -O $PROXYRAWFILE http://www.publicproxyservers.com/page1.html;
sed "s/<[^>]*>//g" < $PROXYRAWFILE | egrep "^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$|^[0-9]{2,4}$" > $PROXYLIST
for l in `cat $PROXYLIST`; do if [ "$p" == "" ]; then p=$l; else echo $p:$l >> $PROXYLIST2; p=""; fi; done
fi
#VALUE=$RANDOM
#echo $VALUE
if [ $USEMAXPROXY -gt 0 ]
then
for p in `cat $PROXYLIST2 | head -n $USEMAXPROXY`
do
echo "# "
echo "# Use proxy $p"
echo "# "
http_proxy=$p getUrl $URL
done
rm -f $PROXYRAWFILE
rm -f $PROXYLIST
rm -f $PROXYLIST2
else
getUrl $URL
fi
fi






