Neste artigo vou mostrar como você pode obter muitos proxies de diferentes países.
Vamos começar.
Precisamos do Python para executar o script. Vamos instalar o 3.10.6
Após instalar o Python, você precisa instalar bibliotecas para receber proxies e verificar sua funcionalidade
pip install requests && pip install lxml && pip install beautifulsoup4 && pip install cfscrape
Após instalar as bibliotecas, crie um arquivo com extensão .py, onde colamos o seguinte código. O código é responsável por analisar proxies de vários sites.
import cfscrape
from bs4 import BeautifulSoup
from datetime import date
def main2():
try:
print("Start 2 step || Site - free-proxy-list.net")
scraper = cfscrape.create_scraper()
r = scraper.get('https://free-proxy-list.net')
soup = BeautifulSoup(r.text, 'lxml')
line = soup.find('table', class_='table table-striped table-bordered').find('tbody').find_all('tr')
for tr in line:
td = tr.find_all('td')
ip = td[0].text
port = td[1].text
with open("proxy_scraped.txt", "a+", encoding="utf-8") as f:
f.write(str(ip + ":" + port + "\n"))
except Exception as e:
print(e)
def main3():
try:
print("Start 3 step || Site - free.proxy-sale.com")
scraper = cfscrape.create_scraper()
r = scraper.get('https://free.proxy-sale.com')
soup = BeautifulSoup(r.text, 'lxml')
line = soup.find('div', class_="main__table-wrap").find('table').find('tbody').find_all('tr')
for i in line:
td = i.find_all('td', class_="ip")
for n in td:
a = n.find("a").text.replace("Нажмите ⌘-С чтобы скопировать Скопировать в буфер", " ").strip()
ip = a
with open("proxy_scraped.txt", "a+", encoding="utf-8") as f:
f.write(str(ip + "\n"))
except Exception as e:
print(e)
def main4():
try:
print("Start 4 step || Site - proxyscrape.com and openproxylist.xyz")
scraper = cfscrape.create_scraper()
response5 = scraper.get("https://openproxylist.xyz/http.txt")
response6 = scraper.get("https://openproxylist.xyz/socks4.txt")
response7 = scraper.get("https://openproxylist.xyz/socks5.txt")
proxies5 = response5.text.strip()
proxies6 = response6.text.strip()
proxies7 = response7.text.strip()
with open("proxy_scraped.txt", "a") as txt_file:
txt_file.write(proxies5+"\n"+proxies6+"\n"+proxies7+"\n")
except Exception as e:
print(e)
def main5():
try:
print("Start 5 step || Site - hidemy.name")
scraper = cfscrape.create_scraper()
r = scraper.get('https://hidemy.name/ru/proxy-list/')
soup = BeautifulSoup(r.text, 'lxml')
line = soup.find('div', class_="table_block").find('table').find('tbody').find_all('tr')
for a in line:
td = a.find_all('td')
ip = td[0].text
port = td[1].text
with open("proxy_scraped.txt", "a+", encoding="utf-8") as f:
f.write(str(ip + ":" + port + "\n"))
except Exception as e:
print(e)
kol_st = 0
def main7():
try:
print("Start 7 step || Site - sslproxies.org")
scraper = cfscrape.create_scraper()
r = scraper.get(f'https://www.sslproxies.org/#list')
soup = BeautifulSoup(r.text, 'lxml')
line = soup.find('table', class_="table table-striped table-bordered").find('tbody').find_all('tr')
for a in line:
td = a.find_all('td')
ip = td[0].text
port = td[1].text
with open("proxy_scraped.txt", "a+", encoding="utf-8") as f:
f.write(str(ip + ":" + port + "\n"))
except Exception as e:
print(e)
def main8():
try:
print("Start 8 step || Site - spys.one")
scraper = cfscrape.create_scraper()
r = scraper.get(f'https://spys.one')
soup = BeautifulSoup(r.text, 'lxml')
line = soup.find('table', cellspacing="0", cellpadding="2").find('table', cellspacing="1", cellpadding="1", align="left").find_all('tr', class_="spy1x", onmouseover="this.style.background='#002424'")
line2 = soup.find('table', cellspacing="0", cellpadding="2").find('table', cellspacing="1", cellpadding="1",align="left").find_all('tr', class_="spy1xx",onmouseover="this.style.background='#002424'")
for tr in line:
td = tr.find_all('td')
ip = td[0].text
with open("proxy_scraped.txt", "a+", encoding="utf-8") as f:
f.write(str(ip + '\n'))
for tr2 in line2:
td2 = tr2.find_all('td')
ip2 = td2[0].text
with open("proxy_scraped.txt", "a+", encoding="utf-8") as f:
f.write(str(ip2 + '\n'))
except Exception as e:
print(e)
def main10():
try:
print("Start 10 step || Site - userel.com")
scraper = cfscrape.create_scraper()
r = scraper.get(f'https://userel.com/')
soup = BeautifulSoup(r.text, 'lxml')
line = soup.find('div', class_="proxy").find('table').find_all('tr')
for tr in line:
td = tr.find_all('td')
ip = td[0].text
with open("proxy_scraped.txt", "a+", encoding="utf-8") as f:
f.write(str(ip) + '\n')
except Exception as e:
print(e)
def main11():
try:
print("Start 11 step || Site - ArchiveProxy")
scraper = cfscrape.create_scraper()
ear, month, day = str(date.today()).split('-')
ear = int(ear)
month = int(month)
day = int(day)
for today in range(day):
try:
scoc = scraper.get(f'https://checkerproxy.net/api/archive/{ear}-{month}-{today}').json()
except:
break
try:
for i in range(40000):
with open('proxy_scraped.txt', 'a+') as file:
file.write(scoc[i]['addr'] + '\n')
except:
pass
except Exception as e:
print(e)
if __name__ == "__main__":
main2()
main3()
main4()
main5()
main7()
main8()
main10()
main11()
print("Duplicates cleaning...")
with open("proxy_scraped.txt") as input_:
result = dict.fromkeys(input_).keys()
with open("proxy_scraped.txt", "w") as output:
print(*result, file=output, sep="")
print("Duplicates successfully cleared!")
Depois de inserir o código, execute o script. Você precisa esperar algum tempo até que o script colete proxies de sites abertos, após o que removerá as duplicatas.
Agora que temos o arquivo proxy_scraped.txt, crie um segundo arquivo .py onde colamos o código a seguir. Este código é responsável por verificar a funcionalidade dos proxies, além de dividi-los por protocolo (socks, http) e classificá-los por país
import threading
import requests
import os
filename = input("FILENAME: ")
timeout = int(input("TIMEOUT: "))
x = 0
y = 0
def check_proxy(proxy):
global y
protocols = ['http', 'https', 'socks4', 'socks5']
for protocol in protocols:
try:
proxies = {protocol: f'{protocol}://{proxy}', 'https': f'{protocol}://{proxy}'}
response = requests.get('http://ip-api.com/json', proxies=proxies, timeout=timeout)
if response.status_code == 200:
data = response.json()
country = data['country']
folder_path = os.path.join('country', country)
file_path = os.path.join(folder_path, f'{protocol}.txt')
file_path2 = os.path.join(folder_path, 'ALL.txt')
file_path3 = os.path.join("ALL", "ALL.txt")
file_path4 = os.path.join("ALL", protocol)
os.makedirs(folder_path, exist_ok=True)
os.makedirs("ALL", exist_ok=True)
with open(file_path, 'a') as f: #country
f.write(f'{proxy}\n')
with open(file_path2, 'a') as f: #country all
f.write(f'{proxy}\n')
with open(f"{file_path4}.txt", 'a') as f:
f.write(f'{proxy}\n')
with open(file_path3, 'a') as f:
f.write(f'{proxy}\n')
y += 1
except:
pass
with open(filename, 'r') as f:
proxy_list = [line.strip() for line in f]
for i in proxy_list:
t = threading.Thread(target=check_proxy, args=(i,)).start()
x += 1
print(f"\r\rПроверенно: {x}/{len(proxy_list)} Good: {y}", end='')
Executamos o script e, depois que o verificador concluir seu trabalho, ele classificará os proxies em funcionamento por país. Além disso, você pode usar este verificador para verificar seus proxies.
os proxies serão divididos por protocolo, e também, na pasta com o script, será criada a pasta “ALL”, onde estarão todos os proxies, sem separação por país, e sem separação por protocolo.
Esses proxies podem ser usados da maneira que você quiser. A escolha é sua!
Tenha um bom dia