Proxy Graber Proxy

Det

Заглянувший
Заглянувший
Det

Det

Заглянувший
Заглянувший
Сообщения
9
Реакции
1
Код на питон какой собирает прокси с разных источников и сохраняет их каждый тип в отдельный фай.

Python:
import asyncio
import aiohttp
from aiohttp import ClientSession


HEADERS = {
    'Authority': 'proxylist.geonode.com',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
}
URL_API = 'https://proxylist.geonode.com/api/proxy-list'
FILES_PROXY = {'socks4': 'socks4.txt', 'socks5':'socks5.txt','https':'https.txt', 'http': 'http.txt'}
LIMIT_IN_PAGE = 500
THREADS_COUNT = 3

TOTAL_PROXY = 0
SAVE_PROXY_COUNTER = 0

QUEUE = asyncio.Queue()
THLOCK = asyncio.Lock()


async def write_to_file(filename:str, data:str) -> None:
    async with THLOCK:
        with open(filename, 'a', encoding='UTF-8') as file:
            file.write(f'{data}\n')


async def get_proxys(session: ClientSession, current_page: int):
    params = {'limit': f'{LIMIT_IN_PAGE}', 'page': f'{current_page}', 'sort_by': 'lastChecked', 'sort_type': 'desc'}
    try:
        async with session.get(URL_API, headers=HEADERS, params=params) as response:
            proxies = await response.json()
            return proxies.get('data')
    except Exception as e:
        print(f'[ERR] Fail get proxys in page {current_page} -- {e}')
        return None


async def get_pages_count() -> int:
    global TOTAL_PROXY
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(URL_API, headers=HEADERS) as response:
                data = await response.json()
    except Exception as e:
        print(f'[ERR] Fail get pages count -- {e}')
        return 0
    TOTAL_PROXY = int(data.get('total', 0))
    return TOTAL_PROXY // LIMIT_IN_PAGE if (TOTAL_PROXY % LIMIT_IN_PAGE) == 0 else TOTAL_PROXY // LIMIT_IN_PAGE + 1


async def worker():
    global QUEUE, SAVE_PROXY_COUNTER
    async with aiohttp.ClientSession() as session:
        while not QUEUE.empty():
            page_num = await QUEUE.get()
            proxys = await get_proxys(session, page_num)
            current_counter = 0
            for proxy in proxys:
                filename = FILES_PROXY[proxy['protocols'][0]]
                line = f"{proxy['ip']}:{proxy['port']}"
                await write_to_file(filename, line)
                current_counter +=1
            SAVE_PROXY_COUNTER += current_counter
            print(f'[+] Page {page_num} saves {current_counter} proxy. Total save: {SAVE_PROXY_COUNTER}')



async def main():
    global TOTAL_PROXY, QUEUE, SAVE_PROXY_COUNTER
    pages = await get_pages_count()
    print(f'[*] Finded proxy {TOTAL_PROXY}, pages {pages}')
    if TOTAL_PROXY == 0:
        exit()
    [QUEUE.put_nowait(page_num) for page_num in range(1, pages+1)]
    threads = [asyncio.create_task(worker()) for _ in range(1, THREADS_COUNT+1)]
    await asyncio.gather(*threads)
    print(f'[*] Finded proxy {TOTAL_PROXY} // Total save {SAVE_PROXY_COUNTER}')

if __name__ == '__main__':
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
    asyncio.run(main())
 

Сверху Снизу