EU
eugen1j/aioscrapy
Python asynchronous library for web scrapping
Python async library for web scraping
Installing
pip install aioscrapy
Usage
Plain text scraping
import asyncio
import json
from aioscrapy import Client, WebTextClient, SingleSessionPool, Dispatcher, SimpleWorker
class CustomClient(Client[str, dict]):
def __init__(self, client: WebTextClient):
self._client = client
async def fetch(self, key: str) -> dict:
data = await self._client.fetch(key)
return json.loads(data)
async def main():
pool = SingleSessionPool()
dispatcher = Dispatcher(['https://httpbin.org/get'])
client = CustomClient(WebTextClient(pool))
worker = SimpleWorker(dispatcher, client)
result = await worker.run()
return result
loop = asyncio.get_event_loop()
print(loop.run_until_complete(main()))Byte content downloading
import asyncio
from aioscrapy import Client, WebByteClient, SingleSessionPool, Dispatcher, SimpleWorker
class CustomClient(Client[str, bytes]):
def __init__(self, client: WebByteClient):
self._client = client
async def fetch(self, key: str) -> bytes:
data = await self._client.fetch(key)
return data
async def main():
pool = SingleSessionPool()
dispatcher = Dispatcher(['https://httpbin.org/image'])
client = CustomClient(WebByteClient(pool))
worker = SimpleWorker(dispatcher, client)
result = await worker.run()
return result
loop = asyncio.get_event_loop()
data: dict = loop.run_until_complete(main())
for url, byte_content in data.items():
print(url + ": " + str(len(byte_content)) + " bytes")On this page
Languages
Python99.6%Makefile0.4%
Contributors
MIT License
Created May 12, 2019
Updated March 16, 2024