123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import asyncio
- from twikit import Client
- from server.spider.twitter_crawl import TwitterCrawler
- from server.spider.twitter_db import DatabaseHandler
- class TwitterClient:
- def __init__(self, client: Client):
- self.client = client
- USERNAME = 'yiyoungjoy@gmail.com'
- EMAIL = 'yiyoungjoy@gmail.com'
- PASSWORD = '19920911yy'
- async def crawl_user(user_id):
- client = Client(language='en-US', proxy="http://127.0.0.1:7890")
- await client.login(
- auth_info_1=USERNAME,
- auth_info_2=EMAIL,
- password=PASSWORD,
- cookies_file='cookies.json'
- )
- db_handler = DatabaseHandler()
- twitter_crawler = TwitterCrawler(client, db_handler)
- await twitter_crawler.crawl_user(user_id)
- async def main():
- # 初始化 Twitter 客户端和数据库处理器
- client = Client(language='en-US', proxy="http://127.0.0.1:7890")
- await client.login(
- auth_info_1=USERNAME,
- auth_info_2=EMAIL,
- password=PASSWORD,
- cookies_file='cookies.json'
- )
- db_handler = DatabaseHandler()
- twitter_crawler = TwitterCrawler(client, db_handler)
- user_ids = await db_handler.get_all_user_ids()
- while True:
- # 定时每分钟爬一次
- for user_id in user_ids:
- await twitter_crawler.crawl_user_tweets(user_id)
- await asyncio.sleep(60) # 每分钟爬取一次
- # 运行爬虫
- # asyncio.run(main())
|