twitter_spider.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. import asyncio
  2. from twikit import Client
  3. from server.spider.twitter_crawl import TwitterCrawler
  4. from server.spider.twitter_db import DatabaseHandler
  5. class TwitterClient:
  6. def __init__(self, client: Client):
  7. self.client = client
  8. USERNAME = 'yiyoungjoy@gmail.com'
  9. EMAIL = 'yiyoungjoy@gmail.com'
  10. PASSWORD = '19920911yy'
  11. async def crawl_user(user_id):
  12. client = Client(language='en-US', proxy="http://127.0.0.1:7890")
  13. await client.login(
  14. auth_info_1=USERNAME,
  15. auth_info_2=EMAIL,
  16. password=PASSWORD,
  17. cookies_file='cookies.json'
  18. )
  19. db_handler = DatabaseHandler()
  20. twitter_crawler = TwitterCrawler(client, db_handler)
  21. await twitter_crawler.crawl_user(user_id)
  22. async def main():
  23. # 初始化 Twitter 客户端和数据库处理器
  24. client = Client(language='en-US', proxy="http://127.0.0.1:7890")
  25. await client.login(
  26. auth_info_1=USERNAME,
  27. auth_info_2=EMAIL,
  28. password=PASSWORD,
  29. cookies_file='cookies.json'
  30. )
  31. db_handler = DatabaseHandler()
  32. twitter_crawler = TwitterCrawler(client, db_handler)
  33. user_ids = await db_handler.get_all_user_ids()
  34. while True:
  35. # 定时每分钟爬一次
  36. for user_id in user_ids:
  37. await twitter_crawler.crawl_user_tweets(user_id)
  38. await asyncio.sleep(60) # 每分钟爬取一次
  39. # 运行爬虫
  40. # asyncio.run(main())