12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- from twikit.client.client import Client
- from server.spider.twitter_db import DatabaseHandler
- class TwitterCrawler:
- def __init__(self, client: Client, db_handler: DatabaseHandler):
- self.client = client
- self.db_handler = db_handler
- async def crawl_user_tweets(self, user_id: str):
- for tweet_type in ['Tweets', 'Replies', 'Media']:
- latest_cursor = self.db_handler.get_latest_twitter_id(user_id, tweet_type)
- while True:
- # 请求获取当前类型的推文数据
- result = await self.client.get_user_tweets(user_id, tweet_type, count=40, cursor=latest_cursor)
- print(result)
- # 处理当前请求返回的推文
- for tweet in result:
- # 保存推文内容到数据库
- await self.db_handler.save_tweet(tweet, tweet_type, latest_cursor)
- # 如果存在下一页数据,更新游标继续获取数据
- if result.next_cursor:
- latest_cursor = result.next_cursor
- else:
- break # 如果没有下一页数据,停止爬取
- async def crawl_user(self, user_name: str):
- # 调用获取用户数据的方法
- result = await self.client.get_user_by_screen_name(user_name)
- if result:
- # 提取用户的关键信息
- user_data = {
- 'id': result.id,
- 'name': result.name,
- 'screen_name': result.screen_name,
- 'profile_image_url': result.profile_image_url,
- 'profile_banner_url': result.profile_banner_url,
- 'url': result.url,
- 'location': result.location,
- 'description': result.description,
- 'is_blue_verified': result.is_blue_verified,
- 'verified': result.verified,
- 'possibly_sensitive': result.possibly_sensitive,
- 'can_dm': result.can_dm,
- 'can_media_tag': result.can_media_tag,
- 'want_retweets': result.want_retweets,
- 'default_profile': result.default_profile,
- 'default_profile_image': result.default_profile_image,
- 'followers_count': result.followers_count,
- 'fast_followers_count': result.fast_followers_count,
- 'normal_followers_count': result.normal_followers_count,
- 'following_count': result.following_count,
- 'favourites_count': result.favourites_count,
- 'listed_count': result.listed_count,
- 'media_count': result.media_count,
- 'statuses_count': result.statuses_count,
- 'is_translator': result.is_translator,
- 'translator_type': result.translator_type,
- 'profile_interstitial_type': None,
- 'withheld_in_countries': None
- }
- # 将数据保存到数据库
- await self.db_handler.save_user(user_data)
- print(f"User data for {user_name} saved successfully.")
|