from twikit.client.client import Client from server.spider.twitter_db import DatabaseHandler class TwitterCrawler: def __init__(self, client: Client, db_handler: DatabaseHandler): self.client = client self.db_handler = db_handler async def crawl_user_tweets(self, user_id: str): for tweet_type in ['Tweets', 'Replies', 'Media']: latest_cursor = self.db_handler.get_latest_twitter_id(user_id, tweet_type) while True: # 请求获取当前类型的推文数据 result = await self.client.get_user_tweets(user_id, tweet_type, count=40, cursor=latest_cursor) print(result) # 处理当前请求返回的推文 for tweet in result: # 保存推文内容到数据库 await self.db_handler.save_tweet(tweet, tweet_type, latest_cursor) # 如果存在下一页数据,更新游标继续获取数据 if result.next_cursor: latest_cursor = result.next_cursor else: break # 如果没有下一页数据,停止爬取 async def crawl_user(self, user_name: str): # 调用获取用户数据的方法 result = await self.client.get_user_by_screen_name(user_name) if result: # 提取用户的关键信息 user_data = { 'id': result.id, 'name': result.name, 'screen_name': result.screen_name, 'profile_image_url': result.profile_image_url, 'profile_banner_url': result.profile_banner_url, 'url': result.url, 'location': result.location, 'description': result.description, 'is_blue_verified': result.is_blue_verified, 'verified': result.verified, 'possibly_sensitive': result.possibly_sensitive, 'can_dm': result.can_dm, 'can_media_tag': result.can_media_tag, 'want_retweets': result.want_retweets, 'default_profile': result.default_profile, 'default_profile_image': result.default_profile_image, 'followers_count': result.followers_count, 'fast_followers_count': result.fast_followers_count, 'normal_followers_count': result.normal_followers_count, 'following_count': result.following_count, 'favourites_count': result.favourites_count, 'listed_count': result.listed_count, 'media_count': result.media_count, 'statuses_count': result.statuses_count, 'is_translator': result.is_translator, 'translator_type': result.translator_type, 'profile_interstitial_type': None, 'withheld_in_countries': None } # 将数据保存到数据库 await self.db_handler.save_user(user_data) print(f"User data for {user_name} saved successfully.")