zhangsan
/
twitter-spider-ai


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
							from twikit.client.client import Client

from server.spider.twitter_db import DatabaseHandler


class TwitterCrawler:
    def __init__(self, client: Client, db_handler: DatabaseHandler):
        self.client = client
        self.db_handler = db_handler

    async def crawl_user_tweets(self, user_id: str):
        for tweet_type in ['Tweets', 'Replies', 'Media']:
            latest_cursor = self.db_handler.get_latest_twitter_id(user_id, tweet_type)
            while True:
                # 请求获取当前类型的推文数据
                result = await self.client.get_user_tweets(user_id, tweet_type, count=40, cursor=latest_cursor)
                print(result)
                # 处理当前请求返回的推文
                for tweet in result:
                    # 保存推文内容到数据库
                    await self.db_handler.save_tweet(tweet, tweet_type, latest_cursor)

                # 如果存在下一页数据，更新游标继续获取数据
                if result.next_cursor:
                    latest_cursor = result.next_cursor
                else:
                    break  # 如果没有下一页数据，停止爬取

    async def crawl_user(self, user_name: str):
        # 调用获取用户数据的方法
        result = await self.client.get_user_by_screen_name(user_name)

        if result:
            # 提取用户的关键信息
            user_data = {
                'id': result.id,
                'name': result.name,
                'screen_name': result.screen_name,
                'profile_image_url': result.profile_image_url,
                'profile_banner_url': result.profile_banner_url,
                'url': result.url,
                'location': result.location,
                'description': result.description,
                'is_blue_verified': result.is_blue_verified,
                'verified': result.verified,
                'possibly_sensitive': result.possibly_sensitive,
                'can_dm': result.can_dm,
                'can_media_tag': result.can_media_tag,
                'want_retweets': result.want_retweets,
                'default_profile': result.default_profile,
                'default_profile_image': result.default_profile_image,
                'followers_count': result.followers_count,
                'fast_followers_count': result.fast_followers_count,
                'normal_followers_count': result.normal_followers_count,
                'following_count': result.following_count,
                'favourites_count': result.favourites_count,
                'listed_count': result.listed_count,
                'media_count': result.media_count,
                'statuses_count': result.statuses_count,
                'is_translator': result.is_translator,
                'translator_type': result.translator_type,
                'profile_interstitial_type': None,
                'withheld_in_countries': None
            }
            # 将数据保存到数据库
            await self.db_handler.save_user(user_data)
            print(f"User data for {user_name} saved successfully.")