zhangsan vor 1 Monat
Commit
4117982f88

+ 18 - 0
.gitignore

@@ -0,0 +1,18 @@
+.DS_Store
+*.pyc
+.vscode
+__pycache__/
+.idea/**/*
+/venv/
+*.iml
+/output/**
+/server/utils/images2/
+/server/utils/images/
+/server/utils/*.html
+/server/utils/*.md
+/server/utils/*.docx
+/server/service/*.md
+/server/utils/*.docx
+/server/example
+output
+input

+ 0 - 0
README.md


+ 13 - 0
app.py

@@ -0,0 +1,13 @@
+from fastapi import FastAPI
+import server.router.user_router as user_router
+import server.router.spider_router as spider_router
+
+app = FastAPI()
+
+# 包含路由
+app.include_router(user_router.router, prefix="/user", tags=["user"])
+app.include_router(spider_router.router, prefix="/user", tags=["spider"])
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8088)

+ 1 - 0
cookies.json

@@ -0,0 +1 @@
+{"_twitter_sess": "BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCOw6YCKVAToMY3NyZl9p%250AZCIlM2U0YTY5YTQ3MWQwZWVjNjk0NWQ4NTMyOTE2OWU1ODg6B2lkIiU5MWJh%250AOTc1MWYzYmRhNmJlZDk5ZjJiMjc5NzZlM2I2Mg%253D%253D--bf831b5d9a1e27706ad9992aa43219ed13003942", "att": "1-MZObN3DQZb7140GqSypqvCIzIGaJxDnULyXMcEnS", "auth_token": "8e55c5953c02e4429e43d8180c42a74e21653452", "ct0": "253e89c25d9417400bcfe513ef7eba09", "guest_id": "v1%3A174003848613249841", "guest_id_ads": "v1%3A174003848613249841", "guest_id_marketing": "v1%3A174003848613249841", "kdt": "ghVMP1LGwSIM58RhxlDFLcINvYQhR30Q1H7ZbgZ5", "personalization_id": "\"v1_Nb5yP1BCCUFYSCeNJ7oBxA==\"", "twid": "\"u=1263399882\""}

+ 4 - 0
requirements.txt

@@ -0,0 +1,4 @@
+fastapi
+uvicorn
+pydantic
+

+ 31 - 0
server/module/TweetModel.py

@@ -0,0 +1,31 @@
+from pydantic import BaseModel
+from typing import Optional
+
+class TweetModel(BaseModel):
+    id: str
+    created_at: str
+    user_id: str
+    text: Optional[str] = None
+    lang: Optional[str] = None
+    in_reply_to: Optional[str] = None
+    is_quote_status: Optional[bool] = None
+    quote_id: Optional[str] = None
+    retweeted_tweet_id: Optional[str] = None
+    possibly_sensitive: Optional[bool] = None
+    quote_count: Optional[int] = None
+    reply_count: Optional[int] = None
+    favorite_count: Optional[int] = None
+    favorited: Optional[bool] = None
+    view_count: Optional[int] = None
+    retweet_count: Optional[int] = None
+    bookmark_count: Optional[int] = None
+    bookmarked: Optional[bool] = None
+    place: Optional[str] = None
+    is_translatable: Optional[bool] = None
+    is_edit_eligible: Optional[bool] = None
+    edits_remaining: Optional[int] = None
+    tweet_type: Optional[str] = None
+    next_cursor: Optional[str] = None
+
+    class Config:
+        orm_mode = True  # 使得 Pydantic 支持从数据库模型中直接读取数据

+ 34 - 0
server/module/UserModel.py

@@ -0,0 +1,34 @@
+from typing import Optional, List
+
+from pydantic import BaseModel
+
+
+class User(BaseModel):
+    id: str
+    name: Optional[str] = None
+    screen_name: Optional[str] = None
+    profile_image_url: Optional[str] = None
+    profile_banner_url: Optional[str] = None
+    url: Optional[str] = None
+    location: Optional[str] = None
+    description: Optional[str] = None
+    is_blue_verified: Optional[bool] = None
+    verified: Optional[bool] = None
+    possibly_sensitive: Optional[bool] = None
+    can_dm: Optional[bool] = None
+    can_media_tag: Optional[bool] = None
+    want_retweets: Optional[bool] = None
+    default_profile: Optional[bool] = None
+    default_profile_image: Optional[bool] = None
+    followers_count: Optional[int] = None
+    fast_followers_count: Optional[int] = None
+    normal_followers_count: Optional[int] = None
+    following_count: Optional[int] = None
+    favourites_count: Optional[int] = None
+    listed_count: Optional[int] = None
+    media_count: Optional[int] = None
+    statuses_count: Optional[int] = None
+    is_translator: Optional[bool] = None
+    translator_type: Optional[str] = None
+    profile_interstitial_type: Optional[str] = None
+    withheld_in_countries: Optional[List[str]] = None

+ 21 - 0
server/router/spider_router.py

@@ -0,0 +1,21 @@
+import asyncio
+
+from fastapi import APIRouter
+from pydantic import BaseModel
+
+from server.spider.twitter_spider import crawl_user
+
+router = APIRouter()
+
+class SpiderInput(BaseModel):
+    username: str
+
+
+@router.post("/spider")
+async def create_spider_data(spider_input: SpiderInput):
+    """提交需要爬取的用户,并返回爬取结果"""
+    username = spider_input.username
+    asyncio.create_task(crawl_user(username))
+    return {
+        'code': 0
+    }

+ 44 - 0
server/router/user_router.py

@@ -0,0 +1,44 @@
+from fastapi import APIRouter, Query
+
+from server.spider.result_model import PaginationResponse
+from server.spider.twitter_db import DatabaseHandler
+
+router = APIRouter()
+
+
+
+
+@router.get("/list", response_model=PaginationResponse)
+async def read_users(page: int = Query(1, description="当前页码"),
+                     page_size: int = Query(10, description="每页显示的记录数")):
+    """获取用户列表"""
+    db_handler = DatabaseHandler()
+    user_list, total_records = await db_handler.get_all_user(page, page_size)
+
+    # 计算总页数
+    total_pages = (total_records + page_size - 1) // page_size
+
+    return {
+        "items": user_list,
+        "total_records": total_records,
+        "current_page": page,
+        "total_pages": total_pages
+    }
+
+
+@router.get("/twitter", response_model=PaginationResponse)
+async def get_twitter_list(page: int = Query(1, description="当前页码"),
+                           page_size: int = Query(10, description="每页显示的记录数")):
+    """获取Twitter列表"""
+    db_handler = DatabaseHandler()
+    twitter_list, total_records = await db_handler.get_all_twitter(page, page_size)
+
+    # 计算总页数
+    total_pages = (total_records + page_size - 1) // page_size
+
+    return {
+        "items": twitter_list,
+        "total_records": total_records,
+        "current_page": page,
+        "total_pages": total_pages
+    }

+ 8 - 0
server/spider/result_model.py

@@ -0,0 +1,8 @@
+from pydantic import BaseModel
+
+
+class PaginationResponse(BaseModel):
+    items: list
+    total_records: int
+    current_page: int
+    total_pages: int

+ 67 - 0
server/spider/twitter_crawl.py

@@ -0,0 +1,67 @@
+from twikit.client.client import Client
+
+from server.spider.twitter_db import DatabaseHandler
+
+
+class TwitterCrawler:
+    def __init__(self, client: Client, db_handler: DatabaseHandler):
+        self.client = client
+        self.db_handler = db_handler
+
+    async def crawl_user_tweets(self, user_id: str):
+        for tweet_type in ['Tweets', 'Replies', 'Media']:
+            latest_cursor = self.db_handler.get_latest_twitter_id(user_id, tweet_type)
+            while True:
+                # 请求获取当前类型的推文数据
+                result = await self.client.get_user_tweets(user_id, tweet_type, count=40, cursor=latest_cursor)
+                print(result)
+                # 处理当前请求返回的推文
+                for tweet in result:
+                    # 保存推文内容到数据库
+                    await self.db_handler.save_tweet(tweet, tweet_type, latest_cursor)
+
+                # 如果存在下一页数据,更新游标继续获取数据
+                if result.next_cursor:
+                    latest_cursor = result.next_cursor
+                else:
+                    break  # 如果没有下一页数据,停止爬取
+
+    async def crawl_user(self, user_name: str):
+        # 调用获取用户数据的方法
+        result = await self.client.get_user_by_screen_name(user_name)
+
+        if result:
+            # 提取用户的关键信息
+            user_data = {
+                'id': result.id,
+                'name': result.name,
+                'screen_name': result.screen_name,
+                'profile_image_url': result.profile_image_url,
+                'profile_banner_url': result.profile_banner_url,
+                'url': result.url,
+                'location': result.location,
+                'description': result.description,
+                'is_blue_verified': result.is_blue_verified,
+                'verified': result.verified,
+                'possibly_sensitive': result.possibly_sensitive,
+                'can_dm': result.can_dm,
+                'can_media_tag': result.can_media_tag,
+                'want_retweets': result.want_retweets,
+                'default_profile': result.default_profile,
+                'default_profile_image': result.default_profile_image,
+                'followers_count': result.followers_count,
+                'fast_followers_count': result.fast_followers_count,
+                'normal_followers_count': result.normal_followers_count,
+                'following_count': result.following_count,
+                'favourites_count': result.favourites_count,
+                'listed_count': result.listed_count,
+                'media_count': result.media_count,
+                'statuses_count': result.statuses_count,
+                'is_translator': result.is_translator,
+                'translator_type': result.translator_type,
+                'profile_interstitial_type': None,
+                'withheld_in_countries': None
+            }
+            # 将数据保存到数据库
+            await self.db_handler.save_user(user_data)
+            print(f"User data for {user_name} saved successfully.")

+ 435 - 0
server/spider/twitter_db.py

@@ -0,0 +1,435 @@
+from typing import Optional
+from datetime import datetime
+
+import mysql.connector
+from mysql.connector import pooling
+
+from server.module.TweetModel import TweetModel
+from server.module.UserModel import User
+
+# 数据库连接配置
+# MySQL 连接配置
+DB_CONFIG = {
+    'host': '117.78.31.244',
+    'user': 'root',
+    'password': 'zh123456',
+    'database': 'twitter_spider',
+    'charset':'utf8mb4'
+}
+
+
+# 假设 tweet.created_at 的格式是 'Thu Feb 20 00:38:20 +0000 2025'
+# 你可以使用 datetime 模块来解析并转换时间格式
+
+def convert_to_mysql_datetime(date_str: str) -> str:
+    try:
+        # 解析时间字符串,并转换为 datetime 对象
+        tweet_datetime = datetime.strptime(date_str, '%a %b %d %H:%M:%S +0000 %Y')
+        # 将 datetime 对象转换为 MySQL 格式的字符串
+        return tweet_datetime.strftime('%Y-%m-%d %H:%M:%S')
+    except ValueError as e:
+        print(f"Error converting time: {e}")
+        return None  # 或者返回一个默认值
+
+
+class DatabaseHandler:
+    def __init__(self):
+        # 初始化连接池
+        self.db_config = DB_CONFIG
+        self.pool = pooling.MySQLConnectionPool(
+            pool_name="mypool",
+            pool_size=5,  # 设置连接池大小
+            **DB_CONFIG
+        )
+
+    def get_connection(self):
+        # 从连接池中获取连接
+        return self.pool.get_connection()
+
+    async def save_tweet(self, tweet, tweet_type, latest_cursor):
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        # 插入推文数据
+        query = """
+            INSERT INTO tweets (id, created_at, user_id, text, lang, in_reply_to, 
+            is_quote_status, quote_id, retweeted_tweet_id, possibly_sensitive, quote_count, 
+            reply_count, favorite_count, favorited, view_count, retweet_count, bookmark_count, 
+            bookmarked, place, is_translatable, is_edit_eligible, edits_remaining,tweet_type,next_cursor)
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,%s,%s)
+        """
+        params = (
+            tweet.id,
+            convert_to_mysql_datetime(tweet.created_at),
+            tweet.user.id,
+            tweet.text,
+            tweet.lang,
+            tweet.in_reply_to,
+            tweet.is_quote_status,
+            tweet.quote.id if tweet.quote else None,
+            tweet.retweeted_tweet.id if tweet.retweeted_tweet else None,
+            tweet.possibly_sensitive,
+            tweet.quote_count,
+            tweet.reply_count,
+            tweet.favorite_count,
+            tweet.favorited,
+            tweet.view_count,
+            tweet.retweet_count,
+            tweet.bookmark_count,
+            tweet.bookmarked,
+            tweet.place if tweet.place else None,  # Handle None values
+            tweet.is_translatable,
+            tweet.is_edit_eligible,
+            tweet.edits_remaining,
+            tweet_type,
+            latest_cursor
+        )
+
+        try:
+            cursor.execute(query, params)
+            conn.commit()
+        except mysql.connector.Error as err:
+            print(f"Error inserting tweet: {err}")
+            conn.rollback()  # 如果插入失败,回滚事务
+        finally:
+            cursor.close()
+            conn.close()
+
+        # 插入媒体数据
+        for media in tweet.media:
+            self.save_media(tweet.id, media)
+
+        # 插入hashtags
+        for hashtag in tweet.hashtags:
+            self.save_hashtag(tweet.id, hashtag)
+
+        # 插入URLs
+        for url_obj in tweet.urls:
+            # 提取 expanded_url(完整的 URL)
+            url = url_obj.get('expanded_url')  # 如果你希望保存完整的 URL
+            if url:
+                self.save_url(tweet.id, url)  # 执行 SQL 插入操作
+
+        # 插入回复
+        if tweet.replies:
+            for reply in tweet.replies:
+                self.save_reply(tweet.id, reply.id)
+
+        # 插入相关推文(引用或转发)
+        if tweet.related_tweets:
+            for related in tweet.related_tweets:
+                self.save_related_tweet(tweet.id, related.id)
+
+    def save_media(self, tweet_id, media):
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        query = """
+            INSERT INTO media (tweet_id, media_url, media_type)
+            VALUES (%s, %s, %s)
+        """
+        try:
+            cursor.execute(query, (tweet_id, media.url, media.type))
+            conn.commit()
+        except mysql.connector.Error as err:
+            print(f"Error inserting media: {err}")
+            conn.rollback()  # 如果插入失败,回滚事务
+        finally:
+            cursor.close()
+            conn.close()
+
+    def save_hashtag(self, tweet_id, hashtag):
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        query = """
+            INSERT INTO hashtags (tweet_id, hashtag)
+            VALUES (%s, %s)
+        """
+        try:
+            cursor.execute(query, (tweet_id, hashtag))
+            conn.commit()
+        except mysql.connector.Error as err:
+            print(f"Error inserting hashtag: {err}")
+            conn.rollback()
+        finally:
+            cursor.close()
+            conn.close()
+
+    def save_url(self, tweet_id, url):
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        query = """
+            INSERT INTO urls (tweet_id, url)
+            VALUES (%s, %s)
+        """
+        try:
+            cursor.execute(query, (tweet_id, url))
+            conn.commit()
+        except mysql.connector.Error as err:
+            print(f"Error inserting URL: {err}")
+            conn.rollback()
+        finally:
+            cursor.close()
+            conn.close()
+
+    def save_reply(self, tweet_id, reply_tweet_id):
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        query = """
+            INSERT INTO replies (tweet_id, reply_tweet_id)
+            VALUES (%s, %s)
+        """
+        try:
+            cursor.execute(query, (tweet_id, reply_tweet_id))
+            conn.commit()
+        except mysql.connector.Error as err:
+            print(f"Error inserting reply: {err}")
+            conn.rollback()
+        finally:
+            cursor.close()
+            conn.close()
+
+    def save_related_tweet(self, tweet_id, related_tweet_id):
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        query = """
+            INSERT INTO related_tweets (tweet_id, related_tweet_id)
+            VALUES (%s, %s)
+        """
+        try:
+            cursor.execute(query, (tweet_id, related_tweet_id))
+            conn.commit()
+        except mysql.connector.Error as err:
+            print(f"Error inserting reply: {err}")
+            conn.rollback()
+        finally:
+            cursor.close()
+            conn.close()
+
+    def get_latest_twitter_id(self, user_id: str, tweet_type: Optional[str] = None) -> Optional[str]:
+        """获取数据库中某个用户指定类型的最新 twitter_id"""
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        # 构建查询条件
+        query = "SELECT next_cursor FROM tweets WHERE user_id = %s"
+        params = [user_id]
+
+        if tweet_type:
+            query += " AND tweet_type = %s"
+            params.append(tweet_type)
+
+        query += " ORDER BY created_at DESC LIMIT 1"
+        try:
+            cursor.execute(query, tuple(params))
+            result = cursor.fetchone()
+
+            return result[0] if result else None
+        except mysql.connector.Error as err:
+            print(f"Error inserting reply: {err}")
+            conn.rollback()
+        finally:
+            cursor.close()
+            conn.close()
+
+    async def save_user(self, user_data):
+        conn = self.get_connection()
+        cursor = conn.cursor()
+        print(user_data)
+        # 检查用户是否已存在
+        check_query = "SELECT COUNT(*) FROM users WHERE id = %s"
+        cursor.execute(check_query, (user_data['id'],))
+        result = cursor.fetchone()
+
+        if result[0] > 0:  # 用户已存在
+            # 更新用户数据
+            update_query = """
+                UPDATE users SET
+                    name = %s,
+                    screen_name = %s,
+                    profile_image_url = %s,
+                    profile_banner_url = %s,
+                    url = %s,
+                    location = %s,
+                    description = %s,
+                    is_blue_verified = %s,
+                    verified = %s,
+                    possibly_sensitive = %s,
+                    can_dm = %s,
+                    can_media_tag = %s,
+                    want_retweets = %s,
+                    default_profile = %s,
+                    default_profile_image = %s,
+                    followers_count = %s,
+                    fast_followers_count = %s,
+                    normal_followers_count = %s,
+                    following_count = %s,
+                    favourites_count = %s,
+                    listed_count = %s,
+                    media_count = %s,
+                    statuses_count = %s,
+                    is_translator = %s,
+                    translator_type = %s,
+                    profile_interstitial_type = %s,
+                    withheld_in_countries = %s
+                WHERE id = %s
+            """
+            cursor.execute(update_query, (
+                user_data['name'], user_data['screen_name'], user_data['profile_image_url'],
+                user_data['profile_banner_url'], user_data['url'], user_data['location'],
+                user_data['description'], user_data['is_blue_verified'], user_data['verified'],
+                user_data['possibly_sensitive'], user_data['can_dm'], user_data['can_media_tag'],
+                user_data['want_retweets'], user_data['default_profile'], user_data['default_profile_image'],
+                user_data['followers_count'], user_data['fast_followers_count'], user_data['normal_followers_count'],
+                user_data['following_count'], user_data['favourites_count'], user_data['listed_count'],
+                user_data['media_count'], user_data['statuses_count'], user_data['is_translator'],
+                user_data['translator_type'], user_data['profile_interstitial_type'],
+                user_data['withheld_in_countries'], user_data['id']
+            ))
+        else:  # 用户不存在,执行插入操作
+            insert_query = """
+                INSERT INTO users (
+                    id, name, screen_name, profile_image_url, profile_banner_url, url,
+                    location, description, is_blue_verified, verified, possibly_sensitive,
+                    can_dm, can_media_tag, want_retweets, default_profile, default_profile_image,
+                    followers_count, fast_followers_count, normal_followers_count, following_count,
+                    favourites_count, listed_count, media_count, statuses_count, is_translator,
+                    translator_type, profile_interstitial_type, withheld_in_countries
+                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+            """
+            cursor.execute(insert_query, (
+                user_data['id'], user_data['name'], user_data['screen_name'], user_data['profile_image_url'],
+                user_data['profile_banner_url'], user_data['url'], user_data['location'], user_data['description'],
+                user_data['is_blue_verified'], user_data['verified'], user_data['possibly_sensitive'],
+                user_data['can_dm'],
+                user_data['can_media_tag'], user_data['want_retweets'], user_data['default_profile'],
+                user_data['default_profile_image'], user_data['followers_count'], user_data['fast_followers_count'],
+                user_data['normal_followers_count'], user_data['following_count'], user_data['favourites_count'],
+                user_data['listed_count'], user_data['media_count'], user_data['statuses_count'],
+                user_data['is_translator'],
+                user_data['translator_type'], user_data['profile_interstitial_type'], user_data['withheld_in_countries']
+            ))
+        try:
+            conn.commit()
+        except mysql.connector.Error as err:
+            print(f"Error inserting user: {err}")
+            conn.rollback()
+        finally:
+            cursor.close()
+            conn.close()
+
+    async def get_all_user(self, page: int = 1, page_size: int = 10):
+        # 连接到数据库
+        conn = self.get_connection()
+        cursor = conn.cursor()
+        # 计算偏移量
+        offset = (page - 1) * page_size
+        # 执行查询获取所有用户的 id
+        query = "SELECT * FROM users LIMIT %s OFFSET %s"
+        cursor.execute(query, (page_size, offset))
+
+        # 获取所有结果,提取 id 列
+        user_list = cursor.fetchall()
+
+        # 获取总记录数
+        count_query = "SELECT COUNT(*) FROM users"
+        cursor.execute(count_query)
+        total_records = cursor.fetchone()[0]
+
+        # 关闭游标和连接
+        cursor.close()
+        conn.close()
+
+        users = [
+            User(
+                id=row[0],
+                name=row[1],
+                screen_name=row[2],
+                profile_image_url=row[3],
+                profile_banner_url=row[4],
+                url=row[5],
+                location=row[6],
+                description=row[7],
+                is_blue_verified=row[8],
+                verified=row[9],
+                possibly_sensitive=row[10],
+                can_dm=row[11],
+                can_media_tag=row[12],
+                want_retweets=row[13],
+                default_profile=row[14],
+                default_profile_image=row[15],
+                followers_count=row[16],
+                fast_followers_count=row[17],
+                normal_followers_count=row[18],
+                following_count=row[19],
+                favourites_count=row[20],
+                listed_count=row[21],
+                media_count=row[22],
+                statuses_count=row[23],
+                is_translator=row[24],
+                translator_type=row[25],
+                profile_interstitial_type=row[26],
+                withheld_in_countries=row[27]
+            ) for row in user_list
+        ]
+        # 返回所有用户 id 的列表
+        return users, total_records
+
+    async def get_all_twitter(self, page: int = 1, page_size: int = 10):
+        """
+        获取所有 tweets 并支持分页。
+
+        :param page: 当前页码,默认为 1
+        :param page_size: 每页显示的记录数,默认为 10
+        :return: tweets 列表
+        """
+        # 计算偏移量
+        offset = (page - 1) * page_size
+
+        # 连接到数据库
+        conn = self.get_connection()
+        cursor = conn.cursor()
+
+        # 执行分页查询获取 tweets
+        query = "SELECT * FROM tweets LIMIT %s OFFSET %s"
+        cursor.execute(query, (page_size, offset))
+
+        # 获取所有结果
+        tweets_list = cursor.fetchall()
+        # 返回查询结果
+        tweets = [TweetModel(**tweet) for tweet in tweets_list]
+
+        # 获取总记录数
+        count_query = "SELECT COUNT(*) FROM tweets"
+        cursor.execute(count_query)
+        total_records = cursor.fetchone()[0]
+
+        # 关闭游标和连接
+        cursor.close()
+        conn.close()
+
+        # 返回所有 tweets 的列表
+        return tweets, total_records
+
+    async def get_all_user_ids(self):
+        # 连接到数据库
+        conn = mysql.connector.connect(**self.db_config)
+        cursor = conn.cursor()
+
+        # 执行查询获取所有用户的 id
+        query = "SELECT id FROM users"
+        cursor.execute(query)
+
+        # 获取所有结果,提取 id 列
+        user_ids = [row[0] for row in cursor.fetchall()]
+
+        # 关闭游标和连接
+        cursor.close()
+        conn.close()
+
+        # 返回所有用户 id 的列表
+        return user_ids

+ 54 - 0
server/spider/twitter_spider.py

@@ -0,0 +1,54 @@
+import asyncio
+
+from twikit import Client
+
+from server.spider.twitter_crawl import TwitterCrawler
+from server.spider.twitter_db import DatabaseHandler
+
+
+class TwitterClient:
+    def __init__(self, client: Client):
+        self.client = client
+
+
+USERNAME = 'yiyoungjoy@gmail.com'
+EMAIL = 'yiyoungjoy@gmail.com'
+PASSWORD = '19920911yy'
+
+
+async def crawl_user(user_id):
+    client = Client(language='en-US', proxy="http://127.0.0.1:7890")
+    await client.login(
+        auth_info_1=USERNAME,
+        auth_info_2=EMAIL,
+        password=PASSWORD,
+        cookies_file='cookies.json'
+    )
+    db_handler = DatabaseHandler()
+    twitter_crawler = TwitterCrawler(client, db_handler)
+    await twitter_crawler.crawl_user(user_id)
+
+
+async def main():
+    # 初始化 Twitter 客户端和数据库处理器
+    client = Client(language='en-US', proxy="http://127.0.0.1:7890")
+    await client.login(
+        auth_info_1=USERNAME,
+        auth_info_2=EMAIL,
+        password=PASSWORD,
+        cookies_file='cookies.json'
+    )
+    db_handler = DatabaseHandler()
+    twitter_crawler = TwitterCrawler(client, db_handler)
+
+    user_ids = await db_handler.get_all_user_ids()
+
+    while True:
+        # 定时每分钟爬一次
+        for user_id in user_ids:
+            await twitter_crawler.crawl_user_tweets(user_id)
+            await asyncio.sleep(60)  # 每分钟爬取一次
+
+
+# 运行爬虫
+# asyncio.run(main())