diff --git a/src/paperbot/utils/CCS-DOWN.py b/src/paperbot/utils/CCS-DOWN.py
deleted file mode 100644
index edb5397f..00000000
--- a/src/paperbot/utils/CCS-DOWN.py
+++ /dev/null
@@ -1,1052 +0,0 @@
-# securipaperbot/utils/downloader.py
-
-from typing import Dict, List, Any, Optional
-import aiohttp
-import asyncio
-import httpx
-from pathlib import Path
-import urllib.parse
-from bs4 import BeautifulSoup
-import re
-import json
-import time
-import random
-from datetime import datetime
-import logging
-import traceback
-
-# 添加动态cookie获取支持
-import traceback
-try:
-    # curl_cffi 0.5.x 版本中, AsyncSession 位于 requests 模块下
-    from curl_cffi.requests import AsyncSession
-    CURL_CFFI_AVAILABLE = True
-except ImportError:
-    from typing import Any as AsyncSession # Mock for type hinting
-    CURL_CFFI_AVAILABLE = False
-    print("❌ 'curl_cffi' 导入失败。详细错误信息如下:")
-    traceback.print_exc()
-    print("警告: curl_cffi 未安装或无法加载，动态cookie获取功能(如ACM)将受限")
-
-try:
-    import cloudscraper
-    CLOUDSCRAPER_AVAILABLE = True
-except ImportError:
-    CLOUDSCRAPER_AVAILABLE = False
-    print("警告: cloudscraper 未安装，动态cookie获取功能(如ACM)将受限")
-
-
-# 使用标准日志，避免相对导入问题
-def setup_logger(name):
-    logger = logging.getLogger(name)
-    if not logger.handlers:
-        logger.setLevel(logging.INFO)
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-    return logger
-
-
-class PaperDownloader:
-    """论文下载工具类 - 优化版本，使用持久化会话"""
-    
-    # 会议基本信息配置
-    CONFERENCE_INFO = {
-        'sp': {
-            'base_url': 'https://ieeexplore.ieee.org/xpl/conhome/1000487/all-proceedings',
-            'headers': {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.5',
-                'Connection': 'keep-alive',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0'
-            }
-        },
-        'ndss': {
-            'base_url': 'https://www.ndss-symposium.org',
-            'headers': {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.5',
-                'Connection': 'keep-alive',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0'
-            }
-        },
-        'usenix': {
-            'base_url': 'https://www.usenix.org/conference/usenixsecurity',
-            'headers': {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.5',
-                'Connection': 'keep-alive',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0'
-            }
-        }
-    }
-    
-    async def _download_with_retry(self, url: str) -> Optional[bytes]:
-        """
-        智能下载实现，带自动重试和反爬处理。
-        
-        Args:
-            url (str): 要下载的URL
-            
-        Returns:
-            Optional[bytes]: 下载的内容，失败返回None
-        """
-        # 验证并确保会话可用
-        if not self.session:
-            try:
-                self.logger.info("正在重新创建持久化会话...")
-                self.session = AsyncSession()
-            except Exception as e:
-                self.logger.error(f"创建持久化会话失败: {e}")
-                return None
-
-        last_error = None
-        content = None
-        
-        for attempt in range(1, self.max_retries + 1):
-            try:
-                self.logger.info(f"下载尝试 {attempt}/{self.max_retries}: {url}")
-                
-                # 配置特殊headers以绕过反爬
-                headers = {
-                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-                    'Accept-Language': 'en-US,en;q=0.5',
-                    'Connection': 'keep-alive',
-                    'Upgrade-Insecure-Requests': '1',
-                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-                }
-                
-                # 验证会话状态
-                if not self.session or getattr(self.session, '_closed', False):
-                    self.logger.warning("会话已关闭，正在重新创建...")
-                    self.session = AsyncSession()
-                
-                # 使用curl_cffi的持久化会话和浏览器仿真
-                response = await self.session.get(
-                    url,
-                    impersonate="chrome110",
-                    headers=headers,
-                    timeout=60
-                )
-                
-                # 检查HTTP状态码
-                if response.status_code == 403:
-                    self.logger.warning(f"遇到403 Forbidden，可能是反爬限制 (尝试 {attempt}/{self.max_retries})")
-                    await asyncio.sleep(self.retry_delay * attempt)  # 指数退避
-                    continue
-                    
-                elif response.status_code == 429:
-                    self.logger.warning(f"遇到429 Too Many Requests，开始等待 (尝试 {attempt}/{self.max_retries})")
-                    await asyncio.sleep(self.retry_delay * 2 * attempt)  # 更长的等待
-                    continue
-                    
-                elif response.status_code != 200:
-                    self.logger.warning(f"HTTP {response.status_code} (尝试 {attempt}/{self.max_retries})")
-                    await asyncio.sleep(self.retry_delay)
-                    continue
-                
-                # 获取响应内容
-                content = response.content
-                
-                # 验证内容
-                if not content or len(content) < 1024:  # 小于1KB可能是错误页面
-                    self.logger.warning(f"响应内容过小: {len(content) if content else 0} bytes")
-                    continue
-                
-                # 对于PDF，验证文件头
-                if url.lower().endswith('.pdf') and not content.startswith(b'%PDF'):
-                    self.logger.warning("响应不是有效的PDF格式")
-                    continue
-                    
-                self.logger.info(f"✅ 成功下载: {len(content)} bytes")
-                return content
-                
-            except Exception as e:
-                last_error = e
-                self.logger.warning(f"下载出错 (尝试 {attempt}/{self.max_retries}): {e}")
-                await asyncio.sleep(self.retry_delay)
-                continue
-        
-        # 所有重试都失败
-        if last_error:
-            self.logger.error(f"❌ 下载失败，已达到最大重试次数。最后错误: {last_error}")
-        else:
-            self.logger.error("❌ 下载失败，未获得有效内容")
-        return None
-
-    def _sanitize_filename(self, filename: str) -> str:
-        """清理并规范化文件名，移除非法字符"""
-        # 替换 Windows 文件系统不允许的字符
-        invalid_chars = r'[\\/:"*?<>|]+'
-        filename = re.sub(invalid_chars, '_', filename)
-        
-        # 将连续的空白字符替换为单个空格
-        filename = re.sub(r'\s+', ' ', filename)
-        
-        # 去除首尾空白
-        filename = filename.strip()
-        
-        # 如果文件名为空，使用默认名称
-        if not filename:
-            filename = f"paper_{int(time.time())}"
-            
-        # 限制文件名长度（Windows 最大路径长度为 260 字符）
-        max_length = 200  # 留一些余地给路径和扩展名
-        if len(filename) > max_length:
-            filename = filename[:max_length-3] + "..."
-            
-        return filename
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        self.config = config or {}
-        self.logger = setup_logger(__name__)
-        self.download_path = Path(self.config.get('download_path', './papers'))
-        self.download_path.mkdir(parents=True, exist_ok=True)
-        
-        self.session: Optional[AsyncSession] = None
-
-        # 配置下载重试参数
-        self.max_retries = self.config.get('max_retries', 3)
-        self.retry_delay = self.config.get('retry_delay', 3)
-
-        # 并发控制
-        max_concurrent = 1
-        self.semaphore = asyncio.Semaphore(max_concurrent)
-
-
-        # 会议URL模板
-        self.conference_urls = {
-            'ccs': 'https://dl.acm.org/doi/proceedings/',
-            'sp': 'https://ieeexplore.ieee.org/xpl/conhome/',
-            'ndss': 'https://www.ndss-symposium.org/',
-            'usenix': 'https://www.usenix.org/conference/'
-        }
-
-    async def __aenter__(self):
-        """创建并返回一个持久化的 curl_cffi 会话."""
-        try:
-            if self.session and not getattr(self.session, '_closed', False):
-                self.logger.info("使用现有的持久化会话...")
-                return self
-                
-            self.logger.info("正在创建新的持久化会话...")
-            self.session = AsyncSession()
-            return self
-        except Exception as e:
-            self.logger.error(f"创建持久化会话失败: {e}")
-            raise
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        """关闭持久化会话."""
-        try:
-            if self.session:
-                # 检查会话是否已经关闭
-                is_closed = getattr(self.session, '_closed', True)
-                
-                if not is_closed and hasattr(self.session, 'close'):
-                    try:
-                        self.logger.info("正在关闭持久化会话...")
-                        await self.session.close()
-                    except Exception as e:
-                        self.logger.warning(f"关闭会话时出现异常: {e}")
-                else:
-                    self.logger.info("会话已经关闭，无需再次关闭")
-        except Exception as e:
-            self.logger.warning(f"处理会话关闭时出现异常: {e}")
-        finally:
-            # 确保会话对象被清理
-            self.session = None
-
-    async def download_paper(self, url: str, title: str, paper_index: int = 0, total_papers: int = 0) -> Dict[str, Any]:
-        """下载单篇论文 - 优化版本"""
-        async with self.semaphore:
-            try:
-                # 生成文件名 - 为IEEE论文添加特殊前缀
-                safe_title = self._sanitize_filename(title)
-                
-                # 使用简化的文件名：只使用论文标题
-                filename = f"{safe_title}.pdf"
-                
-                file_path = self.download_path / filename
-
-                # 显示下载进度（与NDSS/USENIX保持一致）
-                if total_papers > 0:
-                    progress = (paper_index + 1) / total_papers * 100
-                    print(f"💾 [{paper_index+1}/{total_papers}] 下载: {title[:50]}{'...' if len(title) > 50 else ''}")
-
-                # 检查是否已下载并验证文件
-                if file_path.exists():
-                    # 验证文件大小，过小的文件可能是错误页面
-                    file_size = file_path.stat().st_size
-                    if file_size > 1024:  # 大于1KB认为有效
-                        return {
-                            'success': True,
-                            'path': str(file_path),
-                            'cached': True,
-                            'size': file_size
-                        }
-                    else:
-                        # 删除无效文件
-                        file_path.unlink()
-                        self.logger.warning(f"Removed invalid cached file: {file_path}")
-
-                # 下载论文
-                content = await self._download_with_retry(url)
-                if content:
-                    # 验证下载内容
-                    if len(content) < 1024:
-                        raise Exception(f"Downloaded content too small ({len(content)} bytes), likely an error page")
-                    
-                    # 保存文件
-                    file_path.write_bytes(content)
-                    file_size = len(content)
-
-                    return {
-                        'success': True,
-                        'path': str(file_path),
-                        'cached': False,
-                        'size': file_size
-                    }
-                else:
-                    raise Exception("Failed to download paper - no content received")
-
-            except Exception as e:
-                self.logger.error(f"Error downloading paper {title}: {str(e)}")
-                return {
-                    'success': False,
-                    'error': str(e)
-                }
-
-    async def _parse_sp_papers(self, year: str) -> List[Dict[str, Any]]:
-        """解析 IEEE S&P 论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        try:
-            print(f"📚 正在获取 IEEE S&P {full_year} 论文列表...")
-            conf_info = self.CONFERENCE_INFO['sp']
-            base_url = f"{conf_info['base_url']}"
-            
-            # 使用会话发送请求
-            if not self.session:
-                self.session = AsyncSession()
-                
-            response = await self.session.get(
-                base_url,
-                headers=conf_info['headers'],
-                impersonate="chrome110"
-            )
-            
-            if response.status_code != 200:
-                raise Exception(f"获取会议页面失败: HTTP {response.status_code}")
-                
-            # 解析页面内容
-            soup = BeautifulSoup(response.text, 'html.parser')
-            paper_items = soup.select('div.paper-item')
-            
-            for item in paper_items:
-                title_elem = item.select_one('h3.paper-title')
-                if not title_elem:
-                    continue
-                    
-                title = title_elem.text.strip()
-                url = item.select_one('a[href*=".pdf"]')
-                if not url:
-                    continue
-                    
-                pdf_url = url['href']
-                if not pdf_url.startswith('http'):
-                    pdf_url = f"https://ieeexplore.ieee.org{pdf_url}"
-                    
-                papers.append({
-                    'title': title,
-                    'url': pdf_url
-                })
-                
-            print(f"✅ 找到 {len(papers)} 篇论文")
-            return papers
-            
-        except Exception as e:
-            print(f"❌ 获取 IEEE S&P {full_year} 论文列表失败: {str(e)}")
-            return []
-
-    async def _parse_ndss_papers(self, year: str) -> List[Dict[str, Any]]:
-        """解析 NDSS 论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        try:
-            print(f"📚 正在获取 NDSS {full_year} 论文列表...")
-            conf_info = self.CONFERENCE_INFO['ndss']
-            base_url = f"{conf_info['base_url']}/ndss{year}/accepted-papers"
-            
-            # 使用会话发送请求
-            if not self.session:
-                self.session = AsyncSession()
-                
-            response = await self.session.get(
-                base_url,
-                headers=conf_info['headers'],
-                impersonate="chrome110"
-            )
-            
-            if response.status_code != 200:
-                raise Exception(f"获取会议页面失败: HTTP {response.status_code}")
-                
-            # 解析页面内容
-            soup = BeautifulSoup(response.text, 'html.parser')
-            paper_items = soup.select('div.paper-item, div.accepted-paper')
-            
-            for item in paper_items:
-                title_elem = item.select_one('h3.paper-title, h4.paper-title, div.paper-title')
-                if not title_elem:
-                    continue
-                    
-                title = title_elem.text.strip()
-                url = item.select_one('a[href*=".pdf"]')
-                if not url:
-                    continue
-                    
-                pdf_url = url['href']
-                if not pdf_url.startswith('http'):
-                    pdf_url = f"{conf_info['base_url']}{pdf_url}"
-                    
-                papers.append({
-                    'title': title,
-                    'url': pdf_url
-                })
-                
-            print(f"✅ 找到 {len(papers)} 篇论文")
-            return papers
-            
-        except Exception as e:
-            print(f"❌ 获取 NDSS {full_year} 论文列表失败: {str(e)}")
-            return []
-
-    async def _parse_usenix_papers(self, year: str) -> List[Dict[str, Any]]:
-        """解析 USENIX Security 论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        try:
-            print(f"📚 正在获取 USENIX Security {full_year} 论文列表...")
-            conf_info = self.CONFERENCE_INFO['usenix']
-            base_url = f"{conf_info['base_url']}{full_year}/technical-sessions"
-            
-            # 使用会话发送请求
-            if not self.session:
-                self.session = AsyncSession()
-                
-            response = await self.session.get(
-                base_url,
-                headers=conf_info['headers'],
-                impersonate="chrome110"
-            )
-            
-            if response.status_code != 200:
-                raise Exception(f"获取会议页面失败: HTTP {response.status_code}")
-                
-            # 解析页面内容
-            soup = BeautifulSoup(response.text, 'html.parser')
-            paper_items = soup.select('div.paper-item, div.node-paper')
-            
-            for item in paper_items:
-                title_elem = item.select_one('h2.node-title, div.field-title')
-                if not title_elem:
-                    continue
-                    
-                title = title_elem.text.strip()
-                url = item.select_one('a[href*=".pdf"]')
-                if not url:
-                    continue
-                    
-                pdf_url = url['href']
-                if not pdf_url.startswith('http'):
-                    pdf_url = f"https://www.usenix.org{pdf_url}"
-                    
-                papers.append({
-                    'title': title,
-                    'url': pdf_url
-                })
-                
-            print(f"✅ 找到 {len(papers)} 篇论文")
-            return papers
-            
-        except Exception as e:
-            print(f"❌ 获取 USENIX Security {full_year} 论文列表失败: {str(e)}")
-            return []
-
-    async def get_conference_papers(self, conference: str, year: str) -> List[Dict[str, Any]]:
-        """获取会议论文列表 - 带进度显示"""
-        try:
-            conf_info = self.CONFERENCE_INFO.get(conference)
-            if not conf_info and conference != 'ccs':
-                raise ValueError(f"不支持的会议: {conference}")
-
-            papers = []
-            print(f"🔍 正在获取 {conference.upper()} {year} 论文列表...")
-
-            # 根据会议类型选择相应的解析方法
-            if conference == 'ccs':
-                papers = await self._parse_ccs_papers(self.conference_urls[conference], year)
-            elif conference == 'sp':
-                papers = await self._parse_sp_papers(year)
-            elif conference == 'ndss':
-                papers = await self._parse_ndss_papers(year)
-            elif conference == 'usenix':
-                papers = await self._parse_usenix_papers(year)
-
-            if papers:
-                print(f"✅ 成功获取 {len(papers)} 篇论文信息")
-                
-                # 显示找到的论文标题
-                print(f"📋 找到的论文列表:")
-                for i, paper in enumerate(papers[:10]):
-                    title = paper.get('title', '未知标题')[:60]
-                    print(f"  {i+1:2d}. {title}{'...' if len(paper.get('title', '')) > 60 else ''}")
-                
-                if len(papers) > 10:
-                    print(f"  ... 和其他 {len(papers) - 10} 篇论文")
-                
-                # 开始PDF链接验证与进度显示
-                print(f"\n🔗 正在验证PDF链接有效性...")
-                valid_count = 0
-                
-                for i, paper in enumerate(papers):
-                    # 显示进度
-                    progress = (i + 1) / len(papers) * 100
-                    progress_bar = '█' * int(progress // 5) + '░' * (20 - int(progress // 5))
-                    print(f"\r📋 [进度: {progress_bar}] {progress:.1f}% ({i+1}/{len(papers)}) 验证: {paper.get('title', '未知标题')[:30]}...", end='', flush=True)
-                    
-                    # 检查URL有效性
-                    if isinstance(paper.get('url'), str) and paper['url'].strip():
-                        valid_count += 1
-                
-                print(f"\n✅ PDF链接验证完成: {valid_count}/{len(papers)} 个有效链接")
-            else:
-                print(f"⚠️  未找到任何论文")
-            
-            return papers
-            
-        except Exception as e:
-            self.logger.error(f"获取论文列表失败: {str(e)}")
-            raise
-        """获取会议论文列表 - 带进度显示"""
-        try:
-            if conference not in self.conference_urls:
-                raise ValueError(f"不支持的会议: {conference}")
-
-            base_url = self.conference_urls[conference]
-            papers = []
-            
-            print(f"🔍 正在获取 {conference.upper()} {year} 论文列表...")
-
-            # 规范化年份格式
-            year = self.year_formats[conference](year)
-
-            # 根据会议类型选择相应的解析方法
-            papers = await self._get_papers_by_conference(conference, base_url, year)
-            if papers:
-                print(f"✨ 成功获取 {len(papers)} 篇论文信息")
-            return papers
-
-        except Exception as e:
-            self.logger.error(f"获取论文列表失败: {e}")
-            raise
-
-    async def _get_papers_by_conference(self, conference: str, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """根据会议类型获取论文列表"""
-        try:
-            if conference == 'sp':
-                # IEEE S&P
-                full_url = f"{base_url}{year}"
-                return await self._get_sp_papers(full_url, year)
-            elif conference == 'ndss':
-                # NDSS
-                return await self._get_ndss_papers(base_url, year)
-            elif conference == 'usenix':
-                # USENIX Security
-                return await self._get_usenix_papers(base_url, year)
-            elif conference == 'ccs':
-                # ACM CCS
-                return await self._get_ccs_papers(base_url, year)
-            else:
-                raise ValueError(f"不支持的会议: {conference}")
-        except Exception as e:
-            self.logger.error(f"获取{conference.upper()} {year}论文列表失败: {e}")
-            raise
-
-    async def _get_sp_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 IEEE S&P 论文列表"""
-        papers = []
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/94.0.4606.81'
-            }
-            async with self.session.get(base_url, headers=headers) as response:
-                if response.status_code != 200:
-                    raise Exception(f"Failed to fetch SP {year} papers list")
-
-                soup = BeautifulSoup(response.text, 'lxml')
-                paper_items = soup.find_all('div', class_='article-list__item')
-
-                for item in paper_items:
-                    title_elem = item.find('h3', class_='article-list__title')
-                    if not title_elem:
-                        continue
-
-                    title = title_elem.text.strip()
-                    pdf_link = item.find('a', class_='pdf-link')
-                    
-                    if pdf_link and 'href' in pdf_link.attrs:
-                        url = pdf_link['href']
-                        if not url.startswith('http'):
-                            url = f"https://www.computer.org{url}"
-                        papers.append({
-                            'title': title,
-                            'url': url
-                        })
-
-        except Exception as e:
-            self.logger.error(f"解析 SP {year} 论文列表失败: {e}")
-            raise
-
-        return papers
-
-    async def _get_ndss_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 NDSS 论文列表"""
-        papers = []
-        try:
-            url = f"{base_url}ndss{year}/accepted-papers"
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/94.0.4606.81'
-            }
-            async with self.session.get(url, headers=headers) as response:
-                if response.status_code != 200:
-                    raise Exception(f"Failed to fetch NDSS {year} papers list")
-
-                soup = BeautifulSoup(response.text, 'lxml')
-                paper_items = soup.find_all('div', class_='paper-item')
-
-                for item in paper_items:
-                    title_elem = item.find('h2', class_='title')
-                    if not title_elem:
-                        continue
-
-                    title = title_elem.text.strip()
-                    pdf_link = item.find('a', href=lambda x: x and x.endswith('.pdf'))
-                    
-                    if pdf_link and 'href' in pdf_link.attrs:
-                        url = pdf_link['href']
-                        if not url.startswith('http'):
-                            url = f"https://www.ndss-symposium.org{url}"
-                        papers.append({
-                            'title': title,
-                            'url': url
-                        })
-
-        except Exception as e:
-            self.logger.error(f"解析 NDSS {year} 论文列表失败: {e}")
-            raise
-
-        return papers
-
-    async def _get_usenix_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 USENIX Security 论文列表"""
-        papers = []
-        try:
-            url = f"{base_url}{year}/technical-sessions"
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/94.0.4606.81'
-            }
-            async with self.session.get(url, headers=headers) as response:
-                if response.status_code != 200:
-                    raise Exception(f"Failed to fetch USENIX {year} papers list")
-
-                soup = BeautifulSoup(response.text, 'lxml')
-                paper_items = soup.find_all('div', class_='node-paper')
-
-                for item in paper_items:
-                    title_elem = item.find('h2', class_='node-title')
-                    if not title_elem:
-                        continue
-
-                    title = title_elem.text.strip()
-                    pdf_link = item.find('a', href=lambda x: x and x.endswith('.pdf'))
-                    
-                    if pdf_link and 'href' in pdf_link.attrs:
-                        url = pdf_link['href']
-                        if not url.startswith('http'):
-                            url = f"https://www.usenix.org{url}"
-                        papers.append({
-                            'title': title,
-                            'url': url
-                        })
-
-        except Exception as e:
-            self.logger.error(f"解析 USENIX {year} 论文列表失败: {e}")
-            raise
-
-        return papers
-
-    async def get_papers(self, conference: str, year: str) -> List[Dict[str, Any]]:
-        """
-        获取指定会议和年份的论文列表
-        """
-        try:
-            base_url = self.conference_urls.get(conference, {}).get(year)
-            if not base_url:
-                self.logger.error(f"未找到 {conference} {year} 的URL配置")
-                return []
-
-            if conference == 'ccs':
-                papers = await self._parse_ccs_papers(base_url, year)
-            elif conference == 'sp':
-                papers = await self._parse_sp_papers(base_url, year)
-            elif conference == 'ndss':
-                papers = await self._parse_ndss_papers(base_url, year)
-            elif conference == 'usenix':
-                papers = await self._parse_usenix_papers(base_url, year)
-            else:
-                self.logger.error(f"不支持的会议类型: {conference}")
-                return []
-            
-            if papers:
-                print(f"✅ 成功获取 {len(papers)} 篇论文信息")
-                
-                # 显示找到的论文标题
-                print(f"📋 找到的论文列表:")
-                for i, paper in enumerate(papers[:10]):
-                    title = paper.get('title', '未知标题')[:60]
-                    print(f"  {i+1:2d}. {title}{'...' if len(paper.get('title', '')) > 60 else ''}")
-                
-                if len(papers) > 10:
-                    print(f"  ... 和其他 {len(papers) - 10} 篇论文")
-                
-                # 开始PDF链接验证与进度显示
-                print(f"\n🔗 正在验证PDF链接有效性...")
-                valid_count = 0
-                
-                for i, paper in enumerate(papers):
-                    # 显示进度
-                    progress = (i + 1) / len(papers) * 100
-                    progress_bar = '█' * int(progress // 5) + '░' * (20 - int(progress // 5))
-                    print(f"\r📋 [进度: {progress_bar}] {progress:.1f}% ({i+1}/{len(papers)}) 验证: {paper.get('title', '未知标题')[:30]}...", end='', flush=True)
-                    
-                    # 检查URL有效性
-                    if isinstance(paper.get('url'), str) and paper['url'].strip():
-                        valid_count += 1
-                
-                print(f"\n✅ PDF链接验证完成: {valid_count}/{len(papers)} 个有效链接")
-            else:
-                print(f"⚠️  未找到任何论文")
-
-            return papers
-
-        except Exception as e:
-            self.logger.error(f"Error getting papers for {conference} {year}: {str(e)}")
-            raise
-
-
-
-    async def _parse_ccs_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """
-        解析CCS论文列表的主入口函数。
-        使用持久化会话来执行所有相关请求。
-        
-        Args:
-            base_url: 论文列表的基础URL
-            year: 会议年份
-            
-        Returns:
-            论文信息列表
-        """
-        papers = []
-        try:
-            if not self.session:
-                raise RuntimeError("持久化会话未初始化。请在 'async with' 块中使用 PaperDownloader。")
-                
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-
-            paper_dois = await self._get_all_ccs_dois_from_proceedings_page(self.session, year)
-            if not paper_dois:
-                self.logger.error(f"❌ 未能为CCS {year} 获取任何论文的DOI。")
-                return []
-            
-            self.logger.info(f"📚 开始通过API批量解析 {len(paper_dois)} 篇CCS论文的详细信息...")
-            
-            papers = await self._fetch_all_ccs_paper_details_via_api(self.session, paper_dois, year)
-            return papers
-            
-        except Exception as e:
-            self.logger.error(f"❌ CCS论文解析主流程错误: {str(e)}")
-            raise
-
-    async def _fetch_all_ccs_paper_details_via_api(self, session: AsyncSession, dois: List[str], year: str) -> List[Dict[str, Any]]:
-        """
-        使用POST请求批量获取所有CCS论文的JSON数据并解析。
-        """
-        api_url = "https://dl.acm.org/action/exportCiteProcCitation"
-        headers = {
-            'Host': 'dl.acm.org',
-            'Cookie': '_cf_bm=12; _cfuvid=eKvDTOvVWyHDD5bNf_GLEG_fzdrvwq1g_7YIL.aZOJU-1756624678973-0.0.1.1-604800000',
-            'Pragma': 'no-cache',
-            'Accept': '*/*',
-            'Dnt': '1',
-            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0',
-            'Sec-Ch-Ua-Platform-Version': '"19.0.0"',
-            'Origin': 'https://dl.acm.org',
-            'Referer': 'https://dl.acm.org/doi/proceedings/10.1145/3658644',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
-            'Priority': 'u=1, i',
-        }
-        dois_payload = ",".join(dois)
-        data_string = f"dois={dois_payload}&targetFile=custom-bibtex&format=json"
-        content_length = str(len(data_string.encode('utf-8')))
-        headers['Content-Length'] = content_length
-        try:
-            self.logger.info(f"🚀 正在向 {api_url} 发送单次POST请求以获取 {len(dois)} 篇论文的JSON数据...")
-            response = await session.post(
-                api_url,
-                data=data_string,
-                headers=headers,
-                impersonate="chrome110",
-                timeout=180
-            )
-            if response.status_code != 200:
-                self.logger.error(f"❌ 批量获取JSON失败，HTTP状态码: {response.status_code}")
-                self.logger.error(f"响应内容: {response.text[:500]}")
-                debug_file = Path(f"debug_ccs_api_error_{response.status_code}.html")
-                debug_file.write_text(response.text, encoding='utf-8')
-                self.logger.info(f"🐛 已将错误响应保存到 {debug_file.absolute()} 以供调试。")
-                return []
-            json_data_str = response.text
-            self.logger.info(f"✅ 成功获取JSON数据，大小: {len(json_data_str)}字节。开始解析...")
-            debug_json_file = Path("debug_ccs_json_response.json")
-            debug_json_file.write_text(json_data_str, encoding='utf-8')
-            self.logger.info(f"🐛 已将原始JSON响应保存到 {debug_json_file.absolute()} 以供分析。")
-            papers = self._parse_json_data(json_data_str, year)
-            self.logger.info(f"✅ 成功解析 {len(papers)}/{len(dois)} 篇论文的元数据。")
-            return papers
-        except Exception as e:
-            self.logger.error(f"❌ 批量获取和解析JSON数据时发生严重错误: {str(e)}")
-            import traceback
-            traceback.print_exc()
-            return []
-
-    def _parse_json_data(self, json_data_str: str, year: str) -> List[Dict[str, Any]]:
-        """
-        解析从ACM API返回的JSON数据。
-        """
-        papers = []
-        try:
-            data = json.loads(json_data_str)
-            for item_dict in data.get('items', []):
-                for doi, details in item_dict.items():
-                    try:
-                        title = details.get('title', f"未知标题 (DOI: {doi})")
-                        authors_list = []
-                        for author_info in details.get('author', []):
-                            given_name = author_info.get('given', '')
-                            family_name = author_info.get('family', '')
-                            authors_list.append(f"{given_name} {family_name}".strip())
-                        abstract = details.get('abstract', '摘要不可用')
-                        pdf_url = f"https://dl.acm.org/doi/pdf/{doi}"
-                        papers.append({
-                            'title': title,
-                            'authors': authors_list,
-                            'abstract': abstract,
-                            'url': pdf_url,
-                            'conference': "CCS",
-                            'year': year
-                        })
-                    except Exception as e:
-                        self.logger.warning(f"解析单个JSON条目时出错 (DOI: {doi}): {e}")
-                        continue
-            return papers
-        except json.JSONDecodeError as e:
-            self.logger.error(f"❌ JSON解析失败: {e}")
-            return []
-        except Exception as e:
-            self.logger.error(f"❌ 处理JSON数据时发生未知错误: {e}")
-            return []
-  
-
-    def _parse_bibtex_data(self, bibtex_data: str, year: str) -> List[Dict[str, Any]]:
-        """
-        解析BibTeX数据字符串并返回论文列表。
-        使用更健壮的正则表达式来处理复杂的BibTeX格式。
-        """
-        papers = []
-        # 使用更可靠的方式分割条目：按换行符后的'@'分割
-        entries = re.split(r'\n@', bibtex_data)
-        
-        for entry in entries:
-            if not entry.strip() or not entry.startswith('inproceedings'):
-                continue
-
-            try:
-                # 健壮的DOI提取
-                doi_match = re.search(r'doi\s*=\s*\{([^}]+)\}', entry, re.IGNORECASE)
-                doi = doi_match.group(1).strip() if doi_match else "未知DOI"
-
-                # 健壮的标题提取，能处理嵌套花括号
-                title_match = re.search(r'title\s*=\s*\{((?:[^{}]|\{[^{}]*\})+)\}', entry, re.IGNORECASE)
-                title = title_match.group(1).strip().replace("{", "").replace("}", "") if title_match else f"未知标题 (DOI: {doi})"
-
-                # 健壮的作者提取
-                author_match = re.search(r'author\s*=\s*\{([^}]+)\}', entry, re.IGNORECASE)
-                authors_str = author_match.group(1) if author_match else ""
-                authors_list = [name.strip().replace("{", "").replace("}", "") for name in authors_str.split(' and ')]
-
-                abstract = "摘要需访问论文页面查看"
-                pdf_url = f"https://dl.acm.org/doi/pdf/{doi}"
-
-                papers.append({
-                    'title': title,
-                    'authors': authors_list,
-                    'abstract': abstract,
-                    'url': pdf_url,
-                    'conference': "CCS",
-                    'year': year
-                })
-            except Exception as e:
-                self.logger.warning(f"解析单个BibTeX条目时出错: {e}\n条目内容: {entry[:300]}...")
-                continue
-        return papers
-
-    async def _get_all_ccs_dois_from_proceedings_page(self, session: AsyncSession, year: str) -> Optional[List[str]]:
-        """
-        获取CCS会议指定年份所有论文的DOI列表。
-        使用传入的持久化会话。
-        """
-        if not CURL_CFFI_AVAILABLE:
-            self.logger.error("❌ curl_cffi 未安装，无法执行CCS论文抓取。")
-            return None
-
-        try:
-            short_year_str = f"'{year}"
-            full_year_str = f"20{year}" if len(year) == 2 else year
-            
-            proceedings_list_url = 'https://dl.acm.org/conference/ccs/proceedings'
-            self.logger.info(f"🌐 正在通过持久化会话访问CCS会议列表页面: {proceedings_list_url}")
-
-            # 使用更完整的请求头来模拟浏览器行为
-            headers = {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
-                'Cache-Control': 'max-age=0',
-                'Connection': 'keep-alive',
-                'DNT': '1',
-                'Host': 'dl.acm.org',
-                'Sec-Ch-Ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Microsoft Edge";v="116"',
-                'Sec-Ch-Ua-Mobile': '?0',
-                'Sec-Ch-Ua-Platform': '"Windows"',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.0.0'
-            }
-
-            # 尝试多次，使用不同的浏览器模拟配置
-            for browser in ["chrome110", "chrome99", "chrome100", "safari15_3"]:
-                try:
-                    response = await session.get(
-                        proceedings_list_url,
-                        impersonate=browser,
-                        headers=headers,
-                        timeout=45
-                    )
-                    
-                    if response.status_code == 200:
-                        self.logger.info(f"✅ 使用 {browser} 成功访问")
-                        break
-                    else:
-                        self.logger.warning(f"使用 {browser} 失败，HTTP状态码: {response.status_code}")
-                except Exception as e:
-                    self.logger.warning(f"使用 {browser} 时出错: {str(e)}")
-                    await asyncio.sleep(2)  # 失败后等待一下再重试
-                    continue
-            
-            if response.status_code != 200:
-                self.logger.error(f"❌ 访问CCS会议列表页面失败，HTTP状态码: {response.status_code}")
-                return None
-            
-            self.logger.info("✅ 成功获取会议列表页面。")
-            soup = BeautifulSoup(response.text, 'html.parser')
-            
-            target_proc_url = None
-            proc_items = soup.select('li.conference__proceedings div.conference__title a')
-            for item in proc_items:
-                link_text = item.get_text(strip=True)
-                if short_year_str in link_text or full_year_str in link_text:
-                    target_proc_url = urllib.parse.urljoin(proceedings_list_url, item['href'])
-                    self.logger.info(f"✅ 找到 CCS {year} 会议录链接: {target_proc_url}")
-                    break
-            
-            if not target_proc_url:
-                self.logger.error(f"❌ 未能在页面上找到 CCS {year} 的会议录链接。")
-                debug_file = Path("debug_acm_proceedings_list.html")
-                debug_file.write_text(response.text, encoding='utf-8')
-                self.logger.info(f"🐛 已将会议列表页面内容保存到 {debug_file.absolute()} 以供调试。")
-                return None
-
-            self.logger.info(f"🌐 正在访问 CCS {year} 论文列表页面...")
-            response = await session.get(target_proc_url, impersonate="chrome110", timeout=45)
-
-            if response.status_code != 200:
-                self.logger.error(f"❌ 访问 CCS {year} 论文列表页面失败，HTTP状态码: {response.status_code}")
-                return None
-            
-            self.logger.info(f"✅ 成功获取 CCS {year} 论文列表页面。")
-            soup = BeautifulSoup(response.text, 'html.parser')
-            
-            all_dois = []
-            # 从隐藏的input中提取所有DOI
-            doi_inputs = soup.select('input.section--dois')
-            for doi_input in doi_inputs:
-                dois_str = doi_input.get('value', '')
-                if dois_str:
-                    all_dois.extend(dois_str.split(','))
-
-            if not all_dois:
-                self.logger.warning(f"未能在 CCS {year} 页面提取到任何论文DOI。请检查页面结构是否已更改。")
-                debug_file = Path(f"debug_ccs_{year}_papers.html")
-                debug_file.write_text(response.text, encoding='utf-8')
-                self.logger.info(f"🐛 已将论文列表页面内容保存到 {debug_file.absolute()} 以供调试。")
-                return None
-            
-            # 去重并清洗
-            unique_dois = sorted(list(set(doi.strip() for doi in all_dois if doi.strip())))
-            self.logger.info(f"✅ 成功提取 {len(unique_dois)} 个唯一的 CCS {year} 论文DOI。")
-            return unique_dois
-
-        except Exception as e:
-            self.logger.error(f"❌ 获取CCS论文DOI时发生严重错误: {str(e)}")
-            import traceback
-            traceback.print_exc()
-            return None
-
-
diff --git a/src/paperbot/utils/__init__.py b/src/paperbot/utils/__init__.py
index 10b92850..eb88b7eb 100644
--- a/src/paperbot/utils/__init__.py
+++ b/src/paperbot/utils/__init__.py
@@ -2,15 +2,12 @@
 """
 PaperBot 工具函数模块
 
-包含:
+暴露当前仍在主代码路径中使用的通用工具：
 - logger: 日志配置
 - downloader: 论文下载器
 - retry_helper: 重试机制
 - json_parser: JSON 解析
 - text_processing: 文本处理
-- search: 搜索工具
-- analyzer: 分析工具
-- conference_*: 会议相关工具
 """
 
 from paperbot.utils.logger import setup_logger, LogContext, log_with_context
diff --git a/src/paperbot/utils/acm_extractor.py b/src/paperbot/utils/acm_extractor.py
deleted file mode 100644
index 409505bc..00000000
--- a/src/paperbot/utils/acm_extractor.py
+++ /dev/null
@@ -1,288 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-简化版ACM论文提取器
-专为downloader模块设计
-"""
-
-import cloudscraper
-import time
-import random
-import urllib3
-import json
-import gzip
-import io
-import brotli
-from bs4 import BeautifulSoup
-import re
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-class ACMPaperExtractor:
-    """简化版ACM论文提取器"""
-    
-    def __init__(self):
-        self.base_url = "https://dl.acm.org"
-        self.scraper = None
-        self._init_scraper()
-        
-    def _init_scraper(self):
-        """初始化cloudscraper"""
-        self.scraper = cloudscraper.create_scraper(
-            browser={
-                'browser': 'chrome',
-                'platform': 'windows',
-                'mobile': False
-            },
-            delay=10
-        )
-        
-        # 设置基础headers
-        self.scraper.headers.update({
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-            'Sec-Fetch-Dest': 'document',
-            'Sec-Fetch-Mode': 'navigate',
-            'Sec-Fetch-Site': 'none',
-            'Sec-Fetch-User': '?1',
-            'Cache-Control': 'max-age=0',
-            'DNT': '1',
-            'sec-ch-ua': '"Microsoft Edge";v="139", "Chromium";v="139", "Not A(Brand";v="99"',
-            'sec-ch-ua-mobile': '?0',
-            'sec-ch-ua-platform': '"Windows"',
-        })
-    
-    def get_homepage(self):
-        """访问主页获取cookies"""
-        print("🏠 访问ACM主页获取cookies...")
-        
-        try:
-            response = self.scraper.get(self.base_url, timeout=30)
-            print(f"主页状态码: {response.status_code}")
-            
-            if response.status_code == 200:
-                print("✅ 成功获取主页cookies")
-                return True
-            else:
-                print(f"❌ 获取主页失败: HTTP {response.status_code}")
-                return False
-                
-        except Exception as e:
-            print(f"❌ 访问主页异常: {str(e)}")
-            return False
-    
-    def get_proceedings_page(self, proceedings_doi):
-        """访问proceedings页面获取cookies和必要的上下文"""
-        print(f"📂 访问CCS proceedings页面...")
-        
-        if not self.get_homepage():
-            return None
-        
-        url = f"{self.base_url}/doi/proceedings/{proceedings_doi}"
-        
-        # 添加延迟
-        time.sleep(random.uniform(2, 5))
-        
-        try:
-            response = self.scraper.get(url, timeout=30)
-            print(f"Proceedings页面状态码: {response.status_code}")
-            
-            if response.status_code == 200:
-                print("✅ 成功访问proceedings页面")
-                return response.text
-            else:
-                print(f"❌ 访问proceedings页面失败: HTTP {response.status_code}")
-                return None
-                
-        except Exception as e:
-            print(f"❌ 访问proceedings页面异常: {str(e)}")
-            return None
-    
-    def extract_all_paper_dois(self, proceedings_content):
-        """从proceedings页面内容中提取所有论文DOI"""
-        print("🔍 从proceedings页面提取所有论文DOI...")
-        
-        if not proceedings_content:
-            print("❌ proceedings页面内容为空")
-            return []
-        
-        try:
-            soup = BeautifulSoup(proceedings_content, 'html.parser')
-            dois = []
-            
-            # 查找所有论文链接
-            paper_links = soup.find_all('a', href=re.compile(r'/doi/10\.1145/'))
-            
-            for link in paper_links:
-                href = link.get('href', '')
-                # 从href中提取DOI
-                doi_match = re.search(r'/doi/([^/]+/[^/?]+)', href)
-                if doi_match:
-                    doi = doi_match.group(1)
-                    # 确保DOI格式正确且不重复
-                    if doi.startswith('10.1145/') and doi not in dois:
-                        dois.append(doi)
-            
-            print(f"✅ 成功提取到 {len(dois)} 个论文DOI")
-            return dois
-            
-        except Exception as e:
-            print(f"❌ 提取论文DOI时出错: {str(e)}")
-            return []
-    
-    def export_citations(self, doi_list, proceedings_doi):
-        """使用export citation API导出引用信息"""
-        print("📚 使用export citation API导出引用信息...")
-        
-        # 确保已访问主页和proceedings页面
-        self.get_proceedings_page(proceedings_doi)
-        
-        # 构建API URL
-        api_url = f"{self.base_url}/action/exportCiteProcCitation"
-        
-        # 格式化DOI列表
-        formatted_doi_list = []
-        for doi in doi_list:
-            if doi.startswith('10.1145/'):
-                formatted_doi_list.append(doi)
-            else:
-                formatted_doi_list.append(f"10.1145/{doi}")
-        
-        # 构建请求数据
-        data = {
-            'dois': ','.join(formatted_doi_list),
-            'targetFile': 'custom-bibtex',
-            'format': 'bibTex'
-        }
-        
-        # 设置API请求headers
-        api_headers = {
-            'X-Requested-With': 'XMLHttpRequest',
-            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
-            'Accept': '*/*',
-            'Origin': self.base_url,
-            'Referer': f"{self.base_url}/doi/proceedings/{proceedings_doi}",
-            'Sec-Fetch-Site': 'same-origin',
-            'Sec-Fetch-Mode': 'cors',
-            'Sec-Fetch-Dest': 'empty',
-            'DNT': '1',
-            'sec-ch-ua': '"Microsoft Edge";v="139", "Chromium";v="139", "Not A(Brand";v="99"',
-            'sec-ch-ua-mobile': '?0',
-            'sec-ch-ua-platform': '"Windows"',
-        }
-        
-        # 更新请求头
-        self.scraper.headers.update(api_headers)
-        
-        # 添加延迟
-        time.sleep(random.uniform(2, 4))
-        
-        try:
-            response = self.scraper.post(api_url, data=data, timeout=30)
-            print(f"API状态码: {response.status_code}")
-            
-            if response.status_code == 200:
-                print("✅ API请求成功")
-                
-                # 处理响应内容
-                content = None
-                content_encoding = response.headers.get('Content-Encoding', '').lower()
-                
-                # 根据编码类型解压
-                if 'br' in content_encoding:
-                    print("🔍 检测到Brotli压缩，正在解压...")
-                    try:
-                        content = brotli.decompress(response.content).decode('utf-8')
-                        print("✅ Brotli解压成功")
-                    except Exception as e:
-                        print(f"⚠️ Brotli解压失败: {str(e)}")
-                        content = response.text
-                elif 'gzip' in content_encoding:
-                    print("🔍 检测到gzip压缩，正在解压...")
-                    try:
-                        compressed_data = io.BytesIO(response.content)
-                        with gzip.GzipFile(fileobj=compressed_data) as gzip_file:
-                            content = gzip_file.read().decode('utf-8')
-                        print("✅ gzip解压成功")
-                    except Exception as e:
-                        print(f"⚠️ gzip解压失败: {str(e)}")
-                        content = response.text
-                else:
-                    content = response.text
-                    print("📄 无压缩或未知压缩格式")
-                
-                # 解析JSON响应
-                try:
-                    citation_data = json.loads(content)
-                    print("✅ 成功解析JSON响应")
-                    return citation_data
-                except Exception as e:
-                    print(f"⚠️ JSON解析失败: {str(e)}")
-                    return None
-            else:
-                print(f"❌ API请求失败: HTTP {response.status_code}")
-                return None
-                
-        except Exception as e:
-            print(f"❌ API请求异常: {str(e)}")
-            return None
-    
-    def extract_paper_info(self, citation_data):
-        """从引用数据中提取论文信息"""
-        print("🔍 从引用数据中提取论文信息...")
-        
-        if not citation_data or 'items' not in citation_data:
-            print("❌ 无效的引用数据")
-            return []
-        
-        paper_info_list = []
-        items = citation_data['items']
-        
-        print(f"  处理 {len(items)} 个条目...")
-        
-        for item in items:
-            # 每个item是一个字典，键是DOI
-            for doi, paper_data in item.items():
-                try:
-                    # 提取基本信息
-                    title = paper_data.get('title', 'Unknown Title')
-                    
-                    # 提取作者
-                    authors = paper_data.get('author', [])
-                    author_names = []
-                    for author in authors:
-                        if 'family' in author and 'given' in author:
-                            author_names.append(f"{author['given']} {author['family']}")
-                        elif 'literal' in author:
-                            author_names.append(author['literal'])
-                    
-                    # 提取其他信息
-                    url = f"{self.base_url}/doi/{doi}"
-                    pdf_url = f"{self.base_url}/doi/pdf/{doi}"
-                    abstract = paper_data.get('abstract', '')
-                    
-                    # 创建论文信息字典
-                    paper_info = {
-                        'title': title,
-                        'doi': doi,
-                        'url': url,
-                        'authors': author_names,
-                        'pdf_url': pdf_url,
-                        'abstract': abstract,
-                        'publisher': paper_data.get('publisher', ''),
-                        'isbn': paper_data.get('ISBN', ''),
-                        'pages': paper_data.get('page', ''),
-                        'keywords': paper_data.get('keyword', '')
-                    }
-                    
-                    paper_info_list.append(paper_info)
-                except Exception as e:
-                    print(f"  ⚠️ 处理条目时出错: {str(e)}")
-                    continue
-        
-        print(f"✅ 提取到 {len(paper_info_list)} 个论文信息")
-        return paper_info_list
\ No newline at end of file
diff --git a/src/paperbot/utils/conference_downloader.py b/src/paperbot/utils/conference_downloader.py
deleted file mode 100644
index c29800b2..00000000
--- a/src/paperbot/utils/conference_downloader.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import asyncio
-import aiohttp
-import os
-from pathlib import Path
-from bs4 import BeautifulSoup
-from urllib.parse import urljoin
-import re
-
-class ConferenceDownloader:
-    def __init__(self, config):
-        self.base_url = ""
-        self.download_path = Path(config.get('download_path', './papers'))
-        self.download_path.mkdir(parents=True, exist_ok=True)
-        self.session = None
-
-    async def __aenter__(self):
-        self.session = aiohttp.ClientSession()
-        return self
-
-    async def __aexit__(self, exc_type, exc, tb):
-        if self.session:
-            await self.session.close()
-
-    async def get_conference_papers(self, conference, year):
-        raise NotImplementedError("This method should be implemented by subclasses.")
-
-    def sanitize_filename(self, filename):
-        """清理文件名，移除无效字符"""
-        return re.sub(r'[\\/*?:"<>|]', "", filename)
-
-    async def download_paper(self, url, title, paper_index, total_papers):
-        if not url or not isinstance(url, str):
-            return {'success': False, 'error': 'Invalid URL'}
-
-        sanitized_title = self.sanitize_filename(title)
-        pdf_filename = self.download_path / f"{sanitized_title}.pdf"
-
-        if pdf_filename.exists():
-            print(f"[{paper_index + 1}/{total_papers}] 📄 '{sanitized_title}' 已存在 (缓存).")
-            return {'success': True, 'cached': True, 'path': str(pdf_filename)}
-
-        try:
-            print(f"[{paper_index + 1}/{total_papers}] 📥 开始下载: {title}")
-            async with self.session.get(url, timeout=aiohttp.ClientTimeout(total=300)) as response:
-                if response.status == 200:
-                    content = await response.read()
-                    with open(pdf_filename, 'wb') as f:
-                        f.write(content)
-                    return {'success': True, 'cached': False, 'path': str(pdf_filename), 'size': len(content)}
-                else:
-                    return {'success': False, 'error': f"HTTP status {response.status}"}
-        except Exception as e:
-            return {'success': False, 'error': str(e)}
diff --git a/src/paperbot/utils/conference_helpers.py b/src/paperbot/utils/conference_helpers.py
deleted file mode 100644
index fe4e4d9a..00000000
--- a/src/paperbot/utils/conference_helpers.py
+++ /dev/null
@@ -1,177 +0,0 @@
-from typing import Optional, Dict, Any, List
-from bs4 import BeautifulSoup
-from curl_cffi.requests import AsyncSession
-import re
-import logging
-import json
-import asyncio
-from pathlib import Path
-
-def setup_logger(name):
-    logger = logging.getLogger(name)
-    if not logger.handlers:
-        logger.setLevel(logging.INFO)
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-    return logger
-
-class ConferenceHelpers:
-    def __init__(self):
-        self.logger = setup_logger(__name__)
-
-    async def get_sp_papers(self, session: AsyncSession, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 IEEE S&P 论文列表"""
-        papers = []
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
-            }
-            
-            async with session.get(base_url, headers=headers) as response:
-                if response.status == 200:
-                    soup = BeautifulSoup(await response.text(), 'html.parser')
-                    paper_items = soup.find_all('div', class_='article-list__item')
-
-                    for item in paper_items:
-                        title_elem = item.find('h3', class_='article-list__title')
-                        if not title_elem:
-                            continue
-
-                        title = title_elem.text.strip()
-                        pdf_url = await self._get_ieee_pdf_url(item)
-                        
-                        if pdf_url:
-                            papers.append({
-                                'title': title,
-                                'url': pdf_url,
-                                'conference': 'SP',
-                                'year': year
-                            })
-
-                else:
-                    raise Exception(f"Failed to fetch SP {year} papers list")
-
-            return papers
-
-        except Exception as e:
-            self.logger.error(f"解析 SP {year} 论文列表失败: {e}")
-            raise
-
-    async def get_ndss_papers(self, session: AsyncSession, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 NDSS 论文列表"""
-        papers = []
-        try:
-            url = f"{base_url}ndss{year}/accepted-papers"
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-            
-            response = await session.get(url, headers=headers)
-            if response.status_code == 200:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                paper_items = soup.find_all(['div', 'article'], class_=['paper-item', 'accepted-paper'])
-
-                for item in paper_items:
-                    title_elem = item.find(['h2', 'h3', 'h4'], class_=['title', 'paper-title'])
-                    if not title_elem:
-                        continue
-
-                        title = title_elem.text.strip()
-                        pdf_link = item.find('a', href=lambda x: x and x.endswith('.pdf'))
-                        
-                        if pdf_link and 'href' in pdf_link.attrs:
-                            pdf_url = pdf_link['href']
-                            if not pdf_url.startswith('http'):
-                                pdf_url = f"https://www.ndss-symposium.org{pdf_url}"
-                            papers.append({
-                                'title': title,
-                                'url': pdf_url,
-                                'conference': 'NDSS',
-                                'year': year
-                            })
-
-            else:
-                raise Exception(f"Failed to fetch NDSS {year} papers list")
-
-            return papers
-
-        except Exception as e:
-            self.logger.error(f"解析 NDSS {year} 论文列表失败: {e}")
-            raise
-
-    async def get_usenix_papers(self, session: AsyncSession, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 USENIX Security 论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        url = f"{base_url}{full_year}/technical-sessions"
-        
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-            
-            async with session.get(url, headers=headers) as response:
-                if response.status == 200:
-                    soup = BeautifulSoup(await response.text(), 'html.parser')
-                    paper_nodes = soup.find_all(['article', 'div'], class_=['node-paper', 'paper-item'])
-
-                    for node in paper_nodes:
-                        title_elem = node.find(['h2', 'div'], class_=['node-title', 'field-title'])
-                        if not title_elem:
-                            continue
-
-                        title = title_elem.text.strip()
-                        pdf_url = await self._get_usenix_pdf_url(node)
-                        
-                        if pdf_url:
-                            papers.append({
-                                'title': title,
-                                'url': pdf_url,
-                                'conference': 'USENIX',
-                                'year': year
-                            })
-
-                else:
-                    raise Exception(f"Failed to fetch USENIX {year} papers list")
-
-            return papers
-
-        except Exception as e:
-            self.logger.error(f"解析 USENIX {year} 论文列表失败: {e}")
-            raise
-
-    async def _get_ieee_pdf_url(self, paper_element) -> Optional[str]:
-        """从IEEE页面元素中提取PDF URL"""
-        try:
-            pdf_link = paper_element.find('a', href=re.compile(r'.*\.pdf'))
-            if pdf_link:
-                return pdf_link['href']
-            
-            article_link = paper_element.find('a', href=re.compile(r'/document/'))
-            if article_link:
-                doc_id = re.search(r'/document/(\d+)', article_link['href'])
-                if doc_id:
-                    return f"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber={doc_id.group(1)}"
-            
-            return None
-        except Exception as e:
-            self.logger.error(f"Error extracting IEEE PDF URL: {str(e)}")
-            return None
-
-    async def _get_usenix_pdf_url(self, node) -> Optional[str]:
-        """从USENIX论文节点获取PDF URL"""
-        try:
-            pdf_link = node.find('a', href=re.compile(r'\.pdf$'))
-            if pdf_link and 'href' in pdf_link.attrs:
-                url = pdf_link['href']
-                if not url.startswith('http'):
-                    url = f"https://www.usenix.org{url}"
-                return url
-            
-            return None
-        except Exception as e:
-            self.logger.error(f"Error extracting USENIX PDF URL: {str(e)}")
-            return None
diff --git a/src/paperbot/utils/conference_parsers.py b/src/paperbot/utils/conference_parsers.py
deleted file mode 100644
index eacab86f..00000000
--- a/src/paperbot/utils/conference_parsers.py
+++ /dev/null
@@ -1,373 +0,0 @@
-import asyncio
-from curl_cffi.requests import AsyncSession
-from bs4 import BeautifulSoup
-import re
-from typing import Dict, List, Any, Optional
-from pathlib import Path
-import logging
-
-def setup_logger(name):
-    logger = logging.getLogger(name)
-    if not logger.handlers:
-        logger.setLevel(logging.INFO)
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-    return logger
-
-class ConferenceParsers:
-    def __init__(self):
-        self.logger = setup_logger(__name__)
-
-    async def parse_ndss_papers(self, base_url: str, year: str, session: AsyncSession) -> List[Dict[str, Any]]:
-        """解析NDSS论文列表 - 优化版本带进度显示"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        url = f"{base_url}ndss{full_year}/accepted-papers/"
-        
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
-        
-        print(f"🌐 访问 NDSS {year} 会议页面...")
-        
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                async with session.get(url, headers=headers) as response:
-                    print(f"⚡ 尝试 {attempt + 1}/{max_retries}: HTTP {response.status}")
-                    
-                    if response.status == 200:
-                        print(f"📝 正在解析页面内容...")
-                        html = await response.text()
-                        soup = BeautifulSoup(html, 'html.parser')
-                        
-                        # 查找NDSS论文容器
-                        paper_containers = soup.find_all('div', class_='tag-box rel-paper')
-                        print(f"📚 找到 {len(paper_containers)} 个论文容器")
-                        
-                        if not paper_containers:
-                            print(f"⚠️  未找到论文容器，尝试其他选择器...")
-                            # 尝试其他可能的选择器
-                            paper_containers = soup.find_all('div', class_='paper') or soup.find_all('article')
-                            print(f"🔄 备用选择器找到 {len(paper_containers)} 个容器")
-                        
-                        # 处理论文容器并显示简单进度
-                        for idx, container in enumerate(paper_containers):
-                            if idx % 5 == 0 or idx == len(paper_containers) - 1:  # 每5个显示一次进度
-                                progress = (idx + 1) / len(paper_containers) * 100
-                                print(f"� 解析进度: {progress:.1f}% ({idx+1}/{len(paper_containers)})")
-                            
-                            try:
-                                # 提取标题 - 尝试多种选择器
-                                title_elem = (container.find('h3', class_='blog-post-title') or 
-                                            container.find('h3') or 
-                                            container.find('h2') or 
-                                            container.find('h1'))
-                                
-                                if not title_elem:
-                                    continue
-                                
-                                title = title_elem.get_text().strip()
-                                
-                                # 显示找到的论文标题
-                                print(f"📄 [{idx+1}/{len(paper_containers)}] 找到论文: {title[:70]}{'...' if len(title) > 70 else ''}")
-                                
-                                # 提取作者信息
-                                author_elem = container.find('p')
-                                authors_text = author_elem.get_text().strip() if author_elem else ''
-                                authors = [author.strip() for author in authors_text.split(',')] if authors_text else []
-                                
-                                # 提取详情页链接
-                                detail_link = (container.find('a', class_='paper-link-abs') or 
-                                            container.find('a', href=True))
-                                detail_url = detail_link.get('href') if detail_link else ''
-                                
-                                # 提取PDF链接
-                                pdf_url = ''
-                                if detail_url:
-                                    # 将详情页URL转换为绝对URL
-                                    if not detail_url.startswith('http'):
-                                        if detail_url.startswith('//'):
-                                            detail_url = f'https:{detail_url}'
-                                        elif detail_url.startswith('/'):
-                                            detail_url = f'https://www.ndss-symposium.org{detail_url}'
-                                        else:
-                                            detail_url = f'https://www.ndss-symposium.org/{detail_url}'
-                                    pdf_url = await self._get_ndss_pdf_from_detail_page(session, detail_url)
-                                
-                                paper_info = {
-                                    'title': title,
-                                    'authors': authors,
-                                    'abstract': '',
-                                    'url': pdf_url,
-                                    'detail_url': detail_url,
-                                    'doi': ''
-                                }
-                                
-                                if title and len(title) > 10:
-                                    papers.append(paper_info)
-                            except Exception as e:
-                                self.logger.warning(f"Error parsing paper container {idx}: {str(e)}")
-                                continue
-                                
-                        print(f"\n✅ 成功解析 {len(papers)} 篇论文")
-                        return papers
-
-            except Exception as e:
-                print(f"❌ 尝试 {attempt + 1} 失败: {str(e)}")
-                if attempt < max_retries - 1:
-                    delay = 3 * (attempt + 1)
-                    print(f"⏳ 等待 {delay} 秒后重试...")
-                    await asyncio.sleep(delay)
-                else:
-                    return []
-        
-        print("❌ 所有重试均失败")
-        return []
-
-    async def _get_ndss_pdf_from_detail_page(self, session: AsyncSession, detail_url: str) -> str:
-        """从NDSS论文详情页提取PDF链接 - 优化版本"""
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-            
-            async with session.get(detail_url, headers=headers) as response:
-                if response.status == 200:
-                    html = await response.text()
-                    soup = BeautifulSoup(html, 'html.parser')
-                    
-                    # 尝试多种PDF选择器
-                    pdf_selectors = [
-                        ('a', {'href': lambda x: x and x.endswith('.pdf')}),
-                        ('a', {'href': lambda x: x and 'paper' in x.lower() and '.pdf' in x.lower()}),
-                        ('a', {'class': 'file-pdf'}),
-                        ('a', {'class': 'download-pdf'}),
-                        ('a', {'title': lambda x: x and 'pdf' in x.lower()})
-                    ]
-                    
-                    for tag, attrs in pdf_selectors:
-                        pdf_link = soup.find(tag, attrs)
-                        if pdf_link and 'href' in pdf_link.attrs:
-                            pdf_url = pdf_link['href']
-                            # 处理相对URL
-                            if not pdf_url.startswith('http'):
-                                if pdf_url.startswith('//'):
-                                    pdf_url = f'https:{pdf_url}'
-                                elif pdf_url.startswith('/'):
-                                    pdf_url = f'https://www.ndss-symposium.org{pdf_url}'
-                                else:
-                                    pdf_url = f'https://www.ndss-symposium.org/{pdf_url}'
-                            return pdf_url
-                            
-            return ''
-            
-        except Exception as e:
-            self.logger.warning(f"Error getting PDF from detail page: {str(e)}")
-            return ''
-
-    async def parse_usenix_papers(self, base_url: str, year: str, session: AsyncSession) -> List[Dict[str, Any]]:
-        """解析USENIX Security论文列表"""
-        papers: List[Dict[str, Any]] = []
-        url = f"https://www.usenix.org/conference/usenixsecurity{year}/technical-sessions"
-
-        print(f"🌐 正在解析 USENIX Security {year} 论文列表...")
-
-        headers = {
-            "User-Agent": (
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "Chrome/91.0.4472.124 Safari/537.36"
-            )
-        }
-
-        try:
-            response = await session.get(url, headers=headers)
-            status_code = getattr(response, "status_code", None)
-
-            if status_code == 200:
-                print("✅ 页面访问成功，开始解析...")
-                html = getattr(response, "text", "")
-                soup = BeautifulSoup(html, "html.parser")
-
-                # 使用多种选择器查找论文节点
-                paper_nodes = soup.find_all(["article", "div"], class_=["node-paper", "paper-item"])
-                print(f"📚 找到 {len(paper_nodes)} 个论文节点")
-
-                if not paper_nodes:
-                    print("⚠️ 未找到论文节点，尝试备用选择器...")
-                    paper_nodes = soup.find_all(["div", "article"], class_=["paper", "technical-paper"])
-
-                for idx, node in enumerate(paper_nodes, 1):
-                    try:
-                        title_elem = (
-                            node.find(["h2", "h3"], class_=["node-title", "paper-title"])
-                            or node.find("div", class_="field-title")
-                        )
-                        if not title_elem:
-                            continue
-
-                        title = title_elem.text.strip()
-                        pdf_url = await self._get_usenix_pdf_url(node)
-                        if not pdf_url:
-                            continue
-
-                        papers.append(
-                            {
-                                "title": title,
-                                "url": pdf_url,
-                                "conference": "USENIX",
-                                "year": year,
-                            }
-                        )
-                        print(
-                            f"\r📄 处理论文 {idx}/{len(paper_nodes)}: {title[:50]}...",
-                            end="",
-                            flush=True,
-                        )
-                    except Exception:
-                        continue
-
-                print(f"\n✅ USENIX解析完成: {len(papers)} 篇论文")
-                return papers
-
-            if status_code == 404:
-                print(f"❌ USENIX {year} 页面不存在")
-                return []
-
-            raise Exception(f"HTTP {status_code}")
-
-        except Exception as e:
-            print(f"❌ USENIX解析错误: {str(e)}")
-            return []
-
-    async def _get_usenix_pdf_url(self, node) -> Optional[str]:
-        """从USENIX论文节点获取PDF链接"""
-        try:
-            # 直接查找PDF链接
-            pdf_link = node.find('a', href=re.compile(r'\.pdf$', re.I))
-            if pdf_link and pdf_link.get('href'):
-                pdf_url = pdf_link['href']
-                return self._complete_usenix_url(pdf_url)
-            
-            # 查找presentation链接
-            pres_link = node.find('a', href=re.compile(r'/presentation/', re.I))
-            if pres_link and pres_link.get('href'):
-                pres_url = pres_link['href']
-                return self._complete_usenix_url(pres_url)
-            
-            return None
-            
-        except Exception as e:
-            print(f"⚠️ PDF链接提取失败: {str(e)}")
-            return None
-
-    def _complete_usenix_url(self, url: str) -> str:
-        """补全USENIX URL"""
-        if not url:
-            return ''
-        
-        if url.startswith('http'):
-            return url
-        elif url.startswith('//'):
-            return f"https:{url}"
-        elif url.startswith('/'):
-            return f"https://www.usenix.org{url}"
-        else:
-            return f"https://www.usenix.org/{url}"
-
-    async def parse_sp_papers(self, base_url: str, year: str, session: AsyncSession) -> List[Dict[str, Any]]:
-        """解析IEEE S&P论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        print(f"🌐 正在获取 IEEE S&P {full_year} 论文列表...")
-        
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.5',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-            'Sec-Fetch-Dest': 'document',
-            'Sec-Fetch-Mode': 'navigate',
-            'Sec-Fetch-Site': 'none',
-            'Sec-Fetch-User': '?1'
-        }
-        
-        try:
-            response = await session.get(base_url, headers=headers)
-            if response.status_code == 200:
-                print(f"📝 正在解析页面内容...")
-                html = response.text
-                soup = BeautifulSoup(html, 'html.parser')
-                    
-                # 查找论文项
-                paper_items = soup.find_all(['div', 'article'], class_=['paper-item', 'article-item'])
-                print(f"📚 找到 {len(paper_items)} 个论文项")
-                
-                if not paper_items:
-                    paper_items = soup.find_all(['div', 'article'], class_=['paper', 'article'])
-                
-                for idx, item in enumerate(paper_items, 1):
-                    try:
-                        # 查找标题
-                        title_elem = item.find(['h3', 'h2'], class_=['paper-title', 'article-title'])
-                        if not title_elem:
-                            continue
-                            
-                        title = title_elem.text.strip()
-                        
-                        # 查找PDF链接
-                        pdf_url = await self._get_ieee_pdf_url(item)
-                        if pdf_url:
-                            papers.append({
-                                'title': title,
-                                'url': pdf_url,
-                                'conference': 'SP',
-                                'year': year
-                            })
-                            
-                            print(f"\r📄 处理论文 {idx}/{len(paper_items)}: {title[:50]}...", end='', flush=True)
-                    except Exception as e:
-                        print(f"\n⚠️ 处理论文时出错: {str(e)}")
-                        continue
-                            
-                    except Exception as e:
-                        print(f"\n⚠️ 处理论文时出错: {str(e)}")
-                        continue
-                
-                print(f"\n✅ SP解析完成: {len(papers)} 篇论文")
-                return papers
-                    
-            else:
-                raise Exception(f"HTTP {response.status_code}")
-                    
-        except Exception as e:
-            print(f"❌ SP解析错误: {str(e)}")
-            return []
-
-    async def _get_ieee_pdf_url(self, paper_element) -> Optional[str]:
-        """从IEEE论文元素中提取PDF URL"""
-        try:
-            # 直接查找PDF链接
-            pdf_link = paper_element.find('a', href=re.compile(r'\.pdf$', re.I))
-            if pdf_link and pdf_link.get('href'):
-                return pdf_link['href']
-            
-            # 查找文章链接并构造PDF URL
-            article_link = paper_element.find('a', href=re.compile(r'/document/'))
-            if article_link and article_link.get('href'):
-                doc_match = re.search(r'/document/(\d+)', article_link['href'])
-                if doc_match:
-                    doc_id = doc_match.group(1)
-                    return f"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber={doc_id}"
-            
-            return None
-            
-        except Exception as e:
-            print(f"⚠️ PDF链接提取失败: {str(e)}")
-            return None
diff --git a/src/paperbot/utils/conference_parsers_new.py b/src/paperbot/utils/conference_parsers_new.py
deleted file mode 100644
index eacab86f..00000000
--- a/src/paperbot/utils/conference_parsers_new.py
+++ /dev/null
@@ -1,373 +0,0 @@
-import asyncio
-from curl_cffi.requests import AsyncSession
-from bs4 import BeautifulSoup
-import re
-from typing import Dict, List, Any, Optional
-from pathlib import Path
-import logging
-
-def setup_logger(name):
-    logger = logging.getLogger(name)
-    if not logger.handlers:
-        logger.setLevel(logging.INFO)
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-    return logger
-
-class ConferenceParsers:
-    def __init__(self):
-        self.logger = setup_logger(__name__)
-
-    async def parse_ndss_papers(self, base_url: str, year: str, session: AsyncSession) -> List[Dict[str, Any]]:
-        """解析NDSS论文列表 - 优化版本带进度显示"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        url = f"{base_url}ndss{full_year}/accepted-papers/"
-        
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
-        
-        print(f"🌐 访问 NDSS {year} 会议页面...")
-        
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                async with session.get(url, headers=headers) as response:
-                    print(f"⚡ 尝试 {attempt + 1}/{max_retries}: HTTP {response.status}")
-                    
-                    if response.status == 200:
-                        print(f"📝 正在解析页面内容...")
-                        html = await response.text()
-                        soup = BeautifulSoup(html, 'html.parser')
-                        
-                        # 查找NDSS论文容器
-                        paper_containers = soup.find_all('div', class_='tag-box rel-paper')
-                        print(f"📚 找到 {len(paper_containers)} 个论文容器")
-                        
-                        if not paper_containers:
-                            print(f"⚠️  未找到论文容器，尝试其他选择器...")
-                            # 尝试其他可能的选择器
-                            paper_containers = soup.find_all('div', class_='paper') or soup.find_all('article')
-                            print(f"🔄 备用选择器找到 {len(paper_containers)} 个容器")
-                        
-                        # 处理论文容器并显示简单进度
-                        for idx, container in enumerate(paper_containers):
-                            if idx % 5 == 0 or idx == len(paper_containers) - 1:  # 每5个显示一次进度
-                                progress = (idx + 1) / len(paper_containers) * 100
-                                print(f"� 解析进度: {progress:.1f}% ({idx+1}/{len(paper_containers)})")
-                            
-                            try:
-                                # 提取标题 - 尝试多种选择器
-                                title_elem = (container.find('h3', class_='blog-post-title') or 
-                                            container.find('h3') or 
-                                            container.find('h2') or 
-                                            container.find('h1'))
-                                
-                                if not title_elem:
-                                    continue
-                                
-                                title = title_elem.get_text().strip()
-                                
-                                # 显示找到的论文标题
-                                print(f"📄 [{idx+1}/{len(paper_containers)}] 找到论文: {title[:70]}{'...' if len(title) > 70 else ''}")
-                                
-                                # 提取作者信息
-                                author_elem = container.find('p')
-                                authors_text = author_elem.get_text().strip() if author_elem else ''
-                                authors = [author.strip() for author in authors_text.split(',')] if authors_text else []
-                                
-                                # 提取详情页链接
-                                detail_link = (container.find('a', class_='paper-link-abs') or 
-                                            container.find('a', href=True))
-                                detail_url = detail_link.get('href') if detail_link else ''
-                                
-                                # 提取PDF链接
-                                pdf_url = ''
-                                if detail_url:
-                                    # 将详情页URL转换为绝对URL
-                                    if not detail_url.startswith('http'):
-                                        if detail_url.startswith('//'):
-                                            detail_url = f'https:{detail_url}'
-                                        elif detail_url.startswith('/'):
-                                            detail_url = f'https://www.ndss-symposium.org{detail_url}'
-                                        else:
-                                            detail_url = f'https://www.ndss-symposium.org/{detail_url}'
-                                    pdf_url = await self._get_ndss_pdf_from_detail_page(session, detail_url)
-                                
-                                paper_info = {
-                                    'title': title,
-                                    'authors': authors,
-                                    'abstract': '',
-                                    'url': pdf_url,
-                                    'detail_url': detail_url,
-                                    'doi': ''
-                                }
-                                
-                                if title and len(title) > 10:
-                                    papers.append(paper_info)
-                            except Exception as e:
-                                self.logger.warning(f"Error parsing paper container {idx}: {str(e)}")
-                                continue
-                                
-                        print(f"\n✅ 成功解析 {len(papers)} 篇论文")
-                        return papers
-
-            except Exception as e:
-                print(f"❌ 尝试 {attempt + 1} 失败: {str(e)}")
-                if attempt < max_retries - 1:
-                    delay = 3 * (attempt + 1)
-                    print(f"⏳ 等待 {delay} 秒后重试...")
-                    await asyncio.sleep(delay)
-                else:
-                    return []
-        
-        print("❌ 所有重试均失败")
-        return []
-
-    async def _get_ndss_pdf_from_detail_page(self, session: AsyncSession, detail_url: str) -> str:
-        """从NDSS论文详情页提取PDF链接 - 优化版本"""
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-            
-            async with session.get(detail_url, headers=headers) as response:
-                if response.status == 200:
-                    html = await response.text()
-                    soup = BeautifulSoup(html, 'html.parser')
-                    
-                    # 尝试多种PDF选择器
-                    pdf_selectors = [
-                        ('a', {'href': lambda x: x and x.endswith('.pdf')}),
-                        ('a', {'href': lambda x: x and 'paper' in x.lower() and '.pdf' in x.lower()}),
-                        ('a', {'class': 'file-pdf'}),
-                        ('a', {'class': 'download-pdf'}),
-                        ('a', {'title': lambda x: x and 'pdf' in x.lower()})
-                    ]
-                    
-                    for tag, attrs in pdf_selectors:
-                        pdf_link = soup.find(tag, attrs)
-                        if pdf_link and 'href' in pdf_link.attrs:
-                            pdf_url = pdf_link['href']
-                            # 处理相对URL
-                            if not pdf_url.startswith('http'):
-                                if pdf_url.startswith('//'):
-                                    pdf_url = f'https:{pdf_url}'
-                                elif pdf_url.startswith('/'):
-                                    pdf_url = f'https://www.ndss-symposium.org{pdf_url}'
-                                else:
-                                    pdf_url = f'https://www.ndss-symposium.org/{pdf_url}'
-                            return pdf_url
-                            
-            return ''
-            
-        except Exception as e:
-            self.logger.warning(f"Error getting PDF from detail page: {str(e)}")
-            return ''
-
-    async def parse_usenix_papers(self, base_url: str, year: str, session: AsyncSession) -> List[Dict[str, Any]]:
-        """解析USENIX Security论文列表"""
-        papers: List[Dict[str, Any]] = []
-        url = f"https://www.usenix.org/conference/usenixsecurity{year}/technical-sessions"
-
-        print(f"🌐 正在解析 USENIX Security {year} 论文列表...")
-
-        headers = {
-            "User-Agent": (
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "Chrome/91.0.4472.124 Safari/537.36"
-            )
-        }
-
-        try:
-            response = await session.get(url, headers=headers)
-            status_code = getattr(response, "status_code", None)
-
-            if status_code == 200:
-                print("✅ 页面访问成功，开始解析...")
-                html = getattr(response, "text", "")
-                soup = BeautifulSoup(html, "html.parser")
-
-                # 使用多种选择器查找论文节点
-                paper_nodes = soup.find_all(["article", "div"], class_=["node-paper", "paper-item"])
-                print(f"📚 找到 {len(paper_nodes)} 个论文节点")
-
-                if not paper_nodes:
-                    print("⚠️ 未找到论文节点，尝试备用选择器...")
-                    paper_nodes = soup.find_all(["div", "article"], class_=["paper", "technical-paper"])
-
-                for idx, node in enumerate(paper_nodes, 1):
-                    try:
-                        title_elem = (
-                            node.find(["h2", "h3"], class_=["node-title", "paper-title"])
-                            or node.find("div", class_="field-title")
-                        )
-                        if not title_elem:
-                            continue
-
-                        title = title_elem.text.strip()
-                        pdf_url = await self._get_usenix_pdf_url(node)
-                        if not pdf_url:
-                            continue
-
-                        papers.append(
-                            {
-                                "title": title,
-                                "url": pdf_url,
-                                "conference": "USENIX",
-                                "year": year,
-                            }
-                        )
-                        print(
-                            f"\r📄 处理论文 {idx}/{len(paper_nodes)}: {title[:50]}...",
-                            end="",
-                            flush=True,
-                        )
-                    except Exception:
-                        continue
-
-                print(f"\n✅ USENIX解析完成: {len(papers)} 篇论文")
-                return papers
-
-            if status_code == 404:
-                print(f"❌ USENIX {year} 页面不存在")
-                return []
-
-            raise Exception(f"HTTP {status_code}")
-
-        except Exception as e:
-            print(f"❌ USENIX解析错误: {str(e)}")
-            return []
-
-    async def _get_usenix_pdf_url(self, node) -> Optional[str]:
-        """从USENIX论文节点获取PDF链接"""
-        try:
-            # 直接查找PDF链接
-            pdf_link = node.find('a', href=re.compile(r'\.pdf$', re.I))
-            if pdf_link and pdf_link.get('href'):
-                pdf_url = pdf_link['href']
-                return self._complete_usenix_url(pdf_url)
-            
-            # 查找presentation链接
-            pres_link = node.find('a', href=re.compile(r'/presentation/', re.I))
-            if pres_link and pres_link.get('href'):
-                pres_url = pres_link['href']
-                return self._complete_usenix_url(pres_url)
-            
-            return None
-            
-        except Exception as e:
-            print(f"⚠️ PDF链接提取失败: {str(e)}")
-            return None
-
-    def _complete_usenix_url(self, url: str) -> str:
-        """补全USENIX URL"""
-        if not url:
-            return ''
-        
-        if url.startswith('http'):
-            return url
-        elif url.startswith('//'):
-            return f"https:{url}"
-        elif url.startswith('/'):
-            return f"https://www.usenix.org{url}"
-        else:
-            return f"https://www.usenix.org/{url}"
-
-    async def parse_sp_papers(self, base_url: str, year: str, session: AsyncSession) -> List[Dict[str, Any]]:
-        """解析IEEE S&P论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        print(f"🌐 正在获取 IEEE S&P {full_year} 论文列表...")
-        
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.5',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-            'Sec-Fetch-Dest': 'document',
-            'Sec-Fetch-Mode': 'navigate',
-            'Sec-Fetch-Site': 'none',
-            'Sec-Fetch-User': '?1'
-        }
-        
-        try:
-            response = await session.get(base_url, headers=headers)
-            if response.status_code == 200:
-                print(f"📝 正在解析页面内容...")
-                html = response.text
-                soup = BeautifulSoup(html, 'html.parser')
-                    
-                # 查找论文项
-                paper_items = soup.find_all(['div', 'article'], class_=['paper-item', 'article-item'])
-                print(f"📚 找到 {len(paper_items)} 个论文项")
-                
-                if not paper_items:
-                    paper_items = soup.find_all(['div', 'article'], class_=['paper', 'article'])
-                
-                for idx, item in enumerate(paper_items, 1):
-                    try:
-                        # 查找标题
-                        title_elem = item.find(['h3', 'h2'], class_=['paper-title', 'article-title'])
-                        if not title_elem:
-                            continue
-                            
-                        title = title_elem.text.strip()
-                        
-                        # 查找PDF链接
-                        pdf_url = await self._get_ieee_pdf_url(item)
-                        if pdf_url:
-                            papers.append({
-                                'title': title,
-                                'url': pdf_url,
-                                'conference': 'SP',
-                                'year': year
-                            })
-                            
-                            print(f"\r📄 处理论文 {idx}/{len(paper_items)}: {title[:50]}...", end='', flush=True)
-                    except Exception as e:
-                        print(f"\n⚠️ 处理论文时出错: {str(e)}")
-                        continue
-                            
-                    except Exception as e:
-                        print(f"\n⚠️ 处理论文时出错: {str(e)}")
-                        continue
-                
-                print(f"\n✅ SP解析完成: {len(papers)} 篇论文")
-                return papers
-                    
-            else:
-                raise Exception(f"HTTP {response.status_code}")
-                    
-        except Exception as e:
-            print(f"❌ SP解析错误: {str(e)}")
-            return []
-
-    async def _get_ieee_pdf_url(self, paper_element) -> Optional[str]:
-        """从IEEE论文元素中提取PDF URL"""
-        try:
-            # 直接查找PDF链接
-            pdf_link = paper_element.find('a', href=re.compile(r'\.pdf$', re.I))
-            if pdf_link and pdf_link.get('href'):
-                return pdf_link['href']
-            
-            # 查找文章链接并构造PDF URL
-            article_link = paper_element.find('a', href=re.compile(r'/document/'))
-            if article_link and article_link.get('href'):
-                doc_match = re.search(r'/document/(\d+)', article_link['href'])
-                if doc_match:
-                    doc_id = doc_match.group(1)
-                    return f"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber={doc_id}"
-            
-            return None
-            
-        except Exception as e:
-            print(f"⚠️ PDF链接提取失败: {str(e)}")
-            return None
diff --git a/src/paperbot/utils/downloader - ccs.py b/src/paperbot/utils/downloader - ccs.py
deleted file mode 100644
index f544d741..00000000
--- a/src/paperbot/utils/downloader - ccs.py	
+++ /dev/null
@@ -1,1093 +0,0 @@
-# securipaperbot/utils/downloader.py
-
-from typing import Dict, List, Any, Optional
-import aiohttp
-import asyncio
-import httpx
-from pathlib import Path
-import urllib.parse
-from bs4 import BeautifulSoup
-import re
-import json
-import time
-import random
-from datetime import datetime
-import logging
-import traceback
-
-# 添加动态cookie获取支持
-import traceback
-try:
-    # curl_cffi 0.5.x 版本中, AsyncSession 位于 requests 模块下
-    from curl_cffi.requests import AsyncSession
-    CURL_CFFI_AVAILABLE = True
-except ImportError:
-    from typing import Any as AsyncSession # Mock for type hinting
-    CURL_CFFI_AVAILABLE = False
-    print("❌ 'curl_cffi' 导入失败。详细错误信息如下:")
-    traceback.print_exc()
-    print("警告: curl_cffi 未安装或无法加载，动态cookie获取功能(如ACM)将受限")
-
-try:
-    import cloudscraper
-    CLOUDSCRAPER_AVAILABLE = True
-except ImportError:
-    CLOUDSCRAPER_AVAILABLE = False
-    print("警告: cloudscraper 未安装，动态cookie获取功能(如ACM)将受限")
-
-
-# 使用标准日志，避免相对导入问题
-def setup_logger(name):
-    logger = logging.getLogger(name)
-    if not logger.handlers:
-        logger.setLevel(logging.INFO)
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-    return logger
-
-
-from paperbot.utils.conference_downloader import ConferenceDownloader
-
-class PaperDownloader:
-    """论文下载工具类 - 优化版本，使用持久化会话"""
-    
-    # 会议基本信息配置
-    CONFERENCE_INFO = {
-        'sp': {
-            'base_url': 'https://ieeexplore.ieee.org/xpl/conhome/1000487/all-proceedings',
-            'headers': {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.5',
-                'Connection': 'keep-alive',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0'
-            }
-        },
-        'ndss': {
-            'base_url': 'https://www.ndss-symposium.org',
-            'headers': {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.5',
-                'Connection': 'keep-alive',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0'
-            }
-        },
-        'usenix': {
-            'base_url': 'https://www.usenix.org/conference/usenixsecurity',
-            'headers': {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.5',
-                'Connection': 'keep-alive',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0'
-            }
-        }
-    }
-    
-    async def _download_with_retry(self, url: str) -> Optional[bytes]:
-        """
-        智能下载实现，带自动重试和反爬处理。
-        
-        Args:
-            url (str): 要下载的URL
-            
-        Returns:
-            Optional[bytes]: 下载的内容，失败返回None
-        """
-        # 验证并确保会话可用
-        if not self.session:
-            try:
-                self.logger.info("正在重新创建持久化会话...")
-                self.session = AsyncSession()
-            except Exception as e:
-                self.logger.error(f"创建持久化会话失败: {e}")
-                return None
-
-        last_error = None
-        content = None
-        
-        for attempt in range(1, self.max_retries + 1):
-            try:
-                self.logger.info(f"下载尝试 {attempt}/{self.max_retries}: {url}")
-                
-                # 配置特殊headers以绕过反爬
-                headers = {
-                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-                    'Accept-Language': 'en-US,en;q=0.5',
-                    'Connection': 'keep-alive',
-                    'Upgrade-Insecure-Requests': '1',
-                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-                }
-                
-                # 验证会话状态
-                if not self.session or getattr(self.session, '_closed', False):
-                    self.logger.warning("会话已关闭，正在重新创建...")
-                    self.session = AsyncSession()
-                
-                # 使用curl_cffi的持久化会话和浏览器仿真
-                response = await self.session.get(
-                    url,
-                    impersonate="chrome110",
-                    headers=headers,
-                    timeout=60
-                )
-                
-                # 检查HTTP状态码
-                if response.status_code == 403:
-                    self.logger.warning(f"遇到403 Forbidden，可能是反爬限制 (尝试 {attempt}/{self.max_retries})")
-                    await asyncio.sleep(self.retry_delay * attempt)  # 指数退避
-                    continue
-                    
-                elif response.status_code == 429:
-                    self.logger.warning(f"遇到429 Too Many Requests，开始等待 (尝试 {attempt}/{self.max_retries})")
-                    await asyncio.sleep(self.retry_delay * 2 * attempt)  # 更长的等待
-                    continue
-                    
-                elif response.status_code != 200:
-                    self.logger.warning(f"HTTP {response.status_code} (尝试 {attempt}/{self.max_retries})")
-                    await asyncio.sleep(self.retry_delay)
-                    continue
-                
-                # 获取响应内容
-                content = response.content
-                
-                # 验证内容
-                if not content or len(content) < 1024:  # 小于1KB可能是错误页面
-                    self.logger.warning(f"响应内容过小: {len(content) if content else 0} bytes")
-                    continue
-                
-                # 对于PDF，验证文件头
-                if url.lower().endswith('.pdf') and not content.startswith(b'%PDF'):
-                    self.logger.warning("响应不是有效的PDF格式")
-                    continue
-                    
-                self.logger.info(f"✅ 成功下载: {len(content)} bytes")
-                return content
-                
-            except Exception as e:
-                last_error = e
-                self.logger.warning(f"下载出错 (尝试 {attempt}/{self.max_retries}): {e}")
-                await asyncio.sleep(self.retry_delay)
-                continue
-        
-        # 所有重试都失败
-        if last_error:
-            self.logger.error(f"❌ 下载失败，已达到最大重试次数。最后错误: {last_error}")
-        else:
-            self.logger.error("❌ 下载失败，未获得有效内容")
-        return None
-
-    def _sanitize_filename(self, filename: str) -> str:
-        """清理并规范化文件名，移除非法字符"""
-        # 替换 Windows 文件系统不允许的字符
-        invalid_chars = r'[\\/:"*?<>|]+'
-        filename = re.sub(invalid_chars, '_', filename)
-        
-        # 将连续的空白字符替换为单个空格
-        filename = re.sub(r'\s+', ' ', filename)
-        
-        # 去除首尾空白
-        filename = filename.strip()
-        
-        # 如果文件名为空，使用默认名称
-        if not filename:
-            filename = f"paper_{int(time.time())}"
-            
-        # 限制文件名长度（Windows 最大路径长度为 260 字符）
-        max_length = 200  # 留一些余地给路径和扩展名
-        if len(filename) > max_length:
-            filename = filename[:max_length-3] + "..."
-            
-        return filename
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        self.config = config or {}
-        self.logger = setup_logger(__name__)
-        self.download_path = Path(self.config.get('download_path', './papers'))
-        self.download_path.mkdir(parents=True, exist_ok=True)
-        
-        self.session: Optional[AsyncSession] = None
-
-        # 配置下载重试参数
-        self.max_retries = self.config.get('max_retries', 3)
-        self.retry_delay = self.config.get('retry_delay', 3)
-
-        # 并发控制
-        max_concurrent = 1
-        self.semaphore = asyncio.Semaphore(max_concurrent)
-
-
-        # 会议URL模板
-        self.conference_urls = {
-            'ccs': 'https://dl.acm.org/doi/proceedings/',
-            'sp': 'https://ieeexplore.ieee.org/xpl/conhome/',
-            'ndss': 'https://www.ndss-symposium.org/',
-            'usenix': 'https://www.usenix.org/conference/'
-        }
-
-    async def __aenter__(self):
-        """创建并返回一个持久化的 curl_cffi 会话."""
-        try:
-            if self.session and not getattr(self.session, '_closed', False):
-                self.logger.info("使用现有的持久化会话...")
-                return self
-                
-            self.logger.info("正在创建新的持久化会话...")
-            self.session = AsyncSession()
-            return self
-        except Exception as e:
-            self.logger.error(f"创建持久化会话失败: {e}")
-            raise
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        """关闭持久化会话."""
-        try:
-            if self.session:
-                # 检查会话是否已经关闭
-                is_closed = getattr(self.session, '_closed', True)
-                
-                if not is_closed and hasattr(self.session, 'close'):
-                    try:
-                        self.logger.info("正在关闭持久化会话...")
-                        await self.session.close()
-                    except Exception as e:
-                        self.logger.warning(f"关闭会话时出现异常: {e}")
-                else:
-                    self.logger.info("会话已经关闭，无需再次关闭")
-        except Exception as e:
-            self.logger.warning(f"处理会话关闭时出现异常: {e}")
-        finally:
-            # 确保会话对象被清理
-            self.session = None
-
-    async def download_paper(self, url: str, title: str, paper_index: int = 0, total_papers: int = 0) -> Dict[str, Any]:
-        """下载单篇论文 - 优化版本"""
-        async with self.semaphore:
-            try:
-                # 生成文件名 - 为IEEE论文添加特殊前缀
-                safe_title = self._sanitize_filename(title)
-                
-                # 使用简化的文件名：只使用论文标题
-                filename = f"{safe_title}.pdf"
-                
-                file_path = self.download_path / filename
-
-                # 显示下载进度（与NDSS/USENIX保持一致）
-                if total_papers > 0:
-                    progress = (paper_index + 1) / total_papers * 100
-                    print(f"💾 [{paper_index+1}/{total_papers}] 下载: {title[:50]}{'...' if len(title) > 50 else ''}")
-
-                # 检查是否已下载并验证文件
-                if file_path.exists():
-                    # 验证文件大小，过小的文件可能是错误页面
-                    file_size = file_path.stat().st_size
-                    if file_size > 1024:  # 大于1KB认为有效
-                        return {
-                            'success': True,
-                            'path': str(file_path),
-                            'cached': True,
-                            'size': file_size
-                        }
-                    else:
-                        # 删除无效文件
-                        file_path.unlink()
-                        self.logger.warning(f"Removed invalid cached file: {file_path}")
-
-                # 下载论文
-                content = await self._download_with_retry(url)
-                if content:
-                    # 验证下载内容
-                    if len(content) < 1024:
-                        raise Exception(f"Downloaded content too small ({len(content)} bytes), likely an error page")
-                    
-                    # 保存文件
-                    file_path.write_bytes(content)
-                    file_size = len(content)
-
-                    return {
-                        'success': True,
-                        'path': str(file_path),
-                        'cached': False,
-                        'size': file_size
-                    }
-                else:
-                    raise Exception("Failed to download paper - no content received")
-
-            except Exception as e:
-                self.logger.error(f"Error downloading paper {title}: {str(e)}")
-                return {
-                    'success': False,
-                    'error': str(e)
-                }
-
-    async def _parse_sp_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """解析IEEE S&P论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        try:
-            print(f"🌐 正在获取 IEEE S&P {full_year} 论文列表...")
-            conf_info = self.CONFERENCE_INFO['sp']
-            api_url = f"{conf_info['base_url']}"
-            
-            # 使用会话发送请求
-            if not self.session:
-                self.session = AsyncSession()
-                
-            response = await self.session.get(
-                base_url,
-                headers=conf_info['headers'],
-                impersonate="chrome110"
-            )
-            
-            if response.status_code != 200:
-                raise Exception(f"获取会议页面失败: HTTP {response.status_code}")
-                
-            # 解析页面内容
-            soup = BeautifulSoup(response.text, 'html.parser')
-            paper_items = soup.select('div.paper-item')
-            
-            for item in paper_items:
-                title_elem = item.select_one('h3.paper-title')
-                if not title_elem:
-                    continue
-                    
-                title = title_elem.text.strip()
-                url = item.select_one('a[href*=".pdf"]')
-                if not url:
-                    continue
-                    
-                pdf_url = url['href']
-                if not pdf_url.startswith('http'):
-                    pdf_url = f"https://ieeexplore.ieee.org{pdf_url}"
-                    
-                papers.append({
-                    'title': title,
-                    'url': pdf_url
-                })
-                
-            print(f"✅ 找到 {len(papers)} 篇论文")
-            return papers
-            
-        except Exception as e:
-            print(f"❌ 获取 IEEE S&P {full_year} 论文列表失败: {str(e)}")
-            return []
-
-    async def _parse_ndss_papers(self, year: str) -> List[Dict[str, Any]]:
-        """解析 NDSS 论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        try:
-            print(f"📚 正在获取 NDSS {full_year} 论文列表...")
-            conf_info = self.CONFERENCE_INFO['ndss']
-            base_url = f"{conf_info['base_url']}/ndss{year}/accepted-papers"
-            
-            # 使用会话发送请求
-            if not self.session:
-                self.session = AsyncSession()
-                
-            response = await self.session.get(
-                base_url,
-                headers=conf_info['headers'],
-                impersonate="chrome110"
-            )
-            
-            if response.status_code != 200:
-                raise Exception(f"获取会议页面失败: HTTP {response.status_code}")
-                
-            # 解析页面内容
-            soup = BeautifulSoup(response.text, 'html.parser')
-            paper_items = soup.select('div.paper-item, div.accepted-paper')
-            
-            for item in paper_items:
-                title_elem = item.select_one('h3.paper-title, h4.paper-title, div.paper-title')
-                if not title_elem:
-                    continue
-                    
-                title = title_elem.text.strip()
-                url = item.select_one('a[href*=".pdf"]')
-                if not url:
-                    continue
-                    
-                pdf_url = url['href']
-                if not pdf_url.startswith('http'):
-                    pdf_url = f"{conf_info['base_url']}{pdf_url}"
-                    
-                papers.append({
-                    'title': title,
-                    'url': pdf_url
-                })
-                
-            print(f"✅ 找到 {len(papers)} 篇论文")
-            return papers
-            
-        except Exception as e:
-            print(f"❌ 获取 NDSS {full_year} 论文列表失败: {str(e)}")
-            return []
-
-    async def _parse_usenix_papers(self, year: str) -> List[Dict[str, Any]]:
-        """解析 USENIX Security 论文列表"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        try:
-            print(f"📚 正在获取 USENIX Security {full_year} 论文列表...")
-            conf_info = self.CONFERENCE_INFO['usenix']
-            base_url = f"{conf_info['base_url']}{full_year}/technical-sessions"
-            
-            # 使用会话发送请求
-            if not self.session:
-                self.session = AsyncSession()
-                
-            response = await self.session.get(
-                base_url,
-                headers=conf_info['headers'],
-                impersonate="chrome110"
-            )
-            
-            if response.status_code != 200:
-                raise Exception(f"获取会议页面失败: HTTP {response.status_code}")
-                
-            # 解析页面内容
-            soup = BeautifulSoup(response.text, 'html.parser')
-            paper_items = soup.select('div.paper-item, div.node-paper')
-            
-            for item in paper_items:
-                title_elem = item.select_one('h2.node-title, div.field-title')
-                if not title_elem:
-                    continue
-                    
-                title = title_elem.text.strip()
-                url = item.select_one('a[href*=".pdf"]')
-                if not url:
-                    continue
-                    
-                pdf_url = url['href']
-                if not pdf_url.startswith('http'):
-                    pdf_url = f"https://www.usenix.org{pdf_url}"
-                    
-                papers.append({
-                    'title': title,
-                    'url': pdf_url
-                })
-                
-            print(f"✅ 找到 {len(papers)} 篇论文")
-            return papers
-            
-        except Exception as e:
-            print(f"❌ 获取 USENIX Security {full_year} 论文列表失败: {str(e)}")
-            return []
-
-    async def get_conference_papers(self, conference: str, year: str) -> List[Dict[str, Any]]:
-        """获取会议论文列表 - 带进度显示"""
-        try:
-            if not conference in self.conference_urls:
-                raise ValueError(f"不支持的会议: {conference}")
-
-            conf_info = self.CONFERENCE_INFO.get(conference)
-            base_url = self.conference_urls.get(conference)
-            
-            papers = []
-            print(f"🔍 正在获取 {conference.upper()} {year} 论文列表...")
-            
-            if conference == 'ccs':
-                # 使用现有的CCS下载逻辑
-                papers = await self._parse_ccs_papers(base_url, year)
-            else:
-                # 使用新的会议下载器处理其他会议
-                if not self.session:
-                    self.session = AsyncSession()
-                    
-                conf_downloader = ConferenceDownloader(self.session, self.logger)
-                papers = await conf_downloader.parse_papers(conference, base_url, year)
-            
-            if papers:
-                print(f"✅ 成功获取 {len(papers)} 篇论文信息")
-                # 显示找到的论文标题
-                print(f"📋 找到的论文列表:")
-                for i, paper in enumerate(papers[:10]):
-                    title = paper.get('title', '未知标题')[:60]
-                    print(f"  {i+1:2d}. {title}{'...' if len(paper.get('title', '')) > 60 else ''}")
-                
-                if len(papers) > 10:
-                    print(f"  ... 和其他 {len(papers) - 10} 篇论文")
-            else:
-                print(f"⚠️  未找到任何论文")
-            
-            return papers
-            
-        except Exception as e:
-            self.logger.error(f"获取论文列表失败: {str(e)}")
-            raise
-        try:
-            conf_info = self.CONFERENCE_INFO.get(conference)
-            if not conf_info and conference != 'ccs':
-                raise ValueError(f"不支持的会议: {conference}")
-
-            papers = []
-            print(f"🔍 正在获取 {conference.upper()} {year} 论文列表...")
-
-            # 根据会议类型选择相应的解析方法
-            if conference == 'ccs':
-                papers = await self._parse_ccs_papers(self.conference_urls[conference], year)
-            elif conference == 'sp':
-                papers = await self._parse_sp_papers(year)
-            elif conference == 'ndss':
-                papers = await self._parse_ndss_papers(year)
-            elif conference == 'usenix':
-                papers = await self._parse_usenix_papers(year)
-
-            if papers:
-                print(f"✅ 成功获取 {len(papers)} 篇论文信息")
-                
-                # 显示找到的论文标题
-                print(f"📋 找到的论文列表:")
-                for i, paper in enumerate(papers[:10]):
-                    title = paper.get('title', '未知标题')[:60]
-                    print(f"  {i+1:2d}. {title}{'...' if len(paper.get('title', '')) > 60 else ''}")
-                
-                if len(papers) > 10:
-                    print(f"  ... 和其他 {len(papers) - 10} 篇论文")
-                
-                # 开始PDF链接验证与进度显示
-                print(f"\n🔗 正在验证PDF链接有效性...")
-                valid_count = 0
-                
-                for i, paper in enumerate(papers):
-                    # 显示进度
-                    progress = (i + 1) / len(papers) * 100
-                    progress_bar = '█' * int(progress // 5) + '░' * (20 - int(progress // 5))
-                    print(f"\r📋 [进度: {progress_bar}] {progress:.1f}% ({i+1}/{len(papers)}) 验证: {paper.get('title', '未知标题')[:30]}...", end='', flush=True)
-                    
-                    # 检查URL有效性
-                    if isinstance(paper.get('url'), str) and paper['url'].strip():
-                        valid_count += 1
-                
-                print(f"\n✅ PDF链接验证完成: {valid_count}/{len(papers)} 个有效链接")
-            else:
-                print(f"⚠️  未找到任何论文")
-            
-            return papers
-            
-        except Exception as e:
-            self.logger.error(f"获取论文列表失败: {str(e)}")
-            raise
-        """获取会议论文列表 - 带进度显示"""
-        try:
-            if conference not in self.conference_urls:
-                raise ValueError(f"不支持的会议: {conference}")
-
-            base_url = self.conference_urls[conference]
-            papers = []
-            
-            print(f"🔍 正在获取 {conference.upper()} {year} 论文列表...")
-
-            # 规范化年份格式
-            year = self.year_formats[conference](year)
-
-            # 根据会议类型选择相应的解析方法
-            papers = await self._get_papers_by_conference(conference, base_url, year)
-            if papers:
-                print(f"✨ 成功获取 {len(papers)} 篇论文信息")
-            return papers
-
-        except Exception as e:
-            self.logger.error(f"获取论文列表失败: {e}")
-            raise
-
-    async def _get_papers_by_conference(self, conference: str, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """根据会议类型获取论文列表"""
-        try:
-            if conference == 'sp':
-                # IEEE S&P
-                full_url = f"{base_url}{year}"
-                return await self._get_sp_papers(full_url, year)
-            elif conference == 'ndss':
-                # NDSS
-                return await self._get_ndss_papers(base_url, year)
-            elif conference == 'usenix':
-                # USENIX Security
-                return await self._get_usenix_papers(base_url, year)
-            elif conference == 'ccs':
-                # ACM CCS
-                return await self._get_ccs_papers(base_url, year)
-            else:
-                raise ValueError(f"不支持的会议: {conference}")
-        except Exception as e:
-            self.logger.error(f"获取{conference.upper()} {year}论文列表失败: {e}")
-            raise
-
-    async def _get_sp_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 IEEE S&P 论文列表"""
-        papers = []
-        try:
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/94.0.4606.81'
-            }
-            async with self.session.get(base_url, headers=headers) as response:
-                if response.status_code != 200:
-                    raise Exception(f"Failed to fetch SP {year} papers list")
-
-                soup = BeautifulSoup(response.text, 'lxml')
-                paper_items = soup.find_all('div', class_='article-list__item')
-
-                for item in paper_items:
-                    title_elem = item.find('h3', class_='article-list__title')
-                    if not title_elem:
-                        continue
-
-                    title = title_elem.text.strip()
-                    pdf_link = item.find('a', class_='pdf-link')
-                    
-                    if pdf_link and 'href' in pdf_link.attrs:
-                        url = pdf_link['href']
-                        if not url.startswith('http'):
-                            url = f"https://www.computer.org{url}"
-                        papers.append({
-                            'title': title,
-                            'url': url
-                        })
-
-        except Exception as e:
-            self.logger.error(f"解析 SP {year} 论文列表失败: {e}")
-            raise
-
-        return papers
-
-    async def _get_ndss_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 NDSS 论文列表"""
-        papers = []
-        try:
-            url = f"{base_url}ndss{year}/accepted-papers"
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/94.0.4606.81'
-            }
-            async with self.session.get(url, headers=headers) as response:
-                if response.status_code != 200:
-                    raise Exception(f"Failed to fetch NDSS {year} papers list")
-
-                soup = BeautifulSoup(response.text, 'lxml')
-                paper_items = soup.find_all('div', class_='paper-item')
-
-                for item in paper_items:
-                    title_elem = item.find('h2', class_='title')
-                    if not title_elem:
-                        continue
-
-                    title = title_elem.text.strip()
-                    pdf_link = item.find('a', href=lambda x: x and x.endswith('.pdf'))
-                    
-                    if pdf_link and 'href' in pdf_link.attrs:
-                        url = pdf_link['href']
-                        if not url.startswith('http'):
-                            url = f"https://www.ndss-symposium.org{url}"
-                        papers.append({
-                            'title': title,
-                            'url': url
-                        })
-
-        except Exception as e:
-            self.logger.error(f"解析 NDSS {year} 论文列表失败: {e}")
-            raise
-
-        return papers
-
-    async def _get_usenix_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """获取 USENIX Security 论文列表"""
-        papers = []
-        try:
-            url = f"{base_url}{year}/technical-sessions"
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/94.0.4606.81'
-            }
-            async with self.session.get(url, headers=headers) as response:
-                if response.status_code != 200:
-                    raise Exception(f"Failed to fetch USENIX {year} papers list")
-
-                soup = BeautifulSoup(response.text, 'lxml')
-                paper_items = soup.find_all('div', class_='node-paper')
-
-                for item in paper_items:
-                    title_elem = item.find('h2', class_='node-title')
-                    if not title_elem:
-                        continue
-
-                    title = title_elem.text.strip()
-                    pdf_link = item.find('a', href=lambda x: x and x.endswith('.pdf'))
-                    
-                    if pdf_link and 'href' in pdf_link.attrs:
-                        url = pdf_link['href']
-                        if not url.startswith('http'):
-                            url = f"https://www.usenix.org{url}"
-                        papers.append({
-                            'title': title,
-                            'url': url
-                        })
-
-        except Exception as e:
-            self.logger.error(f"解析 USENIX {year} 论文列表失败: {e}")
-            raise
-
-        return papers
-
-    async def get_papers(self, conference: str, year: str) -> List[Dict[str, Any]]:
-        """
-        获取指定会议和年份的论文列表
-        """
-        try:
-            base_url = self.conference_urls.get(conference, {}).get(year)
-            if not base_url:
-                self.logger.error(f"未找到 {conference} {year} 的URL配置")
-                return []
-
-            if conference == 'ccs':
-                papers = await self._parse_ccs_papers(base_url, year)
-            elif conference == 'sp':
-                papers = await self._parse_sp_papers(base_url, year)
-            elif conference == 'ndss':
-                papers = await self._parse_ndss_papers(base_url, year)
-            elif conference == 'usenix':
-                papers = await self._parse_usenix_papers(base_url, year)
-            else:
-                self.logger.error(f"不支持的会议类型: {conference}")
-                return []
-            
-            if papers:
-                print(f"✅ 成功获取 {len(papers)} 篇论文信息")
-                
-                # 显示找到的论文标题
-                print(f"📋 找到的论文列表:")
-                for i, paper in enumerate(papers[:10]):
-                    title = paper.get('title', '未知标题')[:60]
-                    print(f"  {i+1:2d}. {title}{'...' if len(paper.get('title', '')) > 60 else ''}")
-                
-                if len(papers) > 10:
-                    print(f"  ... 和其他 {len(papers) - 10} 篇论文")
-                
-                # 开始PDF链接验证与进度显示
-                print(f"\n🔗 正在验证PDF链接有效性...")
-                valid_count = 0
-                
-                for i, paper in enumerate(papers):
-                    # 显示进度
-                    progress = (i + 1) / len(papers) * 100
-                    progress_bar = '█' * int(progress // 5) + '░' * (20 - int(progress // 5))
-                    print(f"\r📋 [进度: {progress_bar}] {progress:.1f}% ({i+1}/{len(papers)}) 验证: {paper.get('title', '未知标题')[:30]}...", end='', flush=True)
-                    
-                    # 检查URL有效性
-                    if isinstance(paper.get('url'), str) and paper['url'].strip():
-                        valid_count += 1
-                
-                print(f"\n✅ PDF链接验证完成: {valid_count}/{len(papers)} 个有效链接")
-            else:
-                print(f"⚠️  未找到任何论文")
-
-            return papers
-
-        except Exception as e:
-            self.logger.error(f"Error getting papers for {conference} {year}: {str(e)}")
-            raise
-
-
-
-    async def _parse_ccs_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """
-        解析CCS论文列表的主入口函数。
-        使用持久化会话来执行所有相关请求。
-        
-        Args:
-            base_url: 论文列表的基础URL
-            year: 会议年份
-            
-        Returns:
-            论文信息列表
-        """
-        papers = []
-        try:
-            if not self.session:
-                raise RuntimeError("持久化会话未初始化。请在 'async with' 块中使用 PaperDownloader。")
-                
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-            }
-
-            paper_dois = await self._get_all_ccs_dois_from_proceedings_page(self.session, year)
-            if not paper_dois:
-                self.logger.error(f"❌ 未能为CCS {year} 获取任何论文的DOI。")
-                return []
-            
-            self.logger.info(f"📚 开始通过API批量解析 {len(paper_dois)} 篇CCS论文的详细信息...")
-            
-            papers = await self._fetch_all_ccs_paper_details_via_api(self.session, paper_dois, year)
-            return papers
-            
-        except Exception as e:
-            self.logger.error(f"❌ CCS论文解析主流程错误: {str(e)}")
-            raise
-
-    async def _fetch_all_ccs_paper_details_via_api(self, session: AsyncSession, dois: List[str], year: str) -> List[Dict[str, Any]]:
-        """
-        使用POST请求批量获取所有CCS论文的JSON数据并解析。
-        """
-        api_url = "https://dl.acm.org/action/exportCiteProcCitation"
-        headers = {
-            'Host': 'dl.acm.org',
-            'Cookie': '_cf_bm=12; _cfuvid=eKvDTOvVWyHDD5bNf_GLEG_fzdrvwq1g_7YIL.aZOJU-1756624678973-0.0.1.1-604800000',
-            'Pragma': 'no-cache',
-            'Accept': '*/*',
-            'Dnt': '1',
-            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0',
-            'Sec-Ch-Ua-Platform-Version': '"19.0.0"',
-            'Origin': 'https://dl.acm.org',
-            'Referer': 'https://dl.acm.org/doi/proceedings/10.1145/3658644',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
-            'Priority': 'u=1, i',
-        }
-        dois_payload = ",".join(dois)
-        data_string = f"dois={dois_payload}&targetFile=custom-bibtex&format=json"
-        content_length = str(len(data_string.encode('utf-8')))
-        headers['Content-Length'] = content_length
-        try:
-            self.logger.info(f"🚀 正在向 {api_url} 发送单次POST请求以获取 {len(dois)} 篇论文的JSON数据...")
-            response = await session.post(
-                api_url,
-                data=data_string,
-                headers=headers,
-                impersonate="chrome110",
-                timeout=180
-            )
-            if response.status_code != 200:
-                self.logger.error(f"❌ 批量获取JSON失败，HTTP状态码: {response.status_code}")
-                self.logger.error(f"响应内容: {response.text[:500]}")
-                debug_file = Path(f"debug_ccs_api_error_{response.status_code}.html")
-                debug_file.write_text(response.text, encoding='utf-8')
-                self.logger.info(f"🐛 已将错误响应保存到 {debug_file.absolute()} 以供调试。")
-                return []
-            json_data_str = response.text
-            self.logger.info(f"✅ 成功获取JSON数据，大小: {len(json_data_str)}字节。开始解析...")
-            debug_json_file = Path("debug_ccs_json_response.json")
-            debug_json_file.write_text(json_data_str, encoding='utf-8')
-            self.logger.info(f"🐛 已将原始JSON响应保存到 {debug_json_file.absolute()} 以供分析。")
-            papers = self._parse_json_data(json_data_str, year)
-            self.logger.info(f"✅ 成功解析 {len(papers)}/{len(dois)} 篇论文的元数据。")
-            return papers
-        except Exception as e:
-            self.logger.error(f"❌ 批量获取和解析JSON数据时发生严重错误: {str(e)}")
-            import traceback
-            traceback.print_exc()
-            return []
-
-    def _parse_json_data(self, json_data_str: str, year: str) -> List[Dict[str, Any]]:
-        """
-        解析从ACM API返回的JSON数据。
-        """
-        papers = []
-        try:
-            data = json.loads(json_data_str)
-            for item_dict in data.get('items', []):
-                for doi, details in item_dict.items():
-                    try:
-                        title = details.get('title', f"未知标题 (DOI: {doi})")
-                        authors_list = []
-                        for author_info in details.get('author', []):
-                            given_name = author_info.get('given', '')
-                            family_name = author_info.get('family', '')
-                            authors_list.append(f"{given_name} {family_name}".strip())
-                        abstract = details.get('abstract', '摘要不可用')
-                        pdf_url = f"https://dl.acm.org/doi/pdf/{doi}"
-                        papers.append({
-                            'title': title,
-                            'authors': authors_list,
-                            'abstract': abstract,
-                            'url': pdf_url,
-                            'conference': "CCS",
-                            'year': year
-                        })
-                    except Exception as e:
-                        self.logger.warning(f"解析单个JSON条目时出错 (DOI: {doi}): {e}")
-                        continue
-            return papers
-        except json.JSONDecodeError as e:
-            self.logger.error(f"❌ JSON解析失败: {e}")
-            return []
-        except Exception as e:
-            self.logger.error(f"❌ 处理JSON数据时发生未知错误: {e}")
-            return []
-  
-
-    def _parse_bibtex_data(self, bibtex_data: str, year: str) -> List[Dict[str, Any]]:
-        """
-        解析BibTeX数据字符串并返回论文列表。
-        使用更健壮的正则表达式来处理复杂的BibTeX格式。
-        """
-        papers = []
-        # 使用更可靠的方式分割条目：按换行符后的'@'分割
-        entries = re.split(r'\n@', bibtex_data)
-        
-        for entry in entries:
-            if not entry.strip() or not entry.startswith('inproceedings'):
-                continue
-
-            try:
-                # 健壮的DOI提取
-                doi_match = re.search(r'doi\s*=\s*\{([^}]+)\}', entry, re.IGNORECASE)
-                doi = doi_match.group(1).strip() if doi_match else "未知DOI"
-
-                # 健壮的标题提取，能处理嵌套花括号
-                title_match = re.search(r'title\s*=\s*\{((?:[^{}]|\{[^{}]*\})+)\}', entry, re.IGNORECASE)
-                title = title_match.group(1).strip().replace("{", "").replace("}", "") if title_match else f"未知标题 (DOI: {doi})"
-
-                # 健壮的作者提取
-                author_match = re.search(r'author\s*=\s*\{([^}]+)\}', entry, re.IGNORECASE)
-                authors_str = author_match.group(1) if author_match else ""
-                authors_list = [name.strip().replace("{", "").replace("}", "") for name in authors_str.split(' and ')]
-
-                abstract = "摘要需访问论文页面查看"
-                pdf_url = f"https://dl.acm.org/doi/pdf/{doi}"
-
-                papers.append({
-                    'title': title,
-                    'authors': authors_list,
-                    'abstract': abstract,
-                    'url': pdf_url,
-                    'conference': "CCS",
-                    'year': year
-                })
-            except Exception as e:
-                self.logger.warning(f"解析单个BibTeX条目时出错: {e}\n条目内容: {entry[:300]}...")
-                continue
-        return papers
-
-    async def _get_all_ccs_dois_from_proceedings_page(self, session: AsyncSession, year: str) -> Optional[List[str]]:
-        """
-        获取CCS会议指定年份所有论文的DOI列表。
-        使用传入的持久化会话。
-        """
-        if not CURL_CFFI_AVAILABLE:
-            self.logger.error("❌ curl_cffi 未安装，无法执行CCS论文抓取。")
-            return None
-
-        try:
-            short_year_str = f"'{year}"
-            full_year_str = f"20{year}" if len(year) == 2 else year
-            
-            proceedings_list_url = 'https://dl.acm.org/conference/ccs/proceedings'
-            self.logger.info(f"🌐 正在通过持久化会话访问CCS会议列表页面: {proceedings_list_url}")
-
-            # 使用更完整的请求头来模拟浏览器行为
-            headers = {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
-                'Cache-Control': 'max-age=0',
-                'Connection': 'keep-alive',
-                'DNT': '1',
-                'Host': 'dl.acm.org',
-                'Sec-Ch-Ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Microsoft Edge";v="116"',
-                'Sec-Ch-Ua-Mobile': '?0',
-                'Sec-Ch-Ua-Platform': '"Windows"',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1',
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.0.0'
-            }
-
-            # 尝试多次，使用不同的浏览器模拟配置
-            for browser in ["chrome110", "chrome99", "chrome100", "safari15_3"]:
-                try:
-                    response = await session.get(
-                        proceedings_list_url,
-                        impersonate=browser,
-                        headers=headers,
-                        timeout=45
-                    )
-                    
-                    if response.status_code == 200:
-                        self.logger.info(f"✅ 使用 {browser} 成功访问")
-                        break
-                    else:
-                        self.logger.warning(f"使用 {browser} 失败，HTTP状态码: {response.status_code}")
-                except Exception as e:
-                    self.logger.warning(f"使用 {browser} 时出错: {str(e)}")
-                    await asyncio.sleep(2)  # 失败后等待一下再重试
-                    continue
-            
-            if response.status_code != 200:
-                self.logger.error(f"❌ 访问CCS会议列表页面失败，HTTP状态码: {response.status_code}")
-                return None
-            
-            self.logger.info("✅ 成功获取会议列表页面。")
-            soup = BeautifulSoup(response.text, 'html.parser')
-            
-            target_proc_url = None
-            proc_items = soup.select('li.conference__proceedings div.conference__title a')
-            for item in proc_items:
-                link_text = item.get_text(strip=True)
-                if short_year_str in link_text or full_year_str in link_text:
-                    target_proc_url = urllib.parse.urljoin(proceedings_list_url, item['href'])
-                    self.logger.info(f"✅ 找到 CCS {year} 会议录链接: {target_proc_url}")
-                    break
-            
-            if not target_proc_url:
-                self.logger.error(f"❌ 未能在页面上找到 CCS {year} 的会议录链接。")
-                debug_file = Path("debug_acm_proceedings_list.html")
-                debug_file.write_text(response.text, encoding='utf-8')
-                self.logger.info(f"🐛 已将会议列表页面内容保存到 {debug_file.absolute()} 以供调试。")
-                return None
-
-            self.logger.info(f"🌐 正在访问 CCS {year} 论文列表页面...")
-            response = await session.get(target_proc_url, impersonate="chrome110", timeout=45)
-
-            if response.status_code != 200:
-                self.logger.error(f"❌ 访问 CCS {year} 论文列表页面失败，HTTP状态码: {response.status_code}")
-                return None
-            
-            self.logger.info(f"✅ 成功获取 CCS {year} 论文列表页面。")
-            soup = BeautifulSoup(response.text, 'html.parser')
-            
-            all_dois = []
-            # 从隐藏的input中提取所有DOI
-            doi_inputs = soup.select('input.section--dois')
-            for doi_input in doi_inputs:
-                dois_str = doi_input.get('value', '')
-                if dois_str:
-                    all_dois.extend(dois_str.split(','))
-
-            if not all_dois:
-                self.logger.warning(f"未能在 CCS {year} 页面提取到任何论文DOI。请检查页面结构是否已更改。")
-                debug_file = Path(f"debug_ccs_{year}_papers.html")
-                debug_file.write_text(response.text, encoding='utf-8')
-                self.logger.info(f"🐛 已将论文列表页面内容保存到 {debug_file.absolute()} 以供调试。")
-                return None
-            
-            # 去重并清洗
-            unique_dois = sorted(list(set(doi.strip() for doi in all_dois if doi.strip())))
-            self.logger.info(f"✅ 成功提取 {len(unique_dois)} 个唯一的 CCS {year} 论文DOI。")
-            return unique_dois
-
-        except Exception as e:
-            self.logger.error(f"❌ 获取CCS论文DOI时发生严重错误: {str(e)}")
-            import traceback
-            traceback.print_exc()
-            return None
-
-
diff --git a/src/paperbot/utils/downloader_back.py b/src/paperbot/utils/downloader_back.py
deleted file mode 100644
index e049c3e7..00000000
--- a/src/paperbot/utils/downloader_back.py
+++ /dev/null
@@ -1,1206 +0,0 @@
-# securipaperbot/utils/downloader.py
-
-from typing import Dict, List, Any, Optional
-import aiohttp
-import asyncio
-from pathlib import Path
-import urllib.parse
-from bs4 import BeautifulSoup
-import re
-import time
-import random
-from datetime import datetime
-import logging
-
-
-# 使用标准日志，避免相对导入问题
-def setup_logger(name):
-    logger = logging.getLogger(name)
-    if not logger.handlers:
-        logger.setLevel(logging.INFO)
-        handler = logging.StreamHandler()
-        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-        handler.setFormatter(formatter)
-        logger.addHandler(handler)
-    return logger
-
-
-class PaperDownloader:
-    """论文下载工具类 - 优化版本"""
-
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        self.config = config or {}
-        self.logger = setup_logger(__name__)
-        self.download_path = Path(self.config.get('download_path', './papers'))
-        self.download_path.mkdir(parents=True, exist_ok=True)
-
-        # 配置下载重试参数 - 保守稳定的参数
-        self.max_retries = self.config.get('max_retries', 3)
-        self.retry_delay = self.config.get('retry_delay', 3)  # 增加到3秒
-
-        # 完全关闭并发 - 使用单线程确保稳定性
-        max_concurrent = 1  # 强制设为1，不使用并发
-        self.semaphore = asyncio.Semaphore(max_concurrent)
-
-        # 会议URL模板
-        self.conference_urls = {
-            'ccs': 'https://dl.acm.org/doi/proceedings',
-            'sp': 'https://ieeexplore.ieee.org/xpl/conhome/',
-            'ndss': 'https://www.ndss-symposium.org/',
-            'usenix': 'https://www.usenix.org/conference/'
-        }
-
-    async def download_paper(self, url: str, title: str, paper_index: int = 0, total_papers: int = 0) -> Dict[str, Any]:
-        """下载单篇论文 - 优化版本"""
-        async with self.semaphore:
-            try:
-                # 生成文件名
-                safe_title = self._sanitize_filename(title)
-                file_path = self.download_path / f"{safe_title}.pdf"
-
-                # 显示下载进度（与NDSS/USENIX保持一致）
-                if total_papers > 0:
-                    progress = (paper_index + 1) / total_papers * 100
-                    print(f"💾 [{paper_index+1}/{total_papers}] 下载: {title[:50]}{'...' if len(title) > 50 else ''}")
-
-                # 检查是否已下载并验证文件
-                if file_path.exists():
-                    # 验证文件大小，过小的文件可能是错误页面
-                    file_size = file_path.stat().st_size
-                    if file_size > 1024:  # 大于1KB认为有效
-                        return {
-                            'success': True,
-                            'path': str(file_path),
-                            'cached': True,
-                            'size': file_size
-                        }
-                    else:
-                        # 删除无效文件
-                        file_path.unlink()
-                        self.logger.warning(f"Removed invalid cached file: {file_path}")
-
-                # 下载论文
-                content = await self._download_with_retry(url)
-                if content:
-                    # 验证下载内容
-                    if len(content) < 1024:
-                        raise Exception(f"Downloaded content too small ({len(content)} bytes), likely an error page")
-                    
-                    # 保存文件
-                    file_path.write_bytes(content)
-                    file_size = len(content)
-
-                    return {
-                        'success': True,
-                        'path': str(file_path),
-                        'cached': False,
-                        'size': file_size
-                    }
-                else:
-                    raise Exception("Failed to download paper - no content received")
-
-            except Exception as e:
-                self.logger.error(f"Error downloading paper {title}: {str(e)}")
-                return {
-                    'success': False,
-                    'error': str(e)
-                }
-
-    async def get_conference_papers(self, conference: str, year: str) -> List[Dict[str, Any]]:
-        """获取会议论文列表 - 带进度显示"""
-        try:
-            if conference not in self.conference_urls:
-                raise ValueError(f"Unsupported conference: {conference}")
-
-            base_url = self.conference_urls[conference]
-            papers = []
-            
-            print(f"🔍 正在获取 {conference.upper()} {year} 论文列表...")
-
-            # 根据会议类型选择相应的解析方法
-            if conference == 'ccs':
-                papers = await self._parse_ccs_papers(base_url, year)
-            elif conference == 'sp':
-                papers = await self._parse_sp_papers(base_url, year)
-            elif conference == 'ndss':
-                papers = await self._parse_ndss_papers(base_url, year)
-            elif conference == 'usenix':
-                papers = await self._parse_usenix_papers(base_url, year)
-            
-            if papers:
-                print(f"✅ 成功获取 {len(papers)} 篇论文信息")
-                
-                # 显示找到的论文标题
-                print(f"📋 找到的论文列表:")
-                for i, paper in enumerate(papers[:10]):
-                    title = paper.get('title', '未知标题')[:60]
-                    print(f"  {i+1:2d}. {title}{'...' if len(paper.get('title', '')) > 60 else ''}")
-                
-                if len(papers) > 10:
-                    print(f"  ... 和其他 {len(papers) - 10} 篇论文")
-                
-                # 开始PDF链接验证与进度显示
-                print(f"\n🔗 正在验证PDF链接有效性...")
-                valid_count = 0
-                
-                for i, paper in enumerate(papers):
-                    # 显示进度
-                    progress = (i + 1) / len(papers) * 100
-                    progress_bar = '█' * int(progress // 5) + '░' * (20 - int(progress // 5))
-                    print(f"\r📋 [进度: {progress_bar}] {progress:.1f}% ({i+1}/{len(papers)}) 验证: {paper.get('title', '未知标题')[:30]}...", end='', flush=True)
-                    
-                    # 检查URL有效性
-                    if isinstance(paper.get('url'), str) and paper['url'].strip():
-                        valid_count += 1
-                
-                print(f"\n✅ PDF链接验证完成: {valid_count}/{len(papers)} 个有效链接")
-            else:
-                print(f"⚠️  未找到任何论文")
-
-            return papers
-
-        except Exception as e:
-            self.logger.error(f"Error getting papers for {conference} {year}: {str(e)}")
-            raise
-
-
-    async def _parse_ccs_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        return none
-
-
-    async def _parse_ndss_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """解析NDSS论文列表 - 优化版本带进度显示"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        url = f"{base_url}ndss{full_year}/accepted-papers/"
-        
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
-        
-        enhanced_timeout = aiohttp.ClientTimeout(total=120, connect=30, sock_read=60)
-        
-        print(f"🌐 访问 NDSS {year} 会议页面...")
-        
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                async with aiohttp.ClientSession(headers=headers, timeout=enhanced_timeout) as session:
-                    
-                    async with session.get(url) as response:
-                        print(f"⚡ 尝试 {attempt + 1}/{max_retries}: HTTP {response.status}")
-                        
-                        if response.status == 200:
-                            print(f"📝 正在解析页面内容...")
-                            html = await response.text()
-                            soup = BeautifulSoup(html, 'html.parser')
-                            
-                            # 查找NDSS论文容器
-                            paper_containers = soup.find_all('div', class_='tag-box rel-paper')
-                            print(f"📚 找到 {len(paper_containers)} 个论文容器")
-                            
-                            if not paper_containers:
-                                print(f"⚠️  未找到论文容器，尝试其他选择器...")
-                                # 尝试其他可能的选择器
-                                paper_containers = soup.find_all('div', class_='paper') or soup.find_all('article')
-                                print(f"🔄 备用选择器找到 {len(paper_containers)} 个容器")
-                            
-                            # 处理论文容器并显示简单进度
-                            for idx, container in enumerate(paper_containers):
-                                if idx % 5 == 0 or idx == len(paper_containers) - 1:  # 每5个显示一次进度
-                                    progress = (idx + 1) / len(paper_containers) * 100
-                                    print(f"🔍 解析进度: {progress:.1f}% ({idx+1}/{len(paper_containers)})")
-                                
-                                try:
-                                    # 提取标题 - 尝试多种选择器
-                                    title_elem = (container.find('h3', class_='blog-post-title') or 
-                                                 container.find('h3') or 
-                                                 container.find('h2') or 
-                                                 container.find('h1'))
-                                    
-                                    if not title_elem:
-                                        continue
-                                    
-                                    title = title_elem.get_text().strip()
-                                    
-                                    # 显示找到的论文标题
-                                    print(f"📄 [{idx+1}/{len(paper_containers)}] 找到论文: {title[:70]}{'...' if len(title) > 70 else ''}")
-                                    
-                                    # 提取作者信息
-                                    author_elem = container.find('p')
-                                    authors_text = author_elem.get_text().strip() if author_elem else ''
-                                    authors = [author.strip() for author in authors_text.split(',')] if authors_text else []
-                                    
-                                    # 提取详情页链接
-                                    detail_link = (container.find('a', class_='paper-link-abs') or 
-                                                  container.find('a', href=True))
-                                    detail_url = detail_link.get('href') if detail_link else ''
-                                    
-                                    # 提取PDF链接
-                                    pdf_url = ''
-                                    if detail_url:
-                                        pdf_url = await self._get_ndss_pdf_from_detail_page(session, detail_url)
-                                    
-                                    paper_info = {
-                                        'title': title,
-                                        'authors': authors,
-                                        'abstract': '',
-                                        'url': pdf_url,
-                                        'detail_url': detail_url,
-                                        'doi': ''
-                                    }
-                                    
-                                    if title and len(title) > 10:
-                                        papers.append(paper_info)
-                                        
-                                except Exception as e:
-                                    self.logger.warning(f"Error parsing paper container {idx}: {str(e)}")
-                                    continue
-                            
-                            print(f"\n✅ 基础信息解析完成: {len(papers)} 篇论文")
-                            return papers
-                            
-                        elif response.status == 404:
-                            print(f"❌ NDSS {year} 页面不存在")
-                            return []
-                        else:
-                            print(f"⚠️  HTTP {response.status}，正在重试...")
-                            if attempt < max_retries - 1:
-                                await asyncio.sleep(2 ** attempt)
-                                continue
-                            else:
-                                raise Exception(f"HTTP {response.status}")
-            
-            except asyncio.TimeoutError:
-                print(f"⏰ 超时 {attempt + 1}/{max_retries}")
-                if attempt < max_retries - 1:
-                    await asyncio.sleep(5 * (attempt + 1))
-                    continue
-                else:
-                    raise Exception("Connection timeout after retries")
-            
-            except Exception as e:
-                print(f"❌ 尝试 {attempt + 1} 失败: {str(e)}")
-                if attempt < max_retries - 1:
-                    await asyncio.sleep(3 * (attempt + 1))
-                    continue
-                else:
-                    raise
-        
-        return papers
-
-    async def _get_ndss_pdf_from_detail_page(self, session: aiohttp.ClientSession, detail_url: str) -> str:
-        """从 NDSS 论文详情页获取 PDF 链接 - 优化版本"""
-        if not detail_url:
-            return ''
-        
-        try:
-            # 使用更短的超时时间提高速度
-            timeout = aiohttp.ClientTimeout(total=15, connect=5)
-            
-            async with session.get(detail_url, timeout=timeout) as response:
-                if response.status == 200:
-                    html = await response.text()
-                    soup = BeautifulSoup(html, 'html.parser')
-                    
-                    # 尝试多种 PDF 链接模式
-                    pdf_patterns = [
-                        # 直接 PDF 链接
-                        soup.find('a', href=re.compile(r'\.pdf$', re.I)),
-                        # 包含 "pdf" 文本的链接
-                        soup.find('a', string=re.compile(r'pdf', re.I)),
-                        # 包含 "download" 的链接
-                        soup.find('a', string=re.compile(r'download', re.I)),
-                        # 在 href 中包含 "pdf" 的链接
-                        soup.find('a', href=re.compile(r'pdf', re.I))
-                    ]
-                    
-                    for pdf_link in pdf_patterns:
-                        if pdf_link and hasattr(pdf_link, 'get'):
-                            pdf_url = pdf_link.get('href')
-                            if pdf_url and isinstance(pdf_url, str):
-                                # 确保 URL 是完整的
-                                if not pdf_url.startswith('http'):
-                                    if pdf_url.startswith('/'):
-                                        pdf_url = f"https://www.ndss-symposium.org{pdf_url}"
-                                    else:
-                                        # 相对路径
-                                        base_url = '/'.join(detail_url.split('/')[:-1])
-                                        pdf_url = f"{base_url}/{pdf_url}"
-                                
-                                # 验证 URL 是否以 .pdf 结尾或包含 pdf
-                                if pdf_url.lower().endswith('.pdf') or 'pdf' in pdf_url.lower():
-                                    return pdf_url
-                    
-                    # 如果没有找到直接链接，尝试查找内嵌的PDF
-                    iframe_pdf = soup.find('iframe', src=re.compile(r'\.pdf', re.I))
-                    if iframe_pdf and hasattr(iframe_pdf, 'get'):
-                        pdf_url = iframe_pdf.get('src')
-                        if pdf_url and isinstance(pdf_url, str):
-                            if not pdf_url.startswith('http'):
-                                pdf_url = f"https://www.ndss-symposium.org{pdf_url}"
-                            return pdf_url
-                        
-        except asyncio.TimeoutError:
-            self.logger.debug(f"Timeout getting PDF from {detail_url}")
-        except Exception as e:
-            self.logger.debug(f"Error getting PDF from {detail_url}: {str(e)}")
-            
-        return ''
-
-    async def _parse_sp_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """解析SP论文列表 - 使用Computer.org GraphQL API直接获取proceedings ID"""
-        papers = []
-        full_year = f"20{year}" if len(year) == 2 else year
-        
-        print(f"🌐 正在获取 SP {year} proceedings ID...")
-        
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36',
-            'Accept': 'application/json, text/plain, */*',
-            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'DNT': '1',
-            'Content-Type': 'application/json',
-            'Origin': 'https://www.computer.org',
-            'Sec-Fetch-Site': 'same-origin',
-            'Sec-Fetch-Mode': 'cors',
-            'Sec-Fetch-Dest': 'empty',
-            'Referer': 'https://www.computer.org/csdl/proceedings/1000646',
-            'Connection': 'keep-alive',
-            'Cache-Control': 'no-cache'
-        }
-        
-        try:
-            async with aiohttp.ClientSession(headers=headers) as session:
-                # 使用GraphQL API获取SP会议的所有proceedings
-                graphql_url = "https://www.computer.org/csdl/api/v1/graphql"
-                graphql_query = {
-                    "variables": {"groupId": "1000646"},  # SP会议的组ID
-                    "query": "query ($groupId: String) {\n  proceedings(groupId: $groupId) {\n    id\n    acronym\n    title\n    volume\n    displayVolume\n    year\n    __typename\n  }\n}"
-                }
-                
-                timeout = aiohttp.ClientTimeout(total=60, connect=30)
-                async with session.post(graphql_url, json=graphql_query, timeout=timeout) as response:
-                    if response.status == 200:
-                        data = await response.json()
-                        proceedings_list = data.get('data', {}).get('proceedings', [])
-                        
-                        print(f"✅ 获取到 {len(proceedings_list)} 个 proceedings")
-                        
-                        # 查找指定年份的proceedings
-                        target_proceeding = None
-                        for proc in proceedings_list:
-                            if str(proc.get('year')) == full_year:
-                                target_proceeding = proc
-                                break
-                        
-                        if not target_proceeding:
-                            print(f"❌ 未找到 {full_year} 年的proceedings")
-                            return papers
-                        
-                        proceedings_id = target_proceeding.get('id')
-                        print(f"🆔 找到 SP {full_year} proceedings ID: {proceedings_id}")
-                        
-                        # 调用Computer.org API获取论文数据
-                        all_papers = await self._call_computer_org_api(session, proceedings_id)
-                        
-                        # 处理所有论文
-                        if all_papers:
-                            print(f"✅ 成功获取 {len(all_papers)} 篇论文")
-                            
-                            # 解析真正的PDF下载链接
-                            print(f"🔗 开始解析 {len(all_papers)} 个PDF下载链接...")
-                            for i, paper in enumerate(all_papers):
-                                if paper.get('needs_pdf_resolution') and paper.get('url'):
-                                    print(f"📋 PDF解析进度: {i+1}/{len(all_papers)} - {paper.get('title', '')[:50]}...")
-                                    
-                                    real_pdf_url = await self._resolve_ieee_pdf_url(session, paper['url'])
-                                    if real_pdf_url:
-                                        paper['url'] = real_pdf_url
-                                        print(f"✅ 解析成功: {real_pdf_url[:60]}...")
-                                    else:
-                                        print(f"❌ PDF链接解析失败")
-                                    paper.pop('needs_pdf_resolution', None)
-                            
-                            papers = all_papers  # 返回所有论文
-                        else:
-                            papers = []
-                        
-                        return papers
-                    else:
-                        print(f"❌ GraphQL API调用失败: HTTP {response.status}")
-                        return []
-                        
-        except Exception as e:
-            print(f"❌ SP解析错误: {str(e)}")
-            return []
-    
-
-    async def _call_computer_org_api(self, session: aiohttp.ClientSession, proceedings_id: str) -> List[Dict[str, Any]]:
-        """调用Computer.org API获取论文数据"""
-        papers = []
-        api_url = f"https://www.computer.org/csdl/api/v1/citation/asciitext/proceedings/{proceedings_id}"
-        
-        print(f"🔗 调用Computer.org API: {api_url}")
-        
-        # 使用用户提供的完整请求头
-        api_headers = {
-            'Host': 'www.computer.org',
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
-            'Accept': '*/*',
-            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
-            'Accept-Encoding': 'gzip, deflate, br, zstd',
-            'DNT': '1',
-            'Connection': 'keep-alive',
-            'Referer': f'https://www.computer.org/csdl/proceedings/sp/2024/{proceedings_id}',
-            'Sec-Fetch-Dest': 'empty',
-            'Sec-Fetch-Mode': 'cors',
-            'Sec-Fetch-Site': 'same-origin',
-            'Priority': 'u=1, i',
-            'Pragma': 'no-cache',
-            'Cache-Control': 'no-cache'
-        }
-        
-        try:
-            timeout = aiohttp.ClientTimeout(total=60, connect=30)
-            async with session.get(api_url, headers=api_headers, timeout=timeout) as response:
-                if response.status == 200:
-                    print(f"✅ API调用成功")
-                    text_data = await response.text()
-                    papers = self._parse_citation_data(text_data)
-                    print(f"📚 解析到 {len(papers)} 篇论文")
-                    return papers
-                else:
-                    print(f"❌ API调用失败: HTTP {response.status}")
-                    return []
-                    
-        except Exception as e:
-            print(f"❌ API调用错误: {str(e)}")
-            return []
-    
-    def _parse_citation_data(self, citation_text: str) -> List[Dict[str, Any]]:
-        """解析引用数据获取论文信息"""
-        papers = []
-        
-        try:
-            # 按条目分割
-            entries = re.split(r'\n\s*\n', citation_text.strip())
-            
-            for entry in entries:
-                if not entry.strip():
-                    continue
-                
-                paper_info = self._parse_single_citation(entry.strip())
-                if paper_info:
-                    papers.append(paper_info)
-            
-            return papers
-            
-        except Exception as e:
-            self.logger.error(f"解析引用数据失败: {str(e)}")
-            return []
-    
-    def _parse_single_citation(self, citation: str) -> Optional[Dict[str, Any]]:
-        """解析单个引用条目"""
-        try:
-            # 查找标题 (通常在引号内或作为第一行)
-            title_match = re.search(r'"([^"]+)"', citation)
-            if not title_match:
-                # 备用：提取第一行作为标题
-                lines = citation.split('\n')
-                title = lines[0].strip() if lines else ''
-            else:
-                title = title_match.group(1)
-            
-            if not title or len(title) < 10:
-                return None
-            
-            # 检查是否包含keywords:{}（空大括号），这表示非论文内容
-            if re.search(r'keywords:\s*\{\s*\}', citation, re.I):
-                return None
-            
-            # 过滤特定的非论文标题模式
-            non_paper_patterns = [
-                r'author\s+index',
-                r'table\s+of\s+contents',
-                r'program\s+committee',
-                r'organiz(ing|ation)\s+committee',
-                r'chair\s+message',
-                r'welcome\s+message',
-                r'foreword',
-                r'preface',
-                r'index\s+terms',
-                r'subject\s+index'
-            ]
-            
-            for pattern in non_paper_patterns:
-                if re.search(pattern, title, re.I):
-                    return None
-            
-            # 查找作者
-            authors = []
-            author_match = re.search(r'Author\(s\):\s*([^\n]+)', citation)
-            if author_match:
-                authors_text = author_match.group(1)
-                authors = [author.strip() for author in authors_text.split(',')]
-            
-            # 查找所有链接，只保留包含doi.ieeecomputersociety.org的链接
-            all_urls = re.findall(r'https?://[^\s]+', citation)
-            doi_url = ''
-            
-            for url in all_urls:
-                if 'doi.ieeecomputersociety.org' in url:
-                    doi_url = url
-                    break
-            
-            # 如果没找到包含doi.ieeecomputersociety.org的链接，跳过这篇论文
-            if not doi_url:
-                return None
-            
-            return {
-                'title': title,
-                'authors': authors,
-                'abstract': '',
-                'url': doi_url,  # 这里先保存DOI链接，稍后会解析真正的PDF链接
-                'doi': doi_url,
-                'needs_pdf_resolution': True  # 标记需要解析PDF链接
-            }
-            
-        except Exception as e:
-            self.logger.error(f"解析单个引用失败: {str(e)}")
-            return None
-    
-    async def _resolve_ieee_pdf_url(self, session: aiohttp.ClientSession, doi_url: str) -> str:
-        """从IEEE DOI页面解析真正的PDF下载链接"""
-        try:
-            if not doi_url:
-                return ''
-            
-            print(f"🔍 解析PDF链接: {doi_url[:50]}...")
-            
-            # 访问DOI页面，跟随重定向
-            timeout = aiohttp.ClientTimeout(total=30, connect=10)
-            
-            # 设置请求头模拟浏览器
-            headers = {
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
-                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
-                'Accept-Encoding': 'gzip, deflate, br',
-                'DNT': '1',
-                'Connection': 'keep-alive',
-                'Upgrade-Insecure-Requests': '1'
-            }
-            
-            async with session.get(doi_url, timeout=timeout, headers=headers, allow_redirects=True) as response:
-                if response.status == 200:
-                    html = await response.text()
-                    soup = BeautifulSoup(html, 'html.parser')
-                    
-                    print(f"✅ 成功访问页面: {str(response.url)[:60]}...")
-                    
-                    # 查找PDF下载链接的多种模式
-                    pdf_patterns = [
-                        # Computer.org特定的DOWNLOAD PDF按钮
-                        soup.find('a', string=re.compile(r'DOWNLOAD PDF', re.I)),
-                        soup.find('a', text=re.compile(r'download.*pdf', re.I)),
-                        soup.find('button', string=re.compile(r'download.*pdf', re.I)),
-                        
-                        # 带有PDF相关class的链接
-                        soup.find('a', class_=re.compile(r'download|pdf', re.I)),
-                        soup.find('a', attrs={'aria-label': re.compile(r'download|pdf', re.I)}),
-                        
-                        # 直接PDF链接
-                        soup.find('a', href=re.compile(r'\.pdf$', re.I)),
-                        
-                        # Meta标签中的PDF链接
-                        soup.find('meta', attrs={'name': 'citation_pdf_url'}),
-                        soup.find('meta', attrs={'property': 'citation_pdf_url'})
-                    ]
-                    
-                    for pattern in pdf_patterns:
-                        if pattern:
-                            if pattern.name == 'meta':
-                                pdf_url = pattern.get('content')
-                            else:
-                                pdf_url = pattern.get('href')
-                                
-                            if pdf_url:
-                                # 补全相对URL
-                                if not pdf_url.startswith('http'):
-                                    if pdf_url.startswith('/'):
-                                        pdf_url = f"https://www.computer.org{pdf_url}"
-                                    else:
-                                        base_url = '/'.join(str(response.url).split('/')[:-1])
-                                        pdf_url = f"{base_url}/{pdf_url}"
-                                
-                                print(f"✅ 找到PDF链接: {pdf_url[:60]}...")
-                                return pdf_url
-                    
-                    # 如果没找到直接链接，尝试查找data-*属性中PDF链接
-                    for element in soup.find_all(attrs={'data-pdf-url': True}):
-                        pdf_url = element.get('data-pdf-url')
-                        if pdf_url:
-                            if not pdf_url.startswith('http'):
-                                pdf_url = f"https://www.computer.org{pdf_url}"
-                            print(f"🔗 从data属性找到PDF: {pdf_url[:60]}...")
-                            return pdf_url
-                    
-                    # 尝试查找包含PDF的所有链接
-                    all_links = soup.find_all('a', href=True)
-                    for link in all_links:
-                        href = link.get('href')
-                        if href and ('pdf' in href.lower() or 'download' in href.lower()):
-                            if not href.startswith('http'):
-                                if href.startswith('/'):
-                                    href = f"https://www.computer.org{href}"
-                                else:
-                                    base_url = '/'.join(str(response.url).split('/')[:-1])
-                                    href = f"{base_url}/{href}"
-                            print(f"🔍 候选PDF链接: {href[:60]}...")
-                            return href
-                    
-                    # 最后尝试：从当前页面URL构造PDF链接
-                    current_url = str(response.url)
-                    if '/proceedings-article/' in current_url:
-                        # 提取文章ID
-                        parts = current_url.rstrip('/').split('/')
-                        if parts:
-                            article_id = parts[-1]
-                            # 使用真正的Computer.org PDF下载API
-                            pdf_url = f"https://www.computer.org/csdl/pds/api/csdl/proceedings/download-article/{article_id}/pdf"
-                            print(f"🔗 构造PDF API链接: {pdf_url}")
-                            return pdf_url
-                        
-                else:
-                    print(f"❌ DOI访问失败: HTTP {response.status}")
-                    
-        except Exception as e:
-            print(f"❌ 解析PDF链接失败: {str(e)}")
-            
-        return ''
-
-    async def _parse_usenix_papers(self, base_url: str, year: str) -> List[Dict[str, Any]]:
-        """解析USENIX Security论文列表 - 精确修复版本"""
-        papers = []
-        url = f"https://www.usenix.org/conference/usenixsecurity{year}/technical-sessions"
-        
-        print(f"🌐 正在解析 USENIX Security {year} 论文列表...")
-        
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
-        
-        try:
-            async with aiohttp.ClientSession(headers=headers) as session:
-                timeout = aiohttp.ClientTimeout(total=30, connect=10)
-                
-                async with session.get(url, timeout=timeout) as response:
-                    if response.status == 200:
-                        print(f"✅ 页面访问成功，开始解析...")
-                        html = await response.text()
-                        soup = BeautifulSoup(html, 'html.parser')
-                        
-                        # 使用正确的选择器找到论文节点
-                        paper_nodes = soup.find_all('article', class_='node node-paper view-mode-schedule')
-                        
-                        print(f"📚 找到 {len(paper_nodes)} 个论文节点")
-                        
-                        if not paper_nodes:
-                            print(f"⚠️  未找到有效的论文节点")
-                            return papers
-                        
-                        for idx, node in enumerate(paper_nodes):
-                            # 显示进度
-                            if idx % 10 == 0 or idx == len(paper_nodes) - 1:
-                                progress = (idx + 1) / len(paper_nodes) * 100
-                                print(f"🔍 解析进度: {progress:.1f}% ({idx+1}/{len(paper_nodes)})")
-                            
-                            try:
-                                # 提取标题 - 使用h2标签
-                                title_elem = node.find('h2')
-                                if not title_elem:
-                                    continue
-                                
-                                # 从链接中获取标题文本
-                                link_elem = title_elem.find('a')
-                                if not link_elem:
-                                    continue
-                                    
-                                title = link_elem.get_text().strip()
-                                
-                                # 验证是否为有效论文标题
-                                if not self._is_valid_paper_title(title):
-                                    continue
-                                
-                                # 显示找到的论文标题
-                                print(f"📄 [{idx+1}/{len(paper_nodes)}] 找到论文: {title[:70]}{'...' if len(title) > 70 else ''}")
-                                
-                                # 提取作者信息
-                                authors = []
-                                author_container = node.find('div', class_='field-name-field-paper-people-text')
-                                if author_container:
-                                    authors_text = author_container.get_text().strip()
-                                    if authors_text:
-                                        # 简单解析作者列表
-                                        authors = [authors_text.split(',')[0].strip()] if ',' in authors_text else [authors_text]
-                                
-                                # 获取PDF链接
-                                pdf_url = await self._get_usenix_pdf_url_simple(session, node)
-                                
-                                if pdf_url:
-                                    papers.append({
-                                        'title': title,
-                                        'authors': authors,
-                                        'abstract': '',
-                                        'url': pdf_url,
-                                        'doi': ''
-                                    })
-                                    
-                            except Exception as e:
-                                self.logger.debug(f"解析节点 {idx} 失败: {str(e)}")
-                                continue
-                        
-                        print(f"✅ USENIX 解析完成: {len(papers)} 篇论文")
-                        return papers
-                    
-                    elif response.status == 404:
-                        print(f"❌ USENIX {year} 页面不存在")
-                        return []
-                    else:
-                        print(f"❌ HTTP {response.status}")
-                        return []
-                        
-        except asyncio.TimeoutError:
-            print(f"⏰ 访问超时")
-            return []
-        except Exception as e:
-            print(f"❌ 解析错误: {str(e)}")
-            return []
-
-    async def _get_usenix_pdf_url_simple(self, session: aiohttp.ClientSession, node) -> str:
-        """简化的USENIX PDF链接获取方法"""
-        try:
-            # 1. 首先查找直接的PDF链接
-            pdf_link = node.find('a', href=re.compile(r'\.pdf$', re.I))
-            if pdf_link:
-                href = pdf_link.get('href')
-                if href:
-                    return self._complete_usenix_url(href)
-            
-            # 2. 查找presentation页面链接
-            presentation_link = node.find('a', href=re.compile(r'/presentation/', re.I))
-            if presentation_link:
-                presentation_url = presentation_link.get('href')
-                if presentation_url:
-                    presentation_url = self._complete_usenix_url(presentation_url)
-                    
-                    # 从presentation页面获取PDF链接
-                    try:
-                        timeout = aiohttp.ClientTimeout(total=10, connect=5)
-                        async with session.get(presentation_url, timeout=timeout) as response:
-                            if response.status == 200:
-                                html = await response.text()
-                                soup = BeautifulSoup(html, 'html.parser')
-                                
-                                # 查找PDF下载链接
-                                pdf_link = soup.find('a', href=re.compile(r'\.pdf$', re.I))
-                                if pdf_link:
-                                    pdf_url = pdf_link.get('href')
-                                    if pdf_url:
-                                        return self._complete_usenix_url(pdf_url)
-                    except Exception as e:
-                        self.logger.debug(f"presentation页面获取失败: {str(e)}")
-            
-            return ''
-            
-        except Exception as e:
-            self.logger.debug(f"获取PDF链接失败: {str(e)}")
-            return ''
-    
-    def _is_valid_paper_title(self, title: str) -> bool:
-        """验证是否为有效的论文标题"""
-        if not title or len(title) < 10:  # 降低最小长度要求
-            return False
-        
-        # 只排除明显的非论文内容，减少过滤
-        exclude_keywords = [
-            'technical session', 'session chair', 'keynote', 'tutorial', 
-            'workshop', 'break', 'lunch', 'coffee break', 'opening remarks',
-            'closing remarks', 'panel discussion', 'poster session'
-        ]
-        
-        title_lower = title.lower()
-        for keyword in exclude_keywords:
-            if keyword in title_lower:
-                return False
-        
-        # 放宽条件，只要有一定长度就认为是有效论文
-        return len(title) > 15
-    
-    def _complete_usenix_url(self, url: str) -> str:
-        """补全USENIX URL"""
-        if not url:
-            return ''
-        
-        if url.startswith('http'):
-            return url
-        elif url.startswith('/'):
-            return f"https://www.usenix.org{url}"
-        else:
-            return f"https://www.usenix.org/{url}"
-
-
-    def _extract_ieee_pdf_url(self, paper_element) -> str:
-        """从IEEE页面元素中提取PDF链接"""
-        try:
-            # 查找PDF链接
-            pdf_link = paper_element.find('a', href=re.compile(r'.*\.pdf'))
-            if pdf_link:
-                return pdf_link['href']
-            
-            # 查找文章链接并构造PDF URL
-            article_link = paper_element.find('a', href=re.compile(r'/document/'))
-            if article_link:
-                doc_id = re.search(r'/document/(\d+)', article_link['href'])
-                if doc_id:
-                    return f"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber={doc_id.group(1)}"
-            
-            return ''
-        except Exception as e:
-            self.logger.error(f"Error extracting IEEE PDF URL: {str(e)}")
-            return ''
-        """从IEEE页面元素中提取PDF链接"""
-        try:
-            # 查找PDF链接
-            pdf_link = paper_element.find('a', href=re.compile(r'.*\.pdf'))
-            if pdf_link:
-                return pdf_link['href']
-            
-            # 查找文章链接并构造PDF URL
-            article_link = paper_element.find('a', href=re.compile(r'/document/'))
-            if article_link:
-                doc_id = re.search(r'/document/(\d+)', article_link['href'])
-                if doc_id:
-                    return f"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber={doc_id.group(1)}"
-            
-            return ''
-        except Exception as e:
-            self.logger.error(f"Error extracting IEEE PDF URL: {str(e)}")
-            return ''
-
-
-
-    async def _download_with_retry(self, url: str) -> Optional[bytes]:
-        """带重试机制的下载 - 支持CCS反爬策略"""
-        if not url:
-            return None
-        
-        # 对ACM CCS论文使用特殊处理
-        if 'dl.acm.org/doi/pdf/' in url:
-            return await self._download_acm_ccs_pdf_enhanced(url)
-        
-        # 对Computer.org的SP论文使用特殊处理
-        if 'computer.org/csdl/pds/api' in url:
-            return await self._download_computer_org_pdf(url)
-        
-        # 其他链接使用原有方法
-        return await self._download_with_aiohttp(url)
-    
-    async def _download_acm_ccs_pdf_enhanced(self, url: str) -> Optional[bytes]:
-        """增强版ACM CCS PDF下载方法 - 完整的反爬策略"""
-        try:
-            # 1. 首先访问数据库主页获取session
-            await self._warm_up_acm_session()
-            
-            # 2. 提取DOI信息
-            doi_match = re.search(r'/doi/pdf/(.+)', url)
-            if not doi_match:
-                raise Exception("Invalid DOI URL format")
-            
-            doi = doi_match.group(1)
-            acm_page_url = f"https://dl.acm.org/doi/{doi}"
-            
-            # 3. 先访问论文页面获取cookies和referer
-            print(f"🔍 访问论文页面: {acm_page_url[:60]}...")
-            
-            page_headers = self._get_anti_crawler_headers()
-            
-            async with aiohttp.ClientSession(headers=page_headers) as session:
-                # 随机延迟
-                await asyncio.sleep(random.uniform(2, 5))
-                
-                timeout = aiohttp.ClientTimeout(total=30, connect=10)
-                async with session.get(acm_page_url, timeout=timeout) as response:
-                    if response.status == 200:
-                        print(f"✅ 成功访问论文页面")
-                        
-                        # 4. 现在下载PDF，使用论文页面作为referer
-                        pdf_headers = self._get_pdf_download_headers(acm_page_url)
-                        
-                        # 添加更长的延迟
-                        await asyncio.sleep(random.uniform(3, 7))
-                        
-                        print(f"📥 开始下载PDF: {url[:60]}...")
-                        
-                        async with session.get(url, headers=pdf_headers, timeout=timeout) as pdf_response:
-                            if pdf_response.status == 200:
-                                content = await pdf_response.read()
-                                
-                                # 验证PDF文件
-                                if self._is_valid_pdf(content):
-                                    print(f"✅ PDF下载成功 (size: {len(content)/1024:.1f}KB)")
-                                    return content
-                                else:
-                                    raise Exception("Downloaded content is not a valid PDF")
-                            elif pdf_response.status == 403:
-                                raise Exception("Access denied - may need institutional access or VPN")
-                            elif pdf_response.status == 429:
-                                raise Exception("Rate limited - need to slow down requests")
-                            else:
-                                raise Exception(f"PDF download failed: HTTP {pdf_response.status}")
-                    elif response.status == 403:
-                        raise Exception("Access to paper page denied - may need institutional access")
-                    else:
-                        raise Exception(f"Paper page access failed: HTTP {response.status}")
-                        
-        except Exception as e:
-            self.logger.warning(f"Enhanced ACM download failed: {str(e)}")
-            # 如果增强方法失败，尝试简单方法
-            return await self._download_acm_ccs_pdf_simple(url)
-    
-    async def _warm_up_acm_session(self):
-        """预热 ACM session，获取必要的 cookies"""
-        try:
-            headers = self._get_anti_crawler_headers()
-            
-            async with aiohttp.ClientSession(headers=headers) as session:
-                # 访问 ACM 主页
-                await asyncio.sleep(random.uniform(1, 3))
-                timeout = aiohttp.ClientTimeout(total=20, connect=10)
-                
-                async with session.get('https://dl.acm.org/', timeout=timeout) as response:
-                    if response.status == 200:
-                        print(f"🔥 ACM session 预热成功")
-                    else:
-                        print(f"⚠️  ACM session 预热失败: HTTP {response.status}")
-                        
-        except Exception as e:
-            self.logger.debug(f"ACM session warm-up failed: {str(e)}")
-    
-    async def _download_acm_ccs_pdf_simple(self, url: str) -> Optional[bytes]:
-        """简单版ACM CCS PDF下载方法作为备用"""
-        try:
-            headers = self._get_pdf_download_headers()
-            
-            async with aiohttp.ClientSession(headers=headers) as session:
-                await asyncio.sleep(random.uniform(2, 5))
-                
-                timeout = aiohttp.ClientTimeout(total=60, connect=20)
-                async with session.get(url, timeout=timeout) as response:
-                    if response.status == 200:
-                        content = await response.read()
-                        if self._is_valid_pdf(content):
-                            return content
-                    
-            return None
-            
-        except Exception as e:
-            self.logger.warning(f"Simple ACM download failed: {str(e)}")
-            return None
-    
-    async def _download_with_retry(self, url: str) -> Optional[bytes]:
-        """带重试机制的下载 - 修复版本"""
-        if not url:
-            return None
-        
-        # 对Computer.org的SP论文使用特殊处理
-        if 'computer.org/csdl/pds/api' in url:
-            return await self._download_computer_org_pdf(url)
-        
-        # 对ACM CCS论文使用特殊处理
-        if 'dl.acm.org/doi/pdf/' in url:
-            return await self._download_acm_ccs_pdf(url)
-        
-        # 其他链接使用原有方法
-        return await self._download_with_aiohttp(url)
-    
-    async def _download_acm_ccs_pdf(self, url: str) -> Optional[bytes]:
-        """专门为ACM CCS PDF下载的方法 - 使用curl绕过保护机制"""
-        # 尝试多次下载，增加成功率
-        for attempt in range(3):
-            result = await self._download_with_curl(url)
-            if result:
-                return result
-            # 添加延迟
-            import asyncio
-            import random
-            delay = random.uniform(5, 10)
-            await asyncio.sleep(delay)
-        return None
-    
-    async def _download_computer_org_pdf(self, api_url: str) -> Optional[bytes]:
-        """专门为Computer.org PDF下载的方法 - 使用curl"""
-        return await self._download_with_curl(api_url)
-    
-    async def _download_with_curl(self, url: str) -> Optional[bytes]:
-        """使用curl命令下载，绕过ACM保护机制"""
-        try:
-            import asyncio
-            import subprocess
-            import random
-            import re
-            
-            # 提取DOI用于Referer头
-            doi_match = re.search(r'/doi/pdf/(.+)', url)
-            referer = f"https://dl.acm.org/doi/{doi_match.group(1)}" if doi_match else url
-            
-            # 随机选择User-Agent，模拟不同浏览器
-            user_agents = [
-                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0',
-                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
-                'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36',
-                'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36'
-            ]
-            
-            # 构建curl命令，模拟真实浏览器请求
-            cmd = [
-                'curl',
-                '-L',  # 跟随重定向
-                '-s',  # 静默模式
-                '--max-time', '60',  # 最大超时时间
-                '--user-agent', random.choice(user_agents),
-                '--header', 'Accept: application/pdf,application/octet-stream,*/*;q=0.8',
-                '--header', f'Referer: {referer}',
-                '--header', 'Accept-Language: en-US,en;q=0.9',
-                '--header', 'Accept-Encoding: gzip, deflate, br',
-                '--header', 'Connection: keep-alive',
-                '--header', 'Upgrade-Insecure-Requests: 1',
-                '--header', 'Sec-Fetch-Dest: document',
-                '--header', 'Sec-Fetch-Mode: navigate',
-                '--header', 'Sec-Fetch-Site: same-origin',
-                '--header', 'Cache-Control: max-age=0',
-                '--header', 'DNT: 1',
-                '--header', 'Sec-Ch-Ua: "Not;A=Brand";v="99", "Microsoft Edge";v="139", "Chromium";v="139"',
-                '--header', 'Sec-Ch-Ua-Mobile: ?0',
-                '--header', 'Sec-Ch-Ua-Platform: "Windows"',
-                url
-            ]
-            
-            # 使用subprocess异步执行curl
-            process = await asyncio.create_subprocess_exec(
-                *cmd,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE
-            )
-            
-            stdout, stderr = await process.communicate()
-            
-            if process.returncode == 0 and stdout and len(stdout) > 1024:
-                # 验证PDF文件类型
-                if self._is_valid_pdf(stdout):
-                    return stdout
-                else:
-                    return None
-            else:
-                error_msg = stderr.decode('utf-8', errors='ignore') if stderr else 'Unknown error'
-                
-        except Exception as e:
-            pass
-            
-        return None
-
-    async def _download_with_aiohttp(self, url: str) -> Optional[bytes]:
-        """原有的aiohttp下载方法"""
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-            'Accept': 'application/pdf,application/octet-stream,*/*',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Connection': 'keep-alive'
-        }
-        
-        for attempt in range(self.max_retries):
-            try:
-                # 为每次重试创建新的Session，避免连接问题
-                connector = aiohttp.TCPConnector(
-                    limit=5,
-                    limit_per_host=2,
-                    force_close=True,  # 强制关闭连接避免复用问题
-                    enable_cleanup_closed=True
-                )
-                
-                timeout = aiohttp.ClientTimeout(
-                    total=90,
-                    connect=30,
-                    sock_read=60
-                )
-                
-                async with aiohttp.ClientSession(
-                    headers=headers,
-                    timeout=timeout,
-                    connector=connector
-                ) as session:
-                    async with session.get(url) as response:
-                        if response.status == 200:
-                            content = await response.read()
-                            if len(content) > 0:
-                                return content
-                            else:
-                                self.logger.warning(f"Empty content from {url}")
-                        elif response.status in [429, 503, 502]:
-                            self.logger.warning(
-                                f"Server busy (attempt {attempt + 1}/{self.max_retries}): "
-                                f"HTTP {response.status} for {url}"
-                            )
-                            await asyncio.sleep(self.retry_delay * (attempt + 1) * 2)
-                            continue
-                        else:
-                            self.logger.warning(
-                                f"Download failed (attempt {attempt + 1}/{self.max_retries}): "
-                                f"HTTP {response.status} for {url}"
-                            )
-
-            except asyncio.TimeoutError:
-                self.logger.warning(
-                    f"Download timeout (attempt {attempt + 1}/{self.max_retries}): {url}"
-                )
-            except Exception as e:
-                self.logger.warning(
-                    f"Download error (attempt {attempt + 1}/{self.max_retries}): {str(e)} for {url}"
-                )
-
-            # 重试间隔
-            if attempt < self.max_retries - 1:
-                delay = self.retry_delay * (attempt + 1)
-                await asyncio.sleep(delay)
-
-        self.logger.error(f"Failed to download after {self.max_retries} attempts: {url}")
-        return None
-
-    def _sanitize_filename(self, filename: str) -> str:
-        """清理文件名"""
-        # 移除不允许的字符
-        invalid_chars = '<>:"/\\|?*'
-        for char in invalid_chars:
-            filename = filename.replace(char, '')
-
-        # 将空格替换为下划线
-        filename = filename.replace(' ', '_')
-
-        # 限制长度
-        max_length = 255 - len('.pdf')
-        if len(filename) > max_length:
-            filename = filename[:max_length]
-
-        return filename.strip('._')
-    
-    def _is_valid_pdf(self, content: bytes) -> bool:
-        """验证是否为有效的PDF文件"""
-        if not content or len(content) < 4:
-            return False
-        
-        # 检查PDF文件头（%PDF-）
-        return content.startswith(b'%PDF-')
-    
-
diff --git a/src/paperbot/utils/keyword_optimizer.py b/src/paperbot/utils/keyword_optimizer.py
deleted file mode 100644
index 4c33e330..00000000
--- a/src/paperbot/utils/keyword_optimizer.py
+++ /dev/null
@@ -1,491 +0,0 @@
-"""
-关键词优化器
-
-参考: BettaFish/MediaEngine/tools/
-适配: PaperBot 学者追踪 - 提升 Semantic Scholar 查询命中率
-
-功能:
-- 查询扩展 (同义词、相关术语)
-- 查询重写 (LLM 辅助)
-- 安全领域术语优化
-"""
-
-from typing import List, Dict, Any, Optional, Set
-from dataclasses import dataclass, field
-from loguru import logger
-
-
-# ===== 安全领域术语库 =====
-
-SECURITY_SYNONYMS: Dict[str, List[str]] = {
-    # 攻击类型
-    "vulnerability": ["security flaw", "weakness", "exploit", "bug", "CVE"],
-    "malware": ["virus", "trojan", "ransomware", "worm", "spyware", "rootkit"],
-    "phishing": ["social engineering", "spear phishing", "whaling"],
-    "ddos": ["denial of service", "distributed denial of service", "DoS attack"],
-    "injection": ["SQL injection", "code injection", "command injection", "XSS"],
-    "overflow": ["buffer overflow", "stack overflow", "heap overflow", "memory corruption"],
-    
-    # 防御技术
-    "detection": ["intrusion detection", "anomaly detection", "threat detection"],
-    "encryption": ["cryptography", "cipher", "AES", "RSA", "TLS", "SSL"],
-    "authentication": ["identity verification", "MFA", "2FA", "biometrics"],
-    "firewall": ["network security", "packet filtering", "WAF"],
-    "antivirus": ["anti-malware", "endpoint protection", "EDR"],
-    
-    # 研究领域
-    "fuzzing": ["fuzz testing", "mutation testing", "coverage-guided fuzzing", "AFL"],
-    "binary analysis": ["reverse engineering", "disassembly", "decompilation"],
-    "program analysis": ["static analysis", "dynamic analysis", "taint analysis"],
-    "formal verification": ["model checking", "theorem proving", "symbolic execution"],
-    "machine learning security": ["adversarial ML", "ML robustness", "AI security"],
-    
-    # 会议/标准
-    "top venue": ["S&P", "CCS", "USENIX Security", "NDSS"],
-    "IEEE S&P": ["Oakland", "IEEE Symposium on Security and Privacy"],
-    "CCS": ["ACM CCS", "ACM Conference on Computer and Communications Security"],
-    "USENIX Security": ["USENIX Security Symposium"],
-    "NDSS": ["Network and Distributed System Security"],
-}
-
-# 常见缩写展开
-ABBREVIATION_EXPANSIONS: Dict[str, str] = {
-    "ML": "machine learning",
-    "DL": "deep learning",
-    "AI": "artificial intelligence",
-    "NLP": "natural language processing",
-    "IoT": "Internet of Things",
-    "ICS": "industrial control systems",
-    "SCADA": "supervisory control and data acquisition",
-    "APT": "advanced persistent threat",
-    "CVE": "Common Vulnerabilities and Exposures",
-    "CTF": "capture the flag",
-    "PWN": "binary exploitation",
-    "RE": "reverse engineering",
-    "ROP": "return-oriented programming",
-    "ASLR": "address space layout randomization",
-    "DEP": "data execution prevention",
-    "CFI": "control flow integrity",
-    "SGX": "Software Guard Extensions",
-    "TEE": "trusted execution environment",
-    "TLS": "Transport Layer Security",
-    "PKI": "public key infrastructure",
-}
-
-
-# ===== 数据结构 =====
-
-@dataclass
-class OptimizedQuery:
-    """优化后的查询"""
-    original: str
-    optimized: str
-    expansions: List[str] = field(default_factory=list)
-    score: float = 1.0
-    metadata: Dict[str, Any] = field(default_factory=dict)
-
-
-# ===== 关键词优化器 =====
-
-class KeywordOptimizer:
-    """
-    关键词优化器
-    
-    提供查询扩展和优化功能，提升搜索命中率
-    """
-    
-    def __init__(
-        self,
-        synonyms: Optional[Dict[str, List[str]]] = None,
-        abbreviations: Optional[Dict[str, str]] = None,
-        llm_client: Any = None,
-    ):
-        """
-        初始化优化器
-        
-        Args:
-            synonyms: 自定义同义词词典
-            abbreviations: 自定义缩写词典
-            llm_client: LLM 客户端 (用于高级重写)
-        """
-        self.synonyms = {**SECURITY_SYNONYMS, **(synonyms or {})}
-        self.abbreviations = {**ABBREVIATION_EXPANSIONS, **(abbreviations or {})}
-        self.llm_client = llm_client
-    
-    def expand_abbreviations(self, query: str) -> str:
-        """
-        展开查询中的缩写
-        
-        Args:
-            query: 原始查询
-            
-        Returns:
-            展开后的查询
-        """
-        words = query.split()
-        expanded = []
-        
-        for word in words:
-            upper_word = word.upper()
-            if upper_word in self.abbreviations:
-                # 保留原词并添加展开形式
-                expanded.append(f"{word} ({self.abbreviations[upper_word]})")
-            else:
-                expanded.append(word)
-        
-        return " ".join(expanded)
-    
-    def get_synonyms(self, term: str) -> List[str]:
-        """
-        获取术语的同义词
-        
-        Args:
-            term: 术语
-            
-        Returns:
-            同义词列表
-        """
-        term_lower = term.lower()
-        
-        # 直接匹配
-        if term_lower in self.synonyms:
-            return self.synonyms[term_lower]
-        
-        # 部分匹配
-        results = []
-        for key, values in self.synonyms.items():
-            if term_lower in key or key in term_lower:
-                results.extend(values)
-            for value in values:
-                if term_lower in value.lower():
-                    results.append(key)
-                    break
-        
-        return list(set(results))
-    
-    def expand_query(self, query: str, max_expansions: int = 3) -> OptimizedQuery:
-        """
-        扩展查询
-        
-        Args:
-            query: 原始查询
-            max_expansions: 最大扩展数量
-            
-        Returns:
-            优化后的查询
-        """
-        # 1. 展开缩写
-        expanded = self.expand_abbreviations(query)
-        
-        # 2. 收集同义词
-        words = query.lower().split()
-        all_synonyms: Set[str] = set()
-        
-        for word in words:
-            syns = self.get_synonyms(word)
-            all_synonyms.update(syns[:max_expansions])
-        
-        # 3. 构建扩展查询
-        expansions = list(all_synonyms)[:max_expansions]
-        
-        if expansions:
-            expansion_str = " OR ".join(f'"{e}"' for e in expansions)
-            optimized = f"({expanded}) OR ({expansion_str})"
-        else:
-            optimized = expanded
-        
-        return OptimizedQuery(
-            original=query,
-            optimized=optimized,
-            expansions=expansions,
-        )
-    
-    def optimize_for_semantic_scholar(self, query: str) -> OptimizedQuery:
-        """
-        针对 Semantic Scholar API 优化查询
-        
-        Args:
-            query: 原始查询
-            
-        Returns:
-            优化后的查询
-        """
-        # Semantic Scholar 搜索技巧:
-        # - 使用引号进行精确匹配
-        # - 简洁明了的关键词效果更好
-        # - 避免过长的查询
-        
-        # 1. 基本清理
-        cleaned = query.strip()
-        
-        # 2. 检测并保留已有引号
-        if '"' in cleaned:
-            # 用户已经使用了精确匹配，保持原样
-            optimized = cleaned
-            expansions = []
-        else:
-            # 3. 识别核心术语
-            core_terms = self._extract_core_terms(cleaned)
-            
-            # 4. 展开缩写但不过度扩展
-            expanded_terms = []
-            expansions = []
-            
-            for term in core_terms:
-                upper_term = term.upper()
-                if upper_term in self.abbreviations:
-                    expanded_terms.append(self.abbreviations[upper_term])
-                    expansions.append(self.abbreviations[upper_term])
-                else:
-                    expanded_terms.append(term)
-            
-            optimized = " ".join(expanded_terms)
-        
-        return OptimizedQuery(
-            original=query,
-            optimized=optimized,
-            expansions=expansions,
-            metadata={"target": "semantic_scholar"},
-        )
-    
-    def _extract_core_terms(self, query: str) -> List[str]:
-        """提取核心术语"""
-        # 简单的停用词过滤
-        stop_words = {
-            "a", "an", "the", "in", "on", "at", "to", "for", "of",
-            "and", "or", "but", "is", "are", "was", "were", "be",
-            "with", "by", "from", "as", "into", "through", "during",
-            "before", "after", "above", "below", "between", "under",
-        }
-        
-        words = query.lower().split()
-        core = [w for w in words if w not in stop_words and len(w) > 2]
-        
-        return core if core else words
-    
-    def generate_search_variants(self, query: str, num_variants: int = 3) -> List[OptimizedQuery]:
-        """
-        生成多个搜索变体
-        
-        Args:
-            query: 原始查询
-            num_variants: 变体数量
-            
-        Returns:
-            查询变体列表
-        """
-        variants = []
-        
-        # 变体1: 原始查询
-        variants.append(OptimizedQuery(
-            original=query,
-            optimized=query,
-            score=1.0,
-            metadata={"type": "original"},
-        ))
-        
-        # 变体2: 缩写展开
-        expanded = self.expand_abbreviations(query)
-        if expanded != query:
-            variants.append(OptimizedQuery(
-                original=query,
-                optimized=expanded,
-                score=0.9,
-                metadata={"type": "abbreviation_expanded"},
-            ))
-        
-        # 变体3: 同义词扩展
-        syn_query = self.expand_query(query)
-        if syn_query.optimized != query:
-            syn_query.score = 0.8
-            syn_query.metadata["type"] = "synonym_expanded"
-            variants.append(syn_query)
-        
-        # 变体4: 精确匹配 (如果查询足够短)
-        if len(query.split()) <= 4:
-            variants.append(OptimizedQuery(
-                original=query,
-                optimized=f'"{query}"',
-                score=0.7,
-                metadata={"type": "exact_match"},
-            ))
-        
-        return variants[:num_variants]
-    
-    async def rewrite_with_llm(self, query: str, context: Optional[str] = None) -> OptimizedQuery:
-        """
-        使用 LLM 重写查询
-        
-        Args:
-            query: 原始查询
-            context: 额外上下文
-            
-        Returns:
-            重写后的查询
-        """
-        if not self.llm_client:
-            logger.warning("LLM 客户端未配置，返回原始查询")
-            return OptimizedQuery(original=query, optimized=query)
-        
-        prompt = f"""你是一个学术搜索专家。请将以下查询优化为更适合在学术数据库（如 Semantic Scholar）中搜索的形式。
-
-原始查询: {query}
-{f"上下文: {context}" if context else ""}
-
-要求:
-1. 使用标准学术术语
-2. 保持简洁（不超过6个关键词）
-3. 去除无关词汇
-4. 如有必要，展开缩写
-
-只返回优化后的查询，不要解释。"""
-
-        try:
-            response = await self.llm_client.invoke_async(
-                messages=[{"role": "user", "content": prompt}]
-            )
-            optimized = response.strip().strip('"')
-            
-            return OptimizedQuery(
-                original=query,
-                optimized=optimized,
-                metadata={"type": "llm_rewritten"},
-            )
-        except Exception as e:
-            logger.error(f"LLM 重写失败: {e}")
-            return OptimizedQuery(original=query, optimized=query)
-
-
-# ===== 安全论文查询构建器 =====
-
-class SecurityPaperQueryBuilder:
-    """
-    安全论文查询构建器
-    
-    专门用于构建安全领域的学术搜索查询
-    """
-    
-    TOP_VENUES = [
-        "IEEE S&P",
-        "CCS",
-        "USENIX Security",
-        "NDSS",
-    ]
-    
-    ATTACK_CATEGORIES = {
-        "web": ["XSS", "CSRF", "SQL injection", "SSRF", "web vulnerability"],
-        "network": ["DDoS", "man-in-the-middle", "network attack", "traffic analysis"],
-        "system": ["buffer overflow", "kernel exploit", "privilege escalation", "rootkit"],
-        "mobile": ["Android security", "iOS security", "mobile malware", "app vulnerability"],
-        "iot": ["IoT security", "smart device", "embedded security", "firmware"],
-        "ml": ["adversarial", "model poisoning", "backdoor attack", "evasion attack"],
-    }
-    
-    DEFENSE_CATEGORIES = {
-        "detection": ["intrusion detection", "anomaly detection", "malware detection"],
-        "prevention": ["access control", "sandboxing", "isolation", "mitigation"],
-        "analysis": ["static analysis", "dynamic analysis", "fuzzing", "symbolic execution"],
-        "crypto": ["encryption", "authentication", "secure protocol", "key management"],
-    }
-    
-    def __init__(self, optimizer: Optional[KeywordOptimizer] = None):
-        """
-        初始化构建器
-        
-        Args:
-            optimizer: 关键词优化器
-        """
-        self.optimizer = optimizer or KeywordOptimizer()
-    
-    def build_attack_query(self, attack_type: str, specific_terms: Optional[List[str]] = None) -> str:
-        """
-        构建攻击类型查询
-        
-        Args:
-            attack_type: 攻击类别 (web, network, system, mobile, iot, ml)
-            specific_terms: 特定术语
-            
-        Returns:
-            查询字符串
-        """
-        base_terms = self.ATTACK_CATEGORIES.get(attack_type.lower(), [attack_type])
-        all_terms = base_terms + (specific_terms or [])
-        
-        return " OR ".join(f'"{term}"' for term in all_terms)
-    
-    def build_defense_query(self, defense_type: str, specific_terms: Optional[List[str]] = None) -> str:
-        """
-        构建防御类型查询
-        
-        Args:
-            defense_type: 防御类别 (detection, prevention, analysis, crypto)
-            specific_terms: 特定术语
-            
-        Returns:
-            查询字符串
-        """
-        base_terms = self.DEFENSE_CATEGORIES.get(defense_type.lower(), [defense_type])
-        all_terms = base_terms + (specific_terms or [])
-        
-        return " OR ".join(f'"{term}"' for term in all_terms)
-    
-    def build_venue_filter(self, venues: Optional[List[str]] = None) -> str:
-        """
-        构建会议/期刊过滤
-        
-        Args:
-            venues: 会议/期刊列表，默认为四大安全顶会
-            
-        Returns:
-            venue 过滤字符串
-        """
-        target_venues = venues or self.TOP_VENUES
-        return " OR ".join(f'venue:"{v}"' for v in target_venues)
-    
-    def build_comprehensive_query(
-        self,
-        topic: str,
-        attack_type: Optional[str] = None,
-        defense_type: Optional[str] = None,
-        top_venues_only: bool = False,
-        year_range: Optional[str] = None,
-    ) -> str:
-        """
-        构建综合查询
-        
-        Args:
-            topic: 主题
-            attack_type: 攻击类别
-            defense_type: 防御类别
-            top_venues_only: 是否只搜索顶会
-            year_range: 年份范围
-            
-        Returns:
-            综合查询字符串
-        """
-        parts = [topic]
-        
-        if attack_type:
-            parts.append(f"({self.build_attack_query(attack_type)})")
-        
-        if defense_type:
-            parts.append(f"({self.build_defense_query(defense_type)})")
-        
-        query = " AND ".join(parts)
-        
-        if top_venues_only:
-            query = f"({query}) AND ({self.build_venue_filter()})"
-        
-        return query
-
-
-__all__ = [
-    # 数据结构
-    "OptimizedQuery",
-    # 优化器
-    "KeywordOptimizer",
-    "SecurityPaperQueryBuilder",
-    # 常量
-    "SECURITY_SYNONYMS",
-    "ABBREVIATION_EXPANSIONS",
-]
diff --git a/src/paperbot/utils/smart_downloader.py b/src/paperbot/utils/smart_downloader.py
deleted file mode 100644
index 35333aaf..00000000
--- a/src/paperbot/utils/smart_downloader.py
+++ /dev/null
@@ -1,243 +0,0 @@
-# utils/smart_downloader.py
-
-import asyncio
-import time
-from typing import Dict, List, Any, Optional
-from collections import deque
-from dataclasses import dataclass
-import logging
-from paperbot.utils.downloader import PaperDownloader
-
-@dataclass
-class DownloadStats:
-    """下载统计信息"""
-    total_attempts: int = 0
-    successful_downloads: int = 0
-    failed_downloads: int = 0
-    cached_hits: int = 0
-    avg_download_time: float = 0.0
-    current_success_rate: float = 1.0
-    consecutive_failures: int = 0
-
-class SmartDownloadManager:
-    """智能下载管理器 - 动态调整并发数"""
-    
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
-        self.config = config or {}
-        self.logger = logging.getLogger(__name__)
-        
-        # 创建基础下载器
-        self.downloader = PaperDownloader(config)
-        
-        # 并发控制参数
-        self.min_concurrent = 1      # 最小并发数
-        self.max_concurrent = 4      # 最大并发数  
-        self.current_concurrent = 2  # 当前并发数
-        
-        # 性能监控参数
-        self.stats = DownloadStats()
-        self.recent_times = deque(maxlen=10)  # 最近10次下载时间
-        self.adjustment_threshold = 5         # 调整并发数的评估周期
-        
-        # 安全参数
-        self.failure_threshold = 0.3  # 失败率阈值 (30%)
-        self.slow_threshold = 10.0    # 慢下载阈值 (10秒)
-        self.rest_interval = 1.0      # 请求间隔
-        
-        # 创建信号量
-        self.semaphore = asyncio.Semaphore(self.current_concurrent)
-        
-        self.logger.info(f"智能下载管理器初始化 - 并发范围: {self.min_concurrent}-{self.max_concurrent}")
-
-    async def download_papers_smart(self, papers: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """智能批量下载论文"""
-        if not papers:
-            return []
-        
-        valid_papers = [p for p in papers if p.get('url') and p.get('url').strip()]
-        if not valid_papers:
-            self.logger.warning("没有找到有效的PDF下载链接")
-            return []
-        
-        self.logger.info(f"🚀 开始智能下载 {len(valid_papers)} 篇论文")
-        self.logger.info(f"📊 初始并发数: {self.current_concurrent}")
-        
-        start_time = time.time()
-        results = []
-        
-        # 分批处理，每批动态调整并发数
-        batch_size = max(8, self.current_concurrent * 2)  # 批次大小
-        
-        for i in range(0, len(valid_papers), batch_size):
-            batch = valid_papers[i:i + batch_size]
-            batch_results = await self._process_batch(batch, i, len(valid_papers))
-            results.extend(batch_results)
-            
-            # 动态调整并发数
-            await self._adjust_concurrency()
-            
-            # 批次间休息
-            if i + batch_size < len(valid_papers):
-                await asyncio.sleep(self.rest_interval)
-        
-        # 最终统计
-        total_time = time.time() - start_time
-        self._print_final_stats(results, total_time)
-        
-        return results
-
-    async def _process_batch(self, batch: List[Dict[str, Any]], start_idx: int, total: int) -> List[Dict[str, Any]]:
-        """处理一个批次的下载"""
-        self.logger.info(f"\n📦 处理批次 [{start_idx+1}-{min(start_idx+len(batch), total)}/{total}] - 并发数: {self.current_concurrent}")
-        
-        # 创建下载任务
-        tasks = []
-        for i, paper in enumerate(batch):
-            task = self._download_with_monitoring(paper, start_idx + i + 1, total)
-            tasks.append(task)
-        
-        # 执行批次下载
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-        
-        # 处理异常结果
-        processed_results = []
-        for result in results:
-            if isinstance(result, Exception):
-                self.logger.error(f"批次下载异常: {str(result)}")
-                processed_results.append({'success': False, 'error': str(result)})
-            else:
-                processed_results.append(result)
-        
-        return processed_results
-
-    async def _download_with_monitoring(self, paper: Dict[str, Any], index: int, total: int) -> Dict[str, Any]:
-        """带监控的单篇论文下载"""
-        async with self.semaphore:
-            start_time = time.time()
-            
-            try:
-                print(f"🔄 [{index}/{total}] 下载: {paper['title'][:50]}...")
-                
-                # 执行下载
-                result = await self.downloader.download_paper(paper['url'], paper['title'])
-                download_time = time.time() - start_time
-                
-                # 更新统计信息
-                self._update_stats(result, download_time)
-                
-                # 显示结果
-                if result and result.get('success'):
-                    if result.get('cached'):
-                        print(f"📋 缓存命中 (耗时: {download_time:.1f}s)")
-                    else:
-                        size_kb = result.get('size', 0) / 1024
-                        print(f"✅ 下载成功 (耗时: {download_time:.1f}s, 大小: {size_kb:.1f}KB)")
-                else:
-                    error_msg = result.get('error', '未知错误') if result else '下载失败'
-                    print(f"❌ 下载失败: {error_msg}")
-                
-                return result or {'success': False, 'error': '下载失败'}
-                
-            except Exception as e:
-                download_time = time.time() - start_time
-                self._update_stats({'success': False}, download_time)
-                self.logger.error(f"下载异常 [{index}/{total}]: {str(e)}")
-                print(f"❌ 下载异常: {str(e)}")
-                return {'success': False, 'error': str(e)}
-
-    def _update_stats(self, result: Dict[str, Any], download_time: float):
-        """更新下载统计信息"""
-        self.stats.total_attempts += 1
-        
-        if result and result.get('success'):
-            if result.get('cached'):
-                self.stats.cached_hits += 1
-            else:
-                self.stats.successful_downloads += 1
-                self.recent_times.append(download_time)
-            self.stats.consecutive_failures = 0
-        else:
-            self.stats.failed_downloads += 1
-            self.stats.consecutive_failures += 1
-        
-        # 计算成功率
-        if self.stats.total_attempts > 0:
-            self.stats.current_success_rate = (
-                self.stats.successful_downloads + self.stats.cached_hits
-            ) / self.stats.total_attempts
-        
-        # 计算平均下载时间
-        if self.recent_times:
-            self.stats.avg_download_time = sum(self.recent_times) / len(self.recent_times)
-
-    async def _adjust_concurrency(self):
-        """动态调整并发数"""
-        if self.stats.total_attempts < self.adjustment_threshold:
-            return  # 样本太少，不调整
-        
-        old_concurrent = self.current_concurrent
-        
-        # 决策逻辑
-        if self.stats.consecutive_failures >= 3:
-            # 连续失败，降低并发
-            self.current_concurrent = max(self.min_concurrent, self.current_concurrent - 1)
-            reason = f"连续失败{self.stats.consecutive_failures}次"
-            
-        elif self.stats.current_success_rate < self.failure_threshold:
-            # 成功率太低，降低并发
-            self.current_concurrent = max(self.min_concurrent, self.current_concurrent - 1)
-            reason = f"成功率过低({self.stats.current_success_rate:.1%})"
-            
-        elif self.stats.avg_download_time > self.slow_threshold:
-            # 平均速度太慢，降低并发
-            self.current_concurrent = max(self.min_concurrent, self.current_concurrent - 1)
-            reason = f"平均速度过慢({self.stats.avg_download_time:.1f}s)"
-            
-        elif (self.stats.current_success_rate > 0.8 and 
-              self.stats.avg_download_time < 5.0 and 
-              self.stats.consecutive_failures == 0):
-            # 表现良好，增加并发
-            self.current_concurrent = min(self.max_concurrent, self.current_concurrent + 1)
-            reason = f"性能良好(成功率{self.stats.current_success_rate:.1%})"
-        else:
-            return  # 保持当前并发数
-        
-        # 如果并发数发生变化，更新信号量
-        if old_concurrent != self.current_concurrent:
-            self.logger.info(f"🔧 调整并发数: {old_concurrent} → {self.current_concurrent} ({reason})")
-            
-            # 创建新的信号量
-            self.semaphore = asyncio.Semaphore(self.current_concurrent)
-            
-            # 调整后稍作休息
-            await asyncio.sleep(2.0)
-
-    def _print_final_stats(self, results: List[Dict[str, Any]], total_time: float):
-        """打印最终统计信息"""
-        success_count = sum(1 for r in results if r.get('success'))
-        cached_count = sum(1 for r in results if r.get('success') and r.get('cached'))
-        download_count = success_count - cached_count
-        
-        print(f"\n🎉 智能下载完成统计:")
-        print(f"✅ 成功下载: {success_count}/{len(results)} 篇论文")
-        print(f"📋 缓存命中: {cached_count} 篇")
-        print(f"⬇️  实际下载: {download_count} 篇")
-        print(f"⏱️  总耗时: {total_time:.1f} 秒")
-        print(f"🔧 最终并发数: {self.current_concurrent}")
-        
-        if download_count > 0:
-            avg_time = total_time / download_count
-            print(f"📊 平均速度: {avg_time:.2f} 秒/篇")
-        
-        success_rate = success_count / len(results) if results else 0
-        print(f"📈 成功率: {success_rate:.1%}")
-        
-        # 性能总结
-        if success_rate >= 0.9:
-            print(f"🏆 下载性能: 优秀")
-        elif success_rate >= 0.8:
-            print(f"👍 下载性能: 良好")
-        elif success_rate >= 0.6:
-            print(f"⚠️  下载性能: 一般")
-        else:
-            print(f"❌ 下载性能: 需要优化")
\ No newline at end of file
diff --git a/tests/unit/test_utils_cleanup_contracts.py b/tests/unit/test_utils_cleanup_contracts.py
new file mode 100644
index 00000000..a90bffc1
--- /dev/null
+++ b/tests/unit/test_utils_cleanup_contracts.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+import ast
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[2]
+UTILS_ROOT = ROOT / "src" / "paperbot" / "utils"
+SCAN_ROOTS = (
+    ROOT / "main.py",
+    ROOT / "src",
+    ROOT / "tests",
+    ROOT / "cli",
+)
+REMOVED_FILES = (
+    "CCS-DOWN.py",
+    "acm_extractor.py",
+    "conference_downloader.py",
+    "conference_helpers.py",
+    "conference_parsers.py",
+    "conference_parsers_new.py",
+    "downloader - ccs.py",
+    "downloader_back.py",
+    "keyword_optimizer.py",
+    "smart_downloader.py",
+)
+REMOVED_MODULES = (
+    "acm_extractor",
+    "conference_downloader",
+    "conference_helpers",
+    "conference_parsers",
+    "conference_parsers_new",
+    "downloader_back",
+    "keyword_optimizer",
+    "smart_downloader",
+)
+
+
+def _iter_python_files():
+    for path in SCAN_ROOTS:
+        if path.is_file():
+            yield path
+            continue
+        yield from sorted(path.rglob("*.py"))
+
+
+def test_orphaned_utils_variants_are_removed() -> None:
+    for filename in REMOVED_FILES:
+        assert not (UTILS_ROOT / filename).exists(), filename
+
+
+def test_repo_does_not_import_removed_utils_modules() -> None:
+    removed_imports = {f"paperbot.utils.{name}" for name in REMOVED_MODULES}
+    for path in _iter_python_files():
+        tree = ast.parse(path.read_text(encoding="utf-8-sig"), filename=str(path))
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Import):
+                for alias in node.names:
+                    assert alias.name not in removed_imports, f"{path.relative_to(ROOT)}: import {alias.name}"
+            elif isinstance(node, ast.ImportFrom) and node.module:
+                assert node.module not in removed_imports, (
+                    f"{path.relative_to(ROOT)}: from {node.module} import ..."
+                )
+
+
+def test_utils_directory_has_no_backup_or_invalid_python_filenames() -> None:
+    invalid = []
+
+    for path in UTILS_ROOT.glob("*.py"):
+        name = path.name
+        if " " in name or name != name.lower():
+            invalid.append(name)
+        if name.endswith("_back.py") or name.endswith("_new.py"):
+            invalid.append(name)
+
+    assert invalid == []