#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
并发下载图片工具 (Python版本 - 纯下载模式)

功能：
1. 从 vod_items 表读取 vod_en 和 vod_pic
2. 使用多线程并发下载图片
3. 使用 MD5 分目录存储
4. 仅检查文件是否存在，不存在则下载

使用方法：python download-img.py
"""

import os
import sys
import hashlib
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import pymysql
import time
import urllib3

# 禁用 SSL 警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# ==================== 配置区域 ====================
# 数据库配置
DB_CONFIG = {
    'host': 'localhost',
    'port': 3306,
    'user': 'root',
    'password': 'MuYehenlihai888#',
    'database': 'movie_db',
    'charset': 'utf8mb4'
}

# 上传目录（相对于脚本位置）
UPLOAD_DIR = os.path.join(os.path.dirname(__file__), '../www/', 'uploads')

# 服务器 IP 列表（用于绑定源 IP）
SERVER_IPS = [
    '137.175.67.148',
    '137.175.67.149',
    '137.175.67.150',
    '137.175.67.151',
    '137.175.67.152'
]

# 每个 IP 的并发数
CONCURRENT_PER_IP = 2

# 总并发线程数（IP数 × 每个IP的并发数）
MAX_WORKERS = len(SERVER_IPS) * CONCURRENT_PER_IP

# 超时设置（秒）
REQUEST_TIMEOUT = 5

# 每批处理数量
BATCH_SIZE = 3000

# 重试次数
MAX_RETRIES = 2

# SSL 验证（禁用自签名证书验证）
SSL_VERIFY = False


def get_db_connection():
    """获取数据库连接"""
    return pymysql.connect(
        host=DB_CONFIG['host'],
        port=DB_CONFIG['port'],
        user=DB_CONFIG['user'],
        password=DB_CONFIG['password'],
        database=DB_CONFIG['database'],
        charset=DB_CONFIG['charset'],
        cursorclass=pymysql.cursors.DictCursor
    )


def generate_image_path(vod_en, base_dir, extension='jpg'):
    """
    根据 ID 生成图片路径（MD5 分目录存储）
    
    Args:
        vod_en: 图片ID
        base_dir: 基础目录
        extension: 文件扩展名
    
    Returns:
        tuple: (相对路径, 完整路径)
    """
    # 生成 MD5 哈希值
    hash_value = hashlib.md5(str(vod_en).encode()).hexdigest()
    
    # 前2位作为目录名
    dir_name = hash_value[:2]
    
    # 接下来10位作为文件名
    file_name = hash_value[2:12]
    
    # 完整物理路径
    full_dir = os.path.join(base_dir, dir_name)
    
    # 创建目录（如果不存在）
    os.makedirs(full_dir, mode=0o755, exist_ok=True)
    
    # 相对路径（用于访问）
    relative_path = f'/{dir_name}/{file_name}.{extension}'
    
    # 完整保存路径
    save_path = os.path.join(full_dir, f'{file_name}.{extension}')
    
    return relative_path, save_path


def format_friendly_size(size_bytes):
    """
    格式化友好尺寸（统一用KB表示，保留一位小数）
    
    Args:
        size_bytes: 文件大小（字节）
    
    Returns:
        float: KB 值，保留一位小数
    """
    kb = size_bytes / 1024
    return round(kb, 1)


def download_single_image(args):
    """
    下载单张图片（简化版，只下载不更新数据库）
    
    Args:
        args: dict，包含 vod_en, vod_pic, upload_dir, source_ip
    
    Returns:
        dict: 结果信息
    """
    vod_en = args['vod_en']
    vod_pic = args['vod_pic']
    upload_dir = args['upload_dir']
    source_ip = args.get('source_ip')
    session = args.get('session')
    
    result = {
        'vod_en': vod_en,
        'success': False,
        'skipped': False,
        'error': None
    }
    
    try:
        # 获取文件扩展名
        extension = os.path.splitext(vod_pic)[1].lstrip('.')
        if not extension:
            extension = 'jpg'
        
        # 生成文件路径
        relative_path, save_path = generate_image_path(vod_en, upload_dir, extension)
        
        # 检查文件是否已存在
        if os.path.exists(save_path):
            result['skipped'] = True
            return result
        
        # 如果没有提供 session，创建一个临时的
        use_session = session
        close_session = False
        if not use_session:
            use_session = requests.Session()
            if source_ip:
                from requests.adapters import HTTPAdapter
                
                class SourceIPAdapter(HTTPAdapter):
                    def __init__(self, source_ip, *args, **kwargs):
                        self.source_ip = source_ip
                        super().__init__(*args, **kwargs)
                    
                    def init_poolmanager(self, connections, maxsize, block=False):
                        super().init_poolmanager(connections, maxsize, block, source_address=(self.source_ip, 0))
                
                adapter = SourceIPAdapter(source_ip)
                use_session.mount('http://', adapter)
                use_session.mount('https://', adapter)
            close_session = True
        
        # 下载图片（带重试）
        for attempt in range(MAX_RETRIES + 1):
            try:
                response = use_session.get(
                    vod_pic,
                    timeout=REQUEST_TIMEOUT,
                    verify=SSL_VERIFY,
                    stream=True
                )
                response.raise_for_status()
                
                # 保存图片
                with open(save_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                
                result['success'] = True
                break  # 成功则跳出重试循环
                
            except Exception as e:
                if attempt == MAX_RETRIES:
                    result['error'] = str(e)
                    if os.path.exists(save_path):
                        os.remove(save_path)
                else:
                    time.sleep(0.1)
        
        if close_session:
            use_session.close()
                    
    except Exception as e:
        result['error'] = str(e)
    
    return result



def main():
    """主函数"""
    print("=" * 60)
    print("  并发下载图片工具 (Python版)")
    print("=" * 60)
    print()
    
    # 创建上传目录
    if not os.path.exists(UPLOAD_DIR):
        print(f"创建上传目录: {UPLOAD_DIR}")
        os.makedirs(UPLOAD_DIR, mode=0o777, exist_ok=True)
        print("✓ 目录创建成功\n")
    else:
        print("✓ 上传目录已存在\n")
    
    # 连接数据库
    print("正在连接数据库...")
    try:
        conn = get_db_connection()
        print("✓ 数据库连接成功\n")
    except Exception as e:
        print(f"✗ 数据库连接失败: {e}")
        sys.exit(1)
    
    # 获取总记录数（只查询有图片的记录）
    print("正在查询 vod_items 表...")
    try:
        with conn.cursor() as cursor:
            sql = "SELECT COUNT(*) as total FROM vod_items WHERE vod_pic IS NOT NULL AND vod_pic != ''"
            cursor.execute(sql)
            result = cursor.fetchone()
            total = result['total']
        
        print(f"共找到 {total} 条记录\n")
        
        if total == 0:
            print("✗ 没有需要下载的图片")
            conn.close()
            sys.exit(1)
            
    except Exception as e:
        print(f"✗ 查询失败: {e}")
        conn.close()
        sys.exit(1)
    
    # 分批处理
    batch_size = BATCH_SIZE
    offset = 0
    processed_batches = 0
    
    # 统计信息
    total_success = 0
    total_fail = 0
    total_skip = 0
    total_size = 0
    
    start_time = time.time()
    
    print(f"开始分批下载图片（每批 {batch_size} 条）...")
    print(f"服务器 IP: {', '.join(SERVER_IPS)}")
    print(f"并发配置: {CONCURRENT_PER_IP} 线程/IP × {len(SERVER_IPS)} IP = {MAX_WORKERS} 总线程")
    print("-" * 60)
    
    while offset < total:
        # 获取当前批次数据
        try:
            with conn.cursor() as cursor:
                sql = """SELECT vod_en, vod_pic FROM vod_items 
                         WHERE vod_pic IS NOT NULL AND vod_pic != '' 
                         LIMIT %s OFFSET %s"""
                cursor.execute(sql, (batch_size, offset))
                rows = cursor.fetchall()
            
            current_batch = len(rows)
            if current_batch == 0:
                break
            
            print(f"\n处理第 {processed_batches + 1} 批，共 {current_batch} 条记录...")
            
            # 准备下载任务（轮询分配 IP）
            tasks = []
            ip_index = 0
            for row in rows:
                if row['vod_pic'] and row['vod_pic'].strip():
                    # 轮询分配源 IP
                    source_ip = SERVER_IPS[ip_index % len(SERVER_IPS)]
                    tasks.append({
                        'vod_en': row['vod_en'],
                        'vod_pic': row['vod_pic'].strip(),
                        'upload_dir': UPLOAD_DIR,
                        'source_ip': source_ip
                    })
                    ip_index += 1
            
            if not tasks:
                offset += batch_size
                processed_batches += 1
                continue
            
            # 使用线程池并发下载
            batch_success = 0
            batch_fail = 0
            batch_skip = 0
            
            with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
                # 提交所有任务
                future_to_task = {
                    executor.submit(download_single_image, task): task 
                    for task in tasks
                }
                
                # 使用进度条
                with tqdm(total=len(tasks), desc=f"第{processed_batches + 1}批进度") as pbar:
                    for future in as_completed(future_to_task):
                        task = future_to_task[future]
                        try:
                            result = future.result()
                            
                            if result['skipped']:
                                batch_skip += 1
                            elif result['success']:
                                batch_success += 1
                            else:
                                batch_fail += 1
                                if batch_fail <= 20:
                                    print(f"✗ 下载失败: {result['vod_en']} - {result['error']}")
                            
                            pbar.update(1)
                            
                        except Exception as e:
                            batch_fail += 1
                            if batch_fail <= 20:
                                print(f"✗ 任务异常: {task['vod_en']} - {e}")
                            pbar.update(1)
            
            # 更新统计
            total_success += batch_success
            total_fail += batch_fail
            total_skip += batch_skip
            
            print(f"本批完成 | 成功: {batch_success} | 失败: {batch_fail} | 跳过: {batch_skip}")
            print(f"累计 | 成功: {total_success} | 失败: {total_fail} | 跳过: {total_skip}")
            
            # 释放结果集
            offset += batch_size
            processed_batches += 1
            
        except Exception as e:
            print(f"✗ 批次处理失败: {e}")
            break
    
    end_time = time.time()
    elapsed = end_time - start_time
    
    # 显示统计信息
    print("\n" + "-" * 60)
    print("下载完成！\n")
    print("统计信息:")
    print("-" * 40)
    print(f"成功下载:       {total_success}")
    print(f"失败:           {total_fail}")
    print(f"跳过(已存在):   {total_skip}")
    print(f"总大小:         {format_size(total_size)}")
    print(f"总耗时:         {elapsed:.2f} 秒")
    if elapsed > 0:
        print(f"平均速度:       {total_success / elapsed:.2f} 个/秒")
        print(f"网络速度:       {format_size(total_size / elapsed)}/s")
    print(f"并发线程数:     {MAX_WORKERS}")
    
    # 关闭数据库连接
    conn.close()
    
    print("\n" + "=" * 60)
    print("工具执行完毕")
    print("=" * 60)


def format_size(size_bytes):
    """格式化文件大小（用于显示）"""
    if size_bytes == 0:
        return '0 B'
    
    units = ['B', 'KB', 'MB', 'GB', 'TB']
    i = 0
    while size_bytes >= 1024 and i < len(units) - 1:
        size_bytes /= 1024
        i += 1
    
    return f"{size_bytes:.2f} {units[i]}"


if __name__ == '__main__':
    main()
