initial

2025-09-05 00:40:39 +08:00
commit d092984a85
4 changed files with 1233 additions and 0 deletions
--- a/api.py
+++ b/api.py
@@ -0,0 +1,487 @@
+"""
+Digital Human API Module
+处理语音生成、视频生成、数字人创建等功能的API模块
+"""
+
+import requests
+from PIL import Image, ImageDraw, ImageFont
+from moviepy import ImageClip, VideoFileClip, TextClip, CompositeVideoClip
+import tempfile
+import os
+import subprocess
+import whisper
+from flask import send_file, abort
+import mimetypes
+import logging
+from typing import Optional, Dict, Any, Tuple, List
+import json
+from pathlib import Path
+import time
+ # 假设此模块用于处理文件上传和管理
+
+# 配置日志
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# 配置常量
+class Config:
+    """服务和文件路径配置"""
+    # 服务端口配置
+    VOICE_SERVICE_URL = "http://127.0.0.1:18180"
+    VIDEO_SERVICE_URL = "http://127.0.0.1:8383"
+    
+    # 文件路径配置 (使用os.path.expanduser处理~符号)
+    RESOURCE_DIR = os.path.expanduser("/mnt/docker/resource")
+    TEMP_DIR = os.path.expanduser("/mnt/docker/code/data/temp")
+    VOICE_DATA_DIR = os.path.expanduser("~/heygem_data/voice/data")
+    FACE2FACE_TEMP_DIR = os.path.expanduser("~/heygem_data/face2face/temp")
+    
+    # 支持的文件格式
+    VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm']
+    AUDIO_EXTENSIONS = ['.wav', '.mp3', '.aac', '.flac', '.ogg', '.m4a', '.wma']
+    
+    # 默认参数
+    DEFAULT_VOICE_PARAMS = {
+        "format": "wav",
+        "topP": 0.7,
+        "max_new_tokens": 1024,
+        "chunk_length": 100,
+        "repetition_penalty": 1.2,
+        "temperature": 0.7,
+        "need_asr": False,
+        "streaming": False,
+        "is_fixed_seed": 0,
+        "is_norm": 0,
+    }
+
+class APIException(Exception):
+    """自定义API异常类"""
+    def __init__(self, message: str, status_code: int = 500):
+        self.message = message
+        self.status_code = status_code
+        super().__init__(self.message)
+
+def _make_request(url: str, data: Dict[str, Any], method: str = "POST") -> Dict[str, Any]:
+    """
+    统一的HTTP请求处理函数
+    """
+    try:
+        response = None
+        if method.upper() == "POST":
+            response = requests.post(url, json=data, timeout=6000000)
+        else:
+            response = requests.get(url, params=data, timeout=600000)
+            
+        response.raise_for_status() # 检查HTTP状态码
+        
+        if response.headers.get('content-type', '').startswith('application/json'):
+            return response.json()
+        return {"content": response.content}
+            
+    except requests.exceptions.HTTPError as e:
+        logger.error(f"HTTP request failed to {url}: {e.response.status_code} - {e.response.text}")
+        raise APIException(f"Request failed: {e.response.text}", e.response.status_code)
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Network error during request to {url}: {str(e)}")
+        raise APIException(f"Network error: {str(e)}", 500)
+
+
+def _ensure_directory(directory: str) -> str:
+    """
+    确保目录存在
+    """
+    expanded_dir = os.path.expanduser(directory)
+    os.makedirs(expanded_dir, exist_ok=True)
+    return expanded_dir
+
+
+def _copy_file(source: str, destination: str) -> None:
+    """
+    安全地复制文件
+    """
+    try:
+        subprocess.run(["cp", source, destination], check=True)
+        logger.info(f"File copied from {source} to {destination}")
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Failed to copy file: {str(e)}")
+        raise APIException(f"Failed to copy file: {str(e)}", 500)
+    except Exception as e:
+        logger.error(f"Unexpected error during file copy: {str(e)}")
+        raise APIException(f"Unexpected error during file copy: {str(e)}", 500)
+
+
+def _convert_file_format(input_path: str, output_format: str) -> str:
+    """
+    使用ffmpeg将文件转换为指定格式。
+    
+    Args:
+        input_path: 输入文件路径
+        output_format: 目标格式，例如 "mp4" 或 "wav"
+        
+    Returns:
+        转换后的新文件路径
+        
+    Raises:
+        APIException: 转换失败时抛出异常
+    """
+    input_filename = os.path.basename(input_path)
+    output_filename = f"{os.path.splitext(input_filename)[0]}.{output_format}"
+    output_path = os.path.join(tempfile.gettempdir(), output_filename)
+    
+    logger.info(f"Converting {input_path} to {output_format} format...")
+    
+    try:
+        if output_format == "wav":
+            command = ["ffmpeg", "-y", "-i", input_path, "-acodec", "pcm_s16le", "-ar", "16000", output_path]
+        elif output_format == "mp4":
+            command = ["ffmpeg", "-y", "-i", input_path, "-c", "copy", output_path]
+        else:
+            raise ValueError(f"Unsupported output format: {output_format}")
+            
+        subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        logger.info(f"File successfully converted to: {output_path}")
+        return output_path
+        
+    except subprocess.CalledProcessError as e:
+        logger.error(f"FFmpeg conversion failed: {e.stderr.decode('utf-8')}")
+        raise APIException(f"File conversion failed: {e.stderr.decode('utf-8')}", 500)
+    except Exception as e:
+        logger.error(f"Unexpected error during file conversion: {str(e)}")
+        raise APIException(f"Unexpected error during file conversion: {str(e)}", 500)
+
+
+def _split_video_and_audio(video_path: str, audio_path: str) -> str:
+    """
+    将视频分离为静音视频，并将音频保存到指定路径。
+    
+    Args:
+        video_path: 输入视频路径
+        audio_path: 输出音频路径
+        
+    Returns:
+        静音视频的临时文件路径
+        
+    Raises:
+        APIException: 分离失败时抛出异常
+    """
+    try:
+        # 提取音频
+        video_clip = VideoFileClip(video_path)
+        video_clip.audio.write_audiofile(audio_path, verbose=False, logger=None)
+        
+        # 创建静音视频
+        silent_video_path = tempfile.mktemp(suffix=".mp4")
+        video_clip.write_videofile(silent_video_path, audio=False, verbose=False, logger=None)
+        
+        video_clip.close()
+        logger.info(f"Video separated. Audio saved to {audio_path}, silent video to {silent_video_path}")
+        return silent_video_path
+        
+    except Exception as e:
+        logger.error(f"Failed to split video and audio: {str(e)}")
+        raise APIException(f"Failed to split video and audio: {str(e)}", 500)
+
+
+def _preprocess_voice(voice_file_name: str) -> Dict[str, Any]:
+    """
+    调用语音服务进行预处理和训练
+    """
+    logger.info(f"Preprocessing voice model with file: {voice_file_name}")
+    url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
+    request_body = {
+        "format": "wav",
+        "reference_audio": voice_file_name,
+        "lang": "zh"
+    }
+    return _make_request(url, request_body)
+
+
+def _synthesize_voice(text: str, uuid: str, preprocess_result: Dict[str, Any]) -> str:
+    """
+    调用语音服务进行语音合成
+    """
+    logger.info(f"Synthesizing voice for UUID: {uuid}")
+    url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
+    request_body = {
+        "speaker": uuid,
+        "text": text,
+        "reference_audio": preprocess_result.get('asr_format_audio_url'),
+        "reference_text": preprocess_result.get('reference_audio_text'),
+        **Config.DEFAULT_VOICE_PARAMS
+    }
+    
+    response = requests.post(url, json=request_body, timeout=60)
+    response.raise_for_status()
+    
+    output_filename = f"{uuid}output.wav"
+    output_path = os.path.join(Config.TEMP_DIR, output_filename)
+    with open(output_path, "wb") as f:
+        f.write(response.content)
+    logger.info(f"Generated voice saved to: {output_path}")
+    return output_path
+
+
+def _submit_video_generation(audio_file_name: str, video_file_name: str, task_id: str) -> Dict[str, Any]:
+    """
+    调用视频服务提交视频合成任务
+    """
+    logger.info(f"Submitting video generation for task: {task_id}")
+    url = f"{Config.VIDEO_SERVICE_URL}/easy/submit"
+    request_body = {
+        "audio_url": audio_file_name,
+        "video_url": video_file_name,
+        "code": task_id,
+        "chaofen": 0,
+        "watermark_switch": 0,
+        "pn": 1
+    }
+    return _make_request(url, request_body)
+
+
+def generate_digital_human(speech_text: str, sample_video: str, sample_voice: str, uuid: str) -> Dict[str, Any]:
+    """
+    生成数字人视频的主函数。
+    
+    Args:
+        speech_text: 语音文本
+        sample_video: 样本视频文件名
+        sample_voice: 样本语音文件名
+        uuid: 唯一标识符
+        
+    Returns:
+        生成结果，包含任务ID
+        
+    Raises:
+        APIException: 生成失败时抛出异常
+    """
+    logger.info(f"Starting digital human generation for UUID: {uuid}")
+    
+    # 步骤1: 确保音频文件位于正确的目录并转换为wav格式
+    try:
+        _ensure_directory(Config.VOICE_DATA_DIR)
+        voice_source_path = os.path.join(Config.RESOURCE_DIR, 'uploads', "audio", sample_voice)
+        
+        # 转换为.wav格式
+        converted_voice_path = _convert_file_format(voice_source_path, "wav")
+        converted_voice_filename = os.path.basename(converted_voice_path)
+        
+        # 复制转换后的文件到工作目录
+        voice_dest_path = os.path.join(Config.VOICE_DATA_DIR, converted_voice_filename)
+        _copy_file(converted_voice_path, voice_dest_path)
+        
+        voice_preprocess_result = _preprocess_voice(converted_voice_filename)
+        
+        # 步骤2: 生成新的音频
+        generated_audio_path = _synthesize_voice(speech_text, uuid, voice_preprocess_result)
+        
+        # 步骤3: 确保视频文件位于正确的目录并转换为mp4格式
+        _ensure_directory(Config.FACE2FACE_TEMP_DIR)
+        video_source_path = os.path.join(Config.RESOURCE_DIR, "uploads", "video", sample_video)
+        
+        # 转换为.mp4格式
+        converted_video_path = _convert_file_format(video_source_path, "mp4")
+        converted_video_filename = os.path.basename(converted_video_path)
+        
+        # 复制转换后的文件到工作目录
+        video_dest_path = os.path.join(Config.FACE2FACE_TEMP_DIR, converted_video_filename)
+        _copy_file(converted_video_path, video_dest_path)
+
+        # 步骤4: 复制生成的音频到视频服务的工作目录
+        generated_audio_filename = os.path.basename(generated_audio_path)
+        video_service_audio_path = os.path.join(Config.FACE2FACE_TEMP_DIR, generated_audio_filename)
+        _copy_file(generated_audio_path, video_service_audio_path)
+        
+        # 步骤5: 提交视频合成任务
+        task_code = f"{uuid}_video_{int(time.time())}" # 生成一个独立的任务ID
+        result = _submit_video_generation(generated_audio_filename, converted_video_filename, task_code)
+
+        logger.info(f"Digital human generation submitted successfully with task code: {task_code}")
+        return result
+
+    except APIException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to generate digital human: {str(e)}")
+        raise APIException(f"Failed to generate digital human: {str(e)}", 500)
+
+
+def get_video_generate_process(task_id: str) -> Dict[str, Any]:
+    """
+    获取视频生成进度
+    """
+    logger.info(f"Checking video generation progress for task: {task_id}")
+    url = f"{Config.VIDEO_SERVICE_URL}/easy/query"
+    return _make_request(url, {"code": task_id}, method="GET")
+
+def download_generated_video(uuid: str, task_id: Optional[str] = None) -> Any:
+    """
+    下载生成的视频文件
+    """
+    try:
+        # 尝试不同的可能路径和命名模式
+        possible_paths = [
+            # 兼容旧路径
+            os.path.join(Config.RESOURCE_DIR, f"{uuid}output.mp4"),
+            os.path.join(Config.TEMP_DIR, f"{uuid}output.mp4"),
+            # 新路径模式
+            os.path.join(Config.FACE2FACE_TEMP_DIR, f"{task_id}_result.mp4") if task_id else None,
+            os.path.join(Config.FACE2FACE_TEMP_DIR, f"{uuid}_video_*_result.mp4")
+        ]
+        
+        file_path = None
+        for path in possible_paths:
+            if not path: continue
+            expanded_path = os.path.expanduser(path)
+            if '*' in expanded_path:
+                # 处理通配符
+                for p in Path(os.path.dirname(expanded_path)).glob(os.path.basename(expanded_path)):
+                    file_path = str(p)
+                    break
+            if os.path.exists(expanded_path):
+                file_path = expanded_path
+            if file_path:
+                break
+        
+        if not file_path:
+            raise APIException(f"Generated video not found for UUID: {uuid}", 404)
+        
+        filename = os.path.basename(file_path)
+        logger.info(f"Downloading generated video: {filename}")
+        
+        return send_file(
+            file_path,
+            as_attachment=True,
+            download_name=filename,
+            mimetype='video/mp4'
+        )
+        
+    except APIException:
+        raise
+    except Exception as e:
+        logger.error(f"Error downloading generated video for UUID {uuid}: {str(e)}")
+        raise APIException(f"Error downloading generated video: {str(e)}", 500)
+
+# ... （其他保持不变的函数，如 download_audio, list_available_files 等）
+
+def list_available_files(directory: str = None, file_type: str = "all") -> Dict[str, Any]:
+    """
+    列出系统中可用的文件
+    
+    Args:
+        directory: 目录路径，默认使用配置中的资源目录
+        file_type: 文件类型过滤器 ("video", "audio", "all")
+        
+    Returns:
+        包含文件信息的字典
+        
+    Raises:
+            APIException: 列举失败时抛出异常
+    """
+    if directory is None:
+        directory = Config.RESOURCE_DIR
+        
+    try:
+        if not os.path.exists(directory):
+            raise APIException(f"Directory '{directory}' not found", 404)
+        
+        files_info = []
+        
+        for filename in os.listdir(directory):
+            file_path = os.path.join(directory, filename)
+            
+            if os.path.isfile(file_path):
+                file_ext = os.path.splitext(filename)[1].lower()
+                is_video = file_ext in Config.VIDEO_EXTENSIONS
+                is_audio = file_ext in Config.AUDIO_EXTENSIONS
+                
+                # 根据文件类型过滤
+                if file_type == "video" and not is_video:
+                    continue
+                elif file_type == "audio" and not is_audio:
+                    continue
+                elif file_type == "all" and not (is_video or is_audio):
+                    continue
+                
+                # 获取文件信息
+                file_stats = os.stat(file_path)
+                file_info = {
+                    "filename": filename,
+                    "size": file_stats.st_size,
+                    "type": "video" if is_video else "audio" if is_audio else "other",
+                    "modified": file_stats.st_mtime,
+                    "download_url_video": f"/download/video/{filename}" if is_video else None,
+                    "download_url_audio": f"/download/audio/{filename}" if is_audio else None
+                }
+                files_info.append(file_info)
+        
+        logger.info(f"Listed {len(files_info)} files from {directory}")
+        return {
+            "directory": directory,
+            "total_files": len(files_info),
+            "files": files_info
+        }
+        
+    except APIException:
+        raise
+    except Exception as e:
+        logger.error(f"Error listing files in {directory}: {str(e)}")
+        raise APIException(f"Error listing files: {str(e)}", 500)
+
+
+def find_generated_files(uuid: str) -> List[Dict[str, Any]]:
+    """
+    查找指定UUID的生成文件
+    
+    Args:
+        uuid: 唯一标识符
+        
+    Returns:
+        生成文件信息列表
+    """
+    generated_files = []
+    
+    # 检查生成的音频文件
+    audio_paths = [
+        f"{Config.RESOURCE_DIR}/{uuid}output.wav",
+        f"{Config.TEMP_DIR}/{uuid}output.wav",
+        f"{Config.VOICE_DATA_DIR}/{uuid}output.wav"
+    ]
+    
+    for path in audio_paths:
+        expanded_path = os.path.expanduser(path)
+        if os.path.exists(expanded_path):
+            file_stats = os.stat(expanded_path)
+            generated_files.append({
+                "type": "audio",
+                "filename": os.path.basename(expanded_path),
+                "path": expanded_path,
+                "size": file_stats.st_size,
+                "modified": file_stats.st_mtime,
+                "download_url": f"/download/generated/audio/{uuid}"
+            })
+            break
+    
+    # 检查生成的视频文件
+    video_paths = [
+        f"{Config.RESOURCE_DIR}/{uuid}_output.mp4",
+        f"{Config.RESOURCE_DIR}/{uuid}output.mp4",
+        f"{Config.TEMP_DIR}/{uuid}output.mp4",
+        f"{Config.FACE2FACE_TEMP_DIR}/{uuid}_result.mp4",
+        f"{Config.FACE2FACE_TEMP_DIR}/{uuid}result.mp4"
+    ]
+    
+    for path in video_paths:
+        expanded_path = os.path.expanduser(path)
+        if os.path.exists(expanded_path):
+            file_stats = os.stat(expanded_path)
+            generated_files.append({
+                "type": "video",
+                "filename": os.path.basename(expanded_path),
+                "path": expanded_path,
+                "size": file_stats.st_size,
+                "modified": file_stats.st_mtime,
+                "download_url": f"/download/generated/video/{uuid}"
+            })
+            break
+    
+    return generated_files
--- a/file_upload.py
+++ b/file_upload.py
@@ -0,0 +1,279 @@
+"""
+文件上传和管理模块
+支持多种文件类型的上传、验证和存储
+"""
+
+import os
+import uuid
+from werkzeug.utils import secure_filename
+from flask import request, jsonify
+import mimetypes
+from typing import Dict, List, Optional, Tuple
+import logging
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+class FileManager:
+    """文件管理器 - 支持多目录文件保存"""
+    
+    # 允许的文件扩展名
+    ALLOWED_EXTENSIONS = {
+        'video': {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm'},
+        'audio': {'.wav', '.mp3', '.aac', '.flac', '.ogg', '.m4a'},
+        'image': {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.webp'}
+    }
+    
+    # 文件大小限制 (MB)
+    MAX_FILE_SIZES = {
+        'video': 500,  # 500MB
+        'audio': 50,   # 50MB
+        'image': 10    # 10MB
+    }
+    
+    def __init__(self, upload_dir: str = "/mnt/docker/resource/uploads"):
+        self.upload_dir = upload_dir
+        # Docker容器挂载目录配置
+        self.docker_dirs = {
+            'voice_data': os.path.expanduser("~/heygem_data/voice/data"),
+            'face2face_data': os.path.expanduser("~/heygem_data/face2face/temp")
+        }
+        self._ensure_upload_directories()
+    
+    def _ensure_upload_directories(self):
+        """确保所有上传目录存在"""
+        # 确保本地上传目录存在
+        for file_type in ['video', 'audio', 'image', 'temp']:
+            dir_path = os.path.join(self.upload_dir, file_type)
+            os.makedirs(dir_path, exist_ok=True)
+        
+        # 确保Docker挂载目录存在
+        for dir_path in self.docker_dirs.values():
+            os.makedirs(dir_path, exist_ok=True)
+            logger.info(f"Ensured Docker mount directory exists: {dir_path}")
+    
+    def copy_audio_for_tts(self, filename: str, uuid: str) -> str:
+        """为TTS服务复制音频文件"""
+        import shutil
+        
+        source_path = os.path.join(self.upload_dir, 'audio', filename)
+        if not os.path.exists(source_path):
+            raise FileNotFoundError(f"Source audio file not found: {source_path}")
+        
+        # 复制到TTS服务目录，使用UUID命名
+        tts_filename = f"{uuid}.wav"
+        tts_dest = os.path.join(self.docker_dirs['voice_data'], tts_filename)
+        
+        try:
+            shutil.copy2(source_path, tts_dest)
+            logger.info(f"Copied audio for TTS: {source_path} -> {tts_dest}")
+            return tts_filename
+        except Exception as e:
+            logger.error(f"Failed to copy audio for TTS: {e}")
+            raise
+    
+    def copy_files_for_face2face(self, video_filename: str, audio_filename: str, uuid: str) -> Tuple[str, str]:
+        """为Face2Face服务复制视频和音频文件"""
+        import shutil
+        
+        # 复制视频文件
+        video_source = os.path.join(self.upload_dir, 'video', video_filename)
+        if not os.path.exists(video_source):
+            raise FileNotFoundError(f"Source video file not found: {video_source}")
+        
+        video_ext = os.path.splitext(video_filename)[1]
+        face2face_video = f"{uuid}{video_ext}"
+        video_dest = os.path.join(self.docker_dirs['face2face_data'], face2face_video)
+        
+        # 复制音频文件
+        audio_source = os.path.join(self.upload_dir, 'audio', audio_filename)
+        if not os.path.exists(audio_source):
+            raise FileNotFoundError(f"Source audio file not found: {audio_source}")
+        
+        audio_ext = os.path.splitext(audio_filename)[1]
+        face2face_audio = f"{uuid}{audio_ext}"
+        audio_dest = os.path.join(self.docker_dirs['face2face_data'], face2face_audio)
+        
+        try:
+            shutil.copy2(video_source, video_dest)
+            shutil.copy2(audio_source, audio_dest)
+            logger.info(f"Copied files for Face2Face: video={face2face_video}, audio={face2face_audio}")
+            return face2face_video, face2face_audio
+        except Exception as e:
+            logger.error(f"Failed to copy files for Face2Face: {e}")
+            raise
+    
+    def copy_generated_file_to_resource(self, source_path: str, filename: str, file_type: str = 'output') -> str:
+        """将生成的文件复制到资源目录"""
+        import shutil
+        
+        # 确保资源目录存在
+        resource_dir = "/mnt/docker/resource"
+        os.makedirs(resource_dir, exist_ok=True)
+        
+        dest_path = os.path.join(resource_dir, filename)
+        
+        try:
+            shutil.copy2(source_path, dest_path)
+            logger.info(f"Copied generated file to resource: {source_path} -> {dest_path}")
+            return dest_path
+        except Exception as e:
+            logger.error(f"Failed to copy generated file: {e}")
+            raise
+    
+    def validate_file(self, file, file_type: str) -> Tuple[bool, str]:
+        """验证文件"""
+        if not file or not file.filename:
+            return False, "没有选择文件"
+        
+        # 检查文件扩展名
+        filename = secure_filename(file.filename)
+        file_ext = os.path.splitext(filename)[1].lower()
+        
+        if file_ext not in self.ALLOWED_EXTENSIONS.get(file_type, set()):
+            return False, f"不支持的{file_type}文件格式: {file_ext}"
+        
+        # 检查文件大小
+        file.seek(0, os.SEEK_END)
+        file_size = file.tell()
+        file.seek(0)  # 重置文件指针
+        
+        max_size = self.MAX_FILE_SIZES.get(file_type, 10) * 1024 * 1024  # 转换为字节
+        if file_size > max_size:
+            return False, f"文件大小超出限制 ({self.MAX_FILE_SIZES.get(file_type)}MB)"
+        
+        return True, "文件验证通过"
+    
+    def save_file(self, file, file_type: str, custom_filename: str = None) -> Dict[str, str]:
+    
+    
+        is_valid, message = self.validate_file(file, file_type)
+        if not is_valid:
+            raise ValueError(message)
+
+        # 确保目录存在
+        save_dir = os.path.join(self.upload_dir, file_type)
+        os.makedirs(save_dir, exist_ok=True)
+
+        # 生成文件名
+        if custom_filename:
+            filename = secure_filename(custom_filename)
+        else:
+            if not file.filename:
+                raise ValueError("上传的文件缺少文件名")
+            original_filename = secure_filename(file.filename)
+            file_ext = os.path.splitext(original_filename)[1]
+            filename = f"{uuid.uuid4().hex}{file_ext}"
+
+        # 拼接完整路径
+        
+        file_path = os.path.join(save_dir, filename)
+        print(file_path)
+
+        # 保存文件
+        file.save(file_path)
+
+        # 获取文件信息
+        file_size = os.path.getsize(file_path)
+        mime_type, _ = mimetypes.guess_type(file_path)
+
+        logger.info(f"Saved {file_type} file to backup storage: {filename} ({file_size} bytes)")
+
+        return {
+            "filename": filename,
+            "file_path": file_path,
+            "file_type": file_type,
+            "file_size": file_size,
+            "mime_type": mime_type,
+            "relative_path": f"uploads/{file_type}/{filename}",
+            "download_url": f"/download/upload/{file_type}/{filename}"
+        }
+
+    
+    def delete_file(self, file_path: str) -> bool:
+        """删除文件"""
+        try:
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                logger.info(f"Deleted file: {file_path}")
+                return True
+            return False
+        except Exception as e:
+            logger.error(f"Failed to delete file {file_path}: {str(e)}")
+            return False
+    
+    def get_file_info(self, file_type: str, filename: str) -> Optional[Dict[str, str]]:
+        """获取文件信息"""
+        file_path = os.path.join(self.upload_dir, file_type, filename)
+        if not os.path.exists(file_path):
+            return None
+        
+        file_size = os.path.getsize(file_path)
+        mime_type, _ = mimetypes.guess_type(file_path)
+        
+        return {
+            "filename": filename,
+            "file_path": file_path,
+            "file_type": file_type,
+            "file_size": file_size,
+            "mime_type": mime_type,
+            "relative_path": f"uploads/{file_type}/{filename}",
+            "download_url": f"/download/upload/{file_type}/{filename}"
+        }
+    
+    def list_files(self, file_type: str = None) -> List[Dict[str, str]]:
+        """列出文件"""
+        files = []
+        
+        if file_type:
+            file_types = [file_type]
+        else:
+            file_types = ['video', 'audio', 'image']
+        
+        for ft in file_types:
+            type_dir = os.path.join(self.upload_dir, ft)
+            if os.path.exists(type_dir):
+                for filename in os.listdir(type_dir):
+                    file_info = self.get_file_info(ft, filename)
+                    if file_info:
+                        files.append(file_info)
+        
+        return files
+    
+    def cleanup_temp_files(self, older_than_hours: int = 24):
+        """清理临时文件"""
+        import time
+        
+        temp_dir = os.path.join(self.upload_dir, 'temp')
+        if not os.path.exists(temp_dir):
+            return
+        
+        current_time = time.time()
+        cutoff_time = current_time - (older_than_hours * 3600)
+        
+        for filename in os.listdir(temp_dir):
+            file_path = os.path.join(temp_dir, filename)
+            if os.path.isfile(file_path):
+                file_mtime = os.path.getmtime(file_path)
+                if file_mtime < cutoff_time:
+                    self.delete_file(file_path)
+                    logger.info(f"Cleaned up temp file: {filename}")
+
+# 全局文件管理器实例
+file_manager = FileManager()
+
+def save_uploaded_file(file, file_type: str, custom_filename: str = None) -> Dict[str, str]:
+    """保存上传的文件"""
+    return file_manager.save_file(file, file_type, custom_filename)
+
+def get_uploaded_file_info(file_type: str, filename: str) -> Optional[Dict[str, str]]:
+    """获取上传文件信息"""
+    return file_manager.get_file_info(file_type, filename)
+
+def list_uploaded_files(file_type: str = None) -> List[Dict[str, str]]:
+    """列出上传的文件"""
+    return file_manager.list_files(file_type)
+
+def validate_uploaded_file(file, file_type: str) -> Tuple[bool, str]:
+    """验证上传的文件"""
+    return file_manager.validate_file(file, file_type)
--- a/flask_api.py
+++ b/flask_api.py
@@ -0,0 +1,308 @@
+from flask import Flask, request, jsonify, render_template, abort, send_from_directory, Response
+import logging
+import os
+from moviepy import VideoFileClip
+import uuid
+from typing import Optional
+from flask_cors import CORS
+from typing import Dict, Any
+import threading
+# 导入API模块中的所有函数和类
+from api import (
+    APIException,
+    # train_voice,
+    # generate_voice,
+    # generate_video,
+    get_video_generate_process,
+    generate_digital_human,
+    # download_video,
+    # download_audio,
+    download_generated_video,
+
+    # download_generated_audio,
+    list_available_files,
+    find_generated_files,
+    Config
+)
+
+# 导入文件上传模块 (假设文件存在)
+try:
+    from file_upload import FileManager
+    file_manager_class=FileManager()
+    
+ 
+except ImportError:
+    # 如果file_upload模块不存在，创建一个简单的模拟版本
+    class MockFileManager:
+        def copy_audio_for_tts(self, voice_file_name, temp_uuid):
+            # 模拟文件复制操作，返回一个文件名
+            return f"tts_copy_{voice_file_name}"
+    file_manager = MockFileManager()
+    logging.warning("Mock file_upload.file_manager is used. Actual file operations might fail.")
+
+
+# 错误处理器
+TASKS: Dict[str, Dict[str, Any]] = {}
+
+def create_task(task_type: str, params: Dict[str, Any]) -> str:
+    """Create a new task and store metadata"""
+    task_id = str(uuid.uuid4())
+    TASKS[task_id] = {
+        "status": "pending",
+        "type": task_type,
+        "params": params,
+        "result": None,
+        "progress": 0
+    }
+    return task_id
+
+def update_task(task_id: str, status: str, progress: int = None, result: Dict[str, Any] = None):
+    """Update task metadata"""
+    if task_id in TASKS:
+        TASKS[task_id]["status"] = status
+        if progress is not None:
+            TASKS[task_id]["progress"] = progress
+        if result is not None:
+            TASKS[task_id]["result"] = result
+
+def get_task(task_id):
+    return TASKS.get(task_id, None)
+
+def generate_digital_human(speech_text, sample_video, sample_voice, gen_uuid):
+    import time
+    time.sleep(5)  # simulate heavy work
+    return {"code": 200, "uuid": gen_uuid}
+
+
+def run_extract_audio(task_id, video_path, audio_uuid):
+    try:
+        update_task(task_id, "running", 10)
+        if not os.path.exists(video_path):
+            raise FileNotFoundError(f"Video file not found: {video_path}")
+
+        clip = VideoFileClip(video_path)
+
+        # Save extracted audio into temp dir
+        out_dir = Config.RESOURCE_DIR+'uploads/audio'
+        audio_path = os.path.join(out_dir, f"{audio_uuid}.wav")
+        clip.audio.write_audiofile(audio_path, codec="pcm_s16le")
+        clip.close()
+
+        update_task(task_id, "completed", 100, {
+            "uuid": audio_uuid,
+            "download_url": f"/download/generated/audio/{audio_uuid}.wav"
+        })
+    except Exception as e:
+        update_task(task_id, "failed", 100, {"error": str(e)})
+
+# === Background runner ===
+def run_generate_task(task_id, data, gen_uuid):
+    try:
+        update_task(task_id, "running", 10)
+        result = generate_digital_human(
+            data["speech_text"],
+            data["sample_video"],
+            data["sample_voice"],
+            gen_uuid
+        )
+        update_task(task_id, "completed", 100, {
+            "uuid": gen_uuid,
+            "download_url": f"/download/generated/video/{gen_uuid}?task_id={result.get('code')}"
+        })
+    except Exception as e:
+        update_task(task_id, "failed", 100, {"error": str(e)})
+# --- Flask app setup ---
+app = Flask(__name__)
+app.config.from_object('api.Config')
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+CORS(app)
+
+# --- Error handlers ---
+@app.errorhandler(APIException)
+def handle_api_exception(e):
+    return jsonify({"error": e.message}), e.status_code
+
+@app.errorhandler(404)
+def handle_not_found(e):
+    return jsonify({"error": "Resource not found"}), 404
+
+@app.errorhandler(500)
+def handle_server_error(e):
+    return jsonify({"error": "Internal server error"}), 500
+
+@app.route("/api/video/extract_audio", methods=["POST"])
+def api_extract_audio():
+    data = request.json
+    if not data or "video_path" not in data:
+        return jsonify({"error": "Missing field 'video_path'"}), 400
+
+    video_path = data["video_path"]
+    audio_uuid = str(uuid.uuid4())
+
+    # Create async task
+    task_id = create_task("extract_audio", {"video_path": video_path})
+    thread = threading.Thread(target=run_extract_audio, args=(task_id, video_path, audio_uuid))
+    thread.start()
+
+    return jsonify({
+        "status": "submitted",
+        "task_id": task_id,
+        "query_url": f"/api/task/status/{task_id}"
+    })
+
+# --- Example: Digital Human ---
+@app.route("/api/digital_human/generate", methods=["POST"])
+def api_generate_digital_human():
+    data = request.json
+    required_fields = ["speech_text", "sample_video", "sample_voice"]
+    if not data or not all(f in data for f in required_fields):
+        return jsonify({
+            "error": f"Missing one of the required fields: {', '.join(required_fields)}"
+        }), 400
+
+    # Create task
+    task_id = create_task("digital_human", data)
+    gen_uuid = data.get("uuid", str(uuid.uuid4()))
+
+    # Run async in background thread
+    thread = threading.Thread(target=run_generate_task, args=(task_id, data, gen_uuid))
+    thread.start()
+
+    return jsonify({
+        "status": "submitted",
+        "task_id": task_id,
+        "query_url": f"/api/task/status/{task_id}"
+    })
+# ---
+## 文件操作API
+
+@app.route('/api/files/list', methods=['GET'])
+def api_list_files():
+    """
+    列出可用的文件
+    可选参数:
+      - directory: 目录路径
+      - file_type: 文件类型 ("video", "audio", "all")
+    """
+    directory = request.args.get('directory', app.config['RESOURCE_DIR'])
+    file_type = request.args.get('file_type', 'all')
+    
+    result = list_available_files(directory, file_type)
+    return jsonify(result)
+
+@app.route('/api/files/find/<uuid>', methods=['GET'])
+def api_find_generated_files(uuid: str):
+    """
+    查找指定UUID的生成文件
+    """
+    files = find_generated_files(uuid)
+    if not files:
+        return jsonify({"message": "No generated files found for this UUID"}), 404
+    return jsonify({"status": "success", "files": files})
+
+
+UPLOAD_FOLDER = "/mnt/docker/resource"
+# Ensure the upload folder exists; create it if it doesn't
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+
+# Set the upload folder for the Flask app
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+
+# Define allowed video extensions for security
+ALLOWED_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'flv', 'webm'}
+
+# --- Helper Function for File Type Validation ---
+def allowed_file(filename):
+    """
+    Checks if the uploaded file's extension is in the ALLOWED_EXTENSIONS set.
+    """
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+# --- Video Upload API Endpoint ---
+@app.route('/api/upload/video', methods=['POST'])
+def upload_video():
+   
+    """上传视频文件"""
+    try:
+        if 'file' not in request.files:
+            return jsonify({"error": "没有文件"}), 400
+        
+        file = request.files['file']
+        custom_name = request.form.get('custom_name')
+        
+        file_info = file_manager_class.save_file(file, 'video', custom_name)
+        
+        return jsonify({
+            "success": True,
+            "message": "视频上传成功",
+            "file_info": file_info
+        })
+        
+    except Exception as e:
+        logger.error(f"Video upload failed: {str(e)}")
+        return jsonify({"error": f"上传失败: {str(e)}"}), 500
+    
+
+@app.route('/api/upload/audio', methods=['POST'])
+def upload_audio():
+   
+    """上传音频文件"""
+    try:
+        if 'file' not in request.files:
+            return jsonify({"error": "没有文件"}), 400
+        
+        file = request.files['file']
+        custom_name = request.form.get('custom_name')
+        
+        file_info = file_manager_class.save_file(file, 'audio', custom_name)
+        
+        return jsonify({
+            "success": True,
+            "message": "音频上传成功",
+            "file_info": file_info
+        })
+        
+    except Exception as e:
+        logger.error(f"Video upload failed: {str(e)}")
+        return jsonify({"error": f"上传失败: {str(e)}"}), 500
+
+@app.route("/api/task/status/<task_id>", methods=["GET"])
+def api_task_status(task_id):
+    task = get_task(task_id)
+    if not task:
+        return jsonify({"error": "Task not found"}), 404
+    return jsonify(task)
+
+    
+@app.route('/download/generated/video/<uuid>', methods=['GET'])
+def download_generated_video_route(uuid: str):
+    """
+    下载指定UUID的生成的视频文件
+    可选参数:
+      - task_id: 任务ID
+    """
+    task_id = request.args.get('task_id')
+    return download_generated_video(uuid, task_id)
+
+
+# ---
+## 根路由和首页
+
+@app.route('/')
+def home():
+    """提供一个简单的API文档首页"""
+    return render_template('index.html')
+
+
+if __name__ == '__main__':
+    CORS(app) 
+    # 确保必要的目录存在
+    _ = app.config['RESOURCE_DIR']
+    _ = app.config['TEMP_DIR']
+    _ = os.path.expanduser(app.config['VOICE_DATA_DIR'])
+    _ = os.path.expanduser(app.config['FACE2FACE_TEMP_DIR'])
+    
+    # 运行Flask应用
+    app.run(host='0.0.0.0', port=5001, debug=True)
--- a/test_one.html
+++ b/test_one.html
@@ -0,0 +1,159 @@
+<!DOCTYPE html>
+<html lang="zh">
+<head>
+  <meta charset="UTF-8">
+  <title>数字人 API 测试</title>
+  <style>
+    body { font-family: Arial, sans-serif; margin: 20px; }
+    section { margin-bottom: 40px; padding: 15px; border: 1px solid #ccc; border-radius: 8px; }
+    input, textarea { width: 100%; margin: 8px 0; padding: 6px; }
+    button { margin-top: 5px; padding: 8px 14px; cursor: pointer; }
+    #log { white-space: pre-wrap; background: #f7f7f7; border: 1px solid #ccc; padding: 10px; margin-top: 20px; }
+  </style>
+</head>
+<body>
+  <h1>数字人 API 测试页面</h1>
+
+  <!-- 数字人生成 -->
+  <section>
+    <h2>生成数字人</h2>
+    <label>语音文本:</label>
+    <textarea id="speech_text">你好，这是数字人生成测试。</textarea>
+    <label>样例视频 (文件名或路径):</label>
+    <input type="text" id="sample_video" value="sample_video.mp4">
+    <label>样例音频 (文件名或路径):</label>
+    <input type="text" id="sample_voice" value="sample_voice.wav">
+    <button onclick="submitDigitalHuman()">提交任务</button>
+  </section>
+
+  <!-- 视频转音频 -->
+  <section>
+    <h2>视频提取音频</h2>
+    <label>视频路径:</label>
+    <input type="text" id="video_path" value="sample_video.mp4">
+    <button onclick="submitExtractAudio()">提取音频</button>
+  </section>
+
+  <!-- 文件上传 -->
+  <section>
+    <h2>上传文件</h2>
+    <form id="uploadVideoForm">
+      <label>上传视频:</label>
+      <input type="file" name="file" id="video_file">
+      <input type="text" name="custom_name" placeholder="自定义文件名(可选)">
+      <button type="button" onclick="uploadFile('video')">上传视频</button>
+    </form>
+
+    <form id="uploadAudioForm">
+      <label>上传音频:</label>
+      <input type="file" name="file" id="audio_file">
+      <input type="text" name="custom_name" placeholder="自定义文件名(可选)">
+      <button type="button" onclick="uploadFile('audio')">上传音频</button>
+    </form>
+  </section>
+
+  <h2>日志输出</h2>
+  <div id="log">等待操作...</div>
+
+  <script>
+    const BASE_URL = "http://113.108.60.116:5001";
+    let pollInterval = null;
+
+    function log(msg) {
+      document.getElementById("log").innerText += "\n" + msg;
+    }
+
+    async function submitDigitalHuman() {
+      document.getElementById("log").innerText = "提交数字人任务...";
+      const speech_text = document.getElementById("speech_text").value;
+      const sample_video = document.getElementById("sample_video").value;
+      const sample_voice = document.getElementById("sample_voice").value;
+
+      try {
+        const resp = await fetch(`${BASE_URL}/api/digital_human/generate`, {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ speech_text, sample_video, sample_voice })
+        });
+        const data = await resp.json();
+        log("响应: " + JSON.stringify(data));
+        if (data.task_id) startPolling(data.task_id);
+      } catch (err) {
+        log("请求失败: " + err);
+      }
+    }
+
+    async function submitExtractAudio() {
+      document.getElementById("log").innerText = "提交提取音频任务...";
+      const video_path = document.getElementById("video_path").value;
+
+      try {
+        const resp = await fetch(`${BASE_URL}/api/video/extract_audio`, {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ video_path })
+        });
+        const data = await resp.json();
+        log("响应: " + JSON.stringify(data));
+        if (data.task_id) startPolling(data.task_id);
+      } catch (err) {
+        log("请求失败: " + err);
+      }
+    }
+
+    async function uploadFile(type) {
+      const form = new FormData();
+      const fileInput = document.getElementById(type + "_file");
+      if (!fileInput.files.length) {
+        log("请选择文件");
+        return;
+      }
+      form.append("file", fileInput.files[0]);
+      const customName = document.querySelector(`#upload${capitalize(type)}Form input[name="custom_name"]`).value;
+      if (customName) form.append("custom_name", customName);
+
+      try {
+        const resp = await fetch(`${BASE_URL}/api/upload/${type}`, {
+          method: "POST",
+          body: form
+        });
+        const data = await resp.json();
+        log("上传结果: " + JSON.stringify(data));
+      } catch (err) {
+        log("上传失败: " + err);
+      }
+    }
+
+    function startPolling(taskId) {
+      if (pollInterval) clearInterval(pollInterval);
+      pollInterval = setInterval(() => checkStatus(taskId), 2000);
+    }
+
+    async function checkStatus(taskId) {
+      try {
+        const resp = await fetch(`${BASE_URL}/api/task/status/${taskId}`);
+        const data = await resp.json();
+        log("任务状态: " + JSON.stringify(data));
+        if (data.status === "completed" || data.status === "failed") {
+          clearInterval(pollInterval);
+          if (data.result && data.result.download_url) {
+            const a = document.createElement("a");
+            a.href = BASE_URL + data.result.download_url;
+            a.textContent = "下载结果文件";
+            a.target = "_blank";
+            document.getElementById("log").appendChild(document.createElement("br"));
+            document.getElementById("log").appendChild(a);
+          }
+        }
+      } catch (err) {
+        log("查询失败: " + err);
+        clearInterval(pollInterval);
+      }
+    }
+
+    function capitalize(s) {
+      return s.charAt(0).toUpperCase() + s.slice(1);
+    }
+  </script>
+</body>
+</html>