function script

2025-09-05 00:43:20 +08:00
parent 96c7f5b347
commit 6c6b19e03e
5 changed files with 1864 additions and 0 deletions
--- a/function/digital_human_api.py
+++ b/function/digital_human_api.py
@@ -0,0 +1,211 @@
+"""
+优化后的数字人生成API
+按照新的文件管理架构设计
+"""
+
+import requests
+import os
+import time
+import logging
+from typing import Dict, Any
+from api import Config, APIException, _make_request, _copy_file
+
+logger = logging.getLogger(__name__)
+
+def generate_digital_human_v2(speech_text: str, sample_video: str, sample_voice: str, uuid: str) -> Dict[str, Any]:
+    """
+    生成数字人 (优化版本)
+    
+    Args:
+        speech_text: 语音文本
+        sample_video: 样本视频文件名（在上传目录中）
+        sample_voice: 样本语音文件名（在上传目录中）
+        uuid: 唯一标识符
+        
+    Returns:
+        生成结果
+        
+    Raises:
+        APIException: 生成失败时抛出异常
+    """
+    logger.info(f"Generating digital human for UUID: {uuid}")
+    
+    try:
+        from file_upload import file_manager
+        
+        # 步骤1: 将音频文件复制到TTS服务目录进行预处理
+        tts_audio_filename = file_manager.copy_audio_for_tts(sample_voice, f"dh_{uuid}")
+        
+        # 步骤2: 预处理和训练语音模型
+        url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
+        request_body_one = {
+            "format": "wav",
+            "reference_audio": tts_audio_filename,  # 使用TTS目录中的文件名
+            "lang": "zh"
+        }
+        
+        response_one = _make_request(url, request_body_one)
+        
+        # 检查响应中是否包含必要字段
+        if 'asr_format_audio_url' not in response_one:
+            logger.error(f"Voice preprocessing response missing asr_format_audio_url: {response_one}")
+            raise APIException(f"Voice preprocessing failed: missing asr_format_audio_url in response", 500)
+            
+        asr_format_audio_url = response_one['asr_format_audio_url']
+        reference_audio_text = response_one.get('reference_audio_text', '')
+
+        # 步骤3: 生成语音
+        url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
+        request_body_two = {
+            "speaker": uuid,
+            "text": speech_text,
+            "reference_audio": asr_format_audio_url,
+            "reference_text": reference_audio_text,
+            **Config.DEFAULT_VOICE_PARAMS
+        }
+
+        response = requests.post(url, json=request_body_two, timeout=60)
+        logger.info(f"Voice generation response status: {response.status_code}")
+
+        if response.status_code != 200:
+            logger.error(f"Voice generation failed: {response.text}")
+            raise APIException(f"Voice generation failed: {response.text}", response.status_code)
+
+        # 保存生成的音频文件到resource目录
+        generated_audio_filename = f"{uuid}output.wav"
+        generated_audio_path = os.path.join(Config.RESOURCE_DIR, generated_audio_filename)
+        with open(generated_audio_path, "wb") as f:
+            f.write(response.content)
+        logger.info(f"Generated audio saved: {generated_audio_path}")
+
+        # 步骤4: 将视频和生成的音频复制到Face2Face服务目录
+        # 首先将生成的音频从resource目录复制到uploads目录（临时）
+        upload_audio_dir = "/mnt/docker/resource/uploads/audio"
+        os.makedirs(upload_audio_dir, exist_ok=True)
+        upload_audio_path = os.path.join(upload_audio_dir, generated_audio_filename)
+        _copy_file(generated_audio_path, upload_audio_path)
+        
+        face2face_video, face2face_audio = file_manager.copy_files_for_face2face(
+            sample_video, generated_audio_filename, uuid
+        )
+
+        # 步骤5: 提交视频生成任务
+        url = f"{Config.VIDEO_SERVICE_URL}/easy/submit"
+        request_body = {
+            "audio_url": face2face_audio,
+            "video_url": face2face_video,
+            "code": str(int(time.time())),  # 使用时间戳作为唯一任务ID
+            "chaofen": 0,
+            "watermark_switch": 0,
+            "pn": 1
+        }
+        
+        result = _make_request(url, request_body)
+        logger.info(f"Digital human generation submitted successfully: {result}")
+        
+        # 清理临时文件
+        try:
+            os.remove(upload_audio_path)
+        except:
+            pass
+            
+        return result
+
+    except Exception as e:
+        logger.error(f"Failed to generate digital human: {str(e)}")
+        if isinstance(e, APIException):
+            raise
+        raise APIException(f"Failed to generate digital human: {str(e)}", 500)
+
+
+def generate_voice_v2(text: str, reference_audio: str, reference_text: str, uuid: str) -> str:
+    """
+    生成语音 (优化版本)
+    
+    Args:
+        text: 要转换的文本
+        reference_audio: 参考音频文件名（在上传目录中）
+        reference_text: 参考文本
+        uuid: 唯一标识符
+        
+    Returns:
+        生成的音频文件路径（在resource目录中）
+        
+    Raises:
+        APIException: 生成失败时抛出异常
+    """
+    logger.info(f"Generating voice for UUID: {uuid}")
+    
+    try:
+        from file_upload import file_manager
+        
+        # 先将参考音频复制到TTS服务目录
+        tts_audio_filename = file_manager.copy_audio_for_tts(reference_audio, uuid)
+        
+        url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
+        request_body = {
+            "speaker": uuid,
+            "text": text,
+            "reference_audio": tts_audio_filename,  # 使用TTS目录中的文件名
+            "reference_text": reference_text,
+            **Config.DEFAULT_VOICE_PARAMS
+        }
+        
+        response = requests.post(url, json=request_body, timeout=60)
+        logger.info(f"Voice generation response status: {response.status_code}")
+        
+        if response.status_code == 200:
+            # 保存生成的音频到resource目录
+            output_filename = f"{uuid}output.wav"
+            output_path = os.path.join(Config.RESOURCE_DIR, output_filename)
+            with open(output_path, "wb") as f:
+                f.write(response.content)
+            logger.info(f"Generated voice saved to: {output_path}")
+            return output_path
+        else:
+            logger.error(f"Voice generation failed: {response.text}")
+            raise APIException(f"Voice generation failed: {response.text}", response.status_code)
+            
+    except Exception as e:
+        if isinstance(e, APIException):
+            raise
+        logger.error(f"Network error during voice generation: {str(e)}")
+        raise APIException(f"Voice generation error: {str(e)}", 500)
+
+
+def train_voice_v2(voice_file_name: str) -> Dict[str, Any]:
+    """
+    训练语音模型 (优化版本)
+    
+    Args:
+        voice_file_name: 语音文件名（在上传目录中）
+        
+    Returns:
+        训练结果
+        
+    Raises:
+        APIException: 训练失败时抛出异常
+    """
+    logger.info(f"Training voice model with file: {voice_file_name}")
+    
+    try:
+        from file_upload import file_manager
+        
+        # 将音频文件复制到TTS服务目录
+        temp_uuid = "train_" + str(int(time.time()))
+        tts_audio_filename = file_manager.copy_audio_for_tts(voice_file_name, temp_uuid)
+        
+        url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
+        request_body = {
+            "format": "wav",
+            "reference_audio": tts_audio_filename,  # 使用TTS目录中的文件名
+            "lang": "zh"
+        }
+        
+        return _make_request(url, request_body)
+        
+    except Exception as e:
+        if isinstance(e, APIException):
+            raise
+        logger.error(f"Voice training error: {str(e)}")
+        raise APIException(f"Voice training error: {str(e)}", 500)