digital_human_backend/function/digital_human_api.py

"""
优化后的数字人生成API
按照新的文件管理架构设计
"""

import requests
import os
import time
import logging
from typing import Dict, Any
from api import Config, APIException, _make_request, _copy_file

logger = logging.getLogger(__name__)

def generate_digital_human_v2(speech_text: str, sample_video: str, sample_voice: str, uuid: str) -> Dict[str, Any]:
    """
    生成数字人 (优化版本)

    Args:
        speech_text: 语音文本
        sample_video: 样本视频文件名（在上传目录中）
        sample_voice: 样本语音文件名（在上传目录中）
        uuid: 唯一标识符

    Returns:
        生成结果

    Raises:
        APIException: 生成失败时抛出异常
    """
    logger.info(f"Generating digital human for UUID: {uuid}")

    try:
        from file_upload import file_manager

        # 步骤1: 将音频文件复制到TTS服务目录进行预处理
        tts_audio_filename = file_manager.copy_audio_for_tts(sample_voice, f"dh_{uuid}")

        # 步骤2: 预处理和训练语音模型
        url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
        request_body_one = {
            "format": "wav",
            "reference_audio": tts_audio_filename,  # 使用TTS目录中的文件名
            "lang": "zh"
        }

        response_one = _make_request(url, request_body_one)

        # 检查响应中是否包含必要字段
        if 'asr_format_audio_url' not in response_one:
            logger.error(f"Voice preprocessing response missing asr_format_audio_url: {response_one}")
            raise APIException(f"Voice preprocessing failed: missing asr_format_audio_url in response", 500)

        asr_format_audio_url = response_one['asr_format_audio_url']
        reference_audio_text = response_one.get('reference_audio_text', '')

        # 步骤3: 生成语音
        url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
        request_body_two = {
            "speaker": uuid,
            "text": speech_text,
            "reference_audio": asr_format_audio_url,
            "reference_text": reference_audio_text,
            **Config.DEFAULT_VOICE_PARAMS
        }

        response = requests.post(url, json=request_body_two, timeout=60)
        logger.info(f"Voice generation response status: {response.status_code}")

        if response.status_code != 200:
            logger.error(f"Voice generation failed: {response.text}")
            raise APIException(f"Voice generation failed: {response.text}", response.status_code)

        # 保存生成的音频文件到resource目录
        generated_audio_filename = f"{uuid}output.wav"
        generated_audio_path = os.path.join(Config.RESOURCE_DIR, generated_audio_filename)
        with open(generated_audio_path, "wb") as f:
            f.write(response.content)
        logger.info(f"Generated audio saved: {generated_audio_path}")

        # 步骤4: 将视频和生成的音频复制到Face2Face服务目录
        # 首先将生成的音频从resource目录复制到uploads目录（临时）
        upload_audio_dir = "/mnt/docker/resource/uploads/audio"
        os.makedirs(upload_audio_dir, exist_ok=True)
        upload_audio_path = os.path.join(upload_audio_dir, generated_audio_filename)
        _copy_file(generated_audio_path, upload_audio_path)

        face2face_video, face2face_audio = file_manager.copy_files_for_face2face(
            sample_video, generated_audio_filename, uuid
        )

        # 步骤5: 提交视频生成任务
        url = f"{Config.VIDEO_SERVICE_URL}/easy/submit"
        request_body = {
            "audio_url": face2face_audio,
            "video_url": face2face_video,
            "code": str(int(time.time())),  # 使用时间戳作为唯一任务ID
            "chaofen": 0,
            "watermark_switch": 0,
            "pn": 1
        }

        result = _make_request(url, request_body)
        logger.info(f"Digital human generation submitted successfully: {result}")

        # 清理临时文件
        try:
            os.remove(upload_audio_path)
        except:
            pass

        return result

    except Exception as e:
        logger.error(f"Failed to generate digital human: {str(e)}")
        if isinstance(e, APIException):
            raise
        raise APIException(f"Failed to generate digital human: {str(e)}", 500)


def generate_voice_v2(text: str, reference_audio: str, reference_text: str, uuid: str) -> str:
    """
    生成语音 (优化版本)

    Args:
        text: 要转换的文本
        reference_audio: 参考音频文件名（在上传目录中）
        reference_text: 参考文本
        uuid: 唯一标识符

    Returns:
        生成的音频文件路径（在resource目录中）

    Raises:
        APIException: 生成失败时抛出异常
    """
    logger.info(f"Generating voice for UUID: {uuid}")

    try:
        from file_upload import file_manager

        # 先将参考音频复制到TTS服务目录
        tts_audio_filename = file_manager.copy_audio_for_tts(reference_audio, uuid)

        url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
        request_body = {
            "speaker": uuid,
            "text": text,
            "reference_audio": tts_audio_filename,  # 使用TTS目录中的文件名
            "reference_text": reference_text,
            **Config.DEFAULT_VOICE_PARAMS
        }

        response = requests.post(url, json=request_body, timeout=60)
        logger.info(f"Voice generation response status: {response.status_code}")

        if response.status_code == 200:
            # 保存生成的音频到resource目录
            output_filename = f"{uuid}output.wav"
            output_path = os.path.join(Config.RESOURCE_DIR, output_filename)
            with open(output_path, "wb") as f:
                f.write(response.content)
            logger.info(f"Generated voice saved to: {output_path}")
            return output_path
        else:
            logger.error(f"Voice generation failed: {response.text}")
            raise APIException(f"Voice generation failed: {response.text}", response.status_code)

    except Exception as e:
        if isinstance(e, APIException):
            raise
        logger.error(f"Network error during voice generation: {str(e)}")
        raise APIException(f"Voice generation error: {str(e)}", 500)


def train_voice_v2(voice_file_name: str) -> Dict[str, Any]:
    """
    训练语音模型 (优化版本)

    Args:
        voice_file_name: 语音文件名（在上传目录中）

    Returns:
        训练结果

    Raises:
        APIException: 训练失败时抛出异常
    """
    logger.info(f"Training voice model with file: {voice_file_name}")

    try:
        from file_upload import file_manager

        # 将音频文件复制到TTS服务目录
        temp_uuid = "train_" + str(int(time.time()))
        tts_audio_filename = file_manager.copy_audio_for_tts(voice_file_name, temp_uuid)

        url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
        request_body = {
            "format": "wav",
            "reference_audio": tts_audio_filename,  # 使用TTS目录中的文件名
            "lang": "zh"
        }

        return _make_request(url, request_body)

    except Exception as e:
        if isinstance(e, APIException):
            raise
        logger.error(f"Voice training error: {str(e)}")
        raise APIException(f"Voice training error: {str(e)}", 500)