""" 优化后的数字人生成API 按照新的文件管理架构设计 """ import requests import os import time import logging from typing import Dict, Any from api import Config, APIException, _make_request, _copy_file logger = logging.getLogger(__name__) def generate_digital_human_v2(speech_text: str, sample_video: str, sample_voice: str, uuid: str) -> Dict[str, Any]: """ 生成数字人 (优化版本) Args: speech_text: 语音文本 sample_video: 样本视频文件名(在上传目录中) sample_voice: 样本语音文件名(在上传目录中) uuid: 唯一标识符 Returns: 生成结果 Raises: APIException: 生成失败时抛出异常 """ logger.info(f"Generating digital human for UUID: {uuid}") try: from file_upload import file_manager # 步骤1: 将音频文件复制到TTS服务目录进行预处理 tts_audio_filename = file_manager.copy_audio_for_tts(sample_voice, f"dh_{uuid}") # 步骤2: 预处理和训练语音模型 url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran" request_body_one = { "format": "wav", "reference_audio": tts_audio_filename, # 使用TTS目录中的文件名 "lang": "zh" } response_one = _make_request(url, request_body_one) # 检查响应中是否包含必要字段 if 'asr_format_audio_url' not in response_one: logger.error(f"Voice preprocessing response missing asr_format_audio_url: {response_one}") raise APIException(f"Voice preprocessing failed: missing asr_format_audio_url in response", 500) asr_format_audio_url = response_one['asr_format_audio_url'] reference_audio_text = response_one.get('reference_audio_text', '') # 步骤3: 生成语音 url = f"{Config.VOICE_SERVICE_URL}/v1/invoke" request_body_two = { "speaker": uuid, "text": speech_text, "reference_audio": asr_format_audio_url, "reference_text": reference_audio_text, **Config.DEFAULT_VOICE_PARAMS } response = requests.post(url, json=request_body_two, timeout=60) logger.info(f"Voice generation response status: {response.status_code}") if response.status_code != 200: logger.error(f"Voice generation failed: {response.text}") raise APIException(f"Voice generation failed: {response.text}", response.status_code) # 保存生成的音频文件到resource目录 generated_audio_filename = f"{uuid}output.wav" generated_audio_path = os.path.join(Config.RESOURCE_DIR, generated_audio_filename) with open(generated_audio_path, "wb") as f: f.write(response.content) logger.info(f"Generated audio saved: {generated_audio_path}") # 步骤4: 将视频和生成的音频复制到Face2Face服务目录 # 首先将生成的音频从resource目录复制到uploads目录(临时) upload_audio_dir = "/mnt/docker/resource/uploads/audio" os.makedirs(upload_audio_dir, exist_ok=True) upload_audio_path = os.path.join(upload_audio_dir, generated_audio_filename) _copy_file(generated_audio_path, upload_audio_path) face2face_video, face2face_audio = file_manager.copy_files_for_face2face( sample_video, generated_audio_filename, uuid ) # 步骤5: 提交视频生成任务 url = f"{Config.VIDEO_SERVICE_URL}/easy/submit" request_body = { "audio_url": face2face_audio, "video_url": face2face_video, "code": str(int(time.time())), # 使用时间戳作为唯一任务ID "chaofen": 0, "watermark_switch": 0, "pn": 1 } result = _make_request(url, request_body) logger.info(f"Digital human generation submitted successfully: {result}") # 清理临时文件 try: os.remove(upload_audio_path) except: pass return result except Exception as e: logger.error(f"Failed to generate digital human: {str(e)}") if isinstance(e, APIException): raise raise APIException(f"Failed to generate digital human: {str(e)}", 500) def generate_voice_v2(text: str, reference_audio: str, reference_text: str, uuid: str) -> str: """ 生成语音 (优化版本) Args: text: 要转换的文本 reference_audio: 参考音频文件名(在上传目录中) reference_text: 参考文本 uuid: 唯一标识符 Returns: 生成的音频文件路径(在resource目录中) Raises: APIException: 生成失败时抛出异常 """ logger.info(f"Generating voice for UUID: {uuid}") try: from file_upload import file_manager # 先将参考音频复制到TTS服务目录 tts_audio_filename = file_manager.copy_audio_for_tts(reference_audio, uuid) url = f"{Config.VOICE_SERVICE_URL}/v1/invoke" request_body = { "speaker": uuid, "text": text, "reference_audio": tts_audio_filename, # 使用TTS目录中的文件名 "reference_text": reference_text, **Config.DEFAULT_VOICE_PARAMS } response = requests.post(url, json=request_body, timeout=60) logger.info(f"Voice generation response status: {response.status_code}") if response.status_code == 200: # 保存生成的音频到resource目录 output_filename = f"{uuid}output.wav" output_path = os.path.join(Config.RESOURCE_DIR, output_filename) with open(output_path, "wb") as f: f.write(response.content) logger.info(f"Generated voice saved to: {output_path}") return output_path else: logger.error(f"Voice generation failed: {response.text}") raise APIException(f"Voice generation failed: {response.text}", response.status_code) except Exception as e: if isinstance(e, APIException): raise logger.error(f"Network error during voice generation: {str(e)}") raise APIException(f"Voice generation error: {str(e)}", 500) def train_voice_v2(voice_file_name: str) -> Dict[str, Any]: """ 训练语音模型 (优化版本) Args: voice_file_name: 语音文件名(在上传目录中) Returns: 训练结果 Raises: APIException: 训练失败时抛出异常 """ logger.info(f"Training voice model with file: {voice_file_name}") try: from file_upload import file_manager # 将音频文件复制到TTS服务目录 temp_uuid = "train_" + str(int(time.time())) tts_audio_filename = file_manager.copy_audio_for_tts(voice_file_name, temp_uuid) url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran" request_body = { "format": "wav", "reference_audio": tts_audio_filename, # 使用TTS目录中的文件名 "lang": "zh" } return _make_request(url, request_body) except Exception as e: if isinstance(e, APIException): raise logger.error(f"Voice training error: {str(e)}") raise APIException(f"Voice training error: {str(e)}", 500)