Files
digital_human_backend/function/digital_human_api.py
2025-09-05 00:43:20 +08:00

212 lines
7.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
优化后的数字人生成API
按照新的文件管理架构设计
"""
import requests
import os
import time
import logging
from typing import Dict, Any
from api import Config, APIException, _make_request, _copy_file
logger = logging.getLogger(__name__)
def generate_digital_human_v2(speech_text: str, sample_video: str, sample_voice: str, uuid: str) -> Dict[str, Any]:
"""
生成数字人 (优化版本)
Args:
speech_text: 语音文本
sample_video: 样本视频文件名(在上传目录中)
sample_voice: 样本语音文件名(在上传目录中)
uuid: 唯一标识符
Returns:
生成结果
Raises:
APIException: 生成失败时抛出异常
"""
logger.info(f"Generating digital human for UUID: {uuid}")
try:
from file_upload import file_manager
# 步骤1: 将音频文件复制到TTS服务目录进行预处理
tts_audio_filename = file_manager.copy_audio_for_tts(sample_voice, f"dh_{uuid}")
# 步骤2: 预处理和训练语音模型
url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
request_body_one = {
"format": "wav",
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
"lang": "zh"
}
response_one = _make_request(url, request_body_one)
# 检查响应中是否包含必要字段
if 'asr_format_audio_url' not in response_one:
logger.error(f"Voice preprocessing response missing asr_format_audio_url: {response_one}")
raise APIException(f"Voice preprocessing failed: missing asr_format_audio_url in response", 500)
asr_format_audio_url = response_one['asr_format_audio_url']
reference_audio_text = response_one.get('reference_audio_text', '')
# 步骤3: 生成语音
url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
request_body_two = {
"speaker": uuid,
"text": speech_text,
"reference_audio": asr_format_audio_url,
"reference_text": reference_audio_text,
**Config.DEFAULT_VOICE_PARAMS
}
response = requests.post(url, json=request_body_two, timeout=60)
logger.info(f"Voice generation response status: {response.status_code}")
if response.status_code != 200:
logger.error(f"Voice generation failed: {response.text}")
raise APIException(f"Voice generation failed: {response.text}", response.status_code)
# 保存生成的音频文件到resource目录
generated_audio_filename = f"{uuid}output.wav"
generated_audio_path = os.path.join(Config.RESOURCE_DIR, generated_audio_filename)
with open(generated_audio_path, "wb") as f:
f.write(response.content)
logger.info(f"Generated audio saved: {generated_audio_path}")
# 步骤4: 将视频和生成的音频复制到Face2Face服务目录
# 首先将生成的音频从resource目录复制到uploads目录临时
upload_audio_dir = "/mnt/docker/resource/uploads/audio"
os.makedirs(upload_audio_dir, exist_ok=True)
upload_audio_path = os.path.join(upload_audio_dir, generated_audio_filename)
_copy_file(generated_audio_path, upload_audio_path)
face2face_video, face2face_audio = file_manager.copy_files_for_face2face(
sample_video, generated_audio_filename, uuid
)
# 步骤5: 提交视频生成任务
url = f"{Config.VIDEO_SERVICE_URL}/easy/submit"
request_body = {
"audio_url": face2face_audio,
"video_url": face2face_video,
"code": str(int(time.time())), # 使用时间戳作为唯一任务ID
"chaofen": 0,
"watermark_switch": 0,
"pn": 1
}
result = _make_request(url, request_body)
logger.info(f"Digital human generation submitted successfully: {result}")
# 清理临时文件
try:
os.remove(upload_audio_path)
except:
pass
return result
except Exception as e:
logger.error(f"Failed to generate digital human: {str(e)}")
if isinstance(e, APIException):
raise
raise APIException(f"Failed to generate digital human: {str(e)}", 500)
def generate_voice_v2(text: str, reference_audio: str, reference_text: str, uuid: str) -> str:
"""
生成语音 (优化版本)
Args:
text: 要转换的文本
reference_audio: 参考音频文件名(在上传目录中)
reference_text: 参考文本
uuid: 唯一标识符
Returns:
生成的音频文件路径在resource目录中
Raises:
APIException: 生成失败时抛出异常
"""
logger.info(f"Generating voice for UUID: {uuid}")
try:
from file_upload import file_manager
# 先将参考音频复制到TTS服务目录
tts_audio_filename = file_manager.copy_audio_for_tts(reference_audio, uuid)
url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
request_body = {
"speaker": uuid,
"text": text,
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
"reference_text": reference_text,
**Config.DEFAULT_VOICE_PARAMS
}
response = requests.post(url, json=request_body, timeout=60)
logger.info(f"Voice generation response status: {response.status_code}")
if response.status_code == 200:
# 保存生成的音频到resource目录
output_filename = f"{uuid}output.wav"
output_path = os.path.join(Config.RESOURCE_DIR, output_filename)
with open(output_path, "wb") as f:
f.write(response.content)
logger.info(f"Generated voice saved to: {output_path}")
return output_path
else:
logger.error(f"Voice generation failed: {response.text}")
raise APIException(f"Voice generation failed: {response.text}", response.status_code)
except Exception as e:
if isinstance(e, APIException):
raise
logger.error(f"Network error during voice generation: {str(e)}")
raise APIException(f"Voice generation error: {str(e)}", 500)
def train_voice_v2(voice_file_name: str) -> Dict[str, Any]:
"""
训练语音模型 (优化版本)
Args:
voice_file_name: 语音文件名(在上传目录中)
Returns:
训练结果
Raises:
APIException: 训练失败时抛出异常
"""
logger.info(f"Training voice model with file: {voice_file_name}")
try:
from file_upload import file_manager
# 将音频文件复制到TTS服务目录
temp_uuid = "train_" + str(int(time.time()))
tts_audio_filename = file_manager.copy_audio_for_tts(voice_file_name, temp_uuid)
url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
request_body = {
"format": "wav",
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
"lang": "zh"
}
return _make_request(url, request_body)
except Exception as e:
if isinstance(e, APIException):
raise
logger.error(f"Voice training error: {str(e)}")
raise APIException(f"Voice training error: {str(e)}", 500)