212 lines
7.5 KiB
Python
212 lines
7.5 KiB
Python
"""
|
||
优化后的数字人生成API
|
||
按照新的文件管理架构设计
|
||
"""
|
||
|
||
import requests
|
||
import os
|
||
import time
|
||
import logging
|
||
from typing import Dict, Any
|
||
from api import Config, APIException, _make_request, _copy_file
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def generate_digital_human_v2(speech_text: str, sample_video: str, sample_voice: str, uuid: str) -> Dict[str, Any]:
|
||
"""
|
||
生成数字人 (优化版本)
|
||
|
||
Args:
|
||
speech_text: 语音文本
|
||
sample_video: 样本视频文件名(在上传目录中)
|
||
sample_voice: 样本语音文件名(在上传目录中)
|
||
uuid: 唯一标识符
|
||
|
||
Returns:
|
||
生成结果
|
||
|
||
Raises:
|
||
APIException: 生成失败时抛出异常
|
||
"""
|
||
logger.info(f"Generating digital human for UUID: {uuid}")
|
||
|
||
try:
|
||
from file_upload import file_manager
|
||
|
||
# 步骤1: 将音频文件复制到TTS服务目录进行预处理
|
||
tts_audio_filename = file_manager.copy_audio_for_tts(sample_voice, f"dh_{uuid}")
|
||
|
||
# 步骤2: 预处理和训练语音模型
|
||
url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
|
||
request_body_one = {
|
||
"format": "wav",
|
||
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
|
||
"lang": "zh"
|
||
}
|
||
|
||
response_one = _make_request(url, request_body_one)
|
||
|
||
# 检查响应中是否包含必要字段
|
||
if 'asr_format_audio_url' not in response_one:
|
||
logger.error(f"Voice preprocessing response missing asr_format_audio_url: {response_one}")
|
||
raise APIException(f"Voice preprocessing failed: missing asr_format_audio_url in response", 500)
|
||
|
||
asr_format_audio_url = response_one['asr_format_audio_url']
|
||
reference_audio_text = response_one.get('reference_audio_text', '')
|
||
|
||
# 步骤3: 生成语音
|
||
url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
|
||
request_body_two = {
|
||
"speaker": uuid,
|
||
"text": speech_text,
|
||
"reference_audio": asr_format_audio_url,
|
||
"reference_text": reference_audio_text,
|
||
**Config.DEFAULT_VOICE_PARAMS
|
||
}
|
||
|
||
response = requests.post(url, json=request_body_two, timeout=60)
|
||
logger.info(f"Voice generation response status: {response.status_code}")
|
||
|
||
if response.status_code != 200:
|
||
logger.error(f"Voice generation failed: {response.text}")
|
||
raise APIException(f"Voice generation failed: {response.text}", response.status_code)
|
||
|
||
# 保存生成的音频文件到resource目录
|
||
generated_audio_filename = f"{uuid}output.wav"
|
||
generated_audio_path = os.path.join(Config.RESOURCE_DIR, generated_audio_filename)
|
||
with open(generated_audio_path, "wb") as f:
|
||
f.write(response.content)
|
||
logger.info(f"Generated audio saved: {generated_audio_path}")
|
||
|
||
# 步骤4: 将视频和生成的音频复制到Face2Face服务目录
|
||
# 首先将生成的音频从resource目录复制到uploads目录(临时)
|
||
upload_audio_dir = "/mnt/docker/resource/uploads/audio"
|
||
os.makedirs(upload_audio_dir, exist_ok=True)
|
||
upload_audio_path = os.path.join(upload_audio_dir, generated_audio_filename)
|
||
_copy_file(generated_audio_path, upload_audio_path)
|
||
|
||
face2face_video, face2face_audio = file_manager.copy_files_for_face2face(
|
||
sample_video, generated_audio_filename, uuid
|
||
)
|
||
|
||
# 步骤5: 提交视频生成任务
|
||
url = f"{Config.VIDEO_SERVICE_URL}/easy/submit"
|
||
request_body = {
|
||
"audio_url": face2face_audio,
|
||
"video_url": face2face_video,
|
||
"code": str(int(time.time())), # 使用时间戳作为唯一任务ID
|
||
"chaofen": 0,
|
||
"watermark_switch": 0,
|
||
"pn": 1
|
||
}
|
||
|
||
result = _make_request(url, request_body)
|
||
logger.info(f"Digital human generation submitted successfully: {result}")
|
||
|
||
# 清理临时文件
|
||
try:
|
||
os.remove(upload_audio_path)
|
||
except:
|
||
pass
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to generate digital human: {str(e)}")
|
||
if isinstance(e, APIException):
|
||
raise
|
||
raise APIException(f"Failed to generate digital human: {str(e)}", 500)
|
||
|
||
|
||
def generate_voice_v2(text: str, reference_audio: str, reference_text: str, uuid: str) -> str:
|
||
"""
|
||
生成语音 (优化版本)
|
||
|
||
Args:
|
||
text: 要转换的文本
|
||
reference_audio: 参考音频文件名(在上传目录中)
|
||
reference_text: 参考文本
|
||
uuid: 唯一标识符
|
||
|
||
Returns:
|
||
生成的音频文件路径(在resource目录中)
|
||
|
||
Raises:
|
||
APIException: 生成失败时抛出异常
|
||
"""
|
||
logger.info(f"Generating voice for UUID: {uuid}")
|
||
|
||
try:
|
||
from file_upload import file_manager
|
||
|
||
# 先将参考音频复制到TTS服务目录
|
||
tts_audio_filename = file_manager.copy_audio_for_tts(reference_audio, uuid)
|
||
|
||
url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
|
||
request_body = {
|
||
"speaker": uuid,
|
||
"text": text,
|
||
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
|
||
"reference_text": reference_text,
|
||
**Config.DEFAULT_VOICE_PARAMS
|
||
}
|
||
|
||
response = requests.post(url, json=request_body, timeout=60)
|
||
logger.info(f"Voice generation response status: {response.status_code}")
|
||
|
||
if response.status_code == 200:
|
||
# 保存生成的音频到resource目录
|
||
output_filename = f"{uuid}output.wav"
|
||
output_path = os.path.join(Config.RESOURCE_DIR, output_filename)
|
||
with open(output_path, "wb") as f:
|
||
f.write(response.content)
|
||
logger.info(f"Generated voice saved to: {output_path}")
|
||
return output_path
|
||
else:
|
||
logger.error(f"Voice generation failed: {response.text}")
|
||
raise APIException(f"Voice generation failed: {response.text}", response.status_code)
|
||
|
||
except Exception as e:
|
||
if isinstance(e, APIException):
|
||
raise
|
||
logger.error(f"Network error during voice generation: {str(e)}")
|
||
raise APIException(f"Voice generation error: {str(e)}", 500)
|
||
|
||
|
||
def train_voice_v2(voice_file_name: str) -> Dict[str, Any]:
|
||
"""
|
||
训练语音模型 (优化版本)
|
||
|
||
Args:
|
||
voice_file_name: 语音文件名(在上传目录中)
|
||
|
||
Returns:
|
||
训练结果
|
||
|
||
Raises:
|
||
APIException: 训练失败时抛出异常
|
||
"""
|
||
logger.info(f"Training voice model with file: {voice_file_name}")
|
||
|
||
try:
|
||
from file_upload import file_manager
|
||
|
||
# 将音频文件复制到TTS服务目录
|
||
temp_uuid = "train_" + str(int(time.time()))
|
||
tts_audio_filename = file_manager.copy_audio_for_tts(voice_file_name, temp_uuid)
|
||
|
||
url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
|
||
request_body = {
|
||
"format": "wav",
|
||
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
|
||
"lang": "zh"
|
||
}
|
||
|
||
return _make_request(url, request_body)
|
||
|
||
except Exception as e:
|
||
if isinstance(e, APIException):
|
||
raise
|
||
logger.error(f"Voice training error: {str(e)}")
|
||
raise APIException(f"Voice training error: {str(e)}", 500)
|