function script
This commit is contained in:
211
function/digital_human_api.py
Normal file
211
function/digital_human_api.py
Normal file
@@ -0,0 +1,211 @@
|
||||
"""
|
||||
优化后的数字人生成API
|
||||
按照新的文件管理架构设计
|
||||
"""
|
||||
|
||||
import requests
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from api import Config, APIException, _make_request, _copy_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def generate_digital_human_v2(speech_text: str, sample_video: str, sample_voice: str, uuid: str) -> Dict[str, Any]:
|
||||
"""
|
||||
生成数字人 (优化版本)
|
||||
|
||||
Args:
|
||||
speech_text: 语音文本
|
||||
sample_video: 样本视频文件名(在上传目录中)
|
||||
sample_voice: 样本语音文件名(在上传目录中)
|
||||
uuid: 唯一标识符
|
||||
|
||||
Returns:
|
||||
生成结果
|
||||
|
||||
Raises:
|
||||
APIException: 生成失败时抛出异常
|
||||
"""
|
||||
logger.info(f"Generating digital human for UUID: {uuid}")
|
||||
|
||||
try:
|
||||
from file_upload import file_manager
|
||||
|
||||
# 步骤1: 将音频文件复制到TTS服务目录进行预处理
|
||||
tts_audio_filename = file_manager.copy_audio_for_tts(sample_voice, f"dh_{uuid}")
|
||||
|
||||
# 步骤2: 预处理和训练语音模型
|
||||
url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
|
||||
request_body_one = {
|
||||
"format": "wav",
|
||||
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
|
||||
"lang": "zh"
|
||||
}
|
||||
|
||||
response_one = _make_request(url, request_body_one)
|
||||
|
||||
# 检查响应中是否包含必要字段
|
||||
if 'asr_format_audio_url' not in response_one:
|
||||
logger.error(f"Voice preprocessing response missing asr_format_audio_url: {response_one}")
|
||||
raise APIException(f"Voice preprocessing failed: missing asr_format_audio_url in response", 500)
|
||||
|
||||
asr_format_audio_url = response_one['asr_format_audio_url']
|
||||
reference_audio_text = response_one.get('reference_audio_text', '')
|
||||
|
||||
# 步骤3: 生成语音
|
||||
url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
|
||||
request_body_two = {
|
||||
"speaker": uuid,
|
||||
"text": speech_text,
|
||||
"reference_audio": asr_format_audio_url,
|
||||
"reference_text": reference_audio_text,
|
||||
**Config.DEFAULT_VOICE_PARAMS
|
||||
}
|
||||
|
||||
response = requests.post(url, json=request_body_two, timeout=60)
|
||||
logger.info(f"Voice generation response status: {response.status_code}")
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"Voice generation failed: {response.text}")
|
||||
raise APIException(f"Voice generation failed: {response.text}", response.status_code)
|
||||
|
||||
# 保存生成的音频文件到resource目录
|
||||
generated_audio_filename = f"{uuid}output.wav"
|
||||
generated_audio_path = os.path.join(Config.RESOURCE_DIR, generated_audio_filename)
|
||||
with open(generated_audio_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
logger.info(f"Generated audio saved: {generated_audio_path}")
|
||||
|
||||
# 步骤4: 将视频和生成的音频复制到Face2Face服务目录
|
||||
# 首先将生成的音频从resource目录复制到uploads目录(临时)
|
||||
upload_audio_dir = "/mnt/docker/resource/uploads/audio"
|
||||
os.makedirs(upload_audio_dir, exist_ok=True)
|
||||
upload_audio_path = os.path.join(upload_audio_dir, generated_audio_filename)
|
||||
_copy_file(generated_audio_path, upload_audio_path)
|
||||
|
||||
face2face_video, face2face_audio = file_manager.copy_files_for_face2face(
|
||||
sample_video, generated_audio_filename, uuid
|
||||
)
|
||||
|
||||
# 步骤5: 提交视频生成任务
|
||||
url = f"{Config.VIDEO_SERVICE_URL}/easy/submit"
|
||||
request_body = {
|
||||
"audio_url": face2face_audio,
|
||||
"video_url": face2face_video,
|
||||
"code": str(int(time.time())), # 使用时间戳作为唯一任务ID
|
||||
"chaofen": 0,
|
||||
"watermark_switch": 0,
|
||||
"pn": 1
|
||||
}
|
||||
|
||||
result = _make_request(url, request_body)
|
||||
logger.info(f"Digital human generation submitted successfully: {result}")
|
||||
|
||||
# 清理临时文件
|
||||
try:
|
||||
os.remove(upload_audio_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate digital human: {str(e)}")
|
||||
if isinstance(e, APIException):
|
||||
raise
|
||||
raise APIException(f"Failed to generate digital human: {str(e)}", 500)
|
||||
|
||||
|
||||
def generate_voice_v2(text: str, reference_audio: str, reference_text: str, uuid: str) -> str:
|
||||
"""
|
||||
生成语音 (优化版本)
|
||||
|
||||
Args:
|
||||
text: 要转换的文本
|
||||
reference_audio: 参考音频文件名(在上传目录中)
|
||||
reference_text: 参考文本
|
||||
uuid: 唯一标识符
|
||||
|
||||
Returns:
|
||||
生成的音频文件路径(在resource目录中)
|
||||
|
||||
Raises:
|
||||
APIException: 生成失败时抛出异常
|
||||
"""
|
||||
logger.info(f"Generating voice for UUID: {uuid}")
|
||||
|
||||
try:
|
||||
from file_upload import file_manager
|
||||
|
||||
# 先将参考音频复制到TTS服务目录
|
||||
tts_audio_filename = file_manager.copy_audio_for_tts(reference_audio, uuid)
|
||||
|
||||
url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
|
||||
request_body = {
|
||||
"speaker": uuid,
|
||||
"text": text,
|
||||
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
|
||||
"reference_text": reference_text,
|
||||
**Config.DEFAULT_VOICE_PARAMS
|
||||
}
|
||||
|
||||
response = requests.post(url, json=request_body, timeout=60)
|
||||
logger.info(f"Voice generation response status: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
# 保存生成的音频到resource目录
|
||||
output_filename = f"{uuid}output.wav"
|
||||
output_path = os.path.join(Config.RESOURCE_DIR, output_filename)
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
logger.info(f"Generated voice saved to: {output_path}")
|
||||
return output_path
|
||||
else:
|
||||
logger.error(f"Voice generation failed: {response.text}")
|
||||
raise APIException(f"Voice generation failed: {response.text}", response.status_code)
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, APIException):
|
||||
raise
|
||||
logger.error(f"Network error during voice generation: {str(e)}")
|
||||
raise APIException(f"Voice generation error: {str(e)}", 500)
|
||||
|
||||
|
||||
def train_voice_v2(voice_file_name: str) -> Dict[str, Any]:
|
||||
"""
|
||||
训练语音模型 (优化版本)
|
||||
|
||||
Args:
|
||||
voice_file_name: 语音文件名(在上传目录中)
|
||||
|
||||
Returns:
|
||||
训练结果
|
||||
|
||||
Raises:
|
||||
APIException: 训练失败时抛出异常
|
||||
"""
|
||||
logger.info(f"Training voice model with file: {voice_file_name}")
|
||||
|
||||
try:
|
||||
from file_upload import file_manager
|
||||
|
||||
# 将音频文件复制到TTS服务目录
|
||||
temp_uuid = "train_" + str(int(time.time()))
|
||||
tts_audio_filename = file_manager.copy_audio_for_tts(voice_file_name, temp_uuid)
|
||||
|
||||
url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
|
||||
request_body = {
|
||||
"format": "wav",
|
||||
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
|
||||
"lang": "zh"
|
||||
}
|
||||
|
||||
return _make_request(url, request_body)
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, APIException):
|
||||
raise
|
||||
logger.error(f"Voice training error: {str(e)}")
|
||||
raise APIException(f"Voice training error: {str(e)}", 500)
|
||||
Reference in New Issue
Block a user