花了一天时间,直接上代码
语音识别
#Author:Alex.Zhang import os import requests import json import base64 #首先配置必要的信息 baidu_server = 'https://aip.baidubce.com/oauth/2.0/token?' grant_type = 'client_credentials' client_id = 'umuduD7RyyO7OIsAGWHyuZeG' #API KEY client_secret = 'ay0ih0NhwAInGCgIdpmbvSG9nbl0KEw3' #Secret KEY #合成请求token的url url = baidu_server+'grant_type='+grant_type+'&client_id='+client_id+'&client_secret='+client_secret #获取token res = requests.get(url).text data = json.loads(res) token = data['access_token'] #设置音频的属性,采样率,格式等 VOICE_RATE = 16000 FILE_NAME = '666.wav' USER_ID = 'Xu.zh' #这里的id随便填填就好啦,我填的自己昵称 FILE_TYPE = 'wav' #读取文件二进制内容 f_obj = open(FILE_NAME, 'rb') content = base64.b64encode(f_obj.read()) speech = str(content, 'utf8') size = os.path.getsize(FILE_NAME) #json封装 datas = json.dumps({ 'format': FILE_TYPE, 'rate': VOICE_RATE, 'channel': 1, 'cuid': USER_ID, 'token': token, 'speech':speech, 'len': size}) #设置headers和请求地址url headers = {'Content-Type':'application/json'} url = 'https://vop.baidu.com/server_api' #用post方法传数据 request = requests.post(url, datas, headers) result = json.loads(request.text) text = result['result'] if result['err_no'] == 0: print(text) else: print('返回错误!')
文字转语音
from aip import AipSpeech """ 你的 APPID AK SK """ APP_ID = '' API_KEY = '' SECRET_KEY='' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) for i in range(4): if (i==0): content='你瞅啥?' if (i==1): content='瞅你咋地。' if (i==2): content='再瞅一个试试!' if (i==3): content='试试就试试。' if (i==0 or i==2): result = client.synthesis(content,'zh',1,{'spd':0,'vol': 5,'per':3}) if (i==1 or i==3): result = client.synthesis(content,'zh',1,{'spd':0,'vol': 5,'per':4}) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 filename=str(i) if not isinstance(result, dict): with open('文件的保存路径'+filename+'.mp3', 'wb') as f: f.write(result)