我们正在尝试进行声学训练,但无法创建转录的音频文件,如何创建?另外,我们正在使用GetTranscript和Appendtranscript,但是如果以READWRITE模式打开流,则无法获得ISpStream的ISpTranscript接口(interface),因此如何创建脚本wav文件。

hr = SPBindToFile(L"e:\\file1.wav", SPFM_OPEN_READONLY,
    &cpStream);
hr = cpStream.QueryInterface(&cpTranscript);
// We get a error here for as E_NONINTERFACE if SPFM_OPEN_READWRITE
hr = cpTranscript->AppendTranscript(sCorrectText);
hr = cpTranscript->GetTranscript(&pwszTranscript);
// GIVES CORRECT TRANSCRIPT

//READING THIS AGAIN ON NEXT EXECUTION TIME DOES NOT GIVE THE TRANSCRIPT

hr = SPBindToFile(L"e:\\file1.wav", SPFM_OPEN_READONLY,
    &cpStream);
hr = cpStream.QueryInterface(&cpTranscript);
//THIS GIVE THE ERROR E_NONINTERFACE

完成此操作后,我们需要将文件路径添加到注册表。我们通过以下代码执行此操作。
CComPtr<ISpObjectToken> cpObjToken;



ULONG                     CSIDL_LOCAL_APPDATA = 28;
ULONG                     CSIDL_FLAG_CREATE = 32768;
GUID guid0;
LPWSTR FileName2;

hr = cpRecognizerBase->GetRecoProfile(&cpObjToken);
hr = CoCreateGuid(&guid0);
hr = cpObjToken->GetStorageFileName(guid0, L"Test",   L"F:\\sample6.wav",CSIDL_FLAG_CREATE, &FileName2);
//this code runs fine but the file is never added to the registry

任何指针将不胜感激。这个问题与这里问的问题有关Speech training files and registry locations

谢谢

最佳答案



如果ISPStream没有内容,则会发生E_NONINTERFACE。例如,文件为空;调用未成功,但仍返回s_OK(出于某种原因执行此操作)。因此,通常情况下,我会先检查流中实际上是否有任何内容。您可以通过检查其大小来做到这一点:

这是一个例子。如果它的大小为0或有些大的值,那么显然它没有返回正确的值。请记住,返回值是ULARGE_INTEGER

STATSTG streamInfo;
cpStream->Stat(&streamInfo, STATFLAG_DEFAULT);
ULARGE_INTEGER streamSizeULI;
streamSizeULI = streamInfo.cbSize;

SPBindToFile仅适用于SPFM_OPEN_READONLYSPFM_CREATE_ALWAYS,因此您将不得不使用其中之一。

至于如何保存附加的脚本,似乎如果wav文件已经存在(或者至少我不知道如何),则无法直接保存它。如果该文件尚不存在,则可以创建一个新的ispstream,并在通过语音或麦克风将音频信息传递给它时(在网络上有很多示例),您可以附加一个成绩单,然后它就会粘贴。我在下面列举一个例子。

将笔录追加到新文件上:
void recordAndAppendTranscriptInOneOperation() {
HRESULT             hr = S_OK;
CComPtr <ISpVoice>      cpVoice;
CComPtr <ISpStream>     cpStream;
CComPtr<ISpTranscript>  cpTranscript;
CSpStreamFormat         cAudioFmt;

//Create    a   SAPI    Voice
hr  =   cpVoice.CoCreateInstance(CLSID_SpVoice);

char filePathOut[] = R"(C:\SAPI\SampleOutput\SP_Sample.wav)";

//Set   the audio   format
if(SUCCEEDED(hr))
{
    hr  =   cAudioFmt.AssignFormat(SPSF_22kHz16BitMono);
}

//Call  SPBindToFile,   a   SAPI    helper  method,     to  bind    the audio
if(SUCCEEDED(hr))
{
    hr = SPBindToFile(filePathOut, SPFM_CREATE_ALWAYS, &cpStream, &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());
}

//set   the output  to  cpStream    so  that    the output  audio   data    wil
if(SUCCEEDED(hr))
{
    hr = cpVoice->SetOutput(cpStream, TRUE);
}

//Speak the text    “hello  world”  synchronously
if(SUCCEEDED(hr))
{
    hr = cpVoice->Speak(L"Hello World", SPF_DEFAULT, NULL);
}

//close the stream
if(SUCCEEDED(hr))
{
    PWCHAR                      pwszTranscript;
    char NewTranscriptAsString[] = R"(This is a test)";
    LPCWSTR NewTranscript = charToLPSTRW(NewTranscriptAsString);

    hr = cpStream.QueryInterface(&cpTranscript);
    hr = cpTranscript->AppendTranscript(NULL);
    hr = cpTranscript->AppendTranscript(NewTranscript);
    hr = cpTranscript->GetTranscript(&pwszTranscript);

    hr  =   cpStream->Close();
}

//Release   the stream  and voice   object
cpStream.Release();
cpVoice.Release();
 }

Bill Hutchinson(下面的链接源之一)具有一些可用于执行识别器训练的代码,而无需进行所有注册表编辑,等等。我已经在这篇文章的末尾包含了它。他具有函数(TrainOne),该函数通过内存流逐个文件地识别识别器。您可以将预先存在的WAV传递给它。特别是带有成绩单的WAV或没有成绩单的WAV和(然后在调用时将成绩单提供给函数)。请看一下它,因为它非常有用。

这是我发现的与SAPI相关的所有知识的集合,这对其他试图弄清这一点的人很有用。我还将很快发布自己完整的SAPI培训解决方案:
  • How to use the function GetStorageFileName for adding training files to registry?
  • Acoustic training using SAPI 5.3 Speech API
  • Training sapi : Creating transcripted wav files and adding file paths to registry
  • https://groups.google.com/forum/#!topic/microsoft.public.speech_tech.sdk/fTq-PJrVd_Q
  • https://documentation.help/SAPI-5/documentation.pdf

  • 示例培训代码:

    由于比尔·哈钦森(Bill Hutchinson)的SAPI代码是如何使用SAPI在网络上进行培训的几个可靠示例之一,因此,如果有一天被删除/丢失,我在下面提供了他在Google上的帖子:
    #include "stdafx.h"
    #include "sphelper.h"
    #include <sapi.h>
    #include <string.h>
    //MAIN() is last function below
    inline HRESULT ReturnResult(ISpRecoContext * pRecoCtxt, ISpRecoResult
    ** ppResult)
    {
            HRESULT hr = S_OK;
            CSpEvent spEvent;
            while (S_OK == pRecoCtxt->WaitForNotifyEvent(INFINITE))
            {
                    while (S_OK == spEvent.GetFrom(pRecoCtxt))
                    {
                            switch (spEvent.eEventId)
                            {
                                    case SPEI_RECOGNITION:
                                            *ppResult = spEvent.RecoResult();
                                            if (*ppResult)
                                            {
                                                    (*ppResult)->AddRef();
                                            }
                                            return hr;
                                    case [OTHER EVENTS]
                        spEvent.Clear();
            }
            return hr;
    }
    inline HRESULT TrainOneFile(ISpRecoContext * cpRecoCtxt, ISpRecognizer
    * cpRecognizerBase, ISpRecoGrammar * cpGrammar)
    {
            CComPtr<ISpStream>      cpStream;
            CComPtr<ISpRecoResult>        cpResult;
            CComPtr<ISpTranscript>  cpTranscript;
            PWCHAR                  pwszTranscript;
            HRESULT hr = S_OK;
            hr = cpStream.CoCreateInstance(CLSID_SpStream);
            // Bind a stream to an existing wavefile
            if (SUCCEEDED(hr))        {
                    hr = cpStream->BindToFile(L"C:\\XX.wav",                                                        SPFM_OPEN_READONLY,
                            NULL,
                            NULL,
                            SPFEI_ALL_EVENTS);
            }
            if (SUCCEEDED(hr)){
                    hr = cpStream.QueryInterface(&cpTranscript);
            }
            if (SUCCEEDED(hr)) {
                    hr = cpTranscript->GetTranscript(&pwszTranscript);
            }
            //THIS IS ALTERNATE CODE FOR PREVIOUS LINE, FOR SOUND FILES THAT
    DON’T HAVE A TRANSCRIPT ATTACHED
            LPCWSTR sCorrectText = L"Anyone who has spent time on a farm knows
    there is a rhythm to the year.";
            if (SUCCEEDED(hr)){
                    hr = cpTranscript->AppendTranscript(s);
            }
            if (SUCCEEDED(hr))        {
                    hr = cpTranscript->GetTranscript(&pwszTranscript);
            }
            if(SUCCEEDED(hr)){
                    hr = cpRecognizerBase->SetInput(cpStream, TRUE);
            }
            USES_CONVERSION;
            CSpDynamicString dstrText;
            if (SUCCEEDED (hr)){
                    hr = cpGrammar->SetDictationState(SPRS_ACTIVE);
            }
            if (SUCCEEDED(hr)){
                    hr = ReturnResult(cpRecoCtxt, &cpResult);
            }
            if (SUCCEEDED(hr)){
                    hr = cpGrammar->SetDictationState( SPRS_INACTIVE );
            }
            if ((cpResult) &&(SUCCEEDED(hr))){
                    hr = cpResult-
    >GetText(SP_GETWHOLEPHRASE,SP_GETWHOLEPHRASE,TRUE,&dstrText,NULL);
            }
            CComPtr<ISpRecoResult2> cpResult2;
            if (SUCCEEDED(hr)){
                    hr = cpResult.QueryInterface<ISpRecoResult2>(&cpResult2);
            }
            if (SUCCEEDED(hr)){
    //COMMITTEXT SHOULD FORCE ADAPTATION OF MODELS TO CORRECT TEXT
    //(THO IT SHOULD BE REDUNDANT WITH SETTRAININGSTATE() ?)
                    hr = cpResult2-
    >CommitText(SP_GETWHOLEPHRASE,SP_GETWHOLEPHRASE,sCorrectText,SPCF_DEFINITE_CORRECTION);
                    cpResult.Release();
                    cpResult2.Release();
            }
            return hr;
    }
    
    int _tmain(int argc, _TCHAR* argv[])
    {
            HRESULT hr = S_OK;
            CComPtr<ISpRecognizer2> cpRecognizer;
            CComPtr<ISpRecoContext> cpRecoCtxt;
            CComPtr<ISpRecoGrammar> cpGrammar;
            CComPtr<ISpRecognizer> cpRecognizerBase;
            hr = ::CoInitialize(NULL);
                if (SUCCEEDED(hr)) {
                    hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
            }
            if (SUCCEEDED(hr)){
                    hr = cpRecognizer.QueryInterface<ISpRecognizer>(&cpRecognizerBase);
            }
            if (SUCCEEDED(hr)){
                    hr = cpRecognizerBase->CreateRecoContext(&cpRecoCtxt);
            }
            if (cpRecoCtxt){
                    hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar);
            }
            if (SUCCEEDED(hr)){
                    hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC);
            }
            if (SUCCEEDED(hr)){
                    hr = cpRecognizer->SetTrainingState(TRUE, TRUE);
            }
            if (SUCCEEDED(hr)){
                    hr = cpRecoCtxt->SetNotifyWin32Event();
            }
            if (SUCCEEDED(hr)){
                    hr = cpRecoCtxt->SetInterest(
                            SPFEI(SPEI_RECOGNITION)|
                            SPFEI(SPEI_HYPOTHESIS)|
                            SPFEI(SPEI_FALSE_RECOGNITION),
                            SPFEI(SPEI_RECOGNITION)|
                            SPFEI(SPEI_HYPOTHESIS)|
                            SPFEI(SPEI_FALSE_RECOGNITION));
            }
            if (SUCCEEDED(hr)){
                    hr = TrainOneFile(cpRecoCtxt, cpRecognizerBase, cpGrammar);
            }
            if (SUCCEEDED(hr)){//RERUN TO CHECK FOR IMPROVEMENT
                    hr = TrainOneFile(cpRecoCtxt, cpRecognizerBase, cpGrammar);
            }
            cpRecognizer->SetTrainingState(FALSE, TRUE);//should turn off and
    save changes
            ::CoUninitialize();
            return 0;
    }
    

    10-04 23:39