标题说明了我的问题。

我想做的很简单:

  • 加载MP3轨道(通过libmpg123)
  • 阅读样本
  • 在样本
  • 上应用Kiss FFT

    到目前为止我尝试过的
    inline float scale(kiss_fft_scalar val)
    {
        int g = 0;
        return val < 0 ? val*(1/32768.0f ) : val*(1/32767.0f);
    }
    
    void main()
    {
        mpg123_handle *m = NULL;
        int  channels = 0, encoding = 0;
        long rate = 0;
        int err = MPG123_OK;
    
        err = mpg123_init();
        m = mpg123_new(NULL, &err);
        mpg123_open(m, "L:\\audio-io\\audio-analysis\\samples\\zero.mp3");
        mpg123_getformat(m, &rate, &channels, &encoding);
    
        err = mpg123_format_none(m);
        err = mpg123_format(m, rate, channels, encoding);
    
        // Get 2048 samples
        const int TIME = 2048;
    
        // 16-bit integer encoded in bytes, hence x2 size
        unsigned char* buffer = new unsigned char[TIME*2];
        size_t done = 0;
        err = mpg123_read(m, buffer, TIME*2, &done);
    
        short* samples = new short[done/2];
        int index = 0;
    
        // Iterate 2 bytes at a time
        for (int i = 0; i < done; i += 2)
        {
            unsigned char first = buffer[i];
            unsigned char second = buffer[i + 1];
            samples[index++] = (first | (second << 8));
        }
    
        // Array to store the calculated data
        int speclen = TIME / 2 + 1;
        float* output = new float[speclen];
    
        kiss_fftr_cfg config;
        kiss_fft_cpx* spectrum;
    
        config = kiss_fftr_alloc(TIME, 0, NULL, NULL);
        spectrum = (kiss_fft_cpx*) malloc(sizeof(kiss_fft_cpx) * TIME);
    
        // Right here...
        kiss_fftr(config, (kiss_fft_scalar*) samples, spectrum);
    
        for (int i = 0; i < speclen; i++)
        {
            float re = scale(spectrum[i].r) * TIME;
            float im = scale(spectrum[i].i) * TIME;
    
            output[i] = sqrtf(re*re + im*im);
        }
    
        return;
    }
    

    此行出现问题kiss_fftr(config, (kiss_fft_scalar*) samples, spectrum);其中samples包含音频样本(16位),而spectrum假定用于保存输出数据。

    函数完成后,这是调试器窗口中发生的事情。

    有人可以给我一个简单的示例,说明如何在音频(16位编码)样本上应用Kiss FFT函数吗?

    最佳答案

    您需要在代码中找到错误。我的测试代码似乎可以正常工作。

    带浮点的复值前向FFT:

    #include <stdio.h>
    #include <stdlib.h>
    #include <math.h>
    #include "kiss_fft.h"
    
    #ifndef M_PI
    #define M_PI 3.14159265358979324
    #endif
    
    #define N 16
    
    void TestFft(const char* title, const kiss_fft_cpx in[N], kiss_fft_cpx out[N])
    {
      kiss_fft_cfg cfg;
    
      printf("%s\n", title);
    
      if ((cfg = kiss_fft_alloc(N, 0/*is_inverse_fft*/, NULL, NULL)) != NULL)
      {
        size_t i;
    
        kiss_fft(cfg, in, out);
        free(cfg);
    
        for (i = 0; i < N; i++)
          printf(" in[%2zu] = %+f , %+f    "
                 "out[%2zu] = %+f , %+f\n",
                 i, in[i].r, in[i].i,
                 i, out[i].r, out[i].i);
      }
      else
      {
        printf("not enough memory?\n");
        exit(-1);
      }
    }
    
    int main(void)
    {
      kiss_fft_cpx in[N], out[N];
      size_t i;
    
      for (i = 0; i < N; i++)
        in[i].r = in[i].i = 0;
      TestFft("Zeroes (complex)", in, out);
    
      for (i = 0; i < N; i++)
        in[i].r = 1, in[i].i = 0;
      TestFft("Ones (complex)", in, out);
    
      for (i = 0; i < N; i++)
        in[i].r = sin(2 * M_PI * 4 * i / N), in[i].i = 0;
      TestFft("SineWave (complex)", in, out);
    
      return 0;
    }
    

    输出:
    Zeroes (complex)
     in[ 0] = +0.000000 , +0.000000    out[ 0] = +0.000000 , +0.000000
     in[ 1] = +0.000000 , +0.000000    out[ 1] = +0.000000 , +0.000000
     in[ 2] = +0.000000 , +0.000000    out[ 2] = +0.000000 , +0.000000
     in[ 3] = +0.000000 , +0.000000    out[ 3] = +0.000000 , +0.000000
     in[ 4] = +0.000000 , +0.000000    out[ 4] = +0.000000 , +0.000000
     in[ 5] = +0.000000 , +0.000000    out[ 5] = +0.000000 , +0.000000
     in[ 6] = +0.000000 , +0.000000    out[ 6] = +0.000000 , +0.000000
     in[ 7] = +0.000000 , +0.000000    out[ 7] = +0.000000 , +0.000000
     in[ 8] = +0.000000 , +0.000000    out[ 8] = +0.000000 , +0.000000
     in[ 9] = +0.000000 , +0.000000    out[ 9] = +0.000000 , +0.000000
     in[10] = +0.000000 , +0.000000    out[10] = +0.000000 , +0.000000
     in[11] = +0.000000 , +0.000000    out[11] = +0.000000 , +0.000000
     in[12] = +0.000000 , +0.000000    out[12] = +0.000000 , +0.000000
     in[13] = +0.000000 , +0.000000    out[13] = +0.000000 , +0.000000
     in[14] = +0.000000 , +0.000000    out[14] = +0.000000 , +0.000000
     in[15] = +0.000000 , +0.000000    out[15] = +0.000000 , +0.000000
    Ones (complex)
     in[ 0] = +1.000000 , +0.000000    out[ 0] = +16.000000 , +0.000000
     in[ 1] = +1.000000 , +0.000000    out[ 1] = +0.000000 , +0.000000
     in[ 2] = +1.000000 , +0.000000    out[ 2] = +0.000000 , +0.000000
     in[ 3] = +1.000000 , +0.000000    out[ 3] = +0.000000 , +0.000000
     in[ 4] = +1.000000 , +0.000000    out[ 4] = +0.000000 , +0.000000
     in[ 5] = +1.000000 , +0.000000    out[ 5] = +0.000000 , +0.000000
     in[ 6] = +1.000000 , +0.000000    out[ 6] = +0.000000 , +0.000000
     in[ 7] = +1.000000 , +0.000000    out[ 7] = +0.000000 , +0.000000
     in[ 8] = +1.000000 , +0.000000    out[ 8] = +0.000000 , +0.000000
     in[ 9] = +1.000000 , +0.000000    out[ 9] = +0.000000 , +0.000000
     in[10] = +1.000000 , +0.000000    out[10] = +0.000000 , +0.000000
     in[11] = +1.000000 , +0.000000    out[11] = +0.000000 , +0.000000
     in[12] = +1.000000 , +0.000000    out[12] = +0.000000 , +0.000000
     in[13] = +1.000000 , +0.000000    out[13] = +0.000000 , +0.000000
     in[14] = +1.000000 , +0.000000    out[14] = +0.000000 , +0.000000
     in[15] = +1.000000 , +0.000000    out[15] = +0.000000 , +0.000000
    SineWave (complex)
     in[ 0] = +0.000000 , +0.000000    out[ 0] = +0.000000 , +0.000000
     in[ 1] = +1.000000 , +0.000000    out[ 1] = +0.000000 , +0.000000
     in[ 2] = +0.000000 , +0.000000    out[ 2] = +0.000000 , +0.000000
     in[ 3] = -1.000000 , +0.000000    out[ 3] = +0.000000 , +0.000000
     in[ 4] = +0.000000 , +0.000000    out[ 4] = +0.000000 , -8.000000
     in[ 5] = +1.000000 , +0.000000    out[ 5] = +0.000000 , +0.000000
     in[ 6] = +0.000000 , +0.000000    out[ 6] = +0.000000 , +0.000000
     in[ 7] = -1.000000 , +0.000000    out[ 7] = +0.000000 , +0.000000
     in[ 8] = +0.000000 , +0.000000    out[ 8] = +0.000000 , +0.000000
     in[ 9] = +1.000000 , +0.000000    out[ 9] = +0.000000 , +0.000000
     in[10] = +0.000000 , +0.000000    out[10] = +0.000000 , +0.000000
     in[11] = -1.000000 , +0.000000    out[11] = +0.000000 , +0.000000
     in[12] = +0.000000 , +0.000000    out[12] = +0.000000 , +8.000000
     in[13] = +1.000000 , +0.000000    out[13] = +0.000000 , +0.000000
     in[14] = +0.000000 , +0.000000    out[14] = +0.000000 , +0.000000
     in[15] = -1.000000 , +0.000000    out[15] = +0.000000 , +0.000000
    

    带浮点的实值前向FFT:
    #include <stdio.h>
    #include <stdlib.h>
    #include <math.h>
    #include "kiss_fftr.h"
    
    #ifndef M_PI
    #define M_PI 3.14159265358979324
    #endif
    
    #define N 16
    
    void TestFftReal(const char* title, const kiss_fft_scalar in[N], kiss_fft_cpx out[N / 2 + 1])
    {
      kiss_fftr_cfg cfg;
    
      printf("%s\n", title);
    
      if ((cfg = kiss_fftr_alloc(N, 0/*is_inverse_fft*/, NULL, NULL)) != NULL)
      {
        size_t i;
    
        kiss_fftr(cfg, in, out);
        free(cfg);
    
        for (i = 0; i < N; i++)
        {
          printf(" in[%2zu] = %+f    ",
                 i, in[i]);
          if (i < N / 2 + 1)
            printf("out[%2zu] = %+f , %+f",
                   i, out[i].r, out[i].i);
          printf("\n");
        }
      }
      else
      {
        printf("not enough memory?\n");
        exit(-1);
      }
    }
    
    int main(void)
    {
      kiss_fft_scalar in[N];
      kiss_fft_cpx out[N / 2 + 1];
      size_t i;
    
      for (i = 0; i < N; i++)
        in[i] = 0;
      TestFftReal("Zeroes (real)", in, out);
    
      for (i = 0; i < N; i++)
        in[i] = 1;
      TestFftReal("Ones (real)", in, out);
    
      for (i = 0; i < N; i++)
        in[i] = sin(2 * M_PI * 4 * i / N);
      TestFftReal("SineWave (real)", in, out);
    
      return 0;
    }
    

    输出:
    Zeroes (real)
     in[ 0] = +0.000000    out[ 0] = +0.000000 , +0.000000
     in[ 1] = +0.000000    out[ 1] = +0.000000 , +0.000000
     in[ 2] = +0.000000    out[ 2] = +0.000000 , +0.000000
     in[ 3] = +0.000000    out[ 3] = +0.000000 , +0.000000
     in[ 4] = +0.000000    out[ 4] = +0.000000 , +0.000000
     in[ 5] = +0.000000    out[ 5] = +0.000000 , +0.000000
     in[ 6] = +0.000000    out[ 6] = +0.000000 , +0.000000
     in[ 7] = +0.000000    out[ 7] = +0.000000 , +0.000000
     in[ 8] = +0.000000    out[ 8] = +0.000000 , +0.000000
     in[ 9] = +0.000000
     in[10] = +0.000000
     in[11] = +0.000000
     in[12] = +0.000000
     in[13] = +0.000000
     in[14] = +0.000000
     in[15] = +0.000000
    Ones (real)
     in[ 0] = +1.000000    out[ 0] = +16.000000 , +0.000000
     in[ 1] = +1.000000    out[ 1] = +0.000000 , +0.000000
     in[ 2] = +1.000000    out[ 2] = +0.000000 , +0.000000
     in[ 3] = +1.000000    out[ 3] = +0.000000 , +0.000000
     in[ 4] = +1.000000    out[ 4] = +0.000000 , +0.000000
     in[ 5] = +1.000000    out[ 5] = +0.000000 , +0.000000
     in[ 6] = +1.000000    out[ 6] = +0.000000 , +0.000000
     in[ 7] = +1.000000    out[ 7] = +0.000000 , +0.000000
     in[ 8] = +1.000000    out[ 8] = +0.000000 , +0.000000
     in[ 9] = +1.000000
     in[10] = +1.000000
     in[11] = +1.000000
     in[12] = +1.000000
     in[13] = +1.000000
     in[14] = +1.000000
     in[15] = +1.000000
    SineWave (real)
     in[ 0] = +0.000000    out[ 0] = +0.000000 , +0.000000
     in[ 1] = +1.000000    out[ 1] = +0.000000 , +0.000000
     in[ 2] = +0.000000    out[ 2] = +0.000000 , +0.000000
     in[ 3] = -1.000000    out[ 3] = +0.000000 , +0.000000
     in[ 4] = +0.000000    out[ 4] = +0.000000 , -8.000000
     in[ 5] = +1.000000    out[ 5] = +0.000000 , +0.000000
     in[ 6] = +0.000000    out[ 6] = +0.000000 , +0.000000
     in[ 7] = -1.000000    out[ 7] = +0.000000 , +0.000000
     in[ 8] = +0.000000    out[ 8] = +0.000000 , +0.000000
     in[ 9] = +1.000000
     in[10] = +0.000000
     in[11] = -1.000000
     in[12] = +0.000000
     in[13] = +1.000000
     in[14] = +0.000000
     in[15] = -1.000000
    

    关于c++ - 在音频样本上应用Kiss FFT并获得NaN输出?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/14536950/

    10-15 22:44