python - 为什么“conv1d”在C代码，python和pytorch中有所不同

我想用C代码重现pytorch的“ Conv1D”结果。

我尝试使用三种方法（C代码，Python，Pytorch）实现“ Conv1D”，但结果不同。只有七个小数位是合理的。假设结构中有多层conv1d，则分数位数的准确性将逐渐降低。

根据大家的建议，我试图将输入数据的C代码类型更改为double，但是结果仍然不正确。
我做错什么了吗？

例如：

Pytorch的输出：0.2380688339471817017

Python的输出：0.2380688637495040894

C代码的输出（浮点）：0.2380688637

C代码输出（双精度）：0.238068885344539680

这是我当前的实现

输入：

输入暗淡。 = 80，输出变暗。 = 128，内核大小= 5

Pytorch：Conv1D_input.npy，Conv1D_weight.npy

Python：Conv1D_input.npy，Conv1D_weight.npy（与Pytorch相同）

C代码：Conv1D_input.txt，Conv1D_weight.txt（从Pytorch转换，IEEE 754单精度）

火炬

import torch
import numpy as np
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import argparse
import sys
import io
import time
import os

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.c1 = nn.Conv1d(input_size, hidden_size, kernel_size = 5, bias=False)
        self.c1.weight = torch.nn.Parameter(torch.Tensor(np.load("CONV1D_WEIGHT.npy")))

    def forward(self, inputs):
        c = self.c1(inputs)
        return c

input_size = 80
hidden_size = 128
kernel_size = 5

rnn = RNN(input_size, hidden_size)

inputs = torch.nn.Parameter(torch.Tensor(np.load("CONV1D_IN.npy")))
print("inputs", inputs)
outputs = rnn(inputs)
sub_np456 = outputs[0].cpu().detach().numpy()
np.savetxt("Pytorch_CONV1D_OUTPUT.txt", sub_np456)
print('outputs', outputs)

蟒蛇

import struct
import numpy as np

if __name__ == "__main__":
    row = 80
    col = 327
    count = 0
    res_out_dim = 128
    in_dim = 80
    kernel_size = 5
    filter = np.zeros((80, 5), dtype = np.float32)
    featureMaps = np.zeros((128, 323), dtype = np.float32)

    spectrum = np.load("CONV1D_INPUT.npy")
    weight = np.load("CONV1D_WEIGHT.npy")

    spectrum_2d = spectrum.reshape(80, 327)

    for i in range(res_out_dim):
        for j in range(in_dim):
            for k in range(kernel_size):
                filter[j][k] = weight[i][j][k]

        while count < (col-kernel_size+1):
            for j in range(in_dim):
                for k in range(count, kernel_size+count):
                    featureMaps[i][count] = featureMaps[i][count] + spectrum_2d[j][k]*filter[j][k-count]
            count = count + 1
        count = 0

    np.savetxt("Python_CONV1D_OUTPUT.txt", featureMaps)

C代码（浮动）

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

const char CONV1D_WEIGHT[] = "CONV1D_WEIGHT.txt";
const char CONV1D_INPUT[] = "CONV1D_INPUT.txt";

void parameterFree(float **matrix, int row)
{
    int i = 0;
    for(i=0; i<row; i++)
        free(matrix[i]);
    free(matrix);
}

float** createMatrix_2D(int row, int col)
{
    int i = 0;
    float **matrix = NULL;
    matrix = (float**)malloc(sizeof(float*) * row);
    if(matrix == NULL)
        printf("Matrix2D malloc failed\n");
    for(i=0; i<row; i++)
    {
        matrix[i] = (float*)malloc(sizeof(float) * col);
        if(matrix[i] == NULL)
            printf("Matrix2D malloc failed\n");
    }

    return matrix;
}

float** conv_1D(const char weightFile[], float **source, int *row, int *col, int in_dim, int res_out_dim, int kernel_size)
{
    float **filter = createMatrix_2D(in_dim, kernel_size);
    float **featureMaps = createMatrix_2D(res_out_dim, *col-kernel_size+1);
    int i = 0, j = 0, k = 0, count = 0;
    char str[10];
    float data = 0.0;
    FILE *fp = fopen(weightFile, "r");
    if(fp == NULL)
        printf("Resnet file open failed\n");
    else
    {
        /*initial featureMaps*/
        for(i=0; i<res_out_dim; i++)
        {
            for(j=0; j<*col-kernel_size+1; j++)
            {
                featureMaps[i][j] = 0.0;
            }
        }

        /*next filter*/
        for(i=0; i<res_out_dim; i++)
        {
            /*read filter*/
            for(j=0; j<in_dim; j++)
            {
                for(k=0; k<kernel_size; k++)
                {
                    fscanf(fp, "%s", str);
                    sscanf(str, "%x", &data);
                    filter[j][k] = data;
                }
            }

            /* (part of source * filter) */
            while(count < *col-kernel_size+1)
            {
                for(j=0; j<in_dim; j++)
                {
                    for(k=count; k<kernel_size+count; k++)
                    {
                        featureMaps[i][count] += source[j][k]*filter[j][k-count];
                    }
                }
                count++;
            }
            count = 0;
        }
        fclose(fp);
    }
    parameterFree(source, *row);
    parameterFree(filter, in_dim);
    *row = res_out_dim;
    *col = *col-kernel_size+1;

    return featureMaps;
}

int main()
{
    int row = 80;
    int col = 327;
    int in_dim = 80;
    int res_out_dim = 128;
    int kernel_size = 5;
    int i, j;
    float data;
    char str[10];

    float **input = createMatrix_2D(row, col);
    FILE *fp = fopen(CONV1D_INPUT, "r");
    FILE *fp2 = fopen("C code_CONV1D_OUTPUT.txt", "w");
    if(fp == NULL)
        printf("File open failed\n");
    else
    {
        for(i=0; i<row; i++)
        {
            for(j=0; j<col; j++)
            {
                fscanf(fp, "%s", str);
                sscanf(str, "%x", &data);
                input[i][j] = data;
            }
        }
    }

    float **CONV1D_ANS = conv_1D(CONV1D_WEIGHT, input, &row, &col, in_dim, res_out_dim, kernel_size);

    for(i=0; i<row; i++)
    {
        for(j=0; j<col; j++)
        {
            fprintf(fp2, "[%.12f] ", CONV1D_ANS[i][j]);
        }
        fprintf(fp2, "\n");
    }

    return 0;
}

C代码（双精度）

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

const char CONV1D_WEIGHT[] = "CONV1D_WEIGHT.txt";
const char CONV1D_INPUT[] = "CONV1D_INPUT.txt";

void parameterFree(double **matrix, int row)
{
    int i = 0;
    for(i=0; i<row; i++)
        free(matrix[i]);
    free(matrix);
}

double** createMatrix_2D(int row, int col)
{
    int i = 0;
    double **matrix = NULL;
    matrix = (double**)malloc(sizeof(double*) * row);
    if(matrix == NULL)
        printf("Matrix2D malloc failed\n");
    for(i=0; i<row; i++)
    {
        matrix[i] = (double*)malloc(sizeof(double) * col);
        if(matrix[i] == NULL)
            printf("Matrix2D malloc failed\n");
    }

    return matrix;
}

double** conv_1D(const char weightFile[], double **source, int *row, int *col, int in_dim, int res_out_dim, int kernel_size)
{
    double **filter = createMatrix_2D(in_dim, kernel_size);
    double **featureMaps = createMatrix_2D(res_out_dim, *col-kernel_size+1);
    int i = 0, j = 0, k = 0, count = 0;
    char str[10];
    float data = 0.0;
    FILE *fp = fopen(weightFile, "r");
    if(fp == NULL)
        printf("Resnet file open failed\n");
    else
    {
        /*initial featureMaps*/
        for(i=0; i<res_out_dim; i++)
        {
            for(j=0; j<*col-kernel_size+1; j++)
            {
                featureMaps[i][j] = 0.0;
            }
        }

        /*next filter*/
        for(i=0; i<res_out_dim; i++)
        {
            /*read filter*/
            for(j=0; j<in_dim; j++)
            {
                for(k=0; k<kernel_size; k++)
                {
                    fscanf(fp, "%s", str);
                    sscanf(str, "%x", &data);
                    filter[j][k] = (double)data;
                }
            }

            /* (part of source * filter) */
            while(count < *col-kernel_size+1)
            {
                for(j=0; j<in_dim; j++)
                {
                    for(k=count; k<kernel_size+count; k++)
                    {
                        featureMaps[i][count] += source[j][k]*filter[j][k-count];
                    }
                }
                count++;
            }
            count = 0;
        }
        fclose(fp);
    }
    parameterFree(source, *row);
    parameterFree(filter, in_dim);
    *row = res_out_dim;
    *col = *col-kernel_size+1;

    return featureMaps;
}

int main()
{
    int row = 80;
    int col = 327;
    int in_dim = 80;
    int res_out_dim = 128;
    int kernel_size = 5;
    int i, j;
    float data;
    char str[10];

    double **input = createMatrix_2D(row, col);
    FILE *fp = fopen(CONV1D_INPUT, "r");
    FILE *fp2 = fopen("C code_CONV1D_OUTPUT.txt", "w");
    if(fp == NULL)
        printf("File open failed\n");
    else
    {
        for(i=0; i<row; i++)
        {
            for(j=0; j<col; j++)
            {
                fscanf(fp, "%s", str);
                sscanf(str, "%x", &data);
                input[i][j] = (double)data;
            }
        }
    }

    double **CONV1D_ANS = conv_1D(CONV1D_WEIGHT, input, &row, &col, in_dim, res_out_dim, kernel_size);

    for(i=0; i<row; i++)
    {
        for(j=0; j<col; j++)
        {
            fprintf(fp2, "[%.18f] ", CONV1D_ANS[i][j]);
        }
        fprintf(fp2, "\n");
    }

    return 0;
}

最佳答案

浮点数不精确（根据设计）。根据执行顺序，结果可能会有所不同。更糟糕的是，某些公式的数值是直接不稳定的，而对于相同分析表达式的另一公式却是稳定的。

编译器通常将语句重新排列为优化措施。卷积是一个众所周知的包含许多操作和循环的操作。因此，除非您直接比较执行的字节码，否则这种猜测是毫无意义的。