我想用C代码重现pytorch的“ Conv1D”结果。
我尝试使用三种方法(C代码,Python,Pytorch)实现“ Conv1D”,但结果不同。只有七个小数位是合理的。假设结构中有多层conv1d,则分数位数的准确性将逐渐降低。
根据大家的建议,我试图将输入数据的C代码类型更改为double
,但是结果仍然不正确。
我做错什么了吗?
例如:
Pytorch的输出:0.2380688339471817017
Python的输出:0.2380688637495040894
C代码的输出(浮点):0.2380688637
C代码输出(双精度):0.238068885344539680
这是我当前的实现
输入:
输入暗淡。 = 80,输出变暗。 = 128,内核大小= 5
Pytorch:Conv1D_input.npy,Conv1D_weight.npy
Python:Conv1D_input.npy,Conv1D_weight.npy(与Pytorch相同)
C代码:Conv1D_input.txt,Conv1D_weight.txt(从Pytorch转换,IEEE 754单精度)
火炬
import torch
import numpy as np
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
import argparse
import sys
import io
import time
import os
class RNN(nn.Module):
def __init__(self, input_size, hidden_size):
super(RNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.c1 = nn.Conv1d(input_size, hidden_size, kernel_size = 5, bias=False)
self.c1.weight = torch.nn.Parameter(torch.Tensor(np.load("CONV1D_WEIGHT.npy")))
def forward(self, inputs):
c = self.c1(inputs)
return c
input_size = 80
hidden_size = 128
kernel_size = 5
rnn = RNN(input_size, hidden_size)
inputs = torch.nn.Parameter(torch.Tensor(np.load("CONV1D_IN.npy")))
print("inputs", inputs)
outputs = rnn(inputs)
sub_np456 = outputs[0].cpu().detach().numpy()
np.savetxt("Pytorch_CONV1D_OUTPUT.txt", sub_np456)
print('outputs', outputs)
蟒蛇
import struct
import numpy as np
if __name__ == "__main__":
row = 80
col = 327
count = 0
res_out_dim = 128
in_dim = 80
kernel_size = 5
filter = np.zeros((80, 5), dtype = np.float32)
featureMaps = np.zeros((128, 323), dtype = np.float32)
spectrum = np.load("CONV1D_INPUT.npy")
weight = np.load("CONV1D_WEIGHT.npy")
spectrum_2d = spectrum.reshape(80, 327)
for i in range(res_out_dim):
for j in range(in_dim):
for k in range(kernel_size):
filter[j][k] = weight[i][j][k]
while count < (col-kernel_size+1):
for j in range(in_dim):
for k in range(count, kernel_size+count):
featureMaps[i][count] = featureMaps[i][count] + spectrum_2d[j][k]*filter[j][k-count]
count = count + 1
count = 0
np.savetxt("Python_CONV1D_OUTPUT.txt", featureMaps)
C代码(浮动)
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
const char CONV1D_WEIGHT[] = "CONV1D_WEIGHT.txt";
const char CONV1D_INPUT[] = "CONV1D_INPUT.txt";
void parameterFree(float **matrix, int row)
{
int i = 0;
for(i=0; i<row; i++)
free(matrix[i]);
free(matrix);
}
float** createMatrix_2D(int row, int col)
{
int i = 0;
float **matrix = NULL;
matrix = (float**)malloc(sizeof(float*) * row);
if(matrix == NULL)
printf("Matrix2D malloc failed\n");
for(i=0; i<row; i++)
{
matrix[i] = (float*)malloc(sizeof(float) * col);
if(matrix[i] == NULL)
printf("Matrix2D malloc failed\n");
}
return matrix;
}
float** conv_1D(const char weightFile[], float **source, int *row, int *col, int in_dim, int res_out_dim, int kernel_size)
{
float **filter = createMatrix_2D(in_dim, kernel_size);
float **featureMaps = createMatrix_2D(res_out_dim, *col-kernel_size+1);
int i = 0, j = 0, k = 0, count = 0;
char str[10];
float data = 0.0;
FILE *fp = fopen(weightFile, "r");
if(fp == NULL)
printf("Resnet file open failed\n");
else
{
/*initial featureMaps*/
for(i=0; i<res_out_dim; i++)
{
for(j=0; j<*col-kernel_size+1; j++)
{
featureMaps[i][j] = 0.0;
}
}
/*next filter*/
for(i=0; i<res_out_dim; i++)
{
/*read filter*/
for(j=0; j<in_dim; j++)
{
for(k=0; k<kernel_size; k++)
{
fscanf(fp, "%s", str);
sscanf(str, "%x", &data);
filter[j][k] = data;
}
}
/* (part of source * filter) */
while(count < *col-kernel_size+1)
{
for(j=0; j<in_dim; j++)
{
for(k=count; k<kernel_size+count; k++)
{
featureMaps[i][count] += source[j][k]*filter[j][k-count];
}
}
count++;
}
count = 0;
}
fclose(fp);
}
parameterFree(source, *row);
parameterFree(filter, in_dim);
*row = res_out_dim;
*col = *col-kernel_size+1;
return featureMaps;
}
int main()
{
int row = 80;
int col = 327;
int in_dim = 80;
int res_out_dim = 128;
int kernel_size = 5;
int i, j;
float data;
char str[10];
float **input = createMatrix_2D(row, col);
FILE *fp = fopen(CONV1D_INPUT, "r");
FILE *fp2 = fopen("C code_CONV1D_OUTPUT.txt", "w");
if(fp == NULL)
printf("File open failed\n");
else
{
for(i=0; i<row; i++)
{
for(j=0; j<col; j++)
{
fscanf(fp, "%s", str);
sscanf(str, "%x", &data);
input[i][j] = data;
}
}
}
float **CONV1D_ANS = conv_1D(CONV1D_WEIGHT, input, &row, &col, in_dim, res_out_dim, kernel_size);
for(i=0; i<row; i++)
{
for(j=0; j<col; j++)
{
fprintf(fp2, "[%.12f] ", CONV1D_ANS[i][j]);
}
fprintf(fp2, "\n");
}
return 0;
}
C代码(双精度)
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
const char CONV1D_WEIGHT[] = "CONV1D_WEIGHT.txt";
const char CONV1D_INPUT[] = "CONV1D_INPUT.txt";
void parameterFree(double **matrix, int row)
{
int i = 0;
for(i=0; i<row; i++)
free(matrix[i]);
free(matrix);
}
double** createMatrix_2D(int row, int col)
{
int i = 0;
double **matrix = NULL;
matrix = (double**)malloc(sizeof(double*) * row);
if(matrix == NULL)
printf("Matrix2D malloc failed\n");
for(i=0; i<row; i++)
{
matrix[i] = (double*)malloc(sizeof(double) * col);
if(matrix[i] == NULL)
printf("Matrix2D malloc failed\n");
}
return matrix;
}
double** conv_1D(const char weightFile[], double **source, int *row, int *col, int in_dim, int res_out_dim, int kernel_size)
{
double **filter = createMatrix_2D(in_dim, kernel_size);
double **featureMaps = createMatrix_2D(res_out_dim, *col-kernel_size+1);
int i = 0, j = 0, k = 0, count = 0;
char str[10];
float data = 0.0;
FILE *fp = fopen(weightFile, "r");
if(fp == NULL)
printf("Resnet file open failed\n");
else
{
/*initial featureMaps*/
for(i=0; i<res_out_dim; i++)
{
for(j=0; j<*col-kernel_size+1; j++)
{
featureMaps[i][j] = 0.0;
}
}
/*next filter*/
for(i=0; i<res_out_dim; i++)
{
/*read filter*/
for(j=0; j<in_dim; j++)
{
for(k=0; k<kernel_size; k++)
{
fscanf(fp, "%s", str);
sscanf(str, "%x", &data);
filter[j][k] = (double)data;
}
}
/* (part of source * filter) */
while(count < *col-kernel_size+1)
{
for(j=0; j<in_dim; j++)
{
for(k=count; k<kernel_size+count; k++)
{
featureMaps[i][count] += source[j][k]*filter[j][k-count];
}
}
count++;
}
count = 0;
}
fclose(fp);
}
parameterFree(source, *row);
parameterFree(filter, in_dim);
*row = res_out_dim;
*col = *col-kernel_size+1;
return featureMaps;
}
int main()
{
int row = 80;
int col = 327;
int in_dim = 80;
int res_out_dim = 128;
int kernel_size = 5;
int i, j;
float data;
char str[10];
double **input = createMatrix_2D(row, col);
FILE *fp = fopen(CONV1D_INPUT, "r");
FILE *fp2 = fopen("C code_CONV1D_OUTPUT.txt", "w");
if(fp == NULL)
printf("File open failed\n");
else
{
for(i=0; i<row; i++)
{
for(j=0; j<col; j++)
{
fscanf(fp, "%s", str);
sscanf(str, "%x", &data);
input[i][j] = (double)data;
}
}
}
double **CONV1D_ANS = conv_1D(CONV1D_WEIGHT, input, &row, &col, in_dim, res_out_dim, kernel_size);
for(i=0; i<row; i++)
{
for(j=0; j<col; j++)
{
fprintf(fp2, "[%.18f] ", CONV1D_ANS[i][j]);
}
fprintf(fp2, "\n");
}
return 0;
}
最佳答案
浮点数不精确(根据设计)。根据执行顺序,结果可能会有所不同。更糟糕的是,某些公式的数值是直接不稳定的,而对于相同分析表达式的另一公式却是稳定的。
编译器通常将语句重新排列为优化措施。卷积是一个众所周知的包含许多操作和循环的操作。因此,除非您直接比较执行的字节码,否则这种猜测是毫无意义的。