diff --git a/src/backends/cuda/layer_ops.cu b/src/backends/cuda/layer_ops.cu index 69eb374..f8c760a 100644 --- a/src/backends/cuda/layer_ops.cu +++ b/src/backends/cuda/layer_ops.cu @@ -98,7 +98,7 @@ CUDANet::Tensor& CUDA::conv2d( dim3 grid( (out_shape[0] + block.x - 1) / block.x, (out_shape[1] + block.y - 1) / block.y, - (out_shape[3] + block.z - 1) / block.z + (out_shape[2] + block.z - 1) / block.z ); Kernels::convolution<<>>( @@ -212,6 +212,7 @@ CUDANet::Tensor& CUDA::batch_norm( CUDA_CHECK(cudaGetLastError()); } CUDA_CHECK(cudaDeviceSynchronize()); + return output; } CUDANet::Tensor& CUDA::concat( diff --git a/src/layers/add.cpp b/src/layers/add.cpp index 2abbad4..72206e0 100644 --- a/src/layers/add.cpp +++ b/src/layers/add.cpp @@ -19,4 +19,10 @@ Add::~Add() {} CUDANet::Tensor& Add::forward(CUDANet::Tensor& input_a, CUDANet::Tensor& input_b) { output.zero(); + backend->add( + input_a, + input_b, + output + ); + return output; } diff --git a/src/layers/avg_pooling.cpp b/src/layers/avg_pooling.cpp index 4570331..7ecb032 100644 --- a/src/layers/avg_pooling.cpp +++ b/src/layers/avg_pooling.cpp @@ -76,7 +76,7 @@ size_t AvgPool2d::input_size() { } size_t AvgPool2d::output_size() { - return sizeof(float) * out_shape[0] * out_shape[1] * out_shape[3]; + return sizeof(float) * out_shape[0] * out_shape[1] * out_shape[2]; } void AvgPool2d::set_weights(void* input) {} diff --git a/src/layers/conv2d.cpp b/src/layers/conv2d.cpp index e8a4b3f..58a4deb 100644 --- a/src/layers/conv2d.cpp +++ b/src/layers/conv2d.cpp @@ -47,7 +47,7 @@ Conv2d::Conv2d( }; output = CUDANet::Tensor( - Shape{out_shape[0], out_shape[1], out_shape[3]}, + Shape{out_shape[0], out_shape[1], out_shape[2]}, CUDANet::DType::FLOAT32, backend ); diff --git a/src/layers/dense.cpp b/src/layers/dense.cpp index b7f6c77..240b6bb 100644 --- a/src/layers/dense.cpp +++ b/src/layers/dense.cpp @@ -30,6 +30,7 @@ Dense::Dense(CUDANet::Shape in_shape, CUDANet::Shape out_shape, CUDANet::Backend Dense::~Dense() {} CUDANet::Tensor& Dense::forward(CUDANet::Tensor& input) { + output.zero(); backend->dense(weights, biases, input, output, in_shape[0], out_shape[0]); return output; } diff --git a/src/layers/max_pool.cpp b/src/layers/max_pool.cpp index d9ca6f6..0a4a61e 100644 --- a/src/layers/max_pool.cpp +++ b/src/layers/max_pool.cpp @@ -41,7 +41,7 @@ MaxPool2d::MaxPool2d( }; output = CUDANet::Tensor( - Shape{out_shape[0] * out_shape[1] * out_shape[3]}, + Shape{out_shape[0] * out_shape[1] * out_shape[2]}, CUDANet::DType::FLOAT32, backend ); }