WIP Migrate Dense layer

This commit is contained in:
2025-11-18 19:33:51 +01:00
parent 24606491a3
commit 64eac7050b
8 changed files with 90 additions and 158 deletions

View File

@@ -17,7 +17,7 @@ public:
// Tensor ops // Tensor ops
virtual void print(const CUDANet::Tensor &input) = 0; virtual void print(const CUDANet::Tensor &input) = 0;
virtual void clear(CUDANet::Tensor &input) = 0; virtual void zero(CUDANet::Tensor &input) = 0;
virtual void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) = 0; virtual void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) = 0;
virtual void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) = 0; virtual void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) = 0;

View File

@@ -13,7 +13,7 @@ public:
// Tensor ops // Tensor ops
void print(const CUDANet::Tensor &input) override; void print(const CUDANet::Tensor &input) override;
void clear(CUDANet::Tensor &input) override; void zero(CUDANet::Tensor &input) override;
void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override; void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override;
void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override; void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override;

View File

@@ -13,7 +13,7 @@ public:
// Tensor ops // Tensor ops
void print(const CUDANet::Tensor &input) override; void print(const CUDANet::Tensor &input) override;
void clear(CUDANet::Tensor &input) override; void zero(CUDANet::Tensor &input) override;
void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override; void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override;
void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override; void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override;

View File

@@ -1,9 +1,8 @@
#ifndef CUDANET_DENSE_LAYER_H #pragma once
#define CUDANET_DENSE_LAYER_H
#include <vector> #include <vector>
#include "activation.hpp" #include "backend.hpp"
#include "layer.hpp" #include "layer.hpp"
namespace CUDANet::Layers { namespace CUDANet::Layers {
@@ -12,121 +11,67 @@ namespace CUDANet::Layers {
* @brief Dense (fully connected) layer * @brief Dense (fully connected) layer
* *
*/ */
class Dense : public WeightedLayer { class Dense : public Layer {
public: public:
/**
* @brief Construct a new Dense layer
*
* @param inputSize Size of the input vector
* @param outputSize Size of the output vector
* @param activationType Activation function type ('RELU', 'SIGMOID',
* 'SOFTMAX' or 'NONE')
*/
Dense(int inputSize, int outputSize, Layers::ActivationType activationType);
/** Dense(CUDANet::Backend *backend, CUDANet::Shape input_shape, CUDANet::Shape output_shape);
* @brief Destroy the Dense layer
*
*/
~Dense(); ~Dense();
/** CUDANet::Tensor& forward(CUDANet::Tensor &input);
* @brief Forward pass of the dense layer
*
* @param d_input Device pointer to the input vector
* @return Device pointer to the output vector
*/
float* forward(const float* d_input);
/** CUDANet::Shape input_shape();
* @brief Set the weights of the layer
*
* @param weights Pointer to vector of weights
*/
void setWeights(const float* weights);
/** CUDANet::Shape output_shape();
* @brief Get the weights of the layer
*
* @return Vector of weights
*/
std::vector<float> getWeights();
/** size_t input_size();
* @brief Set the biases of the layer
*
* @param biases Pointer to vector of biases
*/
void setBiases(const float* biases);
/** size_t output_size();
* @brief Get the biases of the layer
*
* @return Vector of biases
*/
std::vector<float> getBiases();
/** void set_weights(CUDANet::Tensor &input);
* @brief Get output size
*
* @return int output size
*/
int getOutputSize();
/** CUDANet::Tensor& get_weights();
* @brief Get input size
* void set_biases(CUDANet::Tensor &input);
* @return int input size
*/ CUDANet::Tensor& get_biases();
int getInputSize();
private: private:
int inputSize; CUDANet::Backend *backend;
int outputSize;
std::vector<float> weights; CUDANet::Shape in_shape;
std::vector<float> biases; CUDANet::Shape out_shape;
Layers::Activation* activation; CUDANet::Tensor weights;
CUDANet::Tensor biases;
/**
* @brief Initialize the weights to zeros
*
*/
void initializeWeights();
/** void init_weights();
* @brief Initialize the biases to zeros void init_biases();
*
*/
void initializeBiases();
float* forwardCPU(const float* input); // #ifdef USE_CUDA
// float* d_output;
#ifdef USE_CUDA // float* d_weights;
float* d_output; // float* d_biases;
float* d_weights; // // Precompute kernel launch parameters
float* d_biases; // int forwardGridSize;
// int biasGridSize;
// Precompute kernel launch parameters // /**
int forwardGridSize; // * @brief Copy the weights and biases to the device
int biasGridSize; // *
// */
// void toCuda();
/** // void initCUDA();
* @brief Copy the weights and biases to the device // void delCUDA();
*
*/
void toCuda();
void initCUDA(); // float* forwardCUDA(const float* d_input);
void delCUDA(); // #endif
float* forwardCUDA(const float* d_input);
#endif
}; };
} // namespace CUDANet::Layers } // namespace CUDANet::Layers
#endif // CUDANET_DENSE_LAYER_H

View File

@@ -33,6 +33,11 @@ public:
template <typename T> template <typename T>
T* data(); T* data();
void zero();
template <typename T>
void set_data(T *data);
private: private:
Shape shape; Shape shape;
DType dtype; DType dtype;

View File

@@ -1,13 +1,13 @@
#include <iostream> #include <iostream>
#include "backend/backend.hpp" #include "backend.hpp"
#include "backend/cuda.cuh" #include "backend/cuda.cuh"
#include "utils/cuda_helper.cuh" #include "utils/cuda_helper.cuh"
#include "kernels/matmul.cuh" #include "kernels/matmul.cuh"
using namespace CUDANet::Backend; using namespace CUDANet::Backend;
void CUDA::print(const CUDANet::Backend::Tensor &input) { void CUDA::print(const CUDANet::Tensor &input) {
auto length = input.numel(); auto length = input.numel();
std::vector<float> h_vec(input.numel()); std::vector<float> h_vec(input.numel());
@@ -22,11 +22,11 @@ void CUDA::print(const CUDANet::Backend::Tensor &input) {
std::cout << std::endl; std::cout << std::endl;
} }
void CUDA::clear(CUDANet::Backend::Tensor &input) { void CUDA::zero(CUDANet::Tensor &input) {
CUDA_CHECK(cudaMemset(input.data<float>(), 0, sizeof(float) * input.numel())); CUDA_CHECK(cudaMemset(input.data<float>(), 0, sizeof(float) * input.numel()));
} }
void CUDA::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) { void CUDA::sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) {
auto length = input.numel(); auto length = input.numel();
const int gridSize = ( + BLOCK_SIZE - 1) / BLOCK_SIZE; const int gridSize = ( + BLOCK_SIZE - 1) / BLOCK_SIZE;
@@ -45,7 +45,7 @@ void CUDA::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &
} }
} }
void CUDA::max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) { void CUDA::max(const CUDANet::Tensor &input, CUDANet::Tensor &max) {
auto length = input.numel(); auto length = input.numel();
const int grid_size = (length + BLOCK_SIZE - 1) / BLOCK_SIZE; const int grid_size = (length + BLOCK_SIZE - 1) / BLOCK_SIZE;

View File

@@ -50,3 +50,7 @@ template <typename T>
T* Tensor::data() { T* Tensor::data() {
return static_cast<T*>(d_ptr); return static_cast<T*>(d_ptr);
} }
void Tensor::zero() {
backend->zero(*this);
}

View File

@@ -1,80 +1,58 @@
#include "dense.hpp" #include <format>
#include <stdexcept> #include <stdexcept>
#include "activation.hpp" #include "dense.hpp"
using namespace CUDANet::Layers; using namespace CUDANet::Layers;
Dense::Dense(int inputSize, int outputSize, ActivationType activationType) Dense::Dense(CUDANet::Backend *backend, CUDANet::Shape input_shape, CUDANet::Shape output_shape)
: inputSize(inputSize), outputSize(outputSize) { : backend(backend), in_shape(input_shape), out_shape(output_shape) {
// Allocate memory for weights and biases // Allocate memory for weights and biases
weights.resize(outputSize * inputSize);
biases.resize(outputSize);
initializeWeights(); if (input_shape.size() != 1) {
initializeBiases(); throw std::runtime_error(std::format("Invalid shape. Expected [1], got {}", input_shape));
}
if (output_shape.size() != 1) {
throw std::runtime_error(std::format("Invalid shape. Expected [1], got {}", output_shape));
}
activation = new Activation(activationType, outputSize); auto input_len = input_shape[0];
auto output_len = output_shape[0];
#ifdef USE_CUDA auto weights = CUDANet::Tensor{Shape(input_len * output_len), CUDANet::DType::FLOAT32, backend};
initCUDA(); auto biases = CUDANet::Tensor(Shape(output_len), CUDANet::DType::FLOAT32, backend);
#endif
weights.zero();
biases.zero();
} }
Dense::~Dense() { CUDANet::Tensor& Dense::forward(CUDANet::Tensor &input);
delete activation;
#ifdef USE_CUDA CUDANet::Shape Dense::input_shape() {
delCUDA(); return in_shape;
#endif
} }
void Dense::initializeWeights() { CUDANet::Shape Dense::output_shape() {
std::fill(weights.begin(), weights.end(), 0.0f); return out_shape;
} }
void Dense::initializeBiases() { size_t Dense::input_size() {
std::fill(biases.begin(), biases.end(), 0.0f); return in_shape[0];
} };
float* Dense::forwardCPU(const float* input) { size_t Dense::output_size() {
throw std::logic_error("Not implemented"); return out_shape[0];
} };
float* Dense::forward(const float* input) { void Dense::set_weights(CUDANet::Tensor &input);
#ifdef USE_CUDA
return forwardCUDA(input);
#else
return forwardCPU(input);
#endif
}
void Dense::setWeights(const float* weights_input) { CUDANet::Tensor& Dense::get_weights() {
std::copy(weights_input, weights_input + weights.size(), weights.begin());
#ifdef USE_CUDA
toCuda();
#endif
}
std::vector<float> Dense::getWeights() {
return weights; return weights;
} }
void Dense::setBiases(const float* biases_input) { void Dense::set_biases(CUDANet::Tensor &input);
std::copy(biases_input, biases_input + biases.size(), biases.begin());
#ifdef USE_CUDA
toCuda();
#endif
}
std::vector<float> Dense::getBiases() { CUDANet::Tensor& Dense::get_biases() {
return biases; return biases;
}
int Dense::getOutputSize() {
return outputSize;
}
int Dense::getInputSize() {
return inputSize;
} }