mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-12-22 14:24:22 +00:00
WIP Migrate Dense layer
This commit is contained in:
@@ -17,7 +17,7 @@ public:
|
||||
|
||||
// Tensor ops
|
||||
virtual void print(const CUDANet::Tensor &input) = 0;
|
||||
virtual void clear(CUDANet::Tensor &input) = 0;
|
||||
virtual void zero(CUDANet::Tensor &input) = 0;
|
||||
virtual void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) = 0;
|
||||
virtual void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) = 0;
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ public:
|
||||
|
||||
// Tensor ops
|
||||
void print(const CUDANet::Tensor &input) override;
|
||||
void clear(CUDANet::Tensor &input) override;
|
||||
void zero(CUDANet::Tensor &input) override;
|
||||
void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override;
|
||||
void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override;
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ public:
|
||||
|
||||
// Tensor ops
|
||||
void print(const CUDANet::Tensor &input) override;
|
||||
void clear(CUDANet::Tensor &input) override;
|
||||
void zero(CUDANet::Tensor &input) override;
|
||||
void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override;
|
||||
void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override;
|
||||
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
#ifndef CUDANET_DENSE_LAYER_H
|
||||
#define CUDANET_DENSE_LAYER_H
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "activation.hpp"
|
||||
#include "backend.hpp"
|
||||
#include "layer.hpp"
|
||||
|
||||
namespace CUDANet::Layers {
|
||||
@@ -12,121 +11,67 @@ namespace CUDANet::Layers {
|
||||
* @brief Dense (fully connected) layer
|
||||
*
|
||||
*/
|
||||
class Dense : public WeightedLayer {
|
||||
class Dense : public Layer {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Dense layer
|
||||
*
|
||||
* @param inputSize Size of the input vector
|
||||
* @param outputSize Size of the output vector
|
||||
* @param activationType Activation function type ('RELU', 'SIGMOID',
|
||||
* 'SOFTMAX' or 'NONE')
|
||||
*/
|
||||
Dense(int inputSize, int outputSize, Layers::ActivationType activationType);
|
||||
|
||||
/**
|
||||
* @brief Destroy the Dense layer
|
||||
*
|
||||
*/
|
||||
Dense(CUDANet::Backend *backend, CUDANet::Shape input_shape, CUDANet::Shape output_shape);
|
||||
|
||||
~Dense();
|
||||
|
||||
/**
|
||||
* @brief Forward pass of the dense layer
|
||||
*
|
||||
* @param d_input Device pointer to the input vector
|
||||
* @return Device pointer to the output vector
|
||||
*/
|
||||
float* forward(const float* d_input);
|
||||
CUDANet::Tensor& forward(CUDANet::Tensor &input);
|
||||
|
||||
/**
|
||||
* @brief Set the weights of the layer
|
||||
*
|
||||
* @param weights Pointer to vector of weights
|
||||
*/
|
||||
void setWeights(const float* weights);
|
||||
CUDANet::Shape input_shape();
|
||||
|
||||
/**
|
||||
* @brief Get the weights of the layer
|
||||
*
|
||||
* @return Vector of weights
|
||||
*/
|
||||
std::vector<float> getWeights();
|
||||
CUDANet::Shape output_shape();
|
||||
|
||||
/**
|
||||
* @brief Set the biases of the layer
|
||||
*
|
||||
* @param biases Pointer to vector of biases
|
||||
*/
|
||||
void setBiases(const float* biases);
|
||||
size_t input_size();
|
||||
|
||||
/**
|
||||
* @brief Get the biases of the layer
|
||||
*
|
||||
* @return Vector of biases
|
||||
*/
|
||||
std::vector<float> getBiases();
|
||||
size_t output_size();
|
||||
|
||||
/**
|
||||
* @brief Get output size
|
||||
*
|
||||
* @return int output size
|
||||
*/
|
||||
int getOutputSize();
|
||||
void set_weights(CUDANet::Tensor &input);
|
||||
|
||||
/**
|
||||
* @brief Get input size
|
||||
*
|
||||
* @return int input size
|
||||
*/
|
||||
int getInputSize();
|
||||
CUDANet::Tensor& get_weights();
|
||||
|
||||
void set_biases(CUDANet::Tensor &input);
|
||||
|
||||
CUDANet::Tensor& get_biases();
|
||||
|
||||
private:
|
||||
int inputSize;
|
||||
int outputSize;
|
||||
CUDANet::Backend *backend;
|
||||
|
||||
std::vector<float> weights;
|
||||
std::vector<float> biases;
|
||||
CUDANet::Shape in_shape;
|
||||
CUDANet::Shape out_shape;
|
||||
|
||||
Layers::Activation* activation;
|
||||
CUDANet::Tensor weights;
|
||||
CUDANet::Tensor biases;
|
||||
|
||||
/**
|
||||
* @brief Initialize the weights to zeros
|
||||
*
|
||||
*/
|
||||
void initializeWeights();
|
||||
|
||||
/**
|
||||
* @brief Initialize the biases to zeros
|
||||
*
|
||||
*/
|
||||
void initializeBiases();
|
||||
void init_weights();
|
||||
void init_biases();
|
||||
|
||||
float* forwardCPU(const float* input);
|
||||
// #ifdef USE_CUDA
|
||||
// float* d_output;
|
||||
|
||||
#ifdef USE_CUDA
|
||||
float* d_output;
|
||||
// float* d_weights;
|
||||
// float* d_biases;
|
||||
|
||||
float* d_weights;
|
||||
float* d_biases;
|
||||
// // Precompute kernel launch parameters
|
||||
// int forwardGridSize;
|
||||
// int biasGridSize;
|
||||
|
||||
// Precompute kernel launch parameters
|
||||
int forwardGridSize;
|
||||
int biasGridSize;
|
||||
// /**
|
||||
// * @brief Copy the weights and biases to the device
|
||||
// *
|
||||
// */
|
||||
// void toCuda();
|
||||
|
||||
/**
|
||||
* @brief Copy the weights and biases to the device
|
||||
*
|
||||
*/
|
||||
void toCuda();
|
||||
// void initCUDA();
|
||||
// void delCUDA();
|
||||
|
||||
void initCUDA();
|
||||
void delCUDA();
|
||||
|
||||
float* forwardCUDA(const float* d_input);
|
||||
#endif
|
||||
// float* forwardCUDA(const float* d_input);
|
||||
// #endif
|
||||
|
||||
};
|
||||
|
||||
} // namespace CUDANet::Layers
|
||||
|
||||
#endif // CUDANET_DENSE_LAYER_H
|
||||
|
||||
@@ -33,6 +33,11 @@ public:
|
||||
template <typename T>
|
||||
T* data();
|
||||
|
||||
void zero();
|
||||
|
||||
template <typename T>
|
||||
void set_data(T *data);
|
||||
|
||||
private:
|
||||
Shape shape;
|
||||
DType dtype;
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "backend/backend.hpp"
|
||||
#include "backend.hpp"
|
||||
#include "backend/cuda.cuh"
|
||||
#include "utils/cuda_helper.cuh"
|
||||
#include "kernels/matmul.cuh"
|
||||
|
||||
using namespace CUDANet::Backend;
|
||||
|
||||
void CUDA::print(const CUDANet::Backend::Tensor &input) {
|
||||
void CUDA::print(const CUDANet::Tensor &input) {
|
||||
auto length = input.numel();
|
||||
std::vector<float> h_vec(input.numel());
|
||||
|
||||
@@ -22,11 +22,11 @@ void CUDA::print(const CUDANet::Backend::Tensor &input) {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void CUDA::clear(CUDANet::Backend::Tensor &input) {
|
||||
void CUDA::zero(CUDANet::Tensor &input) {
|
||||
CUDA_CHECK(cudaMemset(input.data<float>(), 0, sizeof(float) * input.numel()));
|
||||
}
|
||||
|
||||
void CUDA::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) {
|
||||
void CUDA::sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) {
|
||||
auto length = input.numel();
|
||||
const int gridSize = ( + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
@@ -45,7 +45,7 @@ void CUDA::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &
|
||||
}
|
||||
}
|
||||
|
||||
void CUDA::max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) {
|
||||
void CUDA::max(const CUDANet::Tensor &input, CUDANet::Tensor &max) {
|
||||
auto length = input.numel();
|
||||
const int grid_size = (length + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
|
||||
@@ -50,3 +50,7 @@ template <typename T>
|
||||
T* Tensor::data() {
|
||||
return static_cast<T*>(d_ptr);
|
||||
}
|
||||
|
||||
void Tensor::zero() {
|
||||
backend->zero(*this);
|
||||
}
|
||||
|
||||
@@ -1,80 +1,58 @@
|
||||
#include "dense.hpp"
|
||||
|
||||
#include <format>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "activation.hpp"
|
||||
#include "dense.hpp"
|
||||
|
||||
using namespace CUDANet::Layers;
|
||||
|
||||
Dense::Dense(int inputSize, int outputSize, ActivationType activationType)
|
||||
: inputSize(inputSize), outputSize(outputSize) {
|
||||
Dense::Dense(CUDANet::Backend *backend, CUDANet::Shape input_shape, CUDANet::Shape output_shape)
|
||||
: backend(backend), in_shape(input_shape), out_shape(output_shape) {
|
||||
// Allocate memory for weights and biases
|
||||
weights.resize(outputSize * inputSize);
|
||||
biases.resize(outputSize);
|
||||
|
||||
initializeWeights();
|
||||
initializeBiases();
|
||||
if (input_shape.size() != 1) {
|
||||
throw std::runtime_error(std::format("Invalid shape. Expected [1], got {}", input_shape));
|
||||
}
|
||||
|
||||
if (output_shape.size() != 1) {
|
||||
throw std::runtime_error(std::format("Invalid shape. Expected [1], got {}", output_shape));
|
||||
}
|
||||
|
||||
activation = new Activation(activationType, outputSize);
|
||||
auto input_len = input_shape[0];
|
||||
auto output_len = output_shape[0];
|
||||
|
||||
#ifdef USE_CUDA
|
||||
initCUDA();
|
||||
#endif
|
||||
auto weights = CUDANet::Tensor{Shape(input_len * output_len), CUDANet::DType::FLOAT32, backend};
|
||||
auto biases = CUDANet::Tensor(Shape(output_len), CUDANet::DType::FLOAT32, backend);
|
||||
|
||||
weights.zero();
|
||||
biases.zero();
|
||||
}
|
||||
|
||||
Dense::~Dense() {
|
||||
delete activation;
|
||||
#ifdef USE_CUDA
|
||||
delCUDA();
|
||||
#endif
|
||||
CUDANet::Tensor& Dense::forward(CUDANet::Tensor &input);
|
||||
|
||||
CUDANet::Shape Dense::input_shape() {
|
||||
return in_shape;
|
||||
}
|
||||
|
||||
void Dense::initializeWeights() {
|
||||
std::fill(weights.begin(), weights.end(), 0.0f);
|
||||
CUDANet::Shape Dense::output_shape() {
|
||||
return out_shape;
|
||||
}
|
||||
|
||||
void Dense::initializeBiases() {
|
||||
std::fill(biases.begin(), biases.end(), 0.0f);
|
||||
}
|
||||
size_t Dense::input_size() {
|
||||
return in_shape[0];
|
||||
};
|
||||
|
||||
float* Dense::forwardCPU(const float* input) {
|
||||
throw std::logic_error("Not implemented");
|
||||
}
|
||||
size_t Dense::output_size() {
|
||||
return out_shape[0];
|
||||
};
|
||||
|
||||
float* Dense::forward(const float* input) {
|
||||
#ifdef USE_CUDA
|
||||
return forwardCUDA(input);
|
||||
#else
|
||||
return forwardCPU(input);
|
||||
#endif
|
||||
}
|
||||
void Dense::set_weights(CUDANet::Tensor &input);
|
||||
|
||||
void Dense::setWeights(const float* weights_input) {
|
||||
std::copy(weights_input, weights_input + weights.size(), weights.begin());
|
||||
#ifdef USE_CUDA
|
||||
toCuda();
|
||||
#endif
|
||||
}
|
||||
|
||||
std::vector<float> Dense::getWeights() {
|
||||
CUDANet::Tensor& Dense::get_weights() {
|
||||
return weights;
|
||||
}
|
||||
|
||||
void Dense::setBiases(const float* biases_input) {
|
||||
std::copy(biases_input, biases_input + biases.size(), biases.begin());
|
||||
#ifdef USE_CUDA
|
||||
toCuda();
|
||||
#endif
|
||||
}
|
||||
void Dense::set_biases(CUDANet::Tensor &input);
|
||||
|
||||
std::vector<float> Dense::getBiases() {
|
||||
CUDANet::Tensor& Dense::get_biases() {
|
||||
return biases;
|
||||
}
|
||||
|
||||
int Dense::getOutputSize() {
|
||||
return outputSize;
|
||||
}
|
||||
|
||||
int Dense::getInputSize() {
|
||||
return inputSize;
|
||||
}
|
||||
Reference in New Issue
Block a user