您的位置:首页 > 科技 > 能源 > 珠海网站制作专业_企业建设流程_宁波seo网络推广咨询价格_苏州网站制作推广

珠海网站制作专业_企业建设流程_宁波seo网络推广咨询价格_苏州网站制作推广

2025/8/30 18:38:32 来源:https://blog.csdn.net/stallion5632/article/details/144025722  浏览:    关键词:珠海网站制作专业_企业建设流程_宁波seo网络推广咨询价格_苏州网站制作推广
珠海网站制作专业_企业建设流程_宁波seo网络推广咨询价格_苏州网站制作推广

文章目录

    • 0. 引言
    • 1. 实现功能概述
    • 2. 完整代码
    • 3. 代码解释
      • 3.1 CUDA 核心逻辑
      • 3.2 主机端逻辑
      • 3.3 OpenCV 图像处理
    • 4. 编译与运行

0. 引言

本文将以一个简单的例子展示如何使用 CUDA 将 RGB 图像转换为灰度图,并结合 OpenCV 完成图像的加载与保存。

1. 实现功能概述

我们要实现以下功能:

  1. 使用 CUDA 内核,将每个像素从 RGBA 格式转换为灰度值。
  2. 灰度化公式为:Gray = 0.299 * R + 0.587 * G + 0.114 * B
  3. 使用 OpenCV 进行图像加载和保存处理。

代码分为三个部分:

  1. CUDA 核心逻辑:负责灰度化转换的 CUDA 内核实现。
  2. 主机端逻辑:完成内存管理、数据传递以及对 CUDA 内核的调用。
  3. OpenCV 图像操作:用于读取和保存图像文件。

2. 完整代码

// rgba_to_greyscale.h
#ifndef RGBA_TO_GREYSCALE_H_
#define RGBA_TO_GREYSCALE_H_#include <cstdint>// Callback function type for handling the output grey image.
typedef void (*CallbackFun)(int32_t height, int32_t width, uint8_t* h_grey_image);// Converts an RGBA image to greyscale using CUDA.
// Parameters:
// - height: Image height.
// - width: Image width.
// - data: Pointer to RGBA image data.
// - callback: Callback function to handle the output grey image.
int32_t RgbaToGreyscale(int32_t height, int32_t width, uint8_t* data, CallbackFun callback = nullptr);#endif  // RGBA_TO_GREYSCALE_H_
// rgba_to_greyscale.cu
#include "rgba_to_greyscale.h"#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <iostream>
#include <cstring>namespace {// CUDA kernel for RGBA to greyscale conversion.
__global__ void RgbaToGreyscaleKernel(const uchar4* rgba_image, uint8_t* grey_image,int32_t num_rows, int32_t num_cols) {const int32_t id = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;if (id < num_rows * num_cols) {const uint8_t r = rgba_image[id].x;const uint8_t g = rgba_image[id].y;const uint8_t b = rgba_image[id].z;grey_image[id] = static_cast<uint8_t>(0.299f * r + 0.587f * g + 0.114f * b);}
}}  // namespaceint32_t RgbaToGreyscale(int32_t height, int32_t width, uint8_t* data, CallbackFun callback) {if (data == nullptr) {std::cerr << "Input data is null." << std::endl;return -1;}uchar4* h_rgba_image = reinterpret_cast<uchar4*>(data);int32_t num_pixels = width * height;uchar4* d_rgba_image = nullptr;uint8_t* d_grey_image = nullptr;uint8_t* h_grey_image = nullptr;if (cudaMalloc(&d_rgba_image, sizeof(uchar4) * num_pixels) != cudaSuccess) {std::cerr << "Failed to allocate device memory for RGBA image." << std::endl;return -1;}if (cudaMalloc(&d_grey_image, sizeof(uint8_t) * num_pixels) != cudaSuccess) {std::cerr << "Failed to allocate device memory for greyscale image." << std::endl;cudaFree(d_rgba_image);return -1;}if (cudaHostAlloc(&h_grey_image, sizeof(uint8_t) * num_pixels, cudaHostAllocMapped) != cudaSuccess) {std::cerr << "Failed to allocate host memory for greyscale image." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);return -1;}if (cudaMemcpy(d_rgba_image, h_rgba_image, sizeof(uchar4) * num_pixels, cudaMemcpyHostToDevice) != cudaSuccess) {std::cerr << "Failed to copy RGBA image data to device." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}const int32_t threads_per_block = 16;const int32_t num_blocks = (num_pixels + threads_per_block * threads_per_block - 1) /(threads_per_block * threads_per_block);const dim3 block_size(threads_per_block, threads_per_block);const dim3 grid_size(num_blocks);RgbaToGreyscaleKernel<<<grid_size, block_size>>>(d_rgba_image, d_grey_image, height, width);if (cudaDeviceSynchronize() != cudaSuccess) {std::cerr << "CUDA kernel execution failed." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}if (cudaMemcpy(h_grey_image, d_grey_image, sizeof(uint8_t) * num_pixels, cudaMemcpyDeviceToHost) != cudaSuccess) {std::cerr << "Failed to copy greyscale image data to host." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}if (callback != nullptr) {callback(height, width, h_grey_image);}cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return 0;
}
// main.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>#include <iostream>
#include <string>
#include <cstdint>#include "rgba_to_greyscale.h"namespace {// Saves the greyscale image to a file.
void SaveGreyscaleImage(int32_t height, int32_t width, uint8_t* h_grey_image) {const std::string output_file = "out.png";cv::Mat out_image(height, width, CV_8UC1, static_cast<void*>(h_grey_image));if (!cv::imwrite(output_file, out_image)) {std::cerr << "Failed to write the output image: " << output_file << std::endl;}
}}  // namespaceint32_t main() {const std::string input_file = "src.png";cv::Mat image = cv::imread(input_file, cv::IMREAD_COLOR);if (image.empty()) {std::cerr << "Failed to read input image: " << input_file << std::endl;return -1;}cv::Mat image_rgba;cv::cvtColor(image, image_rgba, cv::COLOR_BGR2RGBA);if (RgbaToGreyscale(image_rgba.rows, image_rgba.cols, image_rgba.ptr<uint8_t>(), SaveGreyscaleImage) != 0) {std::cerr << "Failed to convert image to greyscale." << std::endl;return -1;}std::cout << "Converted greyscale image saved successfully." << std::endl;return 0;
}

3. 代码解释

3.1 CUDA 核心逻辑

CUDA 内核函数实现每个线程对一个像素的灰度化处理。以下是核心实现代码:

// CUDA kernel for RGBA to greyscale conversion.
__global__ void RgbaToGreyscaleKernel(const uchar4* rgba_image, uint8_t* grey_image,int32_t num_rows, int32_t num_cols) {const int32_t id = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;if (id < num_rows * num_cols) {const uint8_t r = rgba_image[id].x;const uint8_t g = rgba_image[id].y;const uint8_t b = rgba_image[id].z;grey_image[id] = static_cast<uint8_t>(0.299f * r + 0.587f * g + 0.114f * b);}
}

说明:

  • 每个线程计算图像中的一个像素点,提升了并行处理效率。
  • 输入参数 rgba_image 是 RGBA 格式的像素数据,输出参数 grey_image 是灰度图像数据。

3.2 主机端逻辑

主机端负责完成 CUDA 内存管理、内核调用以及数据传输。以下是代码实现:


int32_t RgbaToGreyscale(int32_t height, int32_t width, uint8_t* data, CallbackFun callback) {if (data == nullptr) {std::cerr << "Input data is null." << std::endl;return -1;}uchar4* h_rgba_image = reinterpret_cast<uchar4*>(data);int32_t num_pixels = width * height;uchar4* d_rgba_image = nullptr;uint8_t* d_grey_image = nullptr;uint8_t* h_grey_image = nullptr;if (cudaMalloc(&d_rgba_image, sizeof(uchar4) * num_pixels) != cudaSuccess) {std::cerr << "Failed to allocate device memory for RGBA image." << std::endl;return -1;}if (cudaMalloc(&d_grey_image, sizeof(uint8_t) * num_pixels) != cudaSuccess) {std::cerr << "Failed to allocate device memory for greyscale image." << std::endl;cudaFree(d_rgba_image);return -1;}if (cudaHostAlloc(&h_grey_image, sizeof(uint8_t) * num_pixels, cudaHostAllocMapped) != cudaSuccess) {std::cerr << "Failed to allocate host memory for greyscale image." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);return -1;}if (cudaMemcpy(d_rgba_image, h_rgba_image, sizeof(uchar4) * num_pixels, cudaMemcpyHostToDevice) != cudaSuccess) {std::cerr << "Failed to copy RGBA image data to device." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}const int32_t threads_per_block = 16;const int32_t num_blocks = (num_pixels + threads_per_block * threads_per_block - 1) /(threads_per_block * threads_per_block);const dim3 block_size(threads_per_block, threads_per_block);const dim3 grid_size(num_blocks);RgbaToGreyscaleKernel<<<grid_size, block_size>>>(d_rgba_image, d_grey_image, height, width);if (cudaDeviceSynchronize() != cudaSuccess) {std::cerr << "CUDA kernel execution failed." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}if (cudaMemcpy(h_grey_image, d_grey_image, sizeof(uint8_t) * num_pixels, cudaMemcpyDeviceToHost) != cudaSuccess) {std::cerr << "Failed to copy greyscale image data to host." << std::endl;cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return -1;}if (callback != nullptr) {callback(height, width, h_grey_image);}cudaFree(d_rgba_image);cudaFree(d_grey_image);cudaFreeHost(h_grey_image);return 0;
}

主要功能:

  1. 将图像数据从主机传输到设备。
  2. 调用 CUDA 内核执行灰度化处理。
  3. 将处理结果从设备传回主机,并调用回调函数保存图像。

3.3 OpenCV 图像处理

主程序中使用 OpenCV 加载原始图像,将其转换为 RGBA 格式,随后调用 RgbaToGreyscale 完成灰度化处理,最后保存结果图像:

// Saves the greyscale image to a file.
void SaveGreyscaleImage(int32_t height, int32_t width, uint8_t* h_grey_image) {const std::string output_file = "out.png";cv::Mat out_image(height, width, CV_8UC1, static_cast<void*>(h_grey_image));if (!cv::imwrite(output_file, out_image)) {std::cerr << "Failed to write the output image: " << output_file << std::endl;}
}}  // namespaceint32_t main() {const std::string input_file = "src.png";cv::Mat image = cv::imread(input_file, cv::IMREAD_COLOR);if (image.empty()) {std::cerr << "Failed to read input image: " << input_file << std::endl;return -1;}cv::Mat image_rgba;cv::cvtColor(image, image_rgba, cv::COLOR_BGR2RGBA);if (RgbaToGreyscale(image_rgba.rows, image_rgba.cols, image_rgba.ptr<uint8_t>(), SaveGreyscaleImage) != 0) {std::cerr << "Failed to convert image to greyscale." << std::endl;return -1;}std::cout << "Converted greyscale image saved successfully." << std::endl;return 0;
}

OpenCV 功能说明:

  1. cv::imread:加载输入图像。
  2. cv::cvtColor:将图像从 BGR 转换为 RGBA。
  3. cv::imwrite:保存灰度化处理后的图像。

4. 编译与运行

编译命令:

nvcc -o rgba_to_greyscale main.cpp rgba_to_greyscale.cu -lopencv_core -lopencv_highgui -lopencv_imgproc

运行命令:
将输入图片命名为 src.png,运行程序:

./rgba_to_greyscale

程序执行完成后,灰度图像将保存为 out.png

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com