pyopencl示例程序，单文件可运行-摩杜云开发者社区

import pyopencl as cl
import numpy as np
import time

# 设定随机数seed，用于生成随机数据，确保多次运行的结果一致性
np.random.seed(1)

# 创建OpenCL上下文和命令队列
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

# 生成输入图像和滤波核
image = np.random.rand(1920, 1080).astype(np.float32)
print(f"input numpy mean = {image.mean()}")
output = np.zeros_like(image)

# 将输入图像和滤波核复制到GPU内存中
buf_image = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=image)
buf_output = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=output)

# 定义3x3均值滤波内核程序
kernel_source = """  
__kernel void mean_filter(__global const float *input,
                          __global float *output_arr) {
  int k_size = 3 / 2; // 滤波核大小
  int height = get_global_size(0);
  int width = get_global_size(1);
  int y = get_global_id(0);
  int x = get_global_id(1);
  float sum = 0.0f;
  for (int m = -k_size; m <= k_size; ++m) {
    int bias_y = y + m;
    bias_y = select(bias_y, -bias_y, bias_y < 0);
    bias_y = select(bias_y, (height * 2 - bias_y - 2), bias_y >= height);
    for (int n = -k_size; n <= k_size; ++n) {
      int bias_x = x + n;
      bias_x = select(bias_x, -bias_x, bias_x < 0);
      bias_x = select(bias_x, (width * 2 - bias_x - 2), bias_x >= width);
      sum += input[bias_y * width + bias_x];
    }
  }
  output_arr[y * width + x] =sum/9; // output[y + x * N] = sum * (1.0 / (M * N))
}
"""
kernel = cl.Program(ctx, kernel_source).build()

# 调用内核程序进行均值滤波, 并估算耗时
start = time.time()
kernel.mean_filter(queue, image.shape, None, buf_image, buf_output)  # (queue, globalrange, localrange, args...)
queue.finish()
end = time.time()
t = (end - start) * 1000
print(f"opencl time = {t:.0f}ms")

# 将结果从GPU内存中复制回主机内存中并输出结果
cl.enqueue_copy(queue, output, buf_output)
print(f"output opencl mean = {output.mean()}")


# ------ 使用numpy计算一下均值滤波，对比下结果 ------
def mean_filter(image_array, kernel_size):
    # 获取图像的高度和宽度
    height, width = image_array.shape[:2]

    # 创建一个与原始图像大小相同的零矩阵，用于存储滤波后的图像
    filtered_image = np.zeros_like(image_array)

    # 计算滤波窗口的边界（根据 kernel_size）
    pad = kernel_size // 2  # 假设 kernel_size 是奇数

    # 对图像进行填充，以便在边界处也能进行滤波
    padded_image = np.pad(image_array, ((pad, pad), (pad, pad)), mode='reflect')

    # 应用均值滤波
    for i in range(pad, pad + height):
        for j in range(pad, pad + width):
            # 获取滤波窗口区域
            window = padded_image[i - pad:i + pad + 1, j - pad:j + pad + 1]

            # 计算窗口区域内像素值的平均值并赋值给中心像素
            filtered_image[i - pad, j - pad] = np.mean(window)
    return filtered_image


start = time.time()
np_output = mean_filter(image, 3)
end = time.time()
t = (end - start) * 1000
print(f"numpy  time = {t:.0f}ms")
print(f"output numpy  mean = {np_output.mean()}")