Simple OpenCL example

Adapted from here:

But tidied up a little and made to work on MacOS. It creates a OpenCL executor “thing” and tells it to run a kernel which writes “Hello World” to a buffer, this gets sent back to the main C++ program.

#include "cl.hpp"
#include <iostream>
#include <string>

std::string prog(
"#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n" \
"__constant char hw[] = \"Hello World\"; \n" \
"__kernel void hello(__global char * out) {\n" \
"  size_t tid = get_global_id(0); \n" \
"  out[tid] = hw[tid]; \n" \

int main(void) {
 cl_int err;

 // Get list of platforms (things that can execute OpenCL on this host), get a "context" on the first executor.
 std::vector<cl::Platform> platformList;
 cl_context_properties cprops[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0};
 cl::Context context( CL_DEVICE_TYPE_CPU, cprops, NULL, NULL, &amp;amp;amp;err);

 // Allocate 100 bytes of memory which we will use to communicate with the OpenCL context, executor, whatever.
 size_t mem_size = 100;
 char * outH = new char[mem_size];
 cl::Buffer outCL( context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, mem_size, outH, &amp;amp;amp;err);

 // Give the OpenCL program embedded in the string above to OpenCL.
 cl::Program::Sources source(1, std::make_pair(prog.c_str(), prog.length()+1));

 // Get devices used in this "context"
 std::vector<cl::Device> devices;
 devices = context.getInfo<CL_CONTEXT_DEVICES>();

 // Compile program against device
 cl::Program program(context, source);
 err =,"");

 // create a kernel object, tell it we are using the kernel called "hello", give it an argument which is the memory we alloc'd above.
 cl::Kernel kernel(program, "hello", &amp;amp;amp;err);
 err = kernel.setArg(0, outCL);

 // Queue the kernel up to run
 cl::CommandQueue queue(context, devices[0], 0, &amp;amp;amp;err);
 cl::Event event;
 err = queue.enqueueNDRangeKernel( kernel, cl::NullRange, cl::NDRange(mem_size), cl::NDRange(1, 1), NULL, &amp;amp;amp;event);

 // Use the event object above to block until processing has completed

 // Read the results out of the shared memory area.
 err = queue.enqueueReadBuffer( outCL, CL_TRUE, 0, mem_size, outH);

 // Write to screen
 std::cout << outH;

On MacOS compile it with: g++ ./simple.cpp -framework OpenCL -o simple

You’ll also need to C++ bindings for OpenCL which are available here:

Tarball containing all this is HERE untar it and type make to build.