Source code for neurokernel.tools.gpu

#!/usr/bin/env python

import numbers

import numpy as np
import pycuda.driver as drv
import pycuda.elementwise as elementwise
import pycuda.gpuarray as gpuarray
from pycuda.tools import dtype_to_ctype

# List of available numerical types provided by numpy: 
# XXX This try/except is an ugly hack to prevent the doc build on
# ReadTheDocs from failing:
try:
    num_types = [np.typeDict[t] for t in \
                 np.typecodes['AllInteger']+np.typecodes['AllFloat']]
except TypeError:
    num_types = []

# Numbers of bytes occupied by each numerical type:
num_nbytes = dict((np.dtype(t), t(1).nbytes) for t in num_types)

[docs]def set_realloc(x_gpu, data): """ Transfer data into a GPUArray instance. Copies the contents of a numpy array into a GPUArray instance. If the array has a different type or dimensions than the instance, the GPU memory used by the instance is reallocated and the instance updated appropriately. Parameters ---------- x_gpu : pycuda.gpuarray.GPUArray GPUArray instance to modify. data : numpy.ndarray Array of data to transfer to the GPU. Examples -------- >>> import pycuda.gpuarray as gpuarray >>> import pycuda.autoinit >>> import numpy as np >>> import misc >>> x = np.asarray(np.random.rand(5), np.float32) >>> x_gpu = gpuarray.to_gpu(x) >>> x = np.asarray(np.random.rand(10, 1), np.float64) >>> set_realloc(x_gpu, x) >>> np.allclose(x, x_gpu.get()) True """ # Only reallocate if absolutely necessary: if x_gpu.shape != data.shape or x_gpu.size != data.size or \ x_gpu.strides != data.strides or x_gpu.dtype != data.dtype: # Free old memory: x_gpu.gpudata.free() # Allocate new memory: nbytes = num_nbytes[data.dtype] x_gpu.gpudata = drv.mem_alloc(nbytes*data.size) # Set array attributes: x_gpu.shape = data.shape x_gpu.size = data.size x_gpu.strides = data.strides x_gpu.dtype = data.dtype # Update the GPU memory: x_gpu.set(data)
[docs]def bufint(a): """ Return buffer interface to GPU or numpy array. Parameters ---------- a : pycuda.gpuarray.GPUArray or numpy.ndarray GPU or numpy array. Returns ------- b : buffer Buffer interface to array. Returns None if `a` has a length of 0. """ if not a.size: return None else: if isinstance(a, gpuarray.GPUArray): return a.gpudata.as_buffer(a.nbytes) elif isinstance(a, np.ndarray): return a.data else: raise TypeError('argument must be a GPU or numpy array')
def get_by_inds(src_gpu, ind): """ Get values in a GPUArray by index. Parameters ---------- src_gpu : pycuda.gpuarray.GPUArray GPUArray instance from which to get values. ind : pycuda.gpuarray.GPUArray or numpy.ndarray 1D array of element indices to set. Must have an integer dtype. Returns ------- result : numpy.ndarray Extracted data. Notes ----- Only supports 1D index arrays. May not be efficient for certain index patterns because of lack of inability to coalesce memory operations. """ dest_gpu = gpuarray.empty(ind.shape, src_gpu.dtype) set_by_inds(dest_gpu, ind, src_gpu, 'src') return dest_gpu.get()
[docs]def set_by_inds(dest_gpu, ind, src_gpu, ind_which='dest'): """ Set values in a GPUArray by index. Parameters ---------- dest_gpu : pycuda.gpuarray.GPUArray GPUArray instance to modify. ind : pycuda.gpuarray.GPUArray or numpy.ndarray 1D array of element indices to set. Must have an integer dtype. src_gpu : pycuda.gpuarray.GPUArray GPUArray instance from which to set values. ind_which : str If set to 'dest', set the elements in `dest_gpu` with indices `ind` to the successive values in `src_gpu`; the lengths of `ind` and `src_gpu` must be equal. If set to 'src', set the successive values in `dest_gpu` to the values in `src_gpu` with indices `ind`; the lengths of `ind` and `dest_gpu` must be equal. Examples -------- >>> import pycuda.gpuarray as gpuarray >>> import pycuda.autoinit >>> import numpy as np >>> from nk.tools.gpu import set_by_inds >>> dest_gpu = gpuarray.to_gpu(np.arange(5, dtype=np.float32)) >>> ind = gpuarray.to_gpu(np.array([0, 2, 4])) >>> src_gpu = gpuarray.to_gpu(np.array([1, 1, 1], dtype=np.float32)) >>> set_by_inds(dest_gpu, ind, src_gpu, 'dest') >>> np.allclose(dest_gpu.get(), np.array([1, 1, 1, 3, 1], dtype=np.float32)) True >>> dest_gpu = gpuarray.to_gpu(np.zeros(3, dtype=np.float32)) >>> ind = gpuarray.to_gpu(np.array([0, 2, 4])) >>> src_gpu = gpuarray.to_gpu(np.arange(5, dtype=np.float32)) >>> set_by_inds(dest_gpu, ind, src_gpu, 'src') >>> np.allclose(dest_gpu.get(), np.array([0, 2, 4], dtype=np.float32)) True Notes ----- Only supports 1D index arrays. May not be efficient for certain index patterns because of lack of inability to coalesce memory operations. """ if np.isscalar(src_gpu) or np.isscalar(dest_gpu): raise ValueError('data must be array-like') if len(np.shape(ind)) > 1: raise ValueError('index array must be 1D') # Manually handle empty index array because it will cause the kernel to # fail if processed: N = len(ind) if N == 0: return if not issubclass(ind.dtype.type, numbers.Integral): raise ValueError('index array must contain integers') if not dest_gpu.dtype == src_gpu.dtype: raise ValueError('dest_gpu.dtype != src_gpu.dtype') if ind_which == 'dest': assert N == len(src_gpu) elif ind_which == 'src': assert N == len(dest_gpu) else: raise ValueError('invalid value for `ind_which`') if not isinstance(ind, gpuarray.GPUArray): ind = gpuarray.to_gpu(ind) try: func = set_by_inds.cache[(dest_gpu.dtype, ind.dtype, ind_which)] except KeyError: data_ctype = dtype_to_ctype(dest_gpu.dtype) ind_ctype = dtype_to_ctype(ind.dtype) v = "{data_ctype} *dest, {ind_ctype} *ind, {data_ctype} *src".format(data_ctype=data_ctype, ind_ctype=ind_ctype) if ind_which == 'dest': func = elementwise.ElementwiseKernel(v, "dest[ind[i]] = src[i]") else: func = elementwise.ElementwiseKernel(v, "dest[i] = src[ind[i]]") set_by_inds.cache[(dest_gpu.dtype, ind.dtype, ind_which)] = func func(dest_gpu, ind, src_gpu, range=slice(0, N, 1))
set_by_inds.cache = {}
[docs]def set_by_inds_from_inds(dest_gpu, ind_dest, src_gpu, ind_src): """ Set values in a GPUArray by index from indexed values in another GPUArray. Parameters ---------- dest_gpu : pycuda.gpuarray.GPUArray GPUArray instance to modify. ind_dest : pycuda.gpuarray.GPUArray or numpy.ndarray 1D array of element indices in `dest_gpu` to set. Must have an integer dtype. src_gpu : pycuda.gpuarray.GPUArray GPUArray instance from which to set values. ind_src : pycuda.gpuarray.GPUArray or numpy.ndarray 1D array of element indices in `src_gpu` to copy. Must have an integer dtype and be the same length as `ind_dest`. Examples -------- >>> import pycuda.gpuarray as gpuarray >>> import pycuda.autoinit >>> import numpy as np >>> from nk.tools.gpu import set_by_inds_from_inds >>> dest_gpu = gpuarray.to_gpu(np.zeros(5, dtype=np.float32)) >>> ind_dest = gpuarray.to_gpu(np.array([0, 2, 4])) >>> src_gpu = gpuarray.to_gpu(np.arange(5, 10, dtype=np.float32)) >>> ind_src = gpuarray.to_gpu(np.array([2, 3, 4])) >>> gpu.set_by_inds_from_inds(dest_gpu, ind_dest, src_gpu, ind_src) >>> assert np.allclose(dest_gpu.get(), np.array([7, 0, 8, 0, 9], dtype=np.float32)) True """ if len(np.shape(ind_dest)) > 1: raise ValueError('destination index array must be 1D') if len(np.shape(ind_src)) > 1: raise ValueError('source index array must be 1D') assert dest_gpu.dtype == src_gpu.dtype assert ind_dest.dtype == ind_src.dtype assert issubclass(ind_dest.dtype.type, numbers.Integral) assert issubclass(ind_src.dtype.type, numbers.Integral) # Manually handle empty index array because it will cause the kernel to # fail if processed: N = len(ind_src) if N == 0: return assert N == len(ind_dest) if not isinstance(ind_dest, gpuarray.GPUArray): ind_dest = gpuarray.to_gpu(ind_dest) if not isinstance(ind_src, gpuarray.GPUArray): ind_src = gpuarray.to_gpu(ind_src) try: func = set_by_inds_from_inds.cache[(dest_gpu.dtype, ind_dest.dtype)] except KeyError: data_ctype = dtype_to_ctype(dest_gpu.dtype) ind_ctype = dtype_to_ctype(ind_dest.dtype) v = "{data_ctype} *dest, {ind_ctype} *ind_dest,"\ "{data_ctype} *src, {ind_ctype} *ind_src".format(data_ctype=data_ctype, ind_ctype=ind_ctype) func = elementwise.ElementwiseKernel(v, "dest[ind_dest[i]] = src[ind_src[i]]") set_by_inds_from_inds.cache[(dest_gpu.dtype, ind_dest.dtype)] = func func(dest_gpu, ind_dest, src_gpu, ind_src, range=slice(0, N, 1))
set_by_inds_from_inds.cache = {}