引数とコンスタントメモリ
コンスタントを
使わない場合242.732391(ms)
使う場合242.835258(ms)
大して変わらないのと、実行する度に数値が変動する
#include "cDeviceMemory.h" #include <windows.h> #define USE_CONSTANT 0 #if USE_CONSTANT float H_THRESHOLD; int H_LIMIT; float H_START_X; float H_END_X; float H_START_Y; float H_END_Y; __constant__ float D_THRESHOLD; __constant__ int D_LIMIT; __constant__ float D_START_X; __constant__ float D_END_X; __constant__ float D_START_Y; __constant__ float D_END_Y; __global__ void CalculateMandelbrotAsync( RGBQUAD* pBuf, int pitch, int width, int height) { const float dx = (D_END_X - D_START_X) / width; const float dy = (D_END_Y - D_START_Y) /height; const int ix = threadIdx.x + blockIdx.x * blockDim.x; const int iy = threadIdx.y + blockIdx.y * blockDim.y; const float x = D_START_X + dx * ix; const float y = D_START_Y + dy * iy; float zr = 0.0f; float zi = 0.0f; int i = 0; float zr0 = 0; float zi0 = 0; for(; i < D_LIMIT; ++i) { zr0 = zr * zr - zi * zi + x; if(D_THRESHOLD < zr0) { break; } zi0 = 2.0f * zr * zi + y; if(D_THRESHOLD < zi0) { break; } zr = zr0; zi = zi0; } DWORD depth = ((256*256*256-1)*i)/D_LIMIT; RGBQUAD rgb = {(BYTE)(depth % (256*256)), (BYTE)((depth/256) % 256), (BYTE)(depth/256/256), 0}; pBuf[ix + iy * pitch] = rgb; } #else __global__ void CalculateMandelbrotAsync( RGBQUAD* pBuf, int pitch, int width, int height, float threshold, int limit, float startX, float endX, float startY, float endY) { const float dx = (endX - startX) / width; const float dy = (endY - startY) /height; const int ix = threadIdx.x + blockIdx.x * blockDim.x; const int iy = threadIdx.y + blockIdx.y * blockDim.y; const float x = startX + dx * ix; const float y = startY + dy * iy; float zr = 0.0f; float zi = 0.0f; int i = 0; float zr0 = 0; float zi0 = 0; for(; i < limit; ++i) { zr0 = zr * zr - zi * zi + x; if(threshold < zr0) { break; } zi0 = 2.0f * zr * zi + y; if(threshold < zi0) { break; } zr = zr0; zi = zi0; } DWORD depth = ((256*256*256-1)*i)/limit; RGBQUAD rgb = {(BYTE)(depth % (256*256)), (BYTE)((depth/256) % 256), (BYTE)(depth/256/256), 0}; pBuf[ix + iy * pitch] = rgb; } #endif void CalculateMandelbrot( cDeviceMemory& deviceMem, int pitch, int width, int height, float threshold, int limit, float startX, float endX, float startY, float endY) { dim3 block(32, 16); dim3 grid(width/32, height/16); int size = deviceMem.getSize(); RGBQUAD* pBuf = (RGBQUAD*)deviceMem.getBuffer(); #if USE_CONSTANT H_THRESHOLD = threshold; H_LIMIT = limit; H_START_X = startX; H_END_X = endX; H_START_Y = startY; H_END_Y = endY; cudaMemcpyToSymbol(D_THRESHOLD, &H_THRESHOLD, sizeof(float)); cudaMemcpyToSymbol(D_LIMIT, &H_LIMIT, sizeof(int)); cudaMemcpyToSymbol(D_START_X, &H_START_X, sizeof(float)); cudaMemcpyToSymbol(D_END_X, &H_END_X, sizeof(float)); cudaMemcpyToSymbol(D_START_Y, &H_START_Y, sizeof(float)); cudaMemcpyToSymbol(D_END_Y, &H_END_Y, sizeof(float)); CalculateMandelbrotAsync<<< grid, block >>>(pBuf, pitch, width, height); #else CalculateMandelbrotAsync<<< grid, block >>>(pBuf, pitch, width, height, threshold, limit, startX, endX, startY, endY); #endif }