/**********************************************************************
Copyright �2012 Advanced Micro Devices, Inc. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

�	Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
�	Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
 other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#include <SDKCommon.hpp>

#ifndef _WIN32
#include <unistd.h>
#endif

namespace streamsdk
{
SDKCommon::SDKCommon()
{
    
}

SDKCommon::~SDKCommon()
{
    while(!_timers.empty())
    {
        Timer *temp = _timers.back();
        _timers.pop_back();
        delete temp;
    }
}

/* Returns the path of executable being generated */
std::string
SDKCommon::getPath()
{
#ifdef _WIN32
    char buffer[MAX_PATH];
#ifdef UNICODE
    if(!GetModuleFileName(NULL, (LPWCH)buffer, sizeof(buffer)))
        throw std::string("GetModuleFileName() failed!");
#else
    if(!GetModuleFileName(NULL, buffer, sizeof(buffer)))
        throw std::string("GetModuleFileName() failed!");
#endif
    std::string str(buffer);
    /* '\' == 92 */
    int last = (int)str.find_last_of((char)92);
#else
    char buffer[PATH_MAX + 1];
    ssize_t len;
    if((len = readlink("/proc/self/exe",buffer, sizeof(buffer) - 1)) == -1)
        throw std::string("readlink() failed!");
    buffer[len] = '\0';
    std::string str(buffer);
    /* '/' == 47 */
    int last = (int)str.find_last_of((char)47);
#endif
    return str.substr(0, last + 1);
}

/*
 * Prints no more than 256 elements of the given array.
 * Prints full array if length is less than 256.
 * Prints Array name followed by elements.
 */
template<typename T> 
void SDKCommon::printArray(
    std::string header, 
    const T * data, 
    const int width,
    const int height) const
{
    std::cout<<"\n"<<header<<"\n";
    for(int i = 0; i < height; i++)
    {
        for(int j = 0; j < width; j++)
        {
            std::cout<<data[i*width+j]<<" ";
        }
        std::cout<<"\n";
    }
    std::cout<<"\n";
}

int SDKCommon::waitForEventAndRelease(cl_event *event)
{
    cl_int status = CL_SUCCESS;
    cl_int eventStatus = CL_QUEUED;
	while(eventStatus != CL_COMPLETE)
    {
        status = clGetEventInfo(
                        *event, 
                        CL_EVENT_COMMAND_EXECUTION_STATUS, 
                        sizeof(cl_int),
                        &eventStatus,
                        NULL);
		CHECK_OPENCL_ERROR(status, "clGetEventEventInfo Failed with Error Code:");
	}

    status = clReleaseEvent(*event);
    CHECK_OPENCL_ERROR(status, "clReleaseEvent Failed with Error Code:");

    return SDK_SUCCESS;
}

template<typename T> 
int SDKCommon::fillRandom(
         T * arrayPtr, 
         const int width,
         const int height,
         const T rangeMin,
         const T rangeMax,
         unsigned int seed)
{
    if(!arrayPtr)
    {
        error("Cannot fill array. NULL pointer.");
        return SDK_FAILURE;
    }

    if(!seed)
        seed = (unsigned int)time(NULL);

    srand(seed);
    double range = double(rangeMax - rangeMin) + 1.0; 

    /* random initialisation of input */
    for(int i = 0; i < height; i++)
        for(int j = 0; j < width; j++)
        {
            int index = i*width + j;
            arrayPtr[index] = rangeMin + T(range*rand()/(RAND_MAX + 1.0)); 
        }

    return SDK_SUCCESS;
}

template<typename T> 
int SDKCommon::fillPos(
         T * arrayPtr, 
         const int width,
         const int height)
{
    if(!arrayPtr)
    {
        error("Cannot fill array. NULL pointer.");
        return SDK_FAILURE;
    }

    /* initialisation of input with positions*/
    for(T i = 0; i < height; i++)
        for(T j = 0; j < width; j++)
        {
            T index = i*width + j;
            arrayPtr[index] = index;
        }

    return SDK_SUCCESS;
}

template<typename T> 
int SDKCommon::fillConstant(
         T * arrayPtr, 
         const int width,
         const int height,
         const T val)
{
    if(!arrayPtr)
    {
        error("Cannot fill array. NULL pointer.");
        return SDK_FAILURE;
    }

    /* initialisation of input with constant value*/
    for(int i = 0; i < height; i++)
        for(int j = 0; j < width; j++)
        {
            int index = i*width + j;
            arrayPtr[index] = val;
        }

    return SDK_SUCCESS;
}

template<typename T>
T SDKCommon::roundToPowerOf2(T val)
{
    int bytes = sizeof(T);

    val--;
    for(int i = 0; i < bytes; i++)
        val |= val >> (1<<i);  
    val++;

    return val;
}

template<typename T>
int SDKCommon::isPowerOf2(T val)
{
    long long _val = val;
    if((_val & (-_val))-_val == 0 && _val != 0)
        return SDK_SUCCESS;
    else
        return SDK_FAILURE;
}
const char* 
getOpenCLErrorCodeStr(std::string input)
{
    return "unknown error code"; 
}

template<typename T>
const char* 
getOpenCLErrorCodeStr(T input)
{
    int errorCode = (int)input;
    switch(errorCode)
    {
        case CL_DEVICE_NOT_FOUND:
            return "CL_DEVICE_NOT_FOUND";
        case CL_DEVICE_NOT_AVAILABLE:
            return "CL_DEVICE_NOT_AVAILABLE";               
        case CL_COMPILER_NOT_AVAILABLE:
            return "CL_COMPILER_NOT_AVAILABLE";           
        case CL_MEM_OBJECT_ALLOCATION_FAILURE:
            return "CL_MEM_OBJECT_ALLOCATION_FAILURE";      
        case CL_OUT_OF_RESOURCES:
            return "CL_OUT_OF_RESOURCES";                    
        case CL_OUT_OF_HOST_MEMORY:
            return "CL_OUT_OF_HOST_MEMORY";                 
        case CL_PROFILING_INFO_NOT_AVAILABLE:
            return "CL_PROFILING_INFO_NOT_AVAILABLE";        
        case CL_MEM_COPY_OVERLAP:
            return "CL_MEM_COPY_OVERLAP";                    
        case CL_IMAGE_FORMAT_MISMATCH:
            return "CL_IMAGE_FORMAT_MISMATCH";               
        case CL_IMAGE_FORMAT_NOT_SUPPORTED:
            return "CL_IMAGE_FORMAT_NOT_SUPPORTED";         
        case CL_BUILD_PROGRAM_FAILURE:
            return "CL_BUILD_PROGRAM_FAILURE";              
        case CL_MAP_FAILURE:
            return "CL_MAP_FAILURE";                         
        case CL_MISALIGNED_SUB_BUFFER_OFFSET:
            return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
        case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
            return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
        case CL_INVALID_VALUE:
            return "CL_INVALID_VALUE";                      
        case CL_INVALID_DEVICE_TYPE:
            return "CL_INVALID_DEVICE_TYPE";               
        case CL_INVALID_PLATFORM:
            return "CL_INVALID_PLATFORM";                   
        case CL_INVALID_DEVICE:
            return "CL_INVALID_DEVICE";                    
        case CL_INVALID_CONTEXT:
            return "CL_INVALID_CONTEXT";                    
        case CL_INVALID_QUEUE_PROPERTIES:
            return "CL_INVALID_QUEUE_PROPERTIES";           
        case CL_INVALID_COMMAND_QUEUE:
            return "CL_INVALID_COMMAND_QUEUE";              
        case CL_INVALID_HOST_PTR:
            return "CL_INVALID_HOST_PTR";                   
        case CL_INVALID_MEM_OBJECT:
            return "CL_INVALID_MEM_OBJECT";                  
        case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
            return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";    
        case CL_INVALID_IMAGE_SIZE:
             return "CL_INVALID_IMAGE_SIZE";                 
        case CL_INVALID_SAMPLER:
            return "CL_INVALID_SAMPLER";                    
        case CL_INVALID_BINARY:
            return "CL_INVALID_BINARY";                     
        case CL_INVALID_BUILD_OPTIONS:
            return "CL_INVALID_BUILD_OPTIONS";              
        case CL_INVALID_PROGRAM:
            return "CL_INVALID_PROGRAM";                    
        case CL_INVALID_PROGRAM_EXECUTABLE:
            return "CL_INVALID_PROGRAM_EXECUTABLE";          
        case CL_INVALID_KERNEL_NAME:
            return "CL_INVALID_KERNEL_NAME";                
        case CL_INVALID_KERNEL_DEFINITION:
            return "CL_INVALID_KERNEL_DEFINITION";          
        case CL_INVALID_KERNEL:
            return "CL_INVALID_KERNEL";                     
        case CL_INVALID_ARG_INDEX:
            return "CL_INVALID_ARG_INDEX";                   
        case CL_INVALID_ARG_VALUE:
            return "CL_INVALID_ARG_VALUE";                   
        case CL_INVALID_ARG_SIZE:
            return "CL_INVALID_ARG_SIZE";                    
        case CL_INVALID_KERNEL_ARGS:
            return "CL_INVALID_KERNEL_ARGS";                
        case CL_INVALID_WORK_DIMENSION:
            return "CL_INVALID_WORK_DIMENSION";              
        case CL_INVALID_WORK_GROUP_SIZE:
            return "CL_INVALID_WORK_GROUP_SIZE";             
        case CL_INVALID_WORK_ITEM_SIZE:
            return "CL_INVALID_WORK_ITEM_SIZE";             
        case CL_INVALID_GLOBAL_OFFSET:
            return "CL_INVALID_GLOBAL_OFFSET";              
        case CL_INVALID_EVENT_WAIT_LIST:
            return "CL_INVALID_EVENT_WAIT_LIST";             
        case CL_INVALID_EVENT:
            return "CL_INVALID_EVENT";                      
        case CL_INVALID_OPERATION:
            return "CL_INVALID_OPERATION";                 
        case CL_INVALID_GL_OBJECT:
            return "CL_INVALID_GL_OBJECT";                  
        case CL_INVALID_BUFFER_SIZE:
            return "CL_INVALID_BUFFER_SIZE";                 
        case CL_INVALID_MIP_LEVEL:
            return "CL_INVALID_MIP_LEVEL";                   
        case CL_INVALID_GLOBAL_WORK_SIZE:
            return "CL_INVALID_GLOBAL_WORK_SIZE";            
        case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
            return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
        case CL_PLATFORM_NOT_FOUND_KHR:
            return "CL_PLATFORM_NOT_FOUND_KHR";
        //case CL_INVALID_PROPERTY_EXT:
        //    return "CL_INVALID_PROPERTY_EXT";
        case CL_DEVICE_PARTITION_FAILED_EXT:
            return "CL_DEVICE_PARTITION_FAILED_EXT";
        case CL_INVALID_PARTITION_COUNT_EXT:
            return "CL_INVALID_PARTITION_COUNT_EXT"; 
        default:
            return "unknown error code";
    }

    return "unknown error code";
}


template<typename T>
int SDKDeviceInfo::checkVal(
    T input, 
    T reference, 
    std::string message,
    bool isAPIerror) const
{
    if(input==reference)
    {
        return 0;
    }
    else
    {
        if(isAPIerror)
        {
            std::cout<<"Error: "<< message << " Error code : ";
            std::cout << getOpenCLErrorCodeStr(input) << std::endl;
        }
        else
			std::cout << message;   
        return 1;
    }
}


template<typename T>
int KernelWorkGroupInfo::checkVal(
    T input, 
    T reference, 
    std::string message,
    bool isAPIerror) const
{
    if(input==reference)
    {
        return 0;
    }
    else
    {
        if(isAPIerror)
        {
            std::cout<<"Error: "<< message << " Error code : ";
            std::cout << getOpenCLErrorCodeStr(input) << std::endl;
        }
        else
			std::cout << message;   
        return 1;
    }
}

int KernelWorkGroupInfo::setKernelWorkGroupInfo(cl_kernel &kernel,cl_device_id &device)
{
	cl_int status = CL_SUCCESS;

	//Get Kernel Work Group Info
	status = clGetKernelWorkGroupInfo(kernel,
									  device,
									  CL_KERNEL_WORK_GROUP_SIZE,
									  sizeof(size_t),
									  &kernelWorkGroupSize,
									  NULL);
	if(checkVal(status, CL_SUCCESS, "clGetKernelWorkGroupInfo failed(CL_KERNEL_WORK_GROUP_SIZE)"))
			return SDK_FAILURE;
	
	status = clGetKernelWorkGroupInfo(kernel,
									  device,
									  CL_KERNEL_LOCAL_MEM_SIZE,
									  sizeof(cl_ulong),
									  &localMemoryUsed,
									  NULL);
	if(checkVal(status, CL_SUCCESS, "clGetKernelWorkGroupInfo failed(CL_KERNEL_LOCAL_MEM_SIZE)"))
			return SDK_FAILURE;

	status = clGetKernelWorkGroupInfo(kernel,
									  device,
									  CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
									  sizeof(size_t) * 3,
									  compileWorkGroupSize,
									  NULL);
	if(checkVal(status, CL_SUCCESS, "clGetKernelWorkGroupInfo failed(CL_KERNEL_COMPILE_WORK_GROUP_SIZE)"))
			return SDK_FAILURE;

	return SDK_SUCCESS;
}


// Set all information for a given device id
int 
SDKDeviceInfo::setDeviceInfo(cl_device_id deviceId)
{
    cl_int status = CL_SUCCESS;


    //Get device type
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_TYPE, 
                    sizeof(cl_device_type), 
                    &dType, 
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_TYPE) failed");

    //Get vender ID
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_VENDOR_ID, 
                    sizeof(cl_uint), 
                    &venderId, 
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_VENDOR_ID) failed");

    //Get max compute units
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_COMPUTE_UNITS, 
                    sizeof(cl_uint), 
                    &maxComputeUnits, 
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_COMPUTE_UNITS) failed");

    //Get max work item dimensions
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
                    sizeof(cl_uint),
                    &maxWorkItemDims,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) failed");

    //Get max work item sizes
    delete maxWorkItemSizes;
    maxWorkItemSizes = new size_t[maxWorkItemDims];
    CHECK_ALLOCATION(maxWorkItemSizes, "Failed to allocate memory(maxWorkItemSizes)");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_WORK_ITEM_SIZES,
                    maxWorkItemDims * sizeof(size_t),
                    maxWorkItemSizes,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) failed");

    // Maximum work group size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_WORK_GROUP_SIZE,
                    sizeof(size_t),
                    &maxWorkGroupSize,
                    NULL);
   CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");

    // Preferred vector sizes of all data types
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR,
                    sizeof(cl_uint),
                    &preferredCharVecWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT,
                    sizeof(cl_uint),
                    &preferredShortVecWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,
                    sizeof(cl_uint),
                    &preferredIntVecWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,
                    sizeof(cl_uint),
                    &preferredLongVecWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,
                    sizeof(cl_uint),
                    &preferredFloatVecWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
                    sizeof(cl_uint),
                    &preferredDoubleVecWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,
                    sizeof(cl_uint),
                    &preferredHalfVecWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) failed");

    // Clock frequency
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_CLOCK_FREQUENCY,
                    sizeof(cl_uint),
                    &maxClockFrequency,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_CLOCK_FREQUENCY) failed");

    // Address bits
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_ADDRESS_BITS,
                    sizeof(cl_uint),
                    &addressBits,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_ADDRESS_BITS) failed");

    // Maximum memory alloc size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_MEM_ALLOC_SIZE,
                    sizeof(cl_ulong),
                    &maxMemAllocSize,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_MEM_ALLOC_SIZE) failed");

    // Image support
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_IMAGE_SUPPORT,
                    sizeof(cl_bool),
                    &imageSupport,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_IMAGE_SUPPORT) failed");

    // Maximum read image arguments
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_READ_IMAGE_ARGS,
                    sizeof(cl_uint),
                    &maxReadImageArgs,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_READ_IMAGE_ARGS) failed");

    // Maximum write image arguments
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_WRITE_IMAGE_ARGS,
                    sizeof(cl_uint),
                    &maxWriteImageArgs,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) failed");

    // 2D image and 3D dimensions
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_IMAGE2D_MAX_WIDTH,
                    sizeof(size_t),
                    &image2dMaxWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_IMAGE2D_MAX_WIDTH) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_IMAGE2D_MAX_HEIGHT,
                    sizeof(size_t),
                    &image2dMaxHeight,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_IMAGE2D_MAX_HEIGHT) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_IMAGE3D_MAX_WIDTH,
                    sizeof(size_t),
                    &image3dMaxWidth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_IMAGE3D_MAX_WIDTH) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_IMAGE3D_MAX_HEIGHT,
                    sizeof(size_t),
                    &image3dMaxHeight,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_IMAGE3D_MAX_HEIGHT) failed");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_IMAGE3D_MAX_DEPTH,
                    sizeof(size_t),
                    &image3dMaxDepth,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_IMAGE3D_MAX_DEPTH) failed");

    // Maximum samplers
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_SAMPLERS,
                    sizeof(cl_uint),
                    &maxSamplers,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_SAMPLERS) failed");

    // Maximum parameter size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_PARAMETER_SIZE,
                    sizeof(size_t),
                    &maxParameterSize,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_PARAMETER_SIZE) failed");

    // Memory base address align
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MEM_BASE_ADDR_ALIGN,
                    sizeof(cl_uint),
                    &memBaseAddressAlign,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MEM_BASE_ADDR_ALIGN) failed");

    // Minimum data type align size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE,
                    sizeof(cl_uint),
                    &minDataTypeAlignSize,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE) failed");

    // Single precision floating point configuration
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_SINGLE_FP_CONFIG,
                    sizeof(cl_device_fp_config),
                    &singleFpConfig,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_SINGLE_FP_CONFIG) failed");

    // Double precision floating point configuration
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_DOUBLE_FP_CONFIG,
                    sizeof(cl_device_fp_config),
                    &doubleFpConfig,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_DOUBLE_FP_CONFIG) failed");

    // Global memory cache type
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
                    sizeof(cl_device_mem_cache_type),
                    &globleMemCacheType,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) failed");

    // Global memory cache line size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE,
                    sizeof(cl_uint),
                    &globalMemCachelineSize,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) failed");

    // Global memory cache size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,
                    sizeof(cl_ulong),
                    &globalMemCacheSize,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) failed");

    // Global memory size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_GLOBAL_MEM_SIZE,
                    sizeof(cl_ulong),
                    &globalMemSize,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_GLOBAL_MEM_SIZE) failed");

    // Maximum constant buffer size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
                    sizeof(cl_ulong),
                    &maxConstBufSize,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) failed");

    // Maximum constant arguments
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_MAX_CONSTANT_ARGS,
                    sizeof(cl_uint),
                    &maxConstArgs,
                    NULL);
   CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_MAX_CONSTANT_ARGS) failed");

    // Local memory type
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_LOCAL_MEM_TYPE,
                    sizeof(cl_device_local_mem_type),
                    &localMemType,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_LOCAL_MEM_TYPE) failed");

    // Local memory size
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_LOCAL_MEM_SIZE,
                    sizeof(cl_ulong),
                    &localMemSize,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_LOCAL_MEM_SIZE) failed");

    // Error correction support
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_ERROR_CORRECTION_SUPPORT,
                    sizeof(cl_bool),
                    &errCorrectionSupport,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_ERROR_CORRECTION_SUPPORT) failed");

    // Profiling timer resolution
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PROFILING_TIMER_RESOLUTION,
                    sizeof(size_t),
                    &timerResolution,
                    NULL);
   CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PROFILING_TIMER_RESOLUTION) failed");

    // Endian little
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_ENDIAN_LITTLE,
                    sizeof(cl_bool),
                    &endianLittle,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_ENDIAN_LITTLE) failed");

    // Device available
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_AVAILABLE,
                    sizeof(cl_bool),
                    &available,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_AVAILABLE) failed");

    // Device compiler available
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_COMPILER_AVAILABLE,
                    sizeof(cl_bool),
                    &compilerAvailable,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_COMPILER_AVAILABLE) failed");

    // Device execution capabilities
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_EXECUTION_CAPABILITIES,
                    sizeof(cl_device_exec_capabilities),
                    &execCapabilities,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_EXECUTION_CAPABILITIES) failed");

    // Device queue properities
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_QUEUE_PROPERTIES,
                    sizeof(cl_command_queue_properties),
                    &queueProperties,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_QUEUE_PROPERTIES) failed");

    // Platform
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PLATFORM,
                    sizeof(cl_platform_id),
                    &platform,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PLATFORM) failed");

    // Device name
    size_t tempSize = 0;
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_NAME,
                    0,
                    NULL,
                    &tempSize);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NAME) failed");

    delete name;
    name = new char[tempSize];
    CHECK_ALLOCATION(name, "Failed to allocate memory(name)");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_NAME,
                    sizeof(char) * tempSize,
                    name,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NAME) failed");

    // Vender name
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_VENDOR,
                    0,
                    NULL,
                    &tempSize);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_VENDOR) failed");

    delete vendorName;
    vendorName = new char[tempSize];
    CHECK_ALLOCATION(vendorName, "Failed to allocate memory(venderName)");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_VENDOR,
                    sizeof(char) * tempSize,
                    vendorName,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_VENDOR) failed");

    // Driver name
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DRIVER_VERSION,
                    0,
                    NULL,
                    &tempSize);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DRIVER_VERSION) failed");

    delete driverVersion;
    driverVersion = new char[tempSize];
    CHECK_ALLOCATION(driverVersion, "Failed to allocate memory(driverVersion)");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DRIVER_VERSION,
                    sizeof(char) * tempSize,
                    driverVersion,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DRIVER_VERSION) failed");

    // Device profile
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PROFILE,
                    0,
                    NULL,
                    &tempSize);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PROFILE) failed");

    delete profileType;
    profileType = new char[tempSize];
    CHECK_ALLOCATION(profileType, "Failed to allocate memory(profileType)");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_PROFILE,
                    sizeof(char) * tempSize,
                    profileType,
                    NULL);
   CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_PROFILE) failed");

    // Device version
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_VERSION,
                    0,
                    NULL,
                    &tempSize);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_VERSION) failed");

    delete deviceVersion;
    deviceVersion = new char[tempSize];
    CHECK_ALLOCATION(deviceVersion, "Failed to allocate memory(deviceVersion)");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_VERSION,
                    sizeof(char) * tempSize,
                    deviceVersion,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_VERSION) failed");

    // Device extensions
    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_EXTENSIONS,
                    0,
                    NULL,
                    &tempSize);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_EXTENSIONS) failed");

    delete extensions;
    extensions = new char[tempSize];
    CHECK_ALLOCATION(extensions, "Failed to allocate memory(extensions)");

    status = clGetDeviceInfo(
                    deviceId, 
                    CL_DEVICE_EXTENSIONS,
                    sizeof(char) * tempSize,
                    extensions,
                    NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_EXTENSIONS) failed");

    // Device parameters of OpenCL 1.1 Specification
#ifdef CL_VERSION_1_1
    std::string deviceVerStr(deviceVersion);
    size_t vStart = deviceVerStr.find(" ", 0);
    size_t vEnd = deviceVerStr.find(" ", vStart + 1);
    std::string vStrVal = deviceVerStr.substr(vStart + 1, vEnd - vStart - 1);
    if(vStrVal.compare("1.0") > 0)
    {
        // Native vector sizes of all data types
        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,
                        sizeof(cl_uint),
                        &nativeCharVecWidth,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) failed");

        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,
                        sizeof(cl_uint),
                        &nativeShortVecWidth,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) failed");

        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,
                        sizeof(cl_uint),
                        &nativeIntVecWidth,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) failed");

        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,
                        sizeof(cl_uint),
                        &nativeLongVecWidth,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) failed");

        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,
                        sizeof(cl_uint),
                        &nativeFloatVecWidth,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) failed");

        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,
                        sizeof(cl_uint),
                        &nativeDoubleVecWidth,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) failed");

        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,
                        sizeof(cl_uint),
                        &nativeHalfVecWidth,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) failed");

        // Host unified memory
        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_HOST_UNIFIED_MEMORY,
                        sizeof(cl_bool),
                        &hostUnifiedMem,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_HOST_UNIFIED_MEMORY) failed");

        // Device OpenCL C version
        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_OPENCL_C_VERSION,
                        0,
                        NULL,
                        &tempSize);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_OPENCL_C_VERSION) failed");

        delete openclCVersion;
        openclCVersion = new char[tempSize];
        CHECK_ALLOCATION(openclCVersion, "Failed to allocate memory(openclCVersion)");

        status = clGetDeviceInfo(
                        deviceId, 
                        CL_DEVICE_OPENCL_C_VERSION,
                        sizeof(char) * tempSize,
                        openclCVersion,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceIDs(CL_DEVICE_OPENCL_C_VERSION) failed");
    }
#endif

    return SDK_SUCCESS;
}

template<typename T>
int SDKCommon::checkVal(
    T input, 
    T reference, 
    std::string message,
    bool isAPIerror)
{
    if(input==reference)
    {
        return SDK_SUCCESS;
    }
    else
    {
        if(isAPIerror)
        {
            std::cout<<"Error: "<< message << " Error code : ";
            std::cout << getOpenCLErrorCodeStr(input) << std::endl;
        }
        else
            error(message);   
        return SDK_FAILURE;
    }
}

template<typename T>
std::string SDKCommon::toString(T t, std::ios_base &(*r)(std::ios_base&))
{
  std::ostringstream output;
  output << r << t;
  return output.str();
}

/*
 * Displays the platform name,  device ids and device names for given platform
 */
int SDKCommon::displayDevices(cl_platform_id platform, cl_device_type deviceType)
{
    cl_int status;

    // Get platform name
    char platformVendor[1024];
    status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(platformVendor), platformVendor, NULL);
    CHECK_OPENCL_ERROR(status, "clGetPlatformInfo failed");
    
    std::cout << "\nSelected Platform Vendor : " << platformVendor << std::endl;

    // Get number of devices available 
    cl_uint deviceCount = 0;
    status = clGetDeviceIDs(platform, deviceType, 0, NULL, &deviceCount);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs failed");

    cl_device_id* deviceIds = (cl_device_id*)malloc(sizeof(cl_device_id) * deviceCount);
    CHECK_ALLOCATION(deviceIds, "Failed to allocate memory(deviceIds)");

    // Get device ids
    status = clGetDeviceIDs(platform, deviceType, deviceCount, deviceIds, NULL);
    CHECK_OPENCL_ERROR(status, "clGetDeviceIDs failed");

    // Print device index and device names
    for(cl_uint i = 0; i < deviceCount; ++i)
    {
        char deviceName[1024];
        status = clGetDeviceInfo(deviceIds[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceInfo failed");
        
        std::cout << "Device " << i << " : " << deviceName <<" Device ID is "<<deviceIds[i]<< std::endl;
    }

    free(deviceIds);
    
    return SDK_SUCCESS;
}

int 
SDKCommon::displayPlatformAndDevices(
cl_platform_id platform, const cl_device_id* devices, const int deviceCount)
{
    cl_int status;

    // Get platform name
    char platformVendor[1024];
    status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(platformVendor), platformVendor, NULL);
    CHECK_OPENCL_ERROR(status, "clGetPlatformInfo failed");
    
    std::cout << "\nSelected Platform Vendor : " << platformVendor << std::endl;

    // Print device index and device names
    for(cl_int i = 0; i < deviceCount; ++i)
    {
        char deviceName[1024];
        status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceInfo failed");
        
        std::cout << "Device " << i << " : " << deviceName << std::endl;
    }

    return SDK_SUCCESS;
}


int 
SDKCommon::validateDeviceId(int deviceId, int deviceCount)
{
    // Validate deviceIndex
    if(deviceId >= (int)deviceCount)
    {
        std::cout << "DeviceId should be < " << deviceCount << std::endl;
        return SDK_FAILURE;
    }
    return SDK_SUCCESS;
}

int
SDKCommon::generateBinaryImage(const bifData &binaryData)
{
    cl_int status = CL_SUCCESS;

    /*
     * Have a look at the available platforms and pick either
     * the AMD one if available or a reasonable default.
     */
    cl_uint numPlatforms;
    cl_platform_id platform = NULL;
    status = clGetPlatformIDs(0, NULL, &numPlatforms);
    CHECK_OPENCL_ERROR(status, "clGetPlatformIDs failed.");

    if (0 < numPlatforms) 
    {
        cl_platform_id* platforms = new cl_platform_id[numPlatforms];
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        CHECK_OPENCL_ERROR(status, "clGetPlatformIDs failed.");

        char platformName[100];
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            status = clGetPlatformInfo(
                        platforms[i],
                        CL_PLATFORM_VENDOR,
                        sizeof(platformName),
                        platformName,
                        NULL);
			CHECK_OPENCL_ERROR(status, "clGetPlatformInfo failed.");

            platform = platforms[i];
            if (!strcmp(platformName, "Advanced Micro Devices, Inc.")) 
                break;
        }
        std::cout << "Platform found : " << platformName << "\n";
        delete[] platforms;
    }

    if(NULL == platform)
    {
		std::cout << "NULL platform found so Exiting Application.";
        return SDK_FAILURE;
    }

    /*
     * If we could find our platform, use it. Otherwise use just available platform.
     */
    cl_context_properties cps[5] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        CL_CONTEXT_OFFLINE_DEVICES_AMD,
        (cl_context_properties)1,
        0
    };

    cl_context context = clCreateContextFromType(
                            cps,
                            CL_DEVICE_TYPE_ALL,
                            NULL,
                            NULL,
                            &status);
    CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed.");

    /* create a CL program using the kernel source */
    SDKFile kernelFile;
    std::string kernelPath = getPath();
    kernelPath.append(binaryData.kernelName.c_str());
    if(!kernelFile.open(kernelPath.c_str()))
    {
        std::cout << "Failed to load kernel file : " << kernelPath << std::endl;
        return SDK_FAILURE;
    }
    const char * source = kernelFile.source().c_str();
    size_t sourceSize[] = {strlen(source)};
    cl_program program = clCreateProgramWithSource(
                            context,
                            1,
                            &source,
                            sourceSize,
                            &status);
    CHECK_OPENCL_ERROR(status, "clCreateProgramWithSource failed.");

    std::string flagsStr = std::string(binaryData.flagsStr.c_str());

    // Get additional options
    if(binaryData.flagsFileName.size() != 0)
    {
        streamsdk::SDKFile flagsFile;
        std::string flagsPath = getPath();
        flagsPath.append(binaryData.flagsFileName.c_str());
        if(!flagsFile.open(flagsPath.c_str()))
        {
            std::cout << "Failed to load flags file: " << flagsPath << std::endl;
            return SDK_FAILURE;
        }
        flagsFile.replaceNewlineWithSpaces();
        const char * flags = flagsFile.source().c_str();
        flagsStr.append(flags);
    }

    if(flagsStr.size() != 0)
        std::cout << "Build Options are : " << flagsStr.c_str() << std::endl;

    /* create a cl program executable for all the devices specified */
    status = clBuildProgram(
                program,
                0,
                NULL,
                flagsStr.c_str(),
                NULL,
                NULL);
    /* This function is intentionally left without a error check
      as it may not pass if kernels rely on specific properties of devices
      In such cases, binaries for eligible devices are geenrated and dumped
      even wen this function will return an error */

    //CHECK_OPENCL_ERROR(status, "clBuildProgram failed.");

    size_t numDevices;
    status = clGetProgramInfo(
                program, 
                CL_PROGRAM_NUM_DEVICES,
                sizeof(numDevices),
                &numDevices,
                NULL );
    CHECK_OPENCL_ERROR(status, "clGetProgramInfo(CL_PROGRAM_NUM_DEVICES) failed.");

    std::cout << "Number of devices found : " << numDevices << "\n\n";
    cl_device_id *devices = (cl_device_id *)malloc( sizeof(cl_device_id) * numDevices );
    CHECK_ALLOCATION(devices, "Failed to allocate host memory.(devices)");

    /* grab the handles to all of the devices in the program. */
    status = clGetProgramInfo(
                program, 
                CL_PROGRAM_DEVICES, 
                sizeof(cl_device_id) * numDevices,
                devices,
                NULL );
    CHECK_OPENCL_ERROR(status, "clGetProgramInfo(CL_PROGRAM_DEVICES) failed.");

    /* figure out the sizes of each of the binaries. */
    size_t *binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
   CHECK_ALLOCATION(binarySizes, "Failed to allocate host memory.(binarySizes)");
    
    status = clGetProgramInfo(
                program, 
                CL_PROGRAM_BINARY_SIZES,
                sizeof(size_t) * numDevices, 
                binarySizes, 
                NULL);
    CHECK_OPENCL_ERROR(status, "clGetProgramInfo(CL_PROGRAM_BINARY_SIZES) failed.");

    size_t i = 0;
    /* copy over all of the generated binaries. */
    char **binaries = (char **)malloc( sizeof(char *) * numDevices );
    CHECK_ALLOCATION(binaries, "Failed to allocate host memory.(binaries)");

    for(i = 0; i < numDevices; i++)
    {
        if(binarySizes[i] != 0)
        {
            binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]);
            CHECK_ALLOCATION(binaries[i], "Failed to allocate host memory.(binaries[i])");
        }
        else
        {
            binaries[i] = NULL;
        }
    }
    status = clGetProgramInfo(
                program, 
                CL_PROGRAM_BINARIES,
                sizeof(char *) * numDevices, 
                binaries, 
                NULL);
    CHECK_OPENCL_ERROR(status, "clGetProgramInfo(CL_PROGRAM_BINARIES) failed.");

    /* dump out each binary into its own separate file. */
    for(i = 0; i < numDevices; i++)
    {
        char fileName[100];
        sprintf(fileName, "%s.%d", binaryData.binaryName.c_str(), (int)i);
        char deviceName[1024];
        status = clGetDeviceInfo(
                    devices[i], 
                    CL_DEVICE_NAME, 
                    sizeof(deviceName),
                    deviceName, 
                    NULL);
        CHECK_OPENCL_ERROR(status, "clGetDeviceInfo(CL_DEVICE_NAME) failed.");

        if(binarySizes[i] != 0)
        {
            printf( "%s binary kernel: %s\n", deviceName, fileName);
            streamsdk::SDKFile BinaryFile;
            if(BinaryFile.writeBinaryToFile(fileName, 
                                             binaries[i], 
                                             binarySizes[i]))
            {
                std::cout << "Failed to load kernel file : " << fileName << std::endl;
                return SDK_FAILURE;
            }
        }
        else
        {
            printf(
                "%s binary kernel(%s) : %s\n", 
                deviceName, 
                fileName,
                "Skipping as there is no binary data to write!");
        }
    }

    // Release all resouces and memory
    for(i = 0; i < numDevices; i++)
    {
        if(binaries[i] != NULL)
        {
            free(binaries[i]);
            binaries[i] = NULL;
        }
    }

    if(binaries != NULL)
    {
        free(binaries);
        binaries = NULL;
    }

    if(binarySizes != NULL)
    {
        free(binarySizes);
        binarySizes = NULL;
    }

    if(devices != NULL)
    {
        free(devices);
        devices = NULL;
    }

    status = clReleaseProgram(program);
    CHECK_OPENCL_ERROR(status, "clReleaseProgram failed.");

    status = clReleaseContext(context);
    CHECK_OPENCL_ERROR(status, "clReleaseContext failed.");

    return SDK_SUCCESS;
}

int SDKCommon::getDevices(cl_context &context, cl_device_id **devices, cl_int deviceId, bool deviceIdEnabled)
{
	/* First, get the size of device list data */
    size_t deviceListSize = 0;
	int status = 0;
	status = clGetContextInfo(
                 context, 
                 CL_CONTEXT_DEVICES, 
                 0, 
                 NULL, 
                 &deviceListSize);
    CHECK_OPENCL_ERROR(status, "clGetContextInfo failed.");

    int deviceCount = (int)(deviceListSize / sizeof(cl_device_id));
    if(validateDeviceId(deviceId, deviceCount))
    {
		std::cout << "Invalid Device Selected";
        return SDK_FAILURE;
    }

    /**
	 * Now allocate memory for device list based on the size we got earlier
	 * Note that this memory is allocated to a pointer which is a argument
	 * so it must not be deleted inside this function. The Sample implementer
	 * has to delete the devices pointer in the host code at clean up
	 */

    (*devices) = (cl_device_id *)malloc(deviceListSize);
    CHECK_ALLOCATION((*devices), "Failed to allocate memory (devices).");

    /* Now, get the device list data */
    status = clGetContextInfo(context, 
                 CL_CONTEXT_DEVICES, 
                 deviceListSize, 
                 (*devices), 
                 NULL);
    CHECK_OPENCL_ERROR(status, "clGetGetContextInfo failed.");

	return SDK_SUCCESS;
}

int
SDKCommon::getPlatform(cl_platform_id &platform, int platformId, bool platformIdEnabled)
{
    cl_uint numPlatforms;
    cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
    CHECK_OPENCL_ERROR(status, "clGetPlatformIDs failed.");

    if (0 < numPlatforms) 
    {
        cl_platform_id* platforms = new cl_platform_id[numPlatforms];
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        CHECK_OPENCL_ERROR(status, "clGetPlatformIDs failed.");

        if(platformIdEnabled)
        {
            platform = platforms[platformId];
        }
        else
        {
            char platformName[100];
            for (unsigned i = 0; i < numPlatforms; ++i) 
            {
                status = clGetPlatformInfo(platforms[i],
                                           CL_PLATFORM_VENDOR,
                                           sizeof(platformName),
                                           platformName,
                                           NULL);
				CHECK_OPENCL_ERROR(status, "clGetPlatformInfo failed.");

                platform = platforms[i];
                if (!strcmp(platformName, "Advanced Micro Devices, Inc.")) 
                {
                    break;
                }
            }
            std::cout << "Platform found : " << platformName << "\n";
        }
        delete[] platforms;
    }

    if(NULL == platform)
    {
        error("NULL platform found so Exiting Application.");
        return SDK_FAILURE;
    }

    return SDK_SUCCESS;
}


int
SDKCommon::buildOpenCLProgram(cl_program &program, const cl_context& context, const buildProgramData &buildData)
{
    cl_int status = CL_SUCCESS;
    SDKFile kernelFile;
    std::string kernelPath = getPath();
    if(buildData.binaryName.size() != 0)
    {
        kernelPath.append(buildData.binaryName.c_str());
        if(kernelFile.readBinaryFromFile(kernelPath.c_str()))
        {
            std::cout << "Failed to load kernel file : " << kernelPath << std::endl;
            return SDK_FAILURE;
        }

        const char * binary = kernelFile.source().c_str();
        size_t binarySize = kernelFile.source().size();
        program = clCreateProgramWithBinary(context,
                                            1,
                                            &buildData.devices[buildData.deviceId], 
                                            (const size_t *)&binarySize,
                                            (const unsigned char**)&binary,
                                            NULL,
                                            &status);
        CHECK_OPENCL_ERROR(status, "clCreateProgramWithBinary failed.");
    }
    else
    {
        kernelPath.append(buildData.kernelName.c_str());
        if(!kernelFile.open(kernelPath.c_str()))//bool
        {
            std::cout << "Failed to load kernel file: " << kernelPath << std::endl;
            return SDK_FAILURE;
        }
        const char * source = kernelFile.source().c_str();
        size_t sourceSize[] = {strlen(source)};
        program = clCreateProgramWithSource(context,
                                            1,
                                            &source,
                                            sourceSize,
                                            &status);
        CHECK_OPENCL_ERROR(status, "clCreateProgramWithSource failed.");
    }

    std::string flagsStr = std::string(buildData.flagsStr.c_str());

    // Get additional options
    if(buildData.flagsFileName.size() != 0)
    {
        streamsdk::SDKFile flagsFile;
        std::string flagsPath = getPath();
        flagsPath.append(buildData.flagsFileName.c_str());
        if(!flagsFile.open(flagsPath.c_str()))
        {
            std::cout << "Failed to load flags file: " << flagsPath << std::endl;
            return SDK_FAILURE;
        }
        flagsFile.replaceNewlineWithSpaces();
        const char * flags = flagsFile.source().c_str();
        flagsStr.append(flags);
    }

    if(flagsStr.size() != 0)
        std::cout << "Build Options are : " << flagsStr.c_str() << std::endl;

    /* create a cl program executable for all the devices specified */
    status = clBuildProgram(program, 1, &buildData.devices[buildData.deviceId], flagsStr.c_str(), NULL, NULL);
    if(status != CL_SUCCESS)
    {
        if(status == CL_BUILD_PROGRAM_FAILURE)
        {
            cl_int logStatus;
            char *buildLog = NULL;
            size_t buildLogSize = 0;
            logStatus = clGetProgramBuildInfo (
                            program, 
                            buildData.devices[buildData.deviceId], 
                            CL_PROGRAM_BUILD_LOG, 
                            buildLogSize, 
                            buildLog, 
                            &buildLogSize);
            CHECK_OPENCL_ERROR(logStatus, "clGetProgramBuildInfo failed.");

            buildLog = (char*)malloc(buildLogSize);
            CHECK_ALLOCATION(buildLog, "Failed to allocate host memory. (buildLog)");

            memset(buildLog, 0, buildLogSize);

            logStatus = clGetProgramBuildInfo (
                            program, 
                            buildData.devices[buildData.deviceId], 
                            CL_PROGRAM_BUILD_LOG, 
                            buildLogSize, 
                            buildLog, 
                            NULL);
            if(checkVal(logStatus, CL_SUCCESS, "clGetProgramBuildInfo failed."))
            {
                free(buildLog);
                return SDK_FAILURE;
            }

            std::cout << " \n\t\t\tBUILD LOG\n";
            std::cout << " ************************************************\n";
            std::cout << buildLog << std::endl;
            std::cout << " ************************************************\n";
            free(buildLog);
        }

        CHECK_OPENCL_ERROR(status, "clBuildProgram failed.");
    }
    return SDK_SUCCESS;
}

bool
SDKCommon::compare(const float *refData, const float *data, 
                        const int length, const float epsilon)
{
    float error = 0.0f;
    float ref = 0.0f;

    for(int i = 1; i < length; ++i) 
    {
        float diff = refData[i] - data[i];
        error += diff * diff;
        ref += refData[i] * refData[i];
    }

    float normRef =::sqrtf((float) ref);
    if (::fabs((float) ref) < 1e-7f) {
        return false;
    }
    float normError = ::sqrtf((float) error);
    error = normError / normRef;

    return error < epsilon;
}

bool
SDKCommon::compare(const double *refData, const double *data, 
                        const int length, const double epsilon)
{
    double error = 0.0;
    double ref = 0.0;

    for(int i = 1; i < length; ++i) 
    {
        double diff = refData[i] - data[i];
        error += diff * diff;
        ref += refData[i] * refData[i];
    }

    double normRef =::sqrt((double) ref);
    if (::fabs((double) ref) < 1e-7) {
        return false;
    }
    double normError = ::sqrt((double) error);
    error = normError / normRef;

    return error < epsilon;
}

size_t
SDKCommon::getLocalThreads(const size_t globalThreads,
                           const size_t maxWorkItemSize)
{
    if(maxWorkItemSize < globalThreads)
    {
        if(globalThreads%maxWorkItemSize == 0)
            return maxWorkItemSize;
        else
        {
            for(size_t i=maxWorkItemSize-1; i > 0; --i)
            {
                if(globalThreads%i == 0)
                    return i;
            }
        }
    }
    else
    {
        return globalThreads;
    }

    return SDK_SUCCESS;
}

int SDKCommon::createTimer()
{
    Timer* newTimer = new Timer;
    newTimer->_start = 0;
    newTimer->_clocks = 0;

#ifdef _WIN32
    QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->_freq);
#else
    newTimer->_freq = (long long)1.0E3;
#endif
    
    /* Push back the address of new Timer instance created */
    _timers.push_back(newTimer);

    /*if(_numTimers == 1)
    {
        _timers = newTimer; 
    }
    else
    {
        Timer *save = _timers;

        _timers = new Timer[_numTimers];
        memcpy(_timers,save,sizeof(Timer)*(_numTimers-1));
        _timers[_numTimers-1] = *newTimer;
        delete newTimer;
        newTimer = 0;

        if(_numTimers <= 2 )
        {
            delete save; 
        }
        else
        {
            delete[] save;
        }
        save = 0;
    }*/

    return (int)(_timers.size() - 1);
}

int SDKCommon::resetTimer(int handle)
{
    if(handle >= (int)_timers.size())
    {
        error("Cannot reset timer. Invalid handle.");
        return -1;
    }

    (_timers[handle]->_start) = 0;
    (_timers[handle]->_clocks) = 0;
    return SDK_SUCCESS;
}

int SDKCommon::startTimer(int handle)
{
    if(handle >= (int)_timers.size())
    {
        error("Cannot reset timer. Invalid handle.");
        return SDK_FAILURE;
    }

#ifdef _WIN32
    QueryPerformanceCounter((LARGE_INTEGER*)&(_timers[handle]->_start));	
#else
    struct timeval s;
    gettimeofday(&s, 0);
    _timers[handle]->_start = (long long)s.tv_sec * (long long)1.0E3 + (long long)s.tv_usec / (long long)1.0E3;
#endif

    return SDK_SUCCESS;
}

int SDKCommon::stopTimer(int handle)
{
    long long n=0;

    if(handle >= (int)_timers.size())
    {
        error("Cannot reset timer. Invalid handle.");
        return SDK_FAILURE;
    }

#ifdef _WIN32
    QueryPerformanceCounter((LARGE_INTEGER*)&(n));	
#else
    struct timeval s;
    gettimeofday(&s, 0);
    n = (long long)s.tv_sec * (long long)1.0E3+ (long long)s.tv_usec / (long long)1.0E3;
#endif

    n -= _timers[handle]->_start;
    _timers[handle]->_start = 0;
    _timers[handle]->_clocks += n;

    return SDK_SUCCESS;
}

double SDKCommon::readTimer(int handle)
{
    if(handle >= (int)_timers.size())
    {
        error("Cannot read timer. Invalid handle.");
        return SDK_FAILURE;
    }

    double reading = double(_timers[handle]->_clocks);
    reading = double(reading / _timers[handle]->_freq);

    return reading;
}

void SDKCommon::printTable(Table *t)
{
    if(t == NULL)
    {
        error("Cannot print table, NULL pointer.");
        return;
    }

    int count = 0;
    // Skip delimiters at beginning.
    std::string::size_type curIndex = t->_dataItems.find_first_not_of(t->_delim, 0);
    // Find first "non-delimiter".
    std::string::size_type nextIndex = 
        t->_dataItems.find_first_of(t->_delim, curIndex);

    while (std::string::npos != nextIndex || std::string::npos != curIndex)
    {
        // Found a token, add it to the vector.
        // tokens.push_back(str.substr(curIndex, nextIndex - curIndex));
        std::cout<<std::setw(t->_columnWidth)<<std::left
                 <<t->_dataItems.substr(curIndex, nextIndex - curIndex);				 
        // Skip delimiters.  Note the "not_of"
        curIndex = t->_dataItems.find_first_not_of(t->_delim, nextIndex);
        // Find next "non-delimiter"
        nextIndex = t->_dataItems.find_first_of(t->_delim, curIndex);
        
        count++;

        if(count%t->_numColumns==0)
            std::cout<<"\n";
    }	
}

int 
SDKCommon::fileToString(std::string &fileName, std::string &str)
{
    size_t      size;
    char*       buf;

    // Open file stream
    std::fstream f(fileName.c_str(), (std::fstream::in | std::fstream::binary));

    // Check if we have opened file stream
    if (f.is_open()) 
    {
        size_t  sizeFile;

        // Find the stream size
        f.seekg(0, std::fstream::end);
        size = sizeFile = (size_t)f.tellg();
        f.seekg(0, std::fstream::beg);

        buf = new char[size + 1];
        if (!buf) {
            f.close();
            return  SDK_FAILURE;
        }

        // Read file
        f.read(buf, sizeFile);
        f.close();
        str[size] = '\0';

        str = buf;

        return SDK_SUCCESS;
    }
    else
    {
        error("Converting file to string. Cannot open file.");
        str = "";	
        return SDK_FAILURE;
    }
}

void 
SDKCommon::error(const char* errorMsg)
{
    std::cout<<"Error: "<<errorMsg<<std::endl;
}

void 
SDKCommon::error(std::string errorMsg)
{
    std::cout<<"Error: "<<errorMsg<<std::endl;
}

void 
SDKCommon::expectedError(const char* errorMsg)
{
    std::cout<<"Expected Error: "<<errorMsg<<std::endl;
}

void 
SDKCommon::expectedError(std::string errorMsg)
{
    std::cout<<"Expected Error: "<<errorMsg<<std::endl;
}

/////////////////////////////////////////////////////////////////
// Template Instantiations 
/////////////////////////////////////////////////////////////////
template 
void SDKCommon::printArray<short>(const std::string, 
        const short*, int, int)const;
template 
void SDKCommon::printArray<unsigned char>(const std::string, 
        const unsigned char *, int, int)const;
template 
void SDKCommon::printArray<unsigned int>(const std::string, 
        const unsigned int *, int, int)const;
template 
void SDKCommon::printArray<int>(const std::string, 
        const int *, int, int)const;
template 
void SDKCommon::printArray<long>(const std::string, 
        const long*, int, int)const;
template 
void SDKCommon::printArray<float>(const std::string, 
        const float*, int, int)const;
template 
void SDKCommon::printArray<double>(const std::string, 
        const double*, int, int)const;

template 
int SDKCommon::fillRandom<unsigned char>(unsigned char* arrayPtr, 
        const int width, const int height, 
        unsigned char rangeMin, unsigned char rangeMax, unsigned int seed);	
template 
int SDKCommon::fillRandom<unsigned int>(unsigned int* arrayPtr, 
        const int width, const int height, 
        unsigned int rangeMin, unsigned int rangeMax, unsigned int seed);	
template 
int SDKCommon::fillRandom<int>(int* arrayPtr, 
        const int width, const int height, 
        int rangeMin, int rangeMax, unsigned int seed);	
template 
int SDKCommon::fillRandom<long>(long* arrayPtr, 
        const int width, const int height, 
        long rangeMin, long rangeMax, unsigned int seed);	
template 
int SDKCommon::fillRandom<float>(float* arrayPtr, 
        const int width, const int height, 
        float rangeMin, float rangeMax, unsigned int seed);	
template 
int SDKCommon::fillRandom<double>(double* arrayPtr, 
        const int width, const int height, 
        double rangeMin, double rangeMax, unsigned int seed);	

template 
short SDKCommon::roundToPowerOf2<short>(short val);
template 
unsigned int SDKCommon::roundToPowerOf2<unsigned int>(unsigned int val);
template 
int SDKCommon::roundToPowerOf2<int>(int val);
template 
long SDKCommon::roundToPowerOf2<long>(long val);

template
int SDKCommon::isPowerOf2<short>(short val);
template
int SDKCommon::isPowerOf2<unsigned int>(unsigned int val);
template
int SDKCommon::isPowerOf2<int>(int val);
template
int SDKCommon::isPowerOf2<long>(long val);

template<> 
int SDKCommon::fillPos<short>(short * arrayPtr, const int width, const int height);
template<> 
int SDKCommon::fillPos<unsigned int>(unsigned int * arrayPtr, const int width, const int height);
template<> 
int SDKCommon::fillPos<int>(int * arrayPtr, const int width, const int height);
template<> 
int SDKCommon::fillPos<long>(long * arrayPtr, const int width, const int height);

template<> 
int SDKCommon::fillConstant<short>(short * arrayPtr, 
        const int width, const int height, 
        const short val);
template<> 
int SDKCommon::fillConstant(unsigned int * arrayPtr, 
        const int width, const int height, 
        const unsigned int val);
template<> 
int SDKCommon::fillConstant(int * arrayPtr, 
        const int width, const int height, 
        const int val);
template<> 
int SDKCommon::fillConstant(long * arrayPtr, 
        const int width, const int height, 
        const long val);
template<> 
int SDKCommon::fillConstant(long * arrayPtr, 
        const int width, const int height, 
        const long val);
template<> 
int SDKCommon::fillConstant(long * arrayPtr, 
        const int width, const int height, 
        const long val);


template
const char* getOpenCLErrorCodeStr<int>(int input);

template
int SDKCommon::checkVal<char>(char input, char reference, std::string message, bool isAPIerror);
template
int SDKCommon::checkVal<bool>(bool input, bool reference, std::string message, bool isAPIerror);
template
int SDKCommon::checkVal<std::string>(std::string input, std::string reference, std::string message, bool isAPIerror);
template
int SDKCommon::checkVal<short>(short input, short reference, std::string message, bool isAPIerror);
template
int SDKCommon::checkVal<unsigned int>(unsigned int  input, unsigned int  reference, std::string message, bool isAPIerror);
template
int SDKCommon::checkVal<int>(int input, int reference, std::string message, bool isAPIerror);
template
int SDKCommon::checkVal<long>(long input, long reference, std::string message, bool isAPIerror);


template
std::string SDKCommon::toString<char>(char t, std::ios_base &(*r)(std::ios_base&));
template
std::string SDKCommon::toString<short>(short t, std::ios_base &(*r)(std::ios_base&));
template
std::string SDKCommon::toString<unsigned int>(unsigned int t, std::ios_base &(*r)(std::ios_base&));
template
std::string SDKCommon::toString<int>(int t, std::ios_base &(*r)(std::ios_base&));
template
std::string SDKCommon::toString<long>(long t, std::ios_base &(*r)(std::ios_base&));
template
std::string SDKCommon::toString<float>(float t, std::ios_base &(*r)(std::ios_base&));
template
std::string SDKCommon::toString<double>(double t, std::ios_base &(*r)(std::ios_base&));
}