/********************************************************************** Copyright ©2012 Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: • Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. • Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ********************************************************************/ #include "AESEncrypt.h" #include /*** Global variables***/ cl_uchar sbox[256] = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 //0 , 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 //1 , 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 //2 , 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 //3 , 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 //4 , 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf //5 , 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 //6 , 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 //7 , 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 //8 , 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb //9 , 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 //A , 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 //B , 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a //C , 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e //D , 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf //E , 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};//F //0 1 2 3 4 5 6 7 8 9 A B C D E F cl_uchar Rcon[255] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a , 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39 , 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a , 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8 , 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef , 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc , 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b , 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3 , 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94 , 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20 , 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35 , 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f , 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04 , 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63 , 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd , 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb }; //Separator std::string sep = "----------------------------------------------------------"; bool verify = false; // Pointer to list of CPU and GPU devices AESEncrypt *AESEncrypt_cpu; AESEncrypt *AESEncrypt_gpu; // Number of CPU and GPU devices int numCPUDevices; int numGPUDevices; // Size of input data int width; //the time of simple CPU running time double timeCPU; //the time of simple GPU running time double timeGPU; // Input data for all devices cl_uchar *input; cl_uchar *output; // Host Output data for verification cl_uchar *verificationOutput; //to mark the subbuffer of multi GPU computed cl_mem *subbufferInput; cl_mem *subbufferOutput; std::vector gpuId; // Kernel source string std::string sourceStr; const char *source; // Context properties const cl_context_properties* cprops; cl_context_properties cps[3]; cl_platform_id platform = NULL; // Count for verification cl_uint verificationCount = 0; cl_uint requiredCount = 0; //AES key for each device task cl_uchar *global_key; //setup AES AESEncrypt::AESEncrypt() { output = NULL; } int AESEncrypt::setupAESEncryp() { keySizeBits = 128; rounds = 10; // 1 Byte = 8 bits keySize = keySizeBits/8; // due to unknown represenation of cl_uchar keySizeBits = keySize * sizeof(cl_uchar); key = (cl_uchar*)malloc(keySizeBits); if (!key) { std::cout << "Error: Failed to allocate key memory" <>= 1; } return p; } void AESEncrypt::aesRound(cl_uchar * state, cl_uchar * rKey) { subBytes(state); shiftRows(state); mixColumns(state); addRoundKey(state, rKey); } void AESEncrypt::mixColumn(cl_uchar *column) { cl_uchar cpy[4]; for(cl_uint i = 0; i < 4; ++i) { cpy[i] = column[i]; } column[0] = galoisMultiplication(cpy[0], 2)^ galoisMultiplication(cpy[3], 1)^ galoisMultiplication(cpy[2], 1)^ galoisMultiplication(cpy[1], 3); column[1] = galoisMultiplication(cpy[1], 2)^ galoisMultiplication(cpy[0], 1)^ galoisMultiplication(cpy[3], 1)^ galoisMultiplication(cpy[2], 3); column[2] = galoisMultiplication(cpy[2], 2)^ galoisMultiplication(cpy[1], 1)^ galoisMultiplication(cpy[0], 1)^ galoisMultiplication(cpy[3], 3); column[3] = galoisMultiplication(cpy[3], 2)^ galoisMultiplication(cpy[2], 1)^ galoisMultiplication(cpy[1], 1)^ galoisMultiplication(cpy[0], 3); } void AESEncrypt::aesMain(cl_uchar * state, cl_uchar * rKey, cl_uint rounds) { addRoundKey(state, rKey); for(cl_uint i = 1; i < rounds; ++i) { aesRound(state, rKey + keySize*i); } subBytes(state); shiftRows(state); addRoundKey(state, rKey + keySize*rounds); } void AESEncrypt::keyExpansion(cl_uchar * key, cl_uchar * expandedKey, cl_uint keySize, cl_uint explandedKeySize) { cl_uint currentSize = 0; cl_uint rConIteration = 1; cl_uchar temp[4] = {0}; for(cl_uint i = 0; i < keySize; ++i) { expandedKey[i] = key[i]; } currentSize += keySize; while(currentSize < explandedKeySize) { for(cl_uint i = 0; i < 4; ++i) { temp[i] = expandedKey[(currentSize - 4) + i]; } if(currentSize%keySize == 0) { core(temp, rConIteration++); } //XXX: add extra SBOX here if the keySize is 32 Bytes for(cl_uint i = 0; i < 4; ++i) { expandedKey[currentSize] = expandedKey[currentSize - keySize] ^ temp[i]; currentSize++; } } } void AESEncrypt::rotate(cl_uchar * word) { cl_uchar c = word[0]; for(cl_uint i = 0; i < 3; ++i) { word[i] = word[i + 1]; } word[3] = c; } void AESEncrypt::core(cl_uchar * word, cl_uint iter) { rotate(word); for(cl_uint i = 0; i < 4; ++i) { word[i] = getSBoxValue(word[i]); } word[0] = word[0] ^ getRconValue(iter); } int AESEncrypt::createContext() { context = clCreateContext(cprops, 1, &deviceId, 0, 0, &status); CHECK_CL_ERROR(status, "clCreateContext failed."); return SDK_SUCCESS; } //Create Command-Queue int AESEncrypt::createQueue() { queue = clCreateCommandQueue(context, deviceId, CL_QUEUE_PROFILING_ENABLE, &status); CHECK_CL_ERROR(status, "clCreateCommandQueue failed."); return SDK_SUCCESS; } // Create input buffer and output buffer int AESEncrypt::createBuffers() { inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, width * sizeof(cl_uchar), 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer failed.(inputBuffer)"); outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, width * sizeof(cl_uchar), 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer failed.(outputBuffer)"); rKeyBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uchar ) * explandedKeySize, roundKey, &status); CHECK_CL_ERROR(status, "clCreateBuffer failed. (rKeyBuffer)"); cl_uchar * sBox; sBox = (cl_uchar *)sbox; sBoxBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uchar ) * 256, sBox, &status); CHECK_CL_ERROR(status, "clCreateBuffer failed. (sBoxBuffer)"); return SDK_SUCCESS; } // Initialize input buffer int AESEncrypt::enqueueWriteBuffer() { status = clEnqueueWriteBuffer(queue, inputBuffer, CL_TRUE, 0, width * sizeof(cl_uchar), input, 0, 0, 0); CHECK_CL_ERROR(status, "clEnqueueWriteBuffer failed."); return SDK_SUCCESS; } // Create program with source int AESEncrypt::createProgram(const char **source, const size_t *sourceSize) { program = clCreateProgramWithSource(context, 1, source, sourceSize, &status); CHECK_CL_ERROR(status, "clCreateProgramWithSource failed."); return SDK_SUCCESS; } // Build program source int AESEncrypt::buildProgram() { status = clBuildProgram(program, 1, &deviceId, NULL, 0, 0); // Print build log here if build program failed if(status != CL_SUCCESS) { if(status == CL_BUILD_PROGRAM_FAILURE) { cl_int logStatus; char *buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo(program, deviceId, CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize); CHECK_CL_ERROR(status, "clGetProgramBuildInfo failed."); buildLog = (char*)malloc(buildLogSize); if(buildLog == NULL) { std::cout<<"Failed to allocate host memory. (buildLog)"< 0) { cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id)); status = clGetPlatformIDs(numPlatforms, platforms, NULL); CHECK_CL_ERROR(status, "clGetPlatformIDs failed."); platform = platforms[0]; free(platforms); } cps[0] = CL_CONTEXT_PLATFORM; cps[1] = (cl_context_properties)platform; cps[2] = 0; cprops = (NULL == platform) ? NULL : cps; // Get Number of CPU devices available status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, 0, (cl_uint*)&numCPUDevices); CHECK_CL_ERROR(status, "clGetDeviceIDs failed.(numCPUDevices)"); // Get Number of GPU devices available status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, 0, (cl_uint*)&numGPUDevices); CHECK_CL_ERROR(status, "clGetDeviceIDs failed.(numGPUDevices)"); // If no GPU is present then exit if(numGPUDevices < 1) { std::cout<<"Only CPU device is present. Exiting!"<::iterator it=gpuId.begin(); for (int j=0; j< i;j++) { it++; } device_id = *it; offsetPtr = &AESEncrypt_gpu[0].output[offset]; status = clEnqueueReadBuffer(AESEncrypt_gpu[device_id].queue, subbufferOutput[i], CL_TRUE, 0, NUM_GROUP_THREADS * sizeof(cl_uchar), offsetPtr, 0, 0, 0); CHECK_CL_ERROR(status, "clEnqueueReadBuffer failed."); } // Verify results std::cout << "Verifying results for multi GPU: "; AESEncrypt_gpu[0].verifyResults(); } //Release the resources on all devices for (int i = 0; i < numGPUDevices; i++) { status = clReleaseContext(AESEncrypt_gpu[i].context); CHECK_CL_ERROR(status, "clCreateContext(multi GPU) failed."); status = clReleaseProgram(AESEncrypt_gpu[i].program); CHECK_CL_ERROR(status, "clReleaseProgram(multi GPU) failed."); status = clReleaseMemObject(AESEncrypt_gpu[i].inputBuffer); CHECK_CL_ERROR(status, "clReleaseMemObject(multi GPU) failed. (inputBuffer)"); status = clReleaseMemObject(AESEncrypt_gpu[i].outputBuffer); CHECK_CL_ERROR(status,"clReleaseMemObject(multi GPU) failed. (outputBuffer)"); status = clReleaseMemObject(AESEncrypt_gpu[i].rKeyBuffer); CHECK_CL_ERROR(status,"clReleaseMemObject failed(CPU). (outputBuffer)"); status = clReleaseMemObject(AESEncrypt_gpu[i].sBoxBuffer); CHECK_CL_ERROR(status,"clReleaseMemObject failed(CPU). (outputBuffer)"); status = clReleaseKernel(AESEncrypt_gpu[i].kernel); CHECK_CL_ERROR(status, "clReleaseCommandQueue(multi GPU) failed."); status = clReleaseCommandQueue(AESEncrypt_gpu[i].queue); CHECK_CL_ERROR(status, "clReleaseCommandQueue(multi GPU) failed."); status = clReleaseEvent(AESEncrypt_gpu[i].eventObject); CHECK_CL_ERROR(status, "clReleaseEvent(multi GPU) failed."); } return SDK_SUCCESS; } //calls runCPU(), runSingleGPU() and runMultiGPU(). int run() { if (numGPUDevices < 2) { std::cout << "Warning : There is only one GPU device detected. \n Use single GPU mode" << std::endl; } //case 1: Use single CPU to compute std::cout << sep<< "\nTest 1 : Single CPU\n"<= 6) { std::cout<<"Too many arguments. Type -h or --help for help.\n"; exit(0); } for(int i = 1; i < argc; i++) { if(!strcmp(argv[i], "-e") || !strcmp(argv[i], "--verify")) verify = true; if(!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) { printf("Usage:\n"); printf("-h, --help\tPrint this help.\n"); printf("-e, --verify\tVerify results against reference implementation.\n"); exit(0); } } // Initialize Host application if (initializeHost() != SDK_SUCCESS) return SDK_FAILURE; // Initialize OpenCL resources if ( initializeCL() != SDK_SUCCESS) return SDK_FAILURE; //calls runCPU(), runSingleGPU() and runMultiGPU() if (run() != SDK_SUCCESS) return SDK_FAILURE; // Release host resources cleanupHost(); if(verify) { if (numGPUDevices >= 2) { requiredCount = numGPUDevices + 1; } else { requiredCount = numGPUDevices; } if(verificationCount != requiredCount) { std::cout << "FAILED!\n"; return SDK_FAILURE; } else { std::cout << "PASSED!\n" ; return SDK_SUCCESS; } } }