gStore/Database/JoinIntersect.cu

217 lines
5.2 KiB
Plaintext

// Author : Pinaggle
// Last Modified: 2019/9/22 14:38
#include <vector>
#include <iostream>
#include <iomanip>
#include <time.h>
using namespace std;
/*
* this function is used to do multi-join on GPU, the lists to be joined is recorded
* one by one in arr[],and the start position of each list is recorded in head[].
* table saves which lists are to be intersected. It's size is join_length*item_size.
* Each column of table represent a multi-way join, so we hacve item_size results at
* last.
*/
/*
void disp(const char* name,unsigned *arr,unsigned length,unsigned line)
{
cout << name << endl;
for(unsigned i = 0;i < length;i++)
{
cout <<arr[i] << ' ';
if(i % line == line-1)
cout << endl;
}
cout << endl;
}
*/
void start_clock(struct timespec &start)
{
clock_gettime(CLOCK_MONOTONIC,&start);
}
void gettime_and_reset_clock(struct timespec &start, const char* info)
{
struct timespec cnt;
clock_gettime(CLOCK_MONOTONIC,&cnt);
double period = 1000*(cnt.tv_sec - start.tv_sec)
+ 0.000001*(cnt.tv_nsec - start.tv_nsec);
cout << info << " used " << setiosflags(ios::fixed)
<< setprecision(2) << period <<"ms"<<endl;
start = cnt;
}
void gettime(struct timespec &start, const char* info)
{
struct timespec cnt;
clock_gettime(CLOCK_MONOTONIC,&cnt);
double period = 1000*(cnt.tv_sec - start.tv_sec)
+ 0.000001*(cnt.tv_nsec - start.tv_nsec);
cout << info << " used " << setiosflags(ios::fixed)
<< setprecision(2) << period <<"ms"<<endl;
}
void cudaInit()
{
unsigned *d_init;
cout << "CudaInitFinished" << endl;
cudaMalloc(&d_init,sizeof(unsigned));
cudaFree(d_init);
}
__global__ void binarySearch(unsigned *d_head,unsigned *d_arr,unsigned join_width,
unsigned table_length,unsigned *d_can_list,unsigned can_list_size)
{
unsigned idx = blockDim.x * blockIdx.x + threadIdx.x;
unsigned gridSize = gridDim.x*blockDim.x;
while(idx < d_head[table_length])
{
unsigned key = d_arr[idx];
unsigned l = 0,r = table_length,m,i;
while(l+1 < r)
{
m = (l+r)/2;
if(d_head[m] <= idx)
l = m;
else
r = m;
}
i = l;
bool found = true;
for(unsigned j = 1;j < join_width;j++)
{
l = d_head[j*table_length+i];
r = d_head[j*table_length+i+1];
while(l+1 < r)
{
m = (l+r)/2;
if(d_arr[m] <= key)
l = m;
else
r = m;
}
if((l == r)||(d_arr[l] != key))
{
found = false;
break;
}
}
if(found && (d_can_list != NULL))
{
l = 0;
r = can_list_size;
while(l+1 < r)
{
m = (l+r)/2;
if(d_can_list[m] <= key)
l = m;
else
r = m;
}
if((l == r)||(d_can_list[l] != key))
found = false;
}
if(!found)
d_arr[idx] = -1;
idx += gridSize;
}
}
void calc(unsigned *head,unsigned *arr, unsigned join_width,unsigned table_length,
unsigned item_num,unsigned *can_list,unsigned can_list_size)
{
bool use_gpu = true;
if(use_gpu)
{
struct timespec t;
start_clock(t);
unsigned *d_head, *d_arr, *d_can_list = NULL;
cudaMalloc(&d_head, sizeof(unsigned)*(join_width*table_length+1));
cudaMalloc(&d_arr, sizeof(unsigned)*item_num);
if(can_list != NULL)
cudaMalloc(&d_can_list, sizeof(unsigned)*can_list_size);
gettime_and_reset_clock(t,"cudaMalloc");
cudaMemcpy(d_head, head, sizeof(unsigned)*(join_width*table_length+1), cudaMemcpyHostToDevice);
cudaMemcpy(d_arr, arr, sizeof(unsigned)*item_num, cudaMemcpyHostToDevice);
if(can_list != NULL)
cudaMemcpy(d_can_list, can_list, sizeof(unsigned)*can_list_size, cudaMemcpyHostToDevice);
gettime_and_reset_clock(t,"cudaMemcpy");
int threadsPerBlock = 256;
int blocksPerGrid = (head[table_length] + threadsPerBlock - 1) / threadsPerBlock;
if(blocksPerGrid > 1024)
blocksPerGrid = 1024;
cudaEvent_t start,stop;
float period;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
binarySearch <<<blocksPerGrid, threadsPerBlock >>> (d_head,d_arr,
join_width,table_length,d_can_list,can_list_size);
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&period,start,stop);
printf("****Kernel Execution time: %.2f ms\n",period);
gettime_and_reset_clock(t,"kernel execution");
cudaMemcpy(arr, d_arr, sizeof(unsigned)*head[table_length], cudaMemcpyDeviceToHost);
gettime_and_reset_clock(t,"cudaMemcpy");
cudaFree(d_head);
cudaFree(d_arr);
if(can_list != NULL)
cudaFree(d_can_list);
gettime_and_reset_clock(t,"cudaFree");
}
else
{
CPUMODE:
cout << "enter CPU mode" << endl;
struct timespec start;
for (unsigned i = 0; i < table_length; i++)
for(unsigned idx = head[i];idx < head[i+1];idx++)
{
unsigned key = arr[idx];
unsigned l,r,m;
bool found = true;
for (unsigned j = 1; j < join_width; j++)
{
l = head[j*table_length+i];
r = head[j*table_length+i+1];
while (l + 1 < r)
{
m = (l + r) / 2;
if (arr[m] <= key)
l = m;
else
r = m;
}
if ((l == r) || (arr[l] != key))
{
found = false;
break;
}
}
if(found && can_list != NULL)
{
l = 0;
r = can_list_size;
while (l + 1 < r)
{
m = (l + r) / 2;
if (can_list[m] <= key)
l = m;
else
r = m;
}
if ((l == r) || (can_list[l] != key))
{
found = false;
}
}
if(!found)
arr[idx] = -1;
}
}
}