You could use my library for signed/unsigned integer arithmetic operations, as well as some number theory functions such as POW, GCD, LCM:
#include <Aeu.h>
__global__ void test() {
const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid != 0) return;
Aeu<128> amp = 1562144106091796071UL; // 128-bit long unsigned integer
printf("Were in kernel thread and number is %lu\n", amp.integralCast<unsigned long>());
}
int main() {
test<<<32, 32>>>();
return cudaSuccess != cudaDeviceSynchronize();
}