原始代码
// Type your code here, or load an example.
extern "C" __global__ void square(int* array, int n) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n)
array[tid] = array[tid] * array[tid];
}
void square_do(int *array, int n) {
square<<<92, 128>>>(array, n);
}
对应汇编
__nv_save_fatbinhandle_for_managed_rt(void**):
pushq %rbp
movq %rsp, %rbp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, __nv_fatbinhandle_for_managed_rt(%rip)
nop
popq %rbp
ret
dim3::dim3(unsigned int, unsigned int, unsigned int):
pushq %rbp
movq %rsp, %rbp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movl %edx, -16(%rbp)
movl %ecx, -20(%rbp)
movq -8(%rbp), %rax
movl -12(%rbp), %edx
movl %edx, (%rax)
movq -8(%rbp), %rax
movl -16(%rbp), %edx
movl %edx, 4(%rax)
movq -8(%rbp), %rax
movl -20(%rbp), %edx
movl %edx, 8(%rax)
nop
popq %rbp
ret
square_do(int*, int):
pushq %rbp
movq %rsp, %rbp
subq $48, %rsp
movq %rdi, -40(%rbp)
movl %esi, -44(%rbp)
leaq -24(%rbp), %rax
movl $1, %ecx
movl $1, %edx
movl $128, %esi
movq %rax, %rdi
call dim3::dim3(unsigned int, unsigned int, unsigned int)
leaq -12(%rbp), %rax
movl $1, %ecx
movl $1, %edx
movl $92, %esi
movq %rax, %rdi
call dim3::dim3(unsigned int, unsigned int, unsigned int)
movq -24(%rbp), %rax // blockDim
movl -16(%rbp), %ecx
movq %rcx, %rdx
movq -12(%rbp), %rdi // gridDim
movl -4(%rbp), %esi
movl $0, %r9d
movl $0, %r8d
movq %rdx, %rcx
movq %rax, %rdx
call __cudaPushCallConfiguration // four parameter:(dim3 gridDim, dim3 blockDim, int shareMemSize, stream)
testl %eax, %eax
jne .L5 // if not push ok, return
movl -44(%rbp), %edx
movq -40(%rbp), %rax
movl %edx, %esi // second parameter: n
movq %rax, %rdi // first parameter: array
call square // call square
.L5:
nop
leave
ret
____nv_dummy_param_ref(void*):
pushq %rbp
movq %rsp, %rbp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, ____nv_dummy_param_ref(void*)::__ref(%rip)
nop
popq %rbp
ret
__cudaUnregisterBinaryUtil():
pushq %rbp
movq %rsp, %rbp
movl $__cudaFatCubinHandle, %edi
call ____nv_dummy_param_ref(void*)
movq __cudaFatCubinHandle(%rip), %rax
movq %rax, %rdi
call __cudaUnregisterFatBinary
nop
popq %rbp
ret
__nv_init_managed_rt_with_module(void**):
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rdi
call __cudaInitModule
leave
ret
fatbinData:
.quad 0x00100001ba55ed50,0x00000000000010a8,0x0000005001010002,0x0000000000000e08
.quad 0x0000000000000000,0x0000003400010007,0x0000000f00000040,0x0000000000000011
.quad 0x0000000000000000,0x0000000000000000,0x6178652f7070612f,0x0075632e656c706d
.quad 0x33010102464c457f,0x0000000000000007,0x0000007d00be0002,0x0000000000000000
.quad 0x0000000000000d60,0x00000000000009a0,0x0038004000340534,0x0001000f00400003
.quad 0x7472747368732e00,0x747274732e006261,0x746d79732e006261,0x746d79732e006261
.quad 0x78646e68735f6261,0x666e692e766e2e00,0x2e747865742e006f,0x2e00657261757173
.quad 0x2e6f666e692e766e,0x2e00657261757173,0x65726168732e766e,0x6572617571732e64
.quad 0x6e6f632e766e2e00,0x732e30746e617473,0x722e006572617571,0x6f632e766e2e6c65
.quad 0x2e30746e6174736e,0x2e00657261757173,0x696c5f6775626564,0x2e6c65722e00656e
.quad 0x696c5f6775626564,0x645f766e2e00656e,0x6e696c5f67756265,0x2e00737361735f65
.quad 0x645f766e2e6c6572,0x6e696c5f67756265,0x2e00737361735f65,0x67756265645f766e
.quad 0x7478745f7874705f,0x6c61632e766e2e00,0x2e0068706172676c,0x6f746f72702e766e
.quad 0x766e2e0065707974,0x7463612e6c65722e,0x68732e00006e6f69,0x2e00626174727473
.quad 0x2e00626174727473,0x2e006261746d7973,0x735f6261746d7973,0x766e2e0078646e68
.quad 0x742e006f666e692e,0x617571732e747865,0x692e766e2e006572,0x617571732e6f666e
.quad 0x732e766e2e006572,0x71732e6465726168,0x65722e0065726175,0x6e6f632e766e2e6c
.quad 0x732e30746e617473,0x6e2e006572617571,0x6174736e6f632e76,0x617571732e30746e
.quad 0x756265642e006572,0x2e00656e696c5f67,0x756265642e6c6572,0x2e00656e696c5f67
.quad 0x67756265645f766e,0x61735f656e696c5f,0x2e6c65722e007373,0x67756265645f766e
.quad 0x61735f656e696c5f,0x645f766e2e007373,0x7874705f67756265,0x766e2e007478745f
.quad 0x6172676c6c61632e,0x702e766e2e006870,0x657079746f746f72,0x6c65722e766e2e00
.quad 0x006e6f697463612e,0x0000657261757173,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x000e000300000032,0x0000000000000000,0x0000000000000000
.quad 0x000d00030000007a,0x0000000000000000,0x0000000000000000,0x000400030000008f
.quad 0x0000000000000000,0x0000000000000000,0x00050003000000ab,0x0000000000000000
.quad 0x0000000000000000,0x00060003000000d7,0x0000000000000000,0x0000000000000000
.quad 0x00090003000000e9,0x0000000000000000,0x0000000000000000,0x000a000300000105
.quad 0x0000000000000000,0x0000000000000000,0x000e101200000114,0x0000000000000000
.quad 0x0000000000000100,0x0028000200000054,0x000a0efb01010000,0x0100000001010101
.quad 0x786500007070612f,0x75632e656c706d61,0xa206bbe7c3cd0100,0x0000000209000002
.quad 0x0301040000000000,0x0138020103f00101,0x0201030120020103,0x01010030020100e8
.quad 0x001000020000004c,0x000a0efb01010000,0x0100000001010101,0x0000000209000000
.quad 0x0300040000000000,0x08020b0308020112,0x0120020203f08001,0x011802010380f4f0
.quad 0x0403012002010380,0x0101003002012002,0x0000000000000000,0x6e6f69737265762e
.quad 0x61742e00352e3820,0x5f6d732074656772,0x726464612e003235,0x657a69735f737365
.quad 0x2e00000000343620,0x20656c6269736976,0x73207972746e652e,0x2e00286572617571
.quad 0x752e206d61726170,0x7261757173203436,0x5f6d617261705f65,0x617261702e002c30
.quad 0x73203233752e206d,0x61705f6572617571,0x002900315f6d6172,0x2e206765722e007b
.quad 0x7025092064657270,0x65722e003b3e323c,0x09203233622e2067,0x2e003b3e383c7225
.quad 0x3436622e20676572,0x3e353c6472250920,0x2e646c000000003b,0x36752e6d61726170
.quad 0x2c31647225092034,0x6572617571735b20,0x305f6d617261705f,0x61702e646c003b5d
.quad 0x203233752e6d6172,0x735b202c32722509,0x61705f6572617571,0x003b5d315f6d6172
.quad 0x3233752e766f6d00,0x25202c3372250920,0x003b782e6469746e,0x203233752e766f6d
.quad 0x6325202c34722509,0x003b782e64696174,0x203233752e766f6d,0x7425202c35722509
.quad 0x616d003b782e6469,0x3233732e6f6c2e64,0x25202c3172250920,0x2c347225202c3372
.quad 0x7300003b35722520,0x732e65672e707465,0x2c31702509203233,0x7225202c31722520
.quad 0x2031702540003b32,0x5f4c240920617262,0x003b325f3042425f,0x742e617476630000
.quad 0x6c61626f6c672e6f,0x722509203436752e,0x31647225202c3264,0x772e6c756d00003b
.quad 0x203233732e656469,0x25202c3364722509,0x61003b34202c3172,0x09203436732e6464
.quad 0x7225202c34647225,0x33647225202c3264,0x6f6c672e646c003b,0x203233752e6c6162
.quad 0x255b202c36722509,0x756d003b5d346472,0x3233732e6f6c2e6c,0x25202c3772250920
.quad 0x3b367225202c3672,0x626f6c672e747300,0x09203233752e6c61,0x202c5d346472255b
.quad 0x4c2400003b377225,0x3a325f3042425f5f,0x00003b7465720000,0x00082f040000007d
.quad 0x0000000700000008,0x0000000800081204,0x0008110400000000,0x0000000000000008
.quad 0x0000000800081204,0x0004370400000000,0x000030010000007d,0x00080a0400002a01
.quad 0x000c014000000002,0x000c1704000c1903,0x0008000100000000,0x000c17040011f000
.quad 0x0000000000000000,0x00ff1b030021f000,0x0000001000041d04,0x0000005800081c04
.quad 0x00000000000000d0,0x00000000ffffffff,0x00000000fffffffe,0x00000000fffffffd
.quad 0x00000000fffffffc,0x0000000000000073,0x3605002511000000,0x0000000000000035
.quad 0x0000000800000002,0x000000000000001d,0x0000000800000002,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x001cfc00e22007f6,0x4c98078000870001,0xf0c8000002570002,0xf0c8000002170000
.quad 0x001fd842fec20ff1,0x4f107f8000270203,0x4e00000000270200,0x5b30001800370202
.quad 0x001ff400fd4007ed,0x4b6d038005270207,0x50b0000000070f00,0xe30000000000000f
.quad 0x001fc800fec207f1,0x3829000001e70200,0x4c18810005070202,0x4c10080005170003
.quad 0x001f98e0fe2007b5,0xeed4200000070200,0x5b007f8000070005,0x5b007fa800070006
.quad 0x001fbc00fe2007f2,0x5b30029800670000,0xeedc200000070200,0x50b0000000070f00
.quad 0x001ffc00ffe007ea,0x50b0000000070f00,0xe30000000007000f,0xe2400fffff87000f
.quad 0x001f8000fc0007e0,0x50b0000000070f00,0x50b0000000070f00,0x50b0000000070f00
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000300000001,0x0000000000000000,0x0000000000000000,0x0000000000000040
.quad 0x0000000000000114,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x000000030000000b,0x0000000000000000,0x0000000000000000,0x0000000000000154
.quad 0x000000000000011b,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x0000000200000013,0x0000000000000000,0x0000000000000000,0x0000000000000270
.quad 0x00000000000000d8,0x0000000800000002,0x0000000000000008,0x0000000000000018
.quad 0x000000010000008f,0x0000000000000000,0x0000000000000000,0x0000000000000348
.quad 0x0000000000000058,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x00000001000000ab,0x0000000000000000,0x0000000000000000,0x00000000000003a0
.quad 0x0000000000000050,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x00000001000000d7,0x0000000000000000,0x0000000000000000,0x00000000000003f0
.quad 0x000000000000026a,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x7000000000000029,0x0000000000000000,0x0000000000000000,0x000000000000065c
.quad 0x0000000000000030,0x0000000000000003,0x0000000000000004,0x0000000000000000
.quad 0x700000000000003f,0x0000000000000040,0x0000000000000000,0x000000000000068c
.quad 0x0000000000000058,0x0000000e00000003,0x0000000000000004,0x0000000000000000
.quad 0x70000001000000e9,0x0000000000000000,0x0000000000000000,0x00000000000006e4
.quad 0x0000000000000020,0x0000000000000003,0x0000000000000004,0x0000000000000008
.quad 0x7000000b00000105,0x0000000000000000,0x0000000000000000,0x0000000000000708
.quad 0x0000000000000010,0x0000000000000000,0x0000000000000008,0x0000000000000008
.quad 0x000000090000009b,0x0000000000000040,0x0000000000000000,0x0000000000000718
.quad 0x0000000000000010,0x0000000400000003,0x0000000000000008,0x0000000000000010
.quad 0x00000009000000bf,0x0000000000000040,0x0000000000000000,0x0000000000000728
.quad 0x0000000000000010,0x0000000500000003,0x0000000000000008,0x0000000000000010
.quad 0x0000000100000061,0x0000000000000042,0x0000000000000000,0x0000000000000738
.quad 0x000000000000014c,0x0000000e00000000,0x0000000000000004,0x0000000000000000
.quad 0x0000000100000032,0x0000000000000006,0x0000000000000000,0x00000000000008a0
.quad 0x0000000000000100,0x0700000800000003,0x0000000000000020,0x0000000000000000
.quad 0x0000000500000006,0x0000000000000d60,0x0000000000000000,0x0000000000000000
.quad 0x00000000000000a8,0x00000000000000a8,0x0000000000000008,0x0000000500000001
.quad 0x0000000000000738,0x0000000000000000,0x0000000000000000,0x0000000000000268
.quad 0x0000000000000268,0x0000000000000008,0x0000000500000001,0x0000000000000d60
.quad 0x0000000000000000,0x0000000000000000,0x00000000000000a8,0x00000000000000a8
.quad 0x0000000000000008,0x0000007001010001,0x00000000000001e0,0x00000050000001d9
.quad 0x0000003400080005,0x0000000f00000040,0x0000000000002011,0x0000000000000000
.quad 0x00000000000002bf,0x6178652f7070612f,0x0075632e656c706d,0x0000001600000058
.quad 0x72656e65672d2d20,0x656e696c2d657461,0x0000206f666e692d,0x1ef300032f2f0a3c
.quad 0x6f69737265762e0a,0x742e0a352e38206e,0x6d73207465677261,0x6464612e0a32355f
.quad 0x7a69735f73736572,0xf300310a34362065,0x20656c6269736912,0x73207972746e652e
.quad 0x2e0a286572617571,0x752e206d61726170,0x00125f1100143436,0x332a001c2c305f36
.quad 0x290a3108f3001c32,0x206765722e0a7b0a,0x702520646572702e,0x628600123b3e323c
.quad 0x11383c7225203233,0x6472252034368000,0x636f6ce20012353c,0x0a0a302032203109
.quad 0x752e220063646c0a,0x855b202c314a0022,0x580100253b5d2700,0x5d31250024321c00
.quad 0x6d0a352033730056,0x202c33b7002a766f,0x3b782e6469746e25,0x6325202c347c0016
.quad 0x202c354400176174,0x6c2e646171002c25,0x2c31230018732e6f,0x357225003901004a
.quad 0x730a352034b2006c,0x002b65672e707465,0x08f800302c317034,0x20317025400a3b32
.quad 0x5f5f4c2420617262,0xa70a3b325f304242,0x742e61747663e400,0x6c61626f6c672e6f
.quad 0x0501072c32210101,0x6d0a392035b30065,0x00666469772e6c75,0x3482006733647233
.quad 0x3a732e6464610a3b,0x331100402c342600,0x003601005a040135,0x2100265b202c3640
.quad 0x2200df0500503b5d,0x36722578001d2c37,0x3002003574730a3b,0x0a3b3745001b0000
.quad 0xf0009c3a1400c70a,0x7465720a31203619,0x662e0a0a7d0a0a3b,0x2f22203109656c69
.quad 0x6d6178652f707061, 0x0a2275632e656c70, 0x0000000000000000
__fatDeviceText:
.long 1180844977
.long 1
.quad fatbinData
.quad 0
__device_stub__Z6squarePii(int*, int):
pushq %rbp
movq %rsp, %rbp
subq $96, %rsp
movq %rdi, -88(%rbp)
movl %esi, -92(%rbp)
movl $0, -4(%rbp)
movl -4(%rbp), %eax
cltq
leaq -88(%rbp), %rdx
movq %rdx, -32(%rbp,%rax,8) // push first parameter &array to %rbp-32
addl $1, -4(%rbp)
movl -4(%rbp), %eax
cltq
leaq -92(%rbp), %rdx
movq %rdx, -32(%rbp,%rax,8) // push second parameter &n to %rbp-24
addl $1, -4(%rbp) // %rbp-4 store number of parameter
movq $square, __device_stub__Z6squarePii(int*, int)::__f(%rip) // move square function to static function pointer
movl $1, -44(%rbp)
movl $1, -40(%rbp)
movl $1, -36(%rbp) //[-44, -32) stores three 1s, default 1
movl $1, -56(%rbp)
movl $1, -52(%rbp)
movl $1, -48(%rbp) //[-56, -44) stores three 1s
leaq -72(%rbp), %rcx // stream
leaq -64(%rbp), %rdx // sharedMemSize
leaq -56(%rbp), %rsi // blokcDim?
leaq -44(%rbp), %rax // gridDim
movq %rax, %rdi
call __cudaPopCallConfiguration // call cudaPopCallConfiguration, return 0 for success
testl %eax, %eax
setne %al // if not success
testb %al, %al // set result false
jne .L10 // if not success, function return
cmpl $0, -4(%rbp) // if(nParameter ==0)?
jne .L13 // not 0 paramenter, goto .L13
movq -72(%rbp), %rdi // take return val1 for pop call configuration, should be stream
movq -64(%rbp), %rsi // take return val2 for pop call configuration, should be shareMemSize
leaq -32(%rbp), %rdx // take parameter pointer
movl -4(%rbp), %eax
cltq
salq $3, %rax
leaq (%rdx,%rax), %r9 // parameter end position-> r9
movq -56(%rbp), %rcx // two 0x00010001
movl -48(%rbp), %r8d // one 1
movq -44(%rbp), %rdx // two 1
movl -36(%rbp), %eax // 1
pushq %rdi
pushq %rsi
movq %rdx, %rsi // gridBlock:
movl %eax, %edx // dimbloc
movl $square, %edi // square address
call cudaError cudaLaunchKernel<char>(char const*, dim3, dim3, void**, unsigned long, CUstream_st*)
addq $16, %rsp
jmp .L10
.L13: //
movq -72(%rbp), %rdi
movq -64(%rbp), %rsi
leaq -32(%rbp), %r9
movq -56(%rbp), %rcx
movl -48(%rbp), %r8d
movq -44(%rbp), %rdx
movl -36(%rbp), %eax
pushq %rdi
pushq %rsi
movq %rdx, %rsi
movl %eax, %edx
movl $square, %edi
call cudaError cudaLaunchKernel<char>(char const*, dim3, dim3, void**, unsigned long, CUstream_st*)
addq $16, %rsp
.L10:
leave
ret
square:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movl -12(%rbp), %edx
movq -8(%rbp), %rax
movl %edx, %esi
movq %rax, %rdi
call __device_stub__Z6squarePii(int*, int)
nop
leave
ret
.LC0:
.string "square"
__nv_cudaEntityRegisterCallback(void**):
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, __nv_cudaEntityRegisterCallback(void**)::__ref(%rip)
movq -8(%rbp), %rax
movq %rax, %rdi
call __nv_save_fatbinhandle_for_managed_rt(void**)
movq -8(%rbp), %rax
pushq $0
pushq $0
pushq $0
pushq $0
movl $0, %r9d
movl $-1, %r8d
movl $.LC0, %ecx
movl $.LC0, %edx
movl $square, %esi
movq %rax, %rdi
call __cudaRegisterFunction
addq $32, %rsp
nop
leave
ret
__sti____cudaRegisterAll():
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movl $__fatDeviceText, %edi
call __cudaRegisterFatBinary
movq %rax, __cudaFatCubinHandle(%rip)
movq $__nv_cudaEntityRegisterCallback(void**), -8(%rbp)
movq __cudaFatCubinHandle(%rip), %rax
movq -8(%rbp), %rdx
movq %rax, %rdi
call *%rdx
movq __cudaFatCubinHandle(%rip), %rax
movq %rax, %rdi
call __cudaRegisterFatBinaryEnd
movl $__cudaUnregisterBinaryUtil(), %edi
call atexit
nop
leave
ret
cudaError cudaLaunchKernel<char>(char const*, dim3, dim3, void**, unsigned long, CUstream_st*):
pushq %rbp
movq %rsp, %rbp
subq $48, %rsp
movq %rdi, -8(%rbp) // function->%rbp-8
movq %rcx, %rax //
movl %r8d, %ecx
movq %r9, -48(%rbp)
movq %rsi, -24(%rbp)
movl %edx, -16(%rbp)
movq %rax, -40(%rbp)
movl %ecx, -32(%rbp)
movq -48(%rbp), %r8
movq -40(%rbp), %rcx
movl -32(%rbp), %edi
movq -24(%rbp), %rsi
movl -16(%rbp), %edx
movq -8(%rbp), %rax
pushq 24(%rbp)
pushq 16(%rbp)
movq %r8, %r9
movl %edi, %r8d
movq %rax, %rdi
call cudaLaunchKernel
addq $16, %rsp
leave
ret
标签:kernel,movl,rbp,反汇编,quad,0x0000000000000000,movq,rax,cuda
From: https://www.cnblogs.com/zwlwf/p/18653990