-
Notifications
You must be signed in to change notification settings - Fork 226
Expand file tree
/
Copy pathhip_simple_check.cpp
More file actions
95 lines (82 loc) · 2.64 KB
/
hip_simple_check.cpp
File metadata and controls
95 lines (82 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// Copyright Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT
// Minimal HIP kernel WITHOUT printf, to isolate whether the gfx1150
// sanity-test hang (TheRock#3199) is caused by printf buffer handling
// or by kernel dispatch itself.
//
// Includes a SIGALRM watchdog that dumps a backtrace + /proc/self/maps
// if the process hangs for 20 seconds.
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <execinfo.h>
#include <hip/hip_runtime.h>
#include <unistd.h>
static void dump_backtrace_and_maps(int sig) {
fprintf(stderr, "\n=== WATCHDOG: process hung for 20s (signal %d) ===\n",
sig);
// Dump backtrace addresses
void *frames[64];
int n = backtrace(frames, 64);
fprintf(stderr, "=== backtrace (%d frames) ===\n", n);
backtrace_symbols_fd(frames, n, STDERR_FILENO);
// Dump /proc/self/maps so addresses can be resolved offline
fprintf(stderr, "\n=== /proc/self/maps ===\n");
FILE *maps = fopen("/proc/self/maps", "r");
if (maps) {
char buf[512];
while (fgets(buf, sizeof(buf), maps))
fputs(buf, stderr);
fclose(maps);
}
// Dump /proc/self/stack (kernel stack, if readable)
fprintf(stderr, "\n=== /proc/self/stack ===\n");
FILE *stack = fopen("/proc/self/stack", "r");
if (stack) {
char buf[512];
while (fgets(buf, sizeof(buf), stack))
fputs(buf, stderr);
fclose(stack);
} else {
fprintf(stderr, "(not readable)\n");
}
_exit(2);
}
__global__ void squares_no_printf(int *buf) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
buf[i] = i * i;
}
int main() {
// Set up watchdog: SIGALRM after 20s
signal(SIGALRM, dump_backtrace_and_maps);
alarm(20);
constexpr int gridsize = 1;
constexpr int blocksize = 64;
constexpr int size = gridsize * blocksize;
int *d_buf;
fprintf(stderr, "hip_simple_check: hipHostMalloc\n");
hipHostMalloc(&d_buf, size * sizeof(int));
fprintf(stderr, "hip_simple_check: hipLaunchKernelGGL\n");
hipLaunchKernelGGL(squares_no_printf, gridsize, blocksize, 0, 0, d_buf);
fprintf(stderr, "hip_simple_check: hipDeviceSynchronize\n");
hipDeviceSynchronize();
// Cancel watchdog — we didn't hang
alarm(0);
fprintf(stderr, "hip_simple_check: checking results\n");
int mismatches_count = 0;
for (int i = 0; i < size; ++i) {
int square = i * i;
if (d_buf[i] != square) {
fprintf(stderr,
"Element at index %d expected value %d, actual value: %d\n", i,
square, d_buf[i]);
++mismatches_count;
}
}
if (mismatches_count > 0) {
fprintf(stderr, "There were %d mismatches\n", mismatches_count);
return 1;
}
fprintf(stderr, "hip_simple_check: PASSED\n");
return 0;
}