1
// This file is part of BOINC.
2
// http://boinc.berkeley.edu
3
// Copyright (C) 2009 University of California
5
// BOINC is free software; you can redistribute it and/or modify it
6
// under the terms of the GNU Lesser General Public License
7
// as published by the Free Software Foundation,
8
// either version 3 of the License, or (at your option) any later version.
10
// BOINC is distributed in the hope that it will be useful,
11
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13
// See the GNU Lesser General Public License for more details.
15
// You should have received a copy of the GNU Lesser General Public License
16
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
19
// client-specific GPU code. Mostly GPU detection
24
#include "boinc_win.h"
34
// Suppress obsolete warning when building for OS 10.3.9
35
#define DLOPEN_NO_WARN
46
#include "client_state.h"
47
#include "client_msgs.h"
52
//#define MEASURE_AVAILABLE_RAM
54
static bool in_vector(int n, vector<int>& v) {
55
for (unsigned int i=0; i<v.size(); i++) {
56
if (v[i] == n) return true;
64
void segv_handler(int) {
69
void COPROC::print_available_ram() {
70
#ifdef MEASURE_AVAILABLE_RAM
71
if (gstate.now - last_print_time < 60) return;
72
last_print_time = gstate.now;
74
for (int i=0; i<count; i++) {
75
if (available_ram_unknown[i]) {
76
if (log_flags.coproc_debug) {
77
msg_printf(0, MSG_INFO,
78
"[coproc_debug] %s device %d: available RAM unknown",
83
if (log_flags.coproc_debug) {
84
msg_printf(0, MSG_INFO,
85
"[coproc_debug] %s device %d: available RAM %d MB",
87
(int)(available_ram[i]/MEGA)
96
bool use_all, vector<string>&descs, vector<string>&warnings,
97
vector<int>& ignore_cuda_dev,
98
vector<int>& ignore_ati_dev
103
COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev);
106
warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection");
109
COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev);
112
warnings.push_back("Caught SIGSEGV in ATI GPU detection");
115
void (*old_sig)(int) = signal(SIGSEGV, segv_handler);
116
if (setjmp(resume)) {
117
warnings.push_back("Caught SIGSEGV in NVIDIA GPU detection");
119
COPROC_CUDA::get(*this, use_all, descs, warnings, ignore_cuda_dev);
121
#ifndef __APPLE__ // ATI does not yet support CAL on Macs
122
if (setjmp(resume)) {
123
warnings.push_back("Caught SIGSEGV in ATI GPU detection");
125
COPROC_ATI::get(*this, descs, warnings, ignore_ati_dev);
128
signal(SIGSEGV, old_sig);
132
// return 1/-1/0 if device 1 is more/less/same capable than device 2.
133
// If "loose", ignore FLOPS and tolerate small memory diff
135
int cuda_compare(COPROC_CUDA& c1, COPROC_CUDA& c2, bool loose) {
136
if (c1.prop.major > c2.prop.major) return 1;
137
if (c1.prop.major < c2.prop.major) return -1;
138
if (c1.prop.minor > c2.prop.minor) return 1;
139
if (c1.prop.minor < c2.prop.minor) return -1;
140
if (c1.cuda_version > c2.cuda_version) return 1;
141
if (c1.cuda_version < c2.cuda_version) return -1;
143
if (c1.prop.totalGlobalMem > 1.4*c2.prop.totalGlobalMem) return 1;
144
if (c1.prop.totalGlobalMem < .7* c2.prop.totalGlobalMem) return -1;
147
if (c1.prop.totalGlobalMem > c2.prop.totalGlobalMem) return 1;
148
if (c1.prop.totalGlobalMem < c2.prop.totalGlobalMem) return -1;
149
double s1 = c1.peak_flops();
150
double s2 = c2.peak_flops();
151
if (s1 > s2) return 1;
152
if (s1 < s2) return -1;
157
typedef int (__stdcall *CUDA_GDC)(int *count);
158
typedef int (__stdcall *CUDA_GDV)(int* version);
159
typedef int (__stdcall *CUDA_GDI)(int);
160
typedef int (__stdcall *CUDA_GDG)(int*, int);
161
typedef int (__stdcall *CUDA_GDA)(int*, int, int);
162
typedef int (__stdcall *CUDA_GDN)(char*, int, int);
163
typedef int (__stdcall *CUDA_GDM)(unsigned int*, int);
164
typedef int (__stdcall *CUDA_GDCC)(int*, int*, int);
165
typedef int (__stdcall *CUDA_CC)(unsigned int*, unsigned int, unsigned int);
166
typedef int (__stdcall *CUDA_CD)(unsigned int);
167
typedef int (__stdcall *CUDA_MA)(unsigned int*, unsigned int);
168
typedef int (__stdcall *CUDA_MF)(unsigned int);
169
typedef int (__stdcall *CUDA_MGI)(unsigned int*, unsigned int*);
171
CUDA_GDC __cuDeviceGetCount = NULL;
172
CUDA_GDV __cuDriverGetVersion = NULL;
173
CUDA_GDI __cuInit = NULL;
174
CUDA_GDG __cuDeviceGet = NULL;
175
CUDA_GDA __cuDeviceGetAttribute = NULL;
176
CUDA_GDN __cuDeviceGetName = NULL;
177
CUDA_GDM __cuDeviceTotalMem = NULL;
178
CUDA_GDCC __cuDeviceComputeCapability = NULL;
179
CUDA_CC __cuCtxCreate = NULL;
180
CUDA_CD __cuCtxDestroy = NULL;
181
CUDA_MA __cuMemAlloc = NULL;
182
CUDA_MF __cuMemFree = NULL;
183
CUDA_MGI __cuMemGetInfo = NULL;
186
int (*__cuInit)(int);
187
int (*__cuDeviceGetCount)(int*);
188
int (*__cuDriverGetVersion)(int*);
189
int (*__cuDeviceGet)(int*, int);
190
int (*__cuDeviceGetAttribute)(int*, int, int);
191
int (*__cuDeviceGetName)(char*, int, int);
192
int (*__cuDeviceTotalMem)(unsigned int*, int);
193
int (*__cuDeviceComputeCapability)(int*, int*, int);
194
int (*__cuCtxCreate)(unsigned int*, unsigned int, unsigned int);
195
int (*__cuCtxDestroy)(unsigned int);
196
int (*__cuMemAlloc)(unsigned int*, unsigned int);
197
int (*__cuMemFree)(unsigned int);
198
int (*__cuMemGetInfo)(unsigned int*, unsigned int*);
201
// NVIDIA interfaces are documented here:
202
// http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/online/index.html
204
void COPROC_CUDA::get(
206
bool use_all, // if false, use only those equivalent to most capable
207
vector<string>& descs,
208
vector<string>& warnings,
209
vector<int>& ignore_devs
215
HMODULE cudalib = LoadLibrary("nvcuda.dll");
217
warnings.push_back("No NVIDIA library found");
220
__cuDeviceGetCount = (CUDA_GDC)GetProcAddress( cudalib, "cuDeviceGetCount" );
221
__cuDriverGetVersion = (CUDA_GDV)GetProcAddress( cudalib, "cuDriverGetVersion" );
222
__cuInit = (CUDA_GDI)GetProcAddress( cudalib, "cuInit" );
223
__cuDeviceGet = (CUDA_GDG)GetProcAddress( cudalib, "cuDeviceGet" );
224
__cuDeviceGetAttribute = (CUDA_GDA)GetProcAddress( cudalib, "cuDeviceGetAttribute" );
225
__cuDeviceGetName = (CUDA_GDN)GetProcAddress( cudalib, "cuDeviceGetName" );
226
__cuDeviceTotalMem = (CUDA_GDM)GetProcAddress( cudalib, "cuDeviceTotalMem" );
227
__cuDeviceComputeCapability = (CUDA_GDCC)GetProcAddress( cudalib, "cuDeviceComputeCapability" );
228
__cuCtxCreate = (CUDA_CC)GetProcAddress( cudalib, "cuCtxCreate" );
229
__cuCtxDestroy = (CUDA_CD)GetProcAddress( cudalib, "cuCtxDestroy" );
230
__cuMemAlloc = (CUDA_MA)GetProcAddress( cudalib, "cuMemAlloc" );
231
__cuMemFree = (CUDA_MF)GetProcAddress( cudalib, "cuMemFree" );
232
__cuMemGetInfo = (CUDA_MGI)GetProcAddress( cudalib, "cuMemGetInfo" );
235
NvAPI_Status nvapiStatus;
236
NvDisplayHandle hDisplay;
237
NV_DISPLAY_DRIVER_VERSION Version;
238
memset(&Version, 0, sizeof(Version));
239
Version.version = NV_DISPLAY_DRIVER_VERSION_VER;
242
for (int i=0; ; i++) {
243
nvapiStatus = NvAPI_EnumNvidiaDisplayHandle(i, &hDisplay);
244
if (nvapiStatus != NVAPI_OK) break;
245
nvapiStatus = NvAPI_GetDisplayDriverVersion(hDisplay, &Version);
246
if (nvapiStatus == NVAPI_OK) break;
252
cudalib = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_NOW);
254
cudalib = dlopen("libcuda.so", RTLD_NOW);
257
warnings.push_back("No NVIDIA library found");
260
__cuDeviceGetCount = (int(*)(int*)) dlsym(cudalib, "cuDeviceGetCount");
261
__cuDriverGetVersion = (int(*)(int*)) dlsym( cudalib, "cuDriverGetVersion" );
262
__cuInit = (int(*)(int)) dlsym( cudalib, "cuInit" );
263
__cuDeviceGet = (int(*)(int*, int)) dlsym( cudalib, "cuDeviceGet" );
264
__cuDeviceGetAttribute = (int(*)(int*, int, int)) dlsym( cudalib, "cuDeviceGetAttribute" );
265
__cuDeviceGetName = (int(*)(char*, int, int)) dlsym( cudalib, "cuDeviceGetName" );
266
__cuDeviceTotalMem = (int(*)(unsigned int*, int)) dlsym( cudalib, "cuDeviceTotalMem" );
267
__cuDeviceComputeCapability = (int(*)(int*, int*, int)) dlsym( cudalib, "cuDeviceComputeCapability" );
268
__cuCtxCreate = (int(*)(unsigned int*, unsigned int, unsigned int)) dlsym( cudalib, "cuCtxCreate" );
269
__cuCtxDestroy = (int(*)(unsigned int)) dlsym( cudalib, "cuCtxDestroy" );
270
__cuMemAlloc = (int(*)(unsigned int*, unsigned int)) dlsym( cudalib, "cuMemAlloc" );
271
__cuMemFree = (int(*)(unsigned int)) dlsym( cudalib, "cuMemFree" );
272
__cuMemGetInfo = (int(*)(unsigned int*, unsigned int*)) dlsym( cudalib, "cuMemGetInfo" );
275
if (!__cuDriverGetVersion) {
276
warnings.push_back("cuDriverGetVersion() missing from NVIDIA library");
280
warnings.push_back("cuInit() missing from NVIDIA library");
283
if (!__cuDeviceGetCount) {
284
warnings.push_back("cuDeviceGetCount() missing from NVIDIA library");
287
if (!__cuDeviceGet) {
288
warnings.push_back("cuDeviceGet() missing from NVIDIA library");
291
if (!__cuDeviceGetAttribute) {
292
warnings.push_back("cuDeviceGetAttribute() missing from NVIDIA library");
295
if (!__cuDeviceTotalMem) {
296
warnings.push_back("cuDeviceTotalMem() missing from NVIDIA library");
299
if (!__cuDeviceComputeCapability) {
300
warnings.push_back("cuDeviceComputeCapability() missing from NVIDIA library");
303
if (!__cuCtxCreate) {
304
warnings.push_back("cuCtxCreate() missing from NVIDIA library");
307
if (!__cuCtxDestroy) {
308
warnings.push_back("cuCtxDestroy() missing from NVIDIA library");
312
warnings.push_back("cuMemAlloc() missing from NVIDIA library");
316
warnings.push_back("cuMemFree() missing from NVIDIA library");
319
if (!__cuMemGetInfo) {
320
warnings.push_back("cuMemGetInfo() missing from NVIDIA library");
324
retval = (*__cuInit)(0);
326
sprintf(buf, "NVIDIA drivers present but no GPUs found");
327
warnings.push_back(buf);
332
retval = (*__cuDriverGetVersion)(&cuda_version);
334
sprintf(buf, "cuDriverGetVersion() returned %d", retval);
335
warnings.push_back(buf);
339
vector<COPROC_CUDA> gpus;
340
retval = (*__cuDeviceGetCount)(&count);
342
sprintf(buf, "cuDeviceGetCount() returned %d", retval);
343
warnings.push_back(buf);
346
sprintf(buf, "NVIDIA library reports %d GPU%s", count, (count==1)?"":"s");
347
warnings.push_back(buf);
353
for (j=0; j<count; j++) {
354
memset(&cc.prop, 0, sizeof(cc.prop));
356
retval = (*__cuDeviceGet)(&device, j);
358
sprintf(buf, "cuDeviceGet(%d) returned %d", j, retval);
359
warnings.push_back(buf);
362
(*__cuDeviceGetName)(cc.prop.name, 256, device);
364
sprintf(buf, "cuDeviceGetName(%d) returned %d", j, retval);
365
warnings.push_back(buf);
368
(*__cuDeviceComputeCapability)(&cc.prop.major, &cc.prop.minor, device);
369
(*__cuDeviceTotalMem)(&cc.prop.totalGlobalMem, device);
370
(*__cuDeviceGetAttribute)(&cc.prop.sharedMemPerBlock, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK, device);
371
(*__cuDeviceGetAttribute)(&cc.prop.regsPerBlock, CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK, device);
372
(*__cuDeviceGetAttribute)(&cc.prop.warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, device);
373
(*__cuDeviceGetAttribute)(&cc.prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, device);
374
retval = (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device);
375
retval = (*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, device);
376
(*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, device);
377
(*__cuDeviceGetAttribute)(&cc.prop.maxThreadsDim[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, device);
378
(*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, device);
379
(*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, device);
380
(*__cuDeviceGetAttribute)(&cc.prop.maxGridSize[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, device);
381
(*__cuDeviceGetAttribute)(&cc.prop.clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device);
382
(*__cuDeviceGetAttribute)(&cc.prop.totalConstMem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device);
383
(*__cuDeviceGetAttribute)(&cc.prop.textureAlignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, device);
384
(*__cuDeviceGetAttribute)(&cc.prop.deviceOverlap, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, device);
385
retval = (*__cuDeviceGetAttribute)(&cc.prop.multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device);
386
//retval = (*__cuDeviceGetProperties)(&cc.prop, device);
387
if (cc.prop.major <= 0) continue; // major == 0 means emulation
388
if (cc.prop.major > 100) continue; // e.g. 9999 is an error
389
#if defined(_WIN32) && !defined(SIM)
390
cc.display_driver_version = Version.drvVersion;
392
cc.display_driver_version = 0;
394
cc.cuda_version = cuda_version;
400
warnings.push_back("No CUDA-capable NVIDIA GPUs found");
404
// identify the most capable non-ignored instance
408
for (i=0; i<gpus.size(); i++) {
409
if (in_vector(gpus[i].device_num, ignore_devs)) continue;
413
} else if (cuda_compare(gpus[i], best, false) > 0) {
418
// see which other instances are equivalent,
419
// and set the "count" and "device_nums" fields
422
for (i=0; i<gpus.size(); i++) {
424
gpus[i].description(buf);
425
if (in_vector(gpus[i].device_num, ignore_devs)) {
426
sprintf(buf2, "NVIDIA GPU %d (ignored by config): %s", gpus[i].device_num, buf);
427
} else if (use_all || !cuda_compare(gpus[i], best, true)) {
428
best.device_nums[best.count] = gpus[i].device_num;
430
sprintf(buf2, "NVIDIA GPU %d: %s", gpus[i].device_num, buf);
432
sprintf(buf2, "NVIDIA GPU %d (not used): %s", gpus[i].device_num, buf);
434
descs.push_back(string(buf2));
438
COPROC_CUDA* ccp = new COPROC_CUDA;
440
coprocs.coprocs.push_back(ccp);
444
// fake a NVIDIA GPU (for debugging)
446
COPROC_CUDA* fake_cuda(COPROCS& coprocs, double ram, int count) {
447
COPROC_CUDA* cc = new COPROC_CUDA;
448
strcpy(cc->type, "CUDA");
450
for (int i=0; i<count; i++) {
451
cc->device_nums[i] = i;
453
cc->display_driver_version = 18000;
454
cc->cuda_version = 2020;
455
strcpy(cc->prop.name, "Fake NVIDIA GPU");
456
cc->prop.totalGlobalMem = (unsigned int)ram;
457
cc->prop.sharedMemPerBlock = 100;
458
cc->prop.regsPerBlock = 8;
459
cc->prop.warpSize = 10;
460
cc->prop.memPitch = 10;
461
cc->prop.maxThreadsPerBlock = 20;
462
cc->prop.maxThreadsDim[0] = 2;
463
cc->prop.maxThreadsDim[1] = 2;
464
cc->prop.maxThreadsDim[2] = 2;
465
cc->prop.maxGridSize[0] = 10;
466
cc->prop.maxGridSize[1] = 10;
467
cc->prop.maxGridSize[2] = 10;
468
cc->prop.totalConstMem = 10;
471
cc->prop.clockRate = 1250000;
472
cc->prop.textureAlignment = 1000;
473
cc->prop.multiProcessorCount = 14;
474
coprocs.coprocs.push_back(cc);
478
// See how much RAM is available on each GPU.
479
// If this fails, set "available_ram_unknown"
481
void COPROC_CUDA::get_available_ram() {
482
#ifdef MEASURE_AVAILABLE_RAM
483
int device, i, retval;
484
unsigned int memfree, memtotal;
487
// avoid crash if faked GPU
489
if (!__cuDeviceGet) {
490
for (i=0; i<count; i++) {
491
available_ram[i] = available_ram_fake[i];
492
available_ram_unknown[i] = false;
496
for (i=0; i<count; i++) {
497
int devnum = device_nums[i];
498
available_ram[i] = 0;
499
available_ram_unknown[i] = true;
500
retval = (*__cuDeviceGet)(&device, devnum);
502
if (log_flags.coproc_debug) {
503
msg_printf(0, MSG_INFO,
504
"[coproc] cuDeviceGet(%d) returned %d", devnum, retval
509
retval = (*__cuCtxCreate)(&ctx, 0, device);
511
if (log_flags.coproc_debug) {
512
msg_printf(0, MSG_INFO,
513
"[coproc] cuCtxCreate(%d) returned %d", devnum, retval
518
retval = (*__cuMemGetInfo)(&memfree, &memtotal);
520
if (log_flags.coproc_debug) {
521
msg_printf(0, MSG_INFO,
522
"[coproc] cuMemGetInfo(%d) returned %d", devnum, retval
525
(*__cuCtxDestroy)(ctx);
528
(*__cuCtxDestroy)(ctx);
529
available_ram[i] = (double) memfree;
530
available_ram_unknown[i] = false;
533
for (int i=0; i<count; i++) {
534
available_ram_unknown[i] = false;
535
available_ram[i] = prop.totalGlobalMem;
540
// check whether each GPU is running a graphics app (assume yes)
541
// return true if there's been a change since last time
543
bool COPROC_CUDA::check_running_graphics_app() {
546
for (j=0; j<count; j++) {
548
int device, kernel_timeout;
549
retval = (*__cuDeviceGet)(&device, j);
551
retval = (*__cuDeviceGetAttribute)(&kernel_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, device);
552
if (!retval && !kernel_timeout) {
556
if (new_val != running_graphics_app[j]) {
559
running_graphics_app[j] = new_val;
564
////////////////// ATI STARTS HERE /////////////////
567
// http://developer.amd.com/gpu_assets/Stream_Computing_User_Guide.pdf
568
// ?? why don't they have HTML docs??
571
typedef int (__stdcall *ATI_ATTRIBS) (CALdeviceattribs *attribs, CALuint ordinal);
572
typedef int (__stdcall *ATI_CLOSE)(void);
573
typedef int (__stdcall *ATI_GDC)(CALuint *numDevices);
574
typedef int (__stdcall *ATI_GDI)(void);
575
typedef int (__stdcall *ATI_INFO) (CALdeviceinfo *info, CALuint ordinal);
576
typedef int (__stdcall *ATI_VER) (CALuint *cal_major, CALuint *cal_minor, CALuint *cal_imp);
577
typedef int (__stdcall *ATI_STATUS) (CALdevicestatus*, CALdevice);
578
typedef int (__stdcall *ATI_DEVICEOPEN) (CALdevice*, CALuint);
579
typedef int (__stdcall *ATI_DEVICECLOSE) (CALdevice);
581
ATI_ATTRIBS __calDeviceGetAttribs = NULL;
582
ATI_CLOSE __calShutdown = NULL;
583
ATI_GDC __calDeviceGetCount = NULL;
584
ATI_GDI __calInit = NULL;
585
ATI_INFO __calDeviceGetInfo = NULL;
586
ATI_VER __calGetVersion = NULL;
587
ATI_STATUS __calDeviceGetStatus = NULL;
588
ATI_DEVICEOPEN __calDeviceOpen = NULL;
589
ATI_DEVICECLOSE __calDeviceClose = NULL;
594
int (*__calGetVersion)(CALuint*, CALuint*, CALuint*);
595
int (*__calDeviceGetCount)(CALuint*);
596
int (*__calDeviceGetAttribs)(CALdeviceattribs*, CALuint);
597
int (*__calShutdown)();
598
int (*__calDeviceGetInfo)(CALdeviceinfo*, CALuint);
599
int (*__calDeviceGetStatus)(CALdevicestatus*, CALdevice);
600
int (*__calDeviceOpen)(CALdevice*, CALuint);
601
int (*__calDeviceClose)(CALdevice);
605
void COPROC_ATI::get(COPROCS& coprocs,
606
vector<string>& descs, vector<string>& warnings, vector<int>& ignore_devs
608
CALuint numDevices, cal_major, cal_minor, cal_imp;
611
CALdeviceattribs attribs;
613
bool amdrt_detected = false;
614
bool atirt_detected = false;
617
attribs.struct_size = sizeof(CALdeviceattribs);
624
const char* atilib_name = "aticalrt64.dll";
625
const char* amdlib_name = "amdcalrt64.dll";
627
const char* atilib_name = "aticalrt.dll";
628
const char* amdlib_name = "amdcalrt.dll";
631
HINSTANCE callib = LoadLibrary(atilib_name);
633
atirt_detected = true;
635
callib = LoadLibrary(amdlib_name);
637
amdrt_detected = true;
642
warnings.push_back("No ATI library found.");
646
__calInit = (ATI_GDI)GetProcAddress(callib, "calInit" );
647
__calGetVersion = (ATI_VER)GetProcAddress(callib, "calGetVersion" );
648
__calDeviceGetCount = (ATI_GDC)GetProcAddress(callib, "calDeviceGetCount" );
649
__calDeviceGetAttribs =(ATI_ATTRIBS)GetProcAddress(callib, "calDeviceGetAttribs" );
650
__calShutdown = (ATI_CLOSE)GetProcAddress(callib, "calShutdown" );
651
__calDeviceGetInfo = (ATI_INFO)GetProcAddress(callib, "calDeviceGetInfo" );
652
__calDeviceGetStatus = (ATI_STATUS)GetProcAddress(callib, "calDeviceGetStatus" );
653
__calDeviceOpen = (ATI_DEVICEOPEN)GetProcAddress(callib, "calDeviceOpen" );
654
__calDeviceClose = (ATI_DEVICECLOSE)GetProcAddress(callib, "calDeviceClose" );
660
callib = dlopen("libaticalrt.so", RTLD_NOW);
662
warnings.push_back("No ATI library found");
666
atirt_detected = true;
668
__calInit = (int(*)()) dlsym(callib, "calInit");
669
__calGetVersion = (int(*)(CALuint*, CALuint*, CALuint*)) dlsym(callib, "calGetVersion");
670
__calDeviceGetCount = (int(*)(CALuint*)) dlsym(callib, "calDeviceGetCount");
671
__calDeviceGetAttribs = (int(*)(CALdeviceattribs*, CALuint)) dlsym(callib, "calDeviceGetAttribs");
672
__calShutdown = (int(*)()) dlsym(callib, "calShutdown");
673
__calDeviceGetInfo = (int(*)(CALdeviceinfo*, CALuint)) dlsym(callib, "calDeviceGetInfo");
674
__calDeviceGetStatus = (int(*)(CALdevicestatus*, CALdevice)) dlsym(callib, "calDeviceGetStatus");
675
__calDeviceOpen = (int(*)(CALdevice*, CALuint)) dlsym(callib, "calDeviceOpen");
676
__calDeviceClose = (int(*)(CALdevice)) dlsym(callib, "calDeviceClose");
681
warnings.push_back("calInit() missing from CAL library");
684
if (!__calGetVersion) {
685
warnings.push_back("calGetVersion() missing from CAL library");
688
if (!__calDeviceGetCount) {
689
warnings.push_back("calDeviceGetCount() missing from CAL library");
692
if (!__calDeviceGetAttribs) {
693
warnings.push_back("calDeviceGetAttribs() missing from CAL library");
696
if (!__calDeviceGetInfo) {
697
warnings.push_back("calDeviceGetInfo() missing from CAL library");
700
if (!__calDeviceGetStatus) {
701
warnings.push_back("calDeviceGetStatus() missing from CAL library");
704
if (!__calDeviceOpen) {
705
warnings.push_back("calDeviceOpen() missing from CAL library");
708
if (!__calDeviceClose) {
709
warnings.push_back("calDeviceClose() missing from CAL library");
713
retval = (*__calInit)();
714
if (retval != CAL_RESULT_OK) {
715
sprintf(buf, "calInit() returned %d", retval);
716
warnings.push_back(buf);
719
retval = (*__calDeviceGetCount)(&numDevices);
720
if (retval != CAL_RESULT_OK) {
721
sprintf(buf, "calDeviceGetCount() returned %d", retval);
722
warnings.push_back(buf);
725
retval = (*__calGetVersion)(&cal_major, &cal_minor, &cal_imp);
726
if (retval != CAL_RESULT_OK) {
727
sprintf(buf, "calGetVersion() returned %d", retval);
728
warnings.push_back(buf);
733
warnings.push_back("No usable CAL devices found");
739
vector<COPROC_ATI> gpus;
740
for (CALuint i=0; i<numDevices; i++) {
741
retval = (*__calDeviceGetInfo)(&info, i);
742
if (retval != CAL_RESULT_OK) {
743
sprintf(buf, "calDeviceGetInfo() returned %d", retval);
744
warnings.push_back(buf);
747
retval = (*__calDeviceGetAttribs)(&attribs, i);
748
if (retval != CAL_RESULT_OK) {
749
sprintf(buf, "calDeviceGetAttribs() returned %d", retval);
750
warnings.push_back(buf);
753
switch ((int)attribs.target) {
755
gpu_name="ATI Radeon HD 2900 (RV600)";
758
gpu_name="ATI Radeon HD 2300/2400/3200 (RV610)";
759
attribs.numberOfSIMD=1; // set correct values (reported wrong by driver)
760
attribs.wavefrontSize=32;
763
gpu_name="ATI Radeon HD 2600 (RV630)";
764
// set correct values (reported wrong by driver)
765
attribs.numberOfSIMD=3;
766
attribs.wavefrontSize=32;
769
gpu_name="ATI Radeon HD 3800 (RV670)";
772
gpu_name="ATI Radeon HD 4350/4550 (R710)";
775
gpu_name="ATI Radeon HD 4600 series (R730)";
778
gpu_name="ATI Radeon (RV700 class)";
781
gpu_name="ATI Radeon HD 4700/4800 (RV740/RV770)";
784
gpu_name="ATI Radeon HD5800 series (Cypress)";
787
gpu_name="ATI Radeon HD5700 series (Juniper)";
790
gpu_name="ATI Radeon HD5x00 series (Redwood)";
793
gpu_name="ATI Radeon HD5x00 series (Cedar)";
796
gpu_name="ATI unknown";
799
cc.attribs = attribs;
801
strcpy(cc.name, gpu_name.c_str());
802
sprintf(cc.version, "%d.%d.%d", cal_major, cal_minor, cal_imp);
803
cc.amdrt_detected = amdrt_detected;
804
cc.atirt_detected = atirt_detected;
809
// TODO: count only GPUs with as much memory as fastest one,
810
// same as for NVIDIA
814
for (unsigned int i=0; i<gpus.size(); i++) {
815
char buf[256], buf2[256];
816
gpus[i].description(buf);
817
if (in_vector(gpus[i].device_num, ignore_devs)) {
818
sprintf(buf2, "ATI GPU %d (ignored by config): %s", gpus[i].device_num, buf);
823
} else if (gpus[i].peak_flops() > best.peak_flops()) {
826
sprintf(buf2, "ATI GPU %d: %s", gpus[i].device_num, buf);
828
descs.push_back(buf2);
831
for (unsigned int i=0; i<gpus.size(); i++) {
832
if (in_vector(gpus[i].device_num, ignore_devs)) continue;
833
best.device_nums[best.count] = i;
837
COPROC_ATI* ccp = new COPROC_ATI;
839
strcpy(ccp->type, "ATI");
840
coprocs.coprocs.push_back(ccp);
842
// shut down, otherwise Lenovo won't be able to switch to low-power GPU
844
retval = (*__calShutdown)();
847
COPROC_ATI* fake_ati(COPROCS& coprocs, double ram, int count) {
848
COPROC_ATI* cc = new COPROC_ATI;
849
strcpy(cc->type, "ATI");
850
strcpy(cc->version, "1.4.3");
851
strcpy(cc->name, "foobar");
853
memset(&cc->attribs, 0, sizeof(cc->attribs));
854
memset(&cc->info, 0, sizeof(cc->info));
855
cc->attribs.localRAM = (int)(ram/MEGA);
856
cc->attribs.numberOfSIMD = 32;
857
cc->attribs.wavefrontSize = 32;
858
cc->attribs.engineClock = 50;
859
for (int i=0; i<count; i++) {
860
cc->device_nums[i] = i;
862
coprocs.coprocs.push_back(cc);
866
void COPROC_ATI::get_available_ram() {
867
#ifdef MEASURE_AVAILABLE_RAM
872
st.struct_size = sizeof(CALdevicestatus);
874
// avoid crash if faked GPU
876
for (i=0; i<count; i++) {
877
available_ram[i] = available_ram_fake[i];
878
available_ram_unknown[i] = false;
882
for (i=0; i<count; i++) {
883
available_ram[i] = 0;
884
available_ram_unknown[i] = true;
886
retval = (*__calInit)();
888
if (log_flags.coproc_debug) {
889
msg_printf(0, MSG_INFO,
890
"[coproc] calInit() returned %d", retval
896
for (i=0; i<count; i++) {
897
int devnum = device_nums[i];
898
retval = (*__calDeviceOpen)(&dev, devnum);
900
if (log_flags.coproc_debug) {
901
msg_printf(0, MSG_INFO,
902
"[coproc] calDeviceOpen(%d) returned %d", devnum, retval
907
retval = (*__calDeviceGetStatus)(&st, dev);
909
if (log_flags.coproc_debug) {
910
msg_printf(0, MSG_INFO,
911
"[coproc] calDeviceGetStatus(%d) returned %d",
915
(*__calDeviceClose)(dev);
918
available_ram[i] = st.availLocalRAM*MEGA;
919
available_ram_unknown[i] = false;
920
(*__calDeviceClose)(dev);
924
for (int i=0; i<count; i++) {
925
available_ram_unknown[i] = false;
926
available_ram[i] = attribs.localRAM*MEGA;