2
* Copyright 2008 Advanced Micro Devices, Inc.
3
* Copyright 2008 Red Hat Inc.
4
* Copyright 2009 Jerome Glisse.
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
* and/or sell copies of the Software, and to permit persons to whom the
11
* Software is furnished to do so, subject to the following conditions:
13
* The above copyright notice and this permission notice shall be included in
14
* all copies or substantial portions of the Software.
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
* OTHER DEALINGS IN THE SOFTWARE.
24
* Authors: Dave Airlie
28
#include <linux/seq_file.h>
29
#include <linux/slab.h>
32
#include "radeon_drm.h"
33
#include "radeon_reg.h"
35
#include "radeon_asic.h"
42
#include <linux/firmware.h>
43
#include <linux/platform_device.h>
44
#include <linux/module.h>
46
#include "r100_reg_safe.h"
47
#include "rn50_reg_safe.h"
50
#define FIRMWARE_R100 "radeon/R100_cp.bin"
51
#define FIRMWARE_R200 "radeon/R200_cp.bin"
52
#define FIRMWARE_R300 "radeon/R300_cp.bin"
53
#define FIRMWARE_R420 "radeon/R420_cp.bin"
54
#define FIRMWARE_RS690 "radeon/RS690_cp.bin"
55
#define FIRMWARE_RS600 "radeon/RS600_cp.bin"
56
#define FIRMWARE_R520 "radeon/R520_cp.bin"
58
MODULE_FIRMWARE(FIRMWARE_R100);
59
MODULE_FIRMWARE(FIRMWARE_R200);
60
MODULE_FIRMWARE(FIRMWARE_R300);
61
MODULE_FIRMWARE(FIRMWARE_R420);
62
MODULE_FIRMWARE(FIRMWARE_RS690);
63
MODULE_FIRMWARE(FIRMWARE_RS600);
64
MODULE_FIRMWARE(FIRMWARE_R520);
66
#include "r100_track.h"
68
/* This files gather functions specifics to:
69
* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
72
int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
73
struct radeon_cs_packet *pkt,
80
struct radeon_cs_reloc *reloc;
83
r = r100_cs_packet_next_reloc(p, &reloc);
85
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
87
r100_cs_dump_packet(p, pkt);
90
value = radeon_get_ib_value(p, idx);
91
tmp = value & 0x003fffff;
92
tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
94
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
95
tile_flags |= RADEON_DST_TILE_MACRO;
96
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
97
if (reg == RADEON_SRC_PITCH_OFFSET) {
98
DRM_ERROR("Cannot src blit from microtiled surface\n");
99
r100_cs_dump_packet(p, pkt);
102
tile_flags |= RADEON_DST_TILE_MICRO;
106
p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
110
int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
111
struct radeon_cs_packet *pkt,
115
struct radeon_cs_reloc *reloc;
116
struct r100_cs_track *track;
118
volatile uint32_t *ib;
122
track = (struct r100_cs_track *)p->track;
123
c = radeon_get_ib_value(p, idx++) & 0x1F;
125
DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
127
r100_cs_dump_packet(p, pkt);
130
track->num_arrays = c;
131
for (i = 0; i < (c - 1); i+=2, idx+=3) {
132
r = r100_cs_packet_next_reloc(p, &reloc);
134
DRM_ERROR("No reloc for packet3 %d\n",
136
r100_cs_dump_packet(p, pkt);
139
idx_value = radeon_get_ib_value(p, idx);
140
ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
142
track->arrays[i + 0].esize = idx_value >> 8;
143
track->arrays[i + 0].robj = reloc->robj;
144
track->arrays[i + 0].esize &= 0x7F;
145
r = r100_cs_packet_next_reloc(p, &reloc);
147
DRM_ERROR("No reloc for packet3 %d\n",
149
r100_cs_dump_packet(p, pkt);
152
ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
153
track->arrays[i + 1].robj = reloc->robj;
154
track->arrays[i + 1].esize = idx_value >> 24;
155
track->arrays[i + 1].esize &= 0x7F;
158
r = r100_cs_packet_next_reloc(p, &reloc);
160
DRM_ERROR("No reloc for packet3 %d\n",
162
r100_cs_dump_packet(p, pkt);
165
idx_value = radeon_get_ib_value(p, idx);
166
ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
167
track->arrays[i + 0].robj = reloc->robj;
168
track->arrays[i + 0].esize = idx_value >> 8;
169
track->arrays[i + 0].esize &= 0x7F;
174
void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
176
/* enable the pflip int */
177
radeon_irq_kms_pflip_irq_get(rdev, crtc);
180
void r100_post_page_flip(struct radeon_device *rdev, int crtc)
182
/* disable the pflip int */
183
radeon_irq_kms_pflip_irq_put(rdev, crtc);
186
u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
188
struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
189
u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
192
/* Lock the graphics update lock */
193
/* update the scanout addresses */
194
WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
196
/* Wait for update_pending to go high. */
197
for (i = 0; i < rdev->usec_timeout; i++) {
198
if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
202
DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
204
/* Unlock the lock, so double-buffering can take place inside vblank */
205
tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
206
WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
208
/* Return current update_pending status: */
209
return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET;
212
void r100_pm_get_dynpm_state(struct radeon_device *rdev)
215
rdev->pm.dynpm_can_upclock = true;
216
rdev->pm.dynpm_can_downclock = true;
218
switch (rdev->pm.dynpm_planned_action) {
219
case DYNPM_ACTION_MINIMUM:
220
rdev->pm.requested_power_state_index = 0;
221
rdev->pm.dynpm_can_downclock = false;
223
case DYNPM_ACTION_DOWNCLOCK:
224
if (rdev->pm.current_power_state_index == 0) {
225
rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
226
rdev->pm.dynpm_can_downclock = false;
228
if (rdev->pm.active_crtc_count > 1) {
229
for (i = 0; i < rdev->pm.num_power_states; i++) {
230
if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
232
else if (i >= rdev->pm.current_power_state_index) {
233
rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
236
rdev->pm.requested_power_state_index = i;
241
rdev->pm.requested_power_state_index =
242
rdev->pm.current_power_state_index - 1;
244
/* don't use the power state if crtcs are active and no display flag is set */
245
if ((rdev->pm.active_crtc_count > 0) &&
246
(rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
247
RADEON_PM_MODE_NO_DISPLAY)) {
248
rdev->pm.requested_power_state_index++;
251
case DYNPM_ACTION_UPCLOCK:
252
if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
253
rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
254
rdev->pm.dynpm_can_upclock = false;
256
if (rdev->pm.active_crtc_count > 1) {
257
for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
258
if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
260
else if (i <= rdev->pm.current_power_state_index) {
261
rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
264
rdev->pm.requested_power_state_index = i;
269
rdev->pm.requested_power_state_index =
270
rdev->pm.current_power_state_index + 1;
273
case DYNPM_ACTION_DEFAULT:
274
rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
275
rdev->pm.dynpm_can_upclock = false;
277
case DYNPM_ACTION_NONE:
279
DRM_ERROR("Requested mode for not defined action\n");
282
/* only one clock mode per power state */
283
rdev->pm.requested_clock_mode_index = 0;
285
DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
286
rdev->pm.power_state[rdev->pm.requested_power_state_index].
287
clock_info[rdev->pm.requested_clock_mode_index].sclk,
288
rdev->pm.power_state[rdev->pm.requested_power_state_index].
289
clock_info[rdev->pm.requested_clock_mode_index].mclk,
290
rdev->pm.power_state[rdev->pm.requested_power_state_index].
294
void r100_pm_init_profile(struct radeon_device *rdev)
297
rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
298
rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
299
rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
300
rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
302
rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
303
rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
304
rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
305
rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
307
rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
308
rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
309
rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
310
rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
312
rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
313
rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
314
rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
315
rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
317
rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
318
rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
319
rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
320
rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
322
rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
323
rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
324
rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
325
rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
327
rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
328
rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
329
rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
330
rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
333
void r100_pm_misc(struct radeon_device *rdev)
335
int requested_index = rdev->pm.requested_power_state_index;
336
struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
337
struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
338
u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
340
if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
341
if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
342
tmp = RREG32(voltage->gpio.reg);
343
if (voltage->active_high)
344
tmp |= voltage->gpio.mask;
346
tmp &= ~(voltage->gpio.mask);
347
WREG32(voltage->gpio.reg, tmp);
349
udelay(voltage->delay);
351
tmp = RREG32(voltage->gpio.reg);
352
if (voltage->active_high)
353
tmp &= ~voltage->gpio.mask;
355
tmp |= voltage->gpio.mask;
356
WREG32(voltage->gpio.reg, tmp);
358
udelay(voltage->delay);
362
sclk_cntl = RREG32_PLL(SCLK_CNTL);
363
sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
364
sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
365
sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
366
sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
367
if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
368
sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
369
if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
370
sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
372
sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
373
if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
374
sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
375
else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
376
sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
378
sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
380
if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
381
sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
382
if (voltage->delay) {
383
sclk_more_cntl |= VOLTAGE_DROP_SYNC;
384
switch (voltage->delay) {
386
sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
389
sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
392
sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
395
sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
399
sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
401
sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
403
if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
404
sclk_cntl &= ~FORCE_HDP;
406
sclk_cntl |= FORCE_HDP;
408
WREG32_PLL(SCLK_CNTL, sclk_cntl);
409
WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
410
WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
413
if ((rdev->flags & RADEON_IS_PCIE) &&
414
!(rdev->flags & RADEON_IS_IGP) &&
415
rdev->asic->set_pcie_lanes &&
417
rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
418
radeon_set_pcie_lanes(rdev,
420
DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
424
void r100_pm_prepare(struct radeon_device *rdev)
426
struct drm_device *ddev = rdev->ddev;
427
struct drm_crtc *crtc;
428
struct radeon_crtc *radeon_crtc;
431
/* disable any active CRTCs */
432
list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
433
radeon_crtc = to_radeon_crtc(crtc);
434
if (radeon_crtc->enabled) {
435
if (radeon_crtc->crtc_id) {
436
tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
437
tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
438
WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
440
tmp = RREG32(RADEON_CRTC_GEN_CNTL);
441
tmp |= RADEON_CRTC_DISP_REQ_EN_B;
442
WREG32(RADEON_CRTC_GEN_CNTL, tmp);
448
void r100_pm_finish(struct radeon_device *rdev)
450
struct drm_device *ddev = rdev->ddev;
451
struct drm_crtc *crtc;
452
struct radeon_crtc *radeon_crtc;
455
/* enable any active CRTCs */
456
list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
457
radeon_crtc = to_radeon_crtc(crtc);
458
if (radeon_crtc->enabled) {
459
if (radeon_crtc->crtc_id) {
460
tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
461
tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
462
WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
464
tmp = RREG32(RADEON_CRTC_GEN_CNTL);
465
tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
466
WREG32(RADEON_CRTC_GEN_CNTL, tmp);
472
bool r100_gui_idle(struct radeon_device *rdev)
474
if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
480
/* hpd for digital panel detect/disconnect */
481
bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
483
bool connected = false;
487
if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
491
if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
500
void r100_hpd_set_polarity(struct radeon_device *rdev,
501
enum radeon_hpd_id hpd)
504
bool connected = r100_hpd_sense(rdev, hpd);
508
tmp = RREG32(RADEON_FP_GEN_CNTL);
510
tmp &= ~RADEON_FP_DETECT_INT_POL;
512
tmp |= RADEON_FP_DETECT_INT_POL;
513
WREG32(RADEON_FP_GEN_CNTL, tmp);
516
tmp = RREG32(RADEON_FP2_GEN_CNTL);
518
tmp &= ~RADEON_FP2_DETECT_INT_POL;
520
tmp |= RADEON_FP2_DETECT_INT_POL;
521
WREG32(RADEON_FP2_GEN_CNTL, tmp);
528
void r100_hpd_init(struct radeon_device *rdev)
530
struct drm_device *dev = rdev->ddev;
531
struct drm_connector *connector;
533
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
534
struct radeon_connector *radeon_connector = to_radeon_connector(connector);
535
switch (radeon_connector->hpd.hpd) {
537
rdev->irq.hpd[0] = true;
540
rdev->irq.hpd[1] = true;
545
radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
547
if (rdev->irq.installed)
551
void r100_hpd_fini(struct radeon_device *rdev)
553
struct drm_device *dev = rdev->ddev;
554
struct drm_connector *connector;
556
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
557
struct radeon_connector *radeon_connector = to_radeon_connector(connector);
558
switch (radeon_connector->hpd.hpd) {
560
rdev->irq.hpd[0] = false;
563
rdev->irq.hpd[1] = false;
574
void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
576
/* TODO: can we do somethings here ? */
577
/* It seems hw only cache one entry so we should discard this
578
* entry otherwise if first GPU GART read hit this entry it
579
* could end up in wrong address. */
582
int r100_pci_gart_init(struct radeon_device *rdev)
586
if (rdev->gart.ptr) {
587
WARN(1, "R100 PCI GART already initialized\n");
590
/* Initialize common gart structure */
591
r = radeon_gart_init(rdev);
594
rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
595
rdev->asic->gart_tlb_flush = &r100_pci_gart_tlb_flush;
596
rdev->asic->gart_set_page = &r100_pci_gart_set_page;
597
return radeon_gart_table_ram_alloc(rdev);
600
/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
601
void r100_enable_bm(struct radeon_device *rdev)
604
/* Enable bus mastering */
605
tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
606
WREG32(RADEON_BUS_CNTL, tmp);
609
int r100_pci_gart_enable(struct radeon_device *rdev)
613
radeon_gart_restore(rdev);
614
/* discard memory request outside of configured range */
615
tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
616
WREG32(RADEON_AIC_CNTL, tmp);
617
/* set address range for PCI address translate */
618
WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
619
WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
620
/* set PCI GART page-table base address */
621
WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
622
tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
623
WREG32(RADEON_AIC_CNTL, tmp);
624
r100_pci_gart_tlb_flush(rdev);
625
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
626
(unsigned)(rdev->mc.gtt_size >> 20),
627
(unsigned long long)rdev->gart.table_addr);
628
rdev->gart.ready = true;
632
void r100_pci_gart_disable(struct radeon_device *rdev)
636
/* discard memory request outside of configured range */
637
tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
638
WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
639
WREG32(RADEON_AIC_LO_ADDR, 0);
640
WREG32(RADEON_AIC_HI_ADDR, 0);
643
int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
645
u32 *gtt = rdev->gart.ptr;
647
if (i < 0 || i > rdev->gart.num_gpu_pages) {
650
gtt[i] = cpu_to_le32(lower_32_bits(addr));
654
void r100_pci_gart_fini(struct radeon_device *rdev)
656
radeon_gart_fini(rdev);
657
r100_pci_gart_disable(rdev);
658
radeon_gart_table_ram_free(rdev);
661
int r100_irq_set(struct radeon_device *rdev)
665
if (!rdev->irq.installed) {
666
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
667
WREG32(R_000040_GEN_INT_CNTL, 0);
670
if (rdev->irq.sw_int) {
671
tmp |= RADEON_SW_INT_ENABLE;
673
if (rdev->irq.gui_idle) {
674
tmp |= RADEON_GUI_IDLE_MASK;
676
if (rdev->irq.crtc_vblank_int[0] ||
677
rdev->irq.pflip[0]) {
678
tmp |= RADEON_CRTC_VBLANK_MASK;
680
if (rdev->irq.crtc_vblank_int[1] ||
681
rdev->irq.pflip[1]) {
682
tmp |= RADEON_CRTC2_VBLANK_MASK;
684
if (rdev->irq.hpd[0]) {
685
tmp |= RADEON_FP_DETECT_MASK;
687
if (rdev->irq.hpd[1]) {
688
tmp |= RADEON_FP2_DETECT_MASK;
690
WREG32(RADEON_GEN_INT_CNTL, tmp);
694
void r100_irq_disable(struct radeon_device *rdev)
698
WREG32(R_000040_GEN_INT_CNTL, 0);
699
/* Wait and acknowledge irq */
701
tmp = RREG32(R_000044_GEN_INT_STATUS);
702
WREG32(R_000044_GEN_INT_STATUS, tmp);
705
static uint32_t r100_irq_ack(struct radeon_device *rdev)
707
uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
708
uint32_t irq_mask = RADEON_SW_INT_TEST |
709
RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
710
RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
712
/* the interrupt works, but the status bit is permanently asserted */
713
if (rdev->irq.gui_idle && radeon_gui_idle(rdev)) {
714
if (!rdev->irq.gui_idle_acked)
715
irq_mask |= RADEON_GUI_IDLE_STAT;
719
WREG32(RADEON_GEN_INT_STATUS, irqs);
721
return irqs & irq_mask;
724
int r100_irq_process(struct radeon_device *rdev)
726
uint32_t status, msi_rearm;
727
bool queue_hotplug = false;
729
/* reset gui idle ack. the status bit is broken */
730
rdev->irq.gui_idle_acked = false;
732
status = r100_irq_ack(rdev);
736
if (rdev->shutdown) {
741
if (status & RADEON_SW_INT_TEST) {
742
radeon_fence_process(rdev);
744
/* gui idle interrupt */
745
if (status & RADEON_GUI_IDLE_STAT) {
746
rdev->irq.gui_idle_acked = true;
747
rdev->pm.gui_idle = true;
748
wake_up(&rdev->irq.idle_queue);
750
/* Vertical blank interrupts */
751
if (status & RADEON_CRTC_VBLANK_STAT) {
752
if (rdev->irq.crtc_vblank_int[0]) {
753
drm_handle_vblank(rdev->ddev, 0);
754
rdev->pm.vblank_sync = true;
755
wake_up(&rdev->irq.vblank_queue);
757
if (rdev->irq.pflip[0])
758
radeon_crtc_handle_flip(rdev, 0);
760
if (status & RADEON_CRTC2_VBLANK_STAT) {
761
if (rdev->irq.crtc_vblank_int[1]) {
762
drm_handle_vblank(rdev->ddev, 1);
763
rdev->pm.vblank_sync = true;
764
wake_up(&rdev->irq.vblank_queue);
766
if (rdev->irq.pflip[1])
767
radeon_crtc_handle_flip(rdev, 1);
769
if (status & RADEON_FP_DETECT_STAT) {
770
queue_hotplug = true;
773
if (status & RADEON_FP2_DETECT_STAT) {
774
queue_hotplug = true;
777
status = r100_irq_ack(rdev);
779
/* reset gui idle ack. the status bit is broken */
780
rdev->irq.gui_idle_acked = false;
782
schedule_work(&rdev->hotplug_work);
783
if (rdev->msi_enabled) {
784
switch (rdev->family) {
787
msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
788
WREG32(RADEON_AIC_CNTL, msi_rearm);
789
WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
792
msi_rearm = RREG32(RADEON_MSI_REARM_EN) & ~RV370_MSI_REARM_EN;
793
WREG32(RADEON_MSI_REARM_EN, msi_rearm);
794
WREG32(RADEON_MSI_REARM_EN, msi_rearm | RV370_MSI_REARM_EN);
801
u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
804
return RREG32(RADEON_CRTC_CRNT_FRAME);
806
return RREG32(RADEON_CRTC2_CRNT_FRAME);
809
/* Who ever call radeon_fence_emit should call ring_lock and ask
810
* for enough space (today caller are ib schedule and buffer move) */
811
void r100_fence_ring_emit(struct radeon_device *rdev,
812
struct radeon_fence *fence)
814
/* We have to make sure that caches are flushed before
815
* CPU might read something from VRAM. */
816
radeon_ring_write(rdev, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
817
radeon_ring_write(rdev, RADEON_RB3D_DC_FLUSH_ALL);
818
radeon_ring_write(rdev, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
819
radeon_ring_write(rdev, RADEON_RB3D_ZC_FLUSH_ALL);
820
/* Wait until IDLE & CLEAN */
821
radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
822
radeon_ring_write(rdev, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
823
radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
824
radeon_ring_write(rdev, rdev->config.r100.hdp_cntl |
825
RADEON_HDP_READ_BUFFER_INVALIDATE);
826
radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
827
radeon_ring_write(rdev, rdev->config.r100.hdp_cntl);
828
/* Emit fence sequence & fire IRQ */
829
radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
830
radeon_ring_write(rdev, fence->seq);
831
radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
832
radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
835
int r100_copy_blit(struct radeon_device *rdev,
838
unsigned num_gpu_pages,
839
struct radeon_fence *fence)
842
uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
844
uint32_t stride_pixels;
849
/* radeon limited to 16k stride */
850
stride_bytes &= 0x3fff;
851
/* radeon pitch is /64 */
852
pitch = stride_bytes / 64;
853
stride_pixels = stride_bytes / 4;
854
num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
856
/* Ask for enough room for blit + flush + fence */
857
ndw = 64 + (10 * num_loops);
858
r = radeon_ring_lock(rdev, ndw);
860
DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
863
while (num_gpu_pages > 0) {
864
cur_pages = num_gpu_pages;
865
if (cur_pages > 8191) {
868
num_gpu_pages -= cur_pages;
870
/* pages are in Y direction - height
871
page width in X direction - width */
872
radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
873
radeon_ring_write(rdev,
874
RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
875
RADEON_GMC_DST_PITCH_OFFSET_CNTL |
876
RADEON_GMC_SRC_CLIPPING |
877
RADEON_GMC_DST_CLIPPING |
878
RADEON_GMC_BRUSH_NONE |
879
(RADEON_COLOR_FORMAT_ARGB8888 << 8) |
880
RADEON_GMC_SRC_DATATYPE_COLOR |
882
RADEON_DP_SRC_SOURCE_MEMORY |
883
RADEON_GMC_CLR_CMP_CNTL_DIS |
884
RADEON_GMC_WR_MSK_DIS);
885
radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
886
radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
887
radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
888
radeon_ring_write(rdev, 0);
889
radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
890
radeon_ring_write(rdev, num_gpu_pages);
891
radeon_ring_write(rdev, num_gpu_pages);
892
radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
894
radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
895
radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
896
radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
897
radeon_ring_write(rdev,
898
RADEON_WAIT_2D_IDLECLEAN |
899
RADEON_WAIT_HOST_IDLECLEAN |
900
RADEON_WAIT_DMA_GUI_IDLE);
902
r = radeon_fence_emit(rdev, fence);
904
radeon_ring_unlock_commit(rdev);
908
static int r100_cp_wait_for_idle(struct radeon_device *rdev)
913
for (i = 0; i < rdev->usec_timeout; i++) {
914
tmp = RREG32(R_000E40_RBBM_STATUS);
915
if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
923
void r100_ring_start(struct radeon_device *rdev)
927
r = radeon_ring_lock(rdev, 2);
931
radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
932
radeon_ring_write(rdev,
933
RADEON_ISYNC_ANY2D_IDLE3D |
934
RADEON_ISYNC_ANY3D_IDLE2D |
935
RADEON_ISYNC_WAIT_IDLEGUI |
936
RADEON_ISYNC_CPSCRATCH_IDLEGUI);
937
radeon_ring_unlock_commit(rdev);
941
/* Load the microcode for the CP */
942
static int r100_cp_init_microcode(struct radeon_device *rdev)
944
struct platform_device *pdev;
945
const char *fw_name = NULL;
950
pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
953
printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
956
if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
957
(rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
958
(rdev->family == CHIP_RS200)) {
959
DRM_INFO("Loading R100 Microcode\n");
960
fw_name = FIRMWARE_R100;
961
} else if ((rdev->family == CHIP_R200) ||
962
(rdev->family == CHIP_RV250) ||
963
(rdev->family == CHIP_RV280) ||
964
(rdev->family == CHIP_RS300)) {
965
DRM_INFO("Loading R200 Microcode\n");
966
fw_name = FIRMWARE_R200;
967
} else if ((rdev->family == CHIP_R300) ||
968
(rdev->family == CHIP_R350) ||
969
(rdev->family == CHIP_RV350) ||
970
(rdev->family == CHIP_RV380) ||
971
(rdev->family == CHIP_RS400) ||
972
(rdev->family == CHIP_RS480)) {
973
DRM_INFO("Loading R300 Microcode\n");
974
fw_name = FIRMWARE_R300;
975
} else if ((rdev->family == CHIP_R420) ||
976
(rdev->family == CHIP_R423) ||
977
(rdev->family == CHIP_RV410)) {
978
DRM_INFO("Loading R400 Microcode\n");
979
fw_name = FIRMWARE_R420;
980
} else if ((rdev->family == CHIP_RS690) ||
981
(rdev->family == CHIP_RS740)) {
982
DRM_INFO("Loading RS690/RS740 Microcode\n");
983
fw_name = FIRMWARE_RS690;
984
} else if (rdev->family == CHIP_RS600) {
985
DRM_INFO("Loading RS600 Microcode\n");
986
fw_name = FIRMWARE_RS600;
987
} else if ((rdev->family == CHIP_RV515) ||
988
(rdev->family == CHIP_R520) ||
989
(rdev->family == CHIP_RV530) ||
990
(rdev->family == CHIP_R580) ||
991
(rdev->family == CHIP_RV560) ||
992
(rdev->family == CHIP_RV570)) {
993
DRM_INFO("Loading R500 Microcode\n");
994
fw_name = FIRMWARE_R520;
997
err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
998
platform_device_unregister(pdev);
1000
printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
1002
} else if (rdev->me_fw->size % 8) {
1004
"radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1005
rdev->me_fw->size, fw_name);
1007
release_firmware(rdev->me_fw);
1013
static void r100_cp_load_microcode(struct radeon_device *rdev)
1015
const __be32 *fw_data;
1018
if (r100_gui_wait_for_idle(rdev)) {
1019
printk(KERN_WARNING "Failed to wait GUI idle while "
1020
"programming pipes. Bad things might happen.\n");
1024
size = rdev->me_fw->size / 4;
1025
fw_data = (const __be32 *)&rdev->me_fw->data[0];
1026
WREG32(RADEON_CP_ME_RAM_ADDR, 0);
1027
for (i = 0; i < size; i += 2) {
1028
WREG32(RADEON_CP_ME_RAM_DATAH,
1029
be32_to_cpup(&fw_data[i]));
1030
WREG32(RADEON_CP_ME_RAM_DATAL,
1031
be32_to_cpup(&fw_data[i + 1]));
1036
int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1041
unsigned pre_write_timer;
1042
unsigned pre_write_limit;
1043
unsigned indirect2_start;
1044
unsigned indirect1_start;
1048
if (r100_debugfs_cp_init(rdev)) {
1049
DRM_ERROR("Failed to register debugfs file for CP !\n");
1052
r = r100_cp_init_microcode(rdev);
1054
DRM_ERROR("Failed to load firmware!\n");
1059
/* Align ring size */
1060
rb_bufsz = drm_order(ring_size / 8);
1061
ring_size = (1 << (rb_bufsz + 1)) * 4;
1062
r100_cp_load_microcode(rdev);
1063
r = radeon_ring_init(rdev, ring_size);
1067
/* Each time the cp read 1024 bytes (16 dword/quadword) update
1068
* the rptr copy in system ram */
1070
/* cp will read 128bytes at a time (4 dwords) */
1072
rdev->cp.align_mask = 16 - 1;
1073
/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1074
pre_write_timer = 64;
1075
/* Force CP_RB_WPTR write if written more than one time before the
1078
pre_write_limit = 0;
1079
/* Setup the cp cache like this (cache size is 96 dwords) :
1081
* INDIRECT1 16 to 79
1082
* INDIRECT2 80 to 95
1083
* So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1084
* indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1085
* indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1086
* Idea being that most of the gpu cmd will be through indirect1 buffer
1087
* so it gets the bigger cache.
1089
indirect2_start = 80;
1090
indirect1_start = 16;
1092
WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1093
tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1094
REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1095
REG_SET(RADEON_MAX_FETCH, max_fetch));
1097
tmp |= RADEON_BUF_SWAP_32BIT;
1099
WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1101
/* Set ring address */
1102
DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
1103
WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
1104
/* Force read & write ptr to 0 */
1105
WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1106
WREG32(RADEON_CP_RB_RPTR_WR, 0);
1108
WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
1110
/* set the wb address whether it's enabled or not */
1111
WREG32(R_00070C_CP_RB_RPTR_ADDR,
1112
S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1113
WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
1115
if (rdev->wb.enabled)
1116
WREG32(R_000770_SCRATCH_UMSK, 0xff);
1118
tmp |= RADEON_RB_NO_UPDATE;
1119
WREG32(R_000770_SCRATCH_UMSK, 0);
1122
WREG32(RADEON_CP_RB_CNTL, tmp);
1124
rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
1125
/* Set cp mode to bus mastering & enable cp*/
1126
WREG32(RADEON_CP_CSQ_MODE,
1127
REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1128
REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1129
WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
1130
WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1131
WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
1132
radeon_ring_start(rdev);
1133
r = radeon_ring_test(rdev);
1135
DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1138
rdev->cp.ready = true;
1139
radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1143
void r100_cp_fini(struct radeon_device *rdev)
1145
if (r100_cp_wait_for_idle(rdev)) {
1146
DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1149
r100_cp_disable(rdev);
1150
radeon_ring_fini(rdev);
1151
DRM_INFO("radeon: cp finalized\n");
1154
void r100_cp_disable(struct radeon_device *rdev)
1157
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1158
rdev->cp.ready = false;
1159
WREG32(RADEON_CP_CSQ_MODE, 0);
1160
WREG32(RADEON_CP_CSQ_CNTL, 0);
1161
WREG32(R_000770_SCRATCH_UMSK, 0);
1162
if (r100_gui_wait_for_idle(rdev)) {
1163
printk(KERN_WARNING "Failed to wait GUI idle while "
1164
"programming pipes. Bad things might happen.\n");
1168
void r100_cp_commit(struct radeon_device *rdev)
1170
WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
1171
(void)RREG32(RADEON_CP_RB_WPTR);
1178
int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1179
struct radeon_cs_packet *pkt,
1180
const unsigned *auth, unsigned n,
1181
radeon_packet0_check_t check)
1190
/* Check that register fall into register range
1191
* determined by the number of entry (n) in the
1192
* safe register bitmap.
1194
if (pkt->one_reg_wr) {
1195
if ((reg >> 7) > n) {
1199
if (((reg + (pkt->count << 2)) >> 7) > n) {
1203
for (i = 0; i <= pkt->count; i++, idx++) {
1205
m = 1 << ((reg >> 2) & 31);
1207
r = check(p, pkt, idx, reg);
1212
if (pkt->one_reg_wr) {
1213
if (!(auth[j] & m)) {
1223
void r100_cs_dump_packet(struct radeon_cs_parser *p,
1224
struct radeon_cs_packet *pkt)
1226
volatile uint32_t *ib;
1232
for (i = 0; i <= (pkt->count + 1); i++, idx++) {
1233
DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
1238
* r100_cs_packet_parse() - parse cp packet and point ib index to next packet
1239
* @parser: parser structure holding parsing context.
1240
* @pkt: where to store packet informations
1242
* Assume that chunk_ib_index is properly set. Will return -EINVAL
1243
* if packet is bigger than remaining ib size. or if packets is unknown.
1245
int r100_cs_packet_parse(struct radeon_cs_parser *p,
1246
struct radeon_cs_packet *pkt,
1249
struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
1252
if (idx >= ib_chunk->length_dw) {
1253
DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1254
idx, ib_chunk->length_dw);
1257
header = radeon_get_ib_value(p, idx);
1259
pkt->type = CP_PACKET_GET_TYPE(header);
1260
pkt->count = CP_PACKET_GET_COUNT(header);
1261
switch (pkt->type) {
1263
pkt->reg = CP_PACKET0_GET_REG(header);
1264
pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
1267
pkt->opcode = CP_PACKET3_GET_OPCODE(header);
1273
DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
1276
if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
1277
DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1278
pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
1285
* r100_cs_packet_next_vline() - parse userspace VLINE packet
1286
* @parser: parser structure holding parsing context.
1288
* Userspace sends a special sequence for VLINE waits.
1289
* PACKET0 - VLINE_START_END + value
1290
* PACKET0 - WAIT_UNTIL +_value
1291
* RELOC (P3) - crtc_id in reloc.
1293
* This function parses this and relocates the VLINE START END
1294
* and WAIT UNTIL packets to the correct crtc.
1295
* It also detects a switched off crtc and nulls out the
1296
* wait in that case.
1298
int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1300
struct drm_mode_object *obj;
1301
struct drm_crtc *crtc;
1302
struct radeon_crtc *radeon_crtc;
1303
struct radeon_cs_packet p3reloc, waitreloc;
1306
uint32_t header, h_idx, reg;
1307
volatile uint32_t *ib;
1311
/* parse the wait until */
1312
r = r100_cs_packet_parse(p, &waitreloc, p->idx);
1316
/* check its a wait until and only 1 count */
1317
if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1318
waitreloc.count != 0) {
1319
DRM_ERROR("vline wait had illegal wait until segment\n");
1323
if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1324
DRM_ERROR("vline wait had illegal wait until\n");
1328
/* jump over the NOP */
1329
r = r100_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1334
p->idx += waitreloc.count + 2;
1335
p->idx += p3reloc.count + 2;
1337
header = radeon_get_ib_value(p, h_idx);
1338
crtc_id = radeon_get_ib_value(p, h_idx + 5);
1339
reg = CP_PACKET0_GET_REG(header);
1340
obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
1342
DRM_ERROR("cannot find crtc %d\n", crtc_id);
1345
crtc = obj_to_crtc(obj);
1346
radeon_crtc = to_radeon_crtc(crtc);
1347
crtc_id = radeon_crtc->crtc_id;
1349
if (!crtc->enabled) {
1350
/* if the CRTC isn't enabled - we need to nop out the wait until */
1351
ib[h_idx + 2] = PACKET2(0);
1352
ib[h_idx + 3] = PACKET2(0);
1353
} else if (crtc_id == 1) {
1355
case AVIVO_D1MODE_VLINE_START_END:
1356
header &= ~R300_CP_PACKET0_REG_MASK;
1357
header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1359
case RADEON_CRTC_GUI_TRIG_VLINE:
1360
header &= ~R300_CP_PACKET0_REG_MASK;
1361
header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1364
DRM_ERROR("unknown crtc reloc\n");
1368
ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1375
* r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1376
* @parser: parser structure holding parsing context.
1377
* @data: pointer to relocation data
1378
* @offset_start: starting offset
1379
* @offset_mask: offset mask (to align start offset on)
1380
* @reloc: reloc informations
1382
* Check next packet is relocation packet3, do bo validation and compute
1383
* GPU offset using the provided start.
1385
int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
1386
struct radeon_cs_reloc **cs_reloc)
1388
struct radeon_cs_chunk *relocs_chunk;
1389
struct radeon_cs_packet p3reloc;
1393
if (p->chunk_relocs_idx == -1) {
1394
DRM_ERROR("No relocation chunk !\n");
1398
relocs_chunk = &p->chunks[p->chunk_relocs_idx];
1399
r = r100_cs_packet_parse(p, &p3reloc, p->idx);
1403
p->idx += p3reloc.count + 2;
1404
if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1405
DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1407
r100_cs_dump_packet(p, &p3reloc);
1410
idx = radeon_get_ib_value(p, p3reloc.idx + 1);
1411
if (idx >= relocs_chunk->length_dw) {
1412
DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1413
idx, relocs_chunk->length_dw);
1414
r100_cs_dump_packet(p, &p3reloc);
1417
/* FIXME: we assume reloc size is 4 dwords */
1418
*cs_reloc = p->relocs_ptr[(idx / 4)];
1422
static int r100_get_vtx_size(uint32_t vtx_fmt)
1426
/* ordered according to bits in spec */
1427
if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1429
if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1431
if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1433
if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1435
if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1437
if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1439
if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1441
if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1443
if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1445
if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1447
if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1449
if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1451
if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1453
if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1455
if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1458
if (vtx_fmt & (0x7 << 15))
1459
vtx_size += (vtx_fmt >> 15) & 0x7;
1460
if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1462
if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1464
if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1466
if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1468
if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1470
if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1475
static int r100_packet0_check(struct radeon_cs_parser *p,
1476
struct radeon_cs_packet *pkt,
1477
unsigned idx, unsigned reg)
1479
struct radeon_cs_reloc *reloc;
1480
struct r100_cs_track *track;
1481
volatile uint32_t *ib;
1489
track = (struct r100_cs_track *)p->track;
1491
idx_value = radeon_get_ib_value(p, idx);
1494
case RADEON_CRTC_GUI_TRIG_VLINE:
1495
r = r100_cs_packet_parse_vline(p);
1497
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1499
r100_cs_dump_packet(p, pkt);
1503
/* FIXME: only allow PACKET3 blit? easier to check for out of
1505
case RADEON_DST_PITCH_OFFSET:
1506
case RADEON_SRC_PITCH_OFFSET:
1507
r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1511
case RADEON_RB3D_DEPTHOFFSET:
1512
r = r100_cs_packet_next_reloc(p, &reloc);
1514
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1516
r100_cs_dump_packet(p, pkt);
1519
track->zb.robj = reloc->robj;
1520
track->zb.offset = idx_value;
1521
track->zb_dirty = true;
1522
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1524
case RADEON_RB3D_COLOROFFSET:
1525
r = r100_cs_packet_next_reloc(p, &reloc);
1527
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1529
r100_cs_dump_packet(p, pkt);
1532
track->cb[0].robj = reloc->robj;
1533
track->cb[0].offset = idx_value;
1534
track->cb_dirty = true;
1535
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1537
case RADEON_PP_TXOFFSET_0:
1538
case RADEON_PP_TXOFFSET_1:
1539
case RADEON_PP_TXOFFSET_2:
1540
i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1541
r = r100_cs_packet_next_reloc(p, &reloc);
1543
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1545
r100_cs_dump_packet(p, pkt);
1548
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1549
track->textures[i].robj = reloc->robj;
1550
track->tex_dirty = true;
1552
case RADEON_PP_CUBIC_OFFSET_T0_0:
1553
case RADEON_PP_CUBIC_OFFSET_T0_1:
1554
case RADEON_PP_CUBIC_OFFSET_T0_2:
1555
case RADEON_PP_CUBIC_OFFSET_T0_3:
1556
case RADEON_PP_CUBIC_OFFSET_T0_4:
1557
i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1558
r = r100_cs_packet_next_reloc(p, &reloc);
1560
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1562
r100_cs_dump_packet(p, pkt);
1565
track->textures[0].cube_info[i].offset = idx_value;
1566
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1567
track->textures[0].cube_info[i].robj = reloc->robj;
1568
track->tex_dirty = true;
1570
case RADEON_PP_CUBIC_OFFSET_T1_0:
1571
case RADEON_PP_CUBIC_OFFSET_T1_1:
1572
case RADEON_PP_CUBIC_OFFSET_T1_2:
1573
case RADEON_PP_CUBIC_OFFSET_T1_3:
1574
case RADEON_PP_CUBIC_OFFSET_T1_4:
1575
i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1576
r = r100_cs_packet_next_reloc(p, &reloc);
1578
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1580
r100_cs_dump_packet(p, pkt);
1583
track->textures[1].cube_info[i].offset = idx_value;
1584
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1585
track->textures[1].cube_info[i].robj = reloc->robj;
1586
track->tex_dirty = true;
1588
case RADEON_PP_CUBIC_OFFSET_T2_0:
1589
case RADEON_PP_CUBIC_OFFSET_T2_1:
1590
case RADEON_PP_CUBIC_OFFSET_T2_2:
1591
case RADEON_PP_CUBIC_OFFSET_T2_3:
1592
case RADEON_PP_CUBIC_OFFSET_T2_4:
1593
i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1594
r = r100_cs_packet_next_reloc(p, &reloc);
1596
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1598
r100_cs_dump_packet(p, pkt);
1601
track->textures[2].cube_info[i].offset = idx_value;
1602
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1603
track->textures[2].cube_info[i].robj = reloc->robj;
1604
track->tex_dirty = true;
1606
case RADEON_RE_WIDTH_HEIGHT:
1607
track->maxy = ((idx_value >> 16) & 0x7FF);
1608
track->cb_dirty = true;
1609
track->zb_dirty = true;
1611
case RADEON_RB3D_COLORPITCH:
1612
r = r100_cs_packet_next_reloc(p, &reloc);
1614
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1616
r100_cs_dump_packet(p, pkt);
1620
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1621
tile_flags |= RADEON_COLOR_TILE_ENABLE;
1622
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1623
tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1625
tmp = idx_value & ~(0x7 << 16);
1629
track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1630
track->cb_dirty = true;
1632
case RADEON_RB3D_DEPTHPITCH:
1633
track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1634
track->zb_dirty = true;
1636
case RADEON_RB3D_CNTL:
1637
switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1643
track->cb[0].cpp = 1;
1648
track->cb[0].cpp = 2;
1651
track->cb[0].cpp = 4;
1654
DRM_ERROR("Invalid color buffer format (%d) !\n",
1655
((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1658
track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1659
track->cb_dirty = true;
1660
track->zb_dirty = true;
1662
case RADEON_RB3D_ZSTENCILCNTL:
1663
switch (idx_value & 0xf) {
1678
track->zb_dirty = true;
1680
case RADEON_RB3D_ZPASS_ADDR:
1681
r = r100_cs_packet_next_reloc(p, &reloc);
1683
DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1685
r100_cs_dump_packet(p, pkt);
1688
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1690
case RADEON_PP_CNTL:
1692
uint32_t temp = idx_value >> 4;
1693
for (i = 0; i < track->num_texture; i++)
1694
track->textures[i].enabled = !!(temp & (1 << i));
1695
track->tex_dirty = true;
1698
case RADEON_SE_VF_CNTL:
1699
track->vap_vf_cntl = idx_value;
1701
case RADEON_SE_VTX_FMT:
1702
track->vtx_size = r100_get_vtx_size(idx_value);
1704
case RADEON_PP_TEX_SIZE_0:
1705
case RADEON_PP_TEX_SIZE_1:
1706
case RADEON_PP_TEX_SIZE_2:
1707
i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1708
track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1709
track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1710
track->tex_dirty = true;
1712
case RADEON_PP_TEX_PITCH_0:
1713
case RADEON_PP_TEX_PITCH_1:
1714
case RADEON_PP_TEX_PITCH_2:
1715
i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1716
track->textures[i].pitch = idx_value + 32;
1717
track->tex_dirty = true;
1719
case RADEON_PP_TXFILTER_0:
1720
case RADEON_PP_TXFILTER_1:
1721
case RADEON_PP_TXFILTER_2:
1722
i = (reg - RADEON_PP_TXFILTER_0) / 24;
1723
track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1724
>> RADEON_MAX_MIP_LEVEL_SHIFT);
1725
tmp = (idx_value >> 23) & 0x7;
1726
if (tmp == 2 || tmp == 6)
1727
track->textures[i].roundup_w = false;
1728
tmp = (idx_value >> 27) & 0x7;
1729
if (tmp == 2 || tmp == 6)
1730
track->textures[i].roundup_h = false;
1731
track->tex_dirty = true;
1733
case RADEON_PP_TXFORMAT_0:
1734
case RADEON_PP_TXFORMAT_1:
1735
case RADEON_PP_TXFORMAT_2:
1736
i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1737
if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1738
track->textures[i].use_pitch = 1;
1740
track->textures[i].use_pitch = 0;
1741
track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
1742
track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
1744
if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1745
track->textures[i].tex_coord_type = 2;
1746
switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1747
case RADEON_TXFORMAT_I8:
1748
case RADEON_TXFORMAT_RGB332:
1749
case RADEON_TXFORMAT_Y8:
1750
track->textures[i].cpp = 1;
1751
track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1753
case RADEON_TXFORMAT_AI88:
1754
case RADEON_TXFORMAT_ARGB1555:
1755
case RADEON_TXFORMAT_RGB565:
1756
case RADEON_TXFORMAT_ARGB4444:
1757
case RADEON_TXFORMAT_VYUY422:
1758
case RADEON_TXFORMAT_YVYU422:
1759
case RADEON_TXFORMAT_SHADOW16:
1760
case RADEON_TXFORMAT_LDUDV655:
1761
case RADEON_TXFORMAT_DUDV88:
1762
track->textures[i].cpp = 2;
1763
track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1765
case RADEON_TXFORMAT_ARGB8888:
1766
case RADEON_TXFORMAT_RGBA8888:
1767
case RADEON_TXFORMAT_SHADOW32:
1768
case RADEON_TXFORMAT_LDUDUV8888:
1769
track->textures[i].cpp = 4;
1770
track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1772
case RADEON_TXFORMAT_DXT1:
1773
track->textures[i].cpp = 1;
1774
track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1776
case RADEON_TXFORMAT_DXT23:
1777
case RADEON_TXFORMAT_DXT45:
1778
track->textures[i].cpp = 1;
1779
track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1782
track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1783
track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1784
track->tex_dirty = true;
1786
case RADEON_PP_CUBIC_FACES_0:
1787
case RADEON_PP_CUBIC_FACES_1:
1788
case RADEON_PP_CUBIC_FACES_2:
1790
i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1791
for (face = 0; face < 4; face++) {
1792
track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1793
track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1795
track->tex_dirty = true;
1798
printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1805
int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1806
struct radeon_cs_packet *pkt,
1807
struct radeon_bo *robj)
1812
value = radeon_get_ib_value(p, idx + 2);
1813
if ((value + 1) > radeon_bo_size(robj)) {
1814
DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1815
"(need %u have %lu) !\n",
1817
radeon_bo_size(robj));
1823
static int r100_packet3_check(struct radeon_cs_parser *p,
1824
struct radeon_cs_packet *pkt)
1826
struct radeon_cs_reloc *reloc;
1827
struct r100_cs_track *track;
1829
volatile uint32_t *ib;
1834
track = (struct r100_cs_track *)p->track;
1835
switch (pkt->opcode) {
1836
case PACKET3_3D_LOAD_VBPNTR:
1837
r = r100_packet3_load_vbpntr(p, pkt, idx);
1841
case PACKET3_INDX_BUFFER:
1842
r = r100_cs_packet_next_reloc(p, &reloc);
1844
DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1845
r100_cs_dump_packet(p, pkt);
1848
ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset);
1849
r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1855
/* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
1856
r = r100_cs_packet_next_reloc(p, &reloc);
1858
DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1859
r100_cs_dump_packet(p, pkt);
1862
ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset);
1863
track->num_arrays = 1;
1864
track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1866
track->arrays[0].robj = reloc->robj;
1867
track->arrays[0].esize = track->vtx_size;
1869
track->max_indx = radeon_get_ib_value(p, idx+1);
1871
track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1872
track->immd_dwords = pkt->count - 1;
1873
r = r100_cs_track_check(p->rdev, track);
1877
case PACKET3_3D_DRAW_IMMD:
1878
if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1879
DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1882
track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1883
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1884
track->immd_dwords = pkt->count - 1;
1885
r = r100_cs_track_check(p->rdev, track);
1889
/* triggers drawing using in-packet vertex data */
1890
case PACKET3_3D_DRAW_IMMD_2:
1891
if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
1892
DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1895
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1896
track->immd_dwords = pkt->count;
1897
r = r100_cs_track_check(p->rdev, track);
1901
/* triggers drawing using in-packet vertex data */
1902
case PACKET3_3D_DRAW_VBUF_2:
1903
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1904
r = r100_cs_track_check(p->rdev, track);
1908
/* triggers drawing of vertex buffers setup elsewhere */
1909
case PACKET3_3D_DRAW_INDX_2:
1910
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1911
r = r100_cs_track_check(p->rdev, track);
1915
/* triggers drawing using indices to vertex buffer */
1916
case PACKET3_3D_DRAW_VBUF:
1917
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1918
r = r100_cs_track_check(p->rdev, track);
1922
/* triggers drawing of vertex buffers setup elsewhere */
1923
case PACKET3_3D_DRAW_INDX:
1924
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1925
r = r100_cs_track_check(p->rdev, track);
1929
/* triggers drawing using indices to vertex buffer */
1930
case PACKET3_3D_CLEAR_HIZ:
1931
case PACKET3_3D_CLEAR_ZMASK:
1932
if (p->rdev->hyperz_filp != p->filp)
1938
DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1944
int r100_cs_parse(struct radeon_cs_parser *p)
1946
struct radeon_cs_packet pkt;
1947
struct r100_cs_track *track;
1950
track = kzalloc(sizeof(*track), GFP_KERNEL);
1951
r100_cs_track_clear(p->rdev, track);
1954
r = r100_cs_packet_parse(p, &pkt, p->idx);
1958
p->idx += pkt.count + 2;
1961
if (p->rdev->family >= CHIP_R200)
1962
r = r100_cs_parse_packet0(p, &pkt,
1963
p->rdev->config.r100.reg_safe_bm,
1964
p->rdev->config.r100.reg_safe_bm_size,
1965
&r200_packet0_check);
1967
r = r100_cs_parse_packet0(p, &pkt,
1968
p->rdev->config.r100.reg_safe_bm,
1969
p->rdev->config.r100.reg_safe_bm_size,
1970
&r100_packet0_check);
1975
r = r100_packet3_check(p, &pkt);
1978
DRM_ERROR("Unknown packet type %d !\n",
1985
} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1991
* Global GPU functions
1993
void r100_errata(struct radeon_device *rdev)
1995
rdev->pll_errata = 0;
1997
if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
1998
rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2001
if (rdev->family == CHIP_RV100 ||
2002
rdev->family == CHIP_RS100 ||
2003
rdev->family == CHIP_RS200) {
2004
rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2008
/* Wait for vertical sync on primary CRTC */
2009
void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
2011
uint32_t crtc_gen_cntl, tmp;
2014
crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
2015
if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
2016
!(crtc_gen_cntl & RADEON_CRTC_EN)) {
2019
/* Clear the CRTC_VBLANK_SAVE bit */
2020
WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
2021
for (i = 0; i < rdev->usec_timeout; i++) {
2022
tmp = RREG32(RADEON_CRTC_STATUS);
2023
if (tmp & RADEON_CRTC_VBLANK_SAVE) {
2030
/* Wait for vertical sync on secondary CRTC */
2031
void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
2033
uint32_t crtc2_gen_cntl, tmp;
2036
crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
2037
if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
2038
!(crtc2_gen_cntl & RADEON_CRTC2_EN))
2041
/* Clear the CRTC_VBLANK_SAVE bit */
2042
WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
2043
for (i = 0; i < rdev->usec_timeout; i++) {
2044
tmp = RREG32(RADEON_CRTC2_STATUS);
2045
if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
2052
int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2057
for (i = 0; i < rdev->usec_timeout; i++) {
2058
tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2067
int r100_gui_wait_for_idle(struct radeon_device *rdev)
2072
if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2073
printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
2074
" Bad things might happen.\n");
2076
for (i = 0; i < rdev->usec_timeout; i++) {
2077
tmp = RREG32(RADEON_RBBM_STATUS);
2078
if (!(tmp & RADEON_RBBM_ACTIVE)) {
2086
int r100_mc_wait_for_idle(struct radeon_device *rdev)
2091
for (i = 0; i < rdev->usec_timeout; i++) {
2092
/* read MC_STATUS */
2093
tmp = RREG32(RADEON_MC_STATUS);
2094
if (tmp & RADEON_MC_IDLE) {
2102
void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp)
2104
lockup->last_cp_rptr = cp->rptr;
2105
lockup->last_jiffies = jiffies;
2109
* r100_gpu_cp_is_lockup() - check if CP is lockup by recording information
2110
* @rdev: radeon device structure
2111
* @lockup: r100_gpu_lockup structure holding CP lockup tracking informations
2112
* @cp: radeon_cp structure holding CP information
2114
* We don't need to initialize the lockup tracking information as we will either
2115
* have CP rptr to a different value of jiffies wrap around which will force
2116
* initialization of the lockup tracking informations.
2118
* A possible false positivie is if we get call after while and last_cp_rptr ==
2119
* the current CP rptr, even if it's unlikely it might happen. To avoid this
2120
* if the elapsed time since last call is bigger than 2 second than we return
2121
* false and update the tracking information. Due to this the caller must call
2122
* r100_gpu_cp_is_lockup several time in less than 2sec for lockup to be reported
2123
* the fencing code should be cautious about that.
2125
* Caller should write to the ring to force CP to do something so we don't get
2126
* false positive when CP is just gived nothing to do.
2129
bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_cp *cp)
2131
unsigned long cjiffies, elapsed;
2134
if (!time_after(cjiffies, lockup->last_jiffies)) {
2135
/* likely a wrap around */
2136
lockup->last_cp_rptr = cp->rptr;
2137
lockup->last_jiffies = jiffies;
2140
if (cp->rptr != lockup->last_cp_rptr) {
2141
/* CP is still working no lockup */
2142
lockup->last_cp_rptr = cp->rptr;
2143
lockup->last_jiffies = jiffies;
2146
elapsed = jiffies_to_msecs(cjiffies - lockup->last_jiffies);
2147
if (elapsed >= 10000) {
2148
dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed);
2151
/* give a chance to the GPU ... */
2155
bool r100_gpu_is_lockup(struct radeon_device *rdev)
2160
rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2161
if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2162
r100_gpu_lockup_update(&rdev->config.r100.lockup, &rdev->cp);
2165
/* force CP activities */
2166
r = radeon_ring_lock(rdev, 2);
2169
radeon_ring_write(rdev, 0x80000000);
2170
radeon_ring_write(rdev, 0x80000000);
2171
radeon_ring_unlock_commit(rdev);
2173
rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
2174
return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, &rdev->cp);
2177
void r100_bm_disable(struct radeon_device *rdev)
2181
/* disable bus mastering */
2182
tmp = RREG32(R_000030_BUS_CNTL);
2183
WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2185
WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2187
WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2188
tmp = RREG32(RADEON_BUS_CNTL);
2190
pci_read_config_word(rdev->pdev, 0x4, (u16*)&tmp);
2191
pci_write_config_word(rdev->pdev, 0x4, tmp & 0xFFFB);
2195
int r100_asic_reset(struct radeon_device *rdev)
2197
struct r100_mc_save save;
2201
status = RREG32(R_000E40_RBBM_STATUS);
2202
if (!G_000E40_GUI_ACTIVE(status)) {
2205
r100_mc_stop(rdev, &save);
2206
status = RREG32(R_000E40_RBBM_STATUS);
2207
dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2209
WREG32(RADEON_CP_CSQ_CNTL, 0);
2210
tmp = RREG32(RADEON_CP_RB_CNTL);
2211
WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2212
WREG32(RADEON_CP_RB_RPTR_WR, 0);
2213
WREG32(RADEON_CP_RB_WPTR, 0);
2214
WREG32(RADEON_CP_RB_CNTL, tmp);
2215
/* save PCI state */
2216
pci_save_state(rdev->pdev);
2217
/* disable bus mastering */
2218
r100_bm_disable(rdev);
2219
WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2220
S_0000F0_SOFT_RESET_RE(1) |
2221
S_0000F0_SOFT_RESET_PP(1) |
2222
S_0000F0_SOFT_RESET_RB(1));
2223
RREG32(R_0000F0_RBBM_SOFT_RESET);
2225
WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2227
status = RREG32(R_000E40_RBBM_STATUS);
2228
dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2230
WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2231
RREG32(R_0000F0_RBBM_SOFT_RESET);
2233
WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2235
status = RREG32(R_000E40_RBBM_STATUS);
2236
dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2237
/* restore PCI & busmastering */
2238
pci_restore_state(rdev->pdev);
2239
r100_enable_bm(rdev);
2240
/* Check if GPU is idle */
2241
if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2242
G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2243
dev_err(rdev->dev, "failed to reset GPU\n");
2244
rdev->gpu_lockup = true;
2247
dev_info(rdev->dev, "GPU reset succeed\n");
2248
r100_mc_resume(rdev, &save);
2252
void r100_set_common_regs(struct radeon_device *rdev)
2254
struct drm_device *dev = rdev->ddev;
2255
bool force_dac2 = false;
2258
/* set these so they don't interfere with anything */
2259
WREG32(RADEON_OV0_SCALE_CNTL, 0);
2260
WREG32(RADEON_SUBPIC_CNTL, 0);
2261
WREG32(RADEON_VIPH_CONTROL, 0);
2262
WREG32(RADEON_I2C_CNTL_1, 0);
2263
WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2264
WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2265
WREG32(RADEON_CAP1_TRIG_CNTL, 0);
2267
/* always set up dac2 on rn50 and some rv100 as lots
2268
* of servers seem to wire it up to a VGA port but
2269
* don't report it in the bios connector
2272
switch (dev->pdev->device) {
2281
/* DELL triple head servers */
2282
if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2283
((dev->pdev->subsystem_device == 0x016c) ||
2284
(dev->pdev->subsystem_device == 0x016d) ||
2285
(dev->pdev->subsystem_device == 0x016e) ||
2286
(dev->pdev->subsystem_device == 0x016f) ||
2287
(dev->pdev->subsystem_device == 0x0170) ||
2288
(dev->pdev->subsystem_device == 0x017d) ||
2289
(dev->pdev->subsystem_device == 0x017e) ||
2290
(dev->pdev->subsystem_device == 0x0183) ||
2291
(dev->pdev->subsystem_device == 0x018a) ||
2292
(dev->pdev->subsystem_device == 0x019a)))
2298
u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2299
u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2300
u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2302
/* For CRT on DAC2, don't turn it on if BIOS didn't
2303
enable it, even it's detected.
2306
/* force it to crtc0 */
2307
dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2308
dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2309
disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2311
/* set up the TV DAC */
2312
tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2313
RADEON_TV_DAC_STD_MASK |
2314
RADEON_TV_DAC_RDACPD |
2315
RADEON_TV_DAC_GDACPD |
2316
RADEON_TV_DAC_BDACPD |
2317
RADEON_TV_DAC_BGADJ_MASK |
2318
RADEON_TV_DAC_DACADJ_MASK);
2319
tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2320
RADEON_TV_DAC_NHOLD |
2321
RADEON_TV_DAC_STD_PS2 |
2324
WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2325
WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2326
WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2329
/* switch PM block to ACPI mode */
2330
tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2331
tmp &= ~RADEON_PM_MODE_SEL;
2332
WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2339
static void r100_vram_get_type(struct radeon_device *rdev)
2343
rdev->mc.vram_is_ddr = false;
2344
if (rdev->flags & RADEON_IS_IGP)
2345
rdev->mc.vram_is_ddr = true;
2346
else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2347
rdev->mc.vram_is_ddr = true;
2348
if ((rdev->family == CHIP_RV100) ||
2349
(rdev->family == CHIP_RS100) ||
2350
(rdev->family == CHIP_RS200)) {
2351
tmp = RREG32(RADEON_MEM_CNTL);
2352
if (tmp & RV100_HALF_MODE) {
2353
rdev->mc.vram_width = 32;
2355
rdev->mc.vram_width = 64;
2357
if (rdev->flags & RADEON_SINGLE_CRTC) {
2358
rdev->mc.vram_width /= 4;
2359
rdev->mc.vram_is_ddr = true;
2361
} else if (rdev->family <= CHIP_RV280) {
2362
tmp = RREG32(RADEON_MEM_CNTL);
2363
if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2364
rdev->mc.vram_width = 128;
2366
rdev->mc.vram_width = 64;
2370
rdev->mc.vram_width = 128;
2374
static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2379
aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2381
/* Set HDP_APER_CNTL only on cards that are known not to be broken,
2382
* that is has the 2nd generation multifunction PCI interface
2384
if (rdev->family == CHIP_RV280 ||
2385
rdev->family >= CHIP_RV350) {
2386
WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2387
~RADEON_HDP_APER_CNTL);
2388
DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2389
return aper_size * 2;
2392
/* Older cards have all sorts of funny issues to deal with. First
2393
* check if it's a multifunction card by reading the PCI config
2394
* header type... Limit those to one aperture size
2396
pci_read_config_byte(rdev->pdev, 0xe, &byte);
2398
DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2399
DRM_INFO("Limiting VRAM to one aperture\n");
2403
/* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2404
* have set it up. We don't write this as it's broken on some ASICs but
2405
* we expect the BIOS to have done the right thing (might be too optimistic...)
2407
if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2408
return aper_size * 2;
2412
void r100_vram_init_sizes(struct radeon_device *rdev)
2414
u64 config_aper_size;
2416
/* work out accessible VRAM */
2417
rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2418
rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2419
rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2420
/* FIXME we don't use the second aperture yet when we could use it */
2421
if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2422
rdev->mc.visible_vram_size = rdev->mc.aper_size;
2423
config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2424
if (rdev->flags & RADEON_IS_IGP) {
2426
/* read NB_TOM to get the amount of ram stolen for the GPU */
2427
tom = RREG32(RADEON_NB_TOM);
2428
rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2429
WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2430
rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2432
rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2433
/* Some production boards of m6 will report 0
2436
if (rdev->mc.real_vram_size == 0) {
2437
rdev->mc.real_vram_size = 8192 * 1024;
2438
WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2440
/* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
2441
* Novell bug 204882 + along with lots of ubuntu ones
2443
if (rdev->mc.aper_size > config_aper_size)
2444
config_aper_size = rdev->mc.aper_size;
2446
if (config_aper_size > rdev->mc.real_vram_size)
2447
rdev->mc.mc_vram_size = config_aper_size;
2449
rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2453
void r100_vga_set_state(struct radeon_device *rdev, bool state)
2457
temp = RREG32(RADEON_CONFIG_CNTL);
2458
if (state == false) {
2459
temp &= ~RADEON_CFG_VGA_RAM_EN;
2460
temp |= RADEON_CFG_VGA_IO_DIS;
2462
temp &= ~RADEON_CFG_VGA_IO_DIS;
2464
WREG32(RADEON_CONFIG_CNTL, temp);
2467
void r100_mc_init(struct radeon_device *rdev)
2471
r100_vram_get_type(rdev);
2472
r100_vram_init_sizes(rdev);
2473
base = rdev->mc.aper_base;
2474
if (rdev->flags & RADEON_IS_IGP)
2475
base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2476
radeon_vram_location(rdev, &rdev->mc, base);
2477
rdev->mc.gtt_base_align = 0;
2478
if (!(rdev->flags & RADEON_IS_AGP))
2479
radeon_gtt_location(rdev, &rdev->mc);
2480
radeon_update_bandwidth_info(rdev);
2485
* Indirect registers accessor
2487
void r100_pll_errata_after_index(struct radeon_device *rdev)
2489
if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2490
(void)RREG32(RADEON_CLOCK_CNTL_DATA);
2491
(void)RREG32(RADEON_CRTC_GEN_CNTL);
2495
static void r100_pll_errata_after_data(struct radeon_device *rdev)
2497
/* This workarounds is necessary on RV100, RS100 and RS200 chips
2498
* or the chip could hang on a subsequent access
2500
if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2504
/* This function is required to workaround a hardware bug in some (all?)
2505
* revisions of the R300. This workaround should be called after every
2506
* CLOCK_CNTL_INDEX register access. If not, register reads afterward
2507
* may not be correct.
2509
if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2512
save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2513
tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2514
WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2515
tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2516
WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2520
uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2524
WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2525
r100_pll_errata_after_index(rdev);
2526
data = RREG32(RADEON_CLOCK_CNTL_DATA);
2527
r100_pll_errata_after_data(rdev);
2531
void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2533
WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2534
r100_pll_errata_after_index(rdev);
2535
WREG32(RADEON_CLOCK_CNTL_DATA, v);
2536
r100_pll_errata_after_data(rdev);
2539
void r100_set_safe_registers(struct radeon_device *rdev)
2541
if (ASIC_IS_RN50(rdev)) {
2542
rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2543
rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2544
} else if (rdev->family < CHIP_R200) {
2545
rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2546
rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2548
r200_set_safe_registers(rdev);
2555
#if defined(CONFIG_DEBUG_FS)
2556
static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
2558
struct drm_info_node *node = (struct drm_info_node *) m->private;
2559
struct drm_device *dev = node->minor->dev;
2560
struct radeon_device *rdev = dev->dev_private;
2561
uint32_t reg, value;
2564
seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2565
seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2566
seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2567
for (i = 0; i < 64; i++) {
2568
WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2569
reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2570
WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2571
value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2572
seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2577
static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
2579
struct drm_info_node *node = (struct drm_info_node *) m->private;
2580
struct drm_device *dev = node->minor->dev;
2581
struct radeon_device *rdev = dev->dev_private;
2583
unsigned count, i, j;
2585
radeon_ring_free_size(rdev);
2586
rdp = RREG32(RADEON_CP_RB_RPTR);
2587
wdp = RREG32(RADEON_CP_RB_WPTR);
2588
count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
2589
seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2590
seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2591
seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2592
seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
2593
seq_printf(m, "%u dwords in ring\n", count);
2594
for (j = 0; j <= count; j++) {
2595
i = (rdp + j) & rdev->cp.ptr_mask;
2596
seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
2602
static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
2604
struct drm_info_node *node = (struct drm_info_node *) m->private;
2605
struct drm_device *dev = node->minor->dev;
2606
struct radeon_device *rdev = dev->dev_private;
2607
uint32_t csq_stat, csq2_stat, tmp;
2608
unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2611
seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2612
seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2613
csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2614
csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2615
r_rptr = (csq_stat >> 0) & 0x3ff;
2616
r_wptr = (csq_stat >> 10) & 0x3ff;
2617
ib1_rptr = (csq_stat >> 20) & 0x3ff;
2618
ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2619
ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2620
ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2621
seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2622
seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2623
seq_printf(m, "Ring rptr %u\n", r_rptr);
2624
seq_printf(m, "Ring wptr %u\n", r_wptr);
2625
seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2626
seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2627
seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2628
seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
2629
/* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
2630
* 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
2631
seq_printf(m, "Ring fifo:\n");
2632
for (i = 0; i < 256; i++) {
2633
WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2634
tmp = RREG32(RADEON_CP_CSQ_DATA);
2635
seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
2637
seq_printf(m, "Indirect1 fifo:\n");
2638
for (i = 256; i <= 512; i++) {
2639
WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2640
tmp = RREG32(RADEON_CP_CSQ_DATA);
2641
seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
2643
seq_printf(m, "Indirect2 fifo:\n");
2644
for (i = 640; i < ib1_wptr; i++) {
2645
WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2646
tmp = RREG32(RADEON_CP_CSQ_DATA);
2647
seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
2652
static int r100_debugfs_mc_info(struct seq_file *m, void *data)
2654
struct drm_info_node *node = (struct drm_info_node *) m->private;
2655
struct drm_device *dev = node->minor->dev;
2656
struct radeon_device *rdev = dev->dev_private;
2659
tmp = RREG32(RADEON_CONFIG_MEMSIZE);
2660
seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
2661
tmp = RREG32(RADEON_MC_FB_LOCATION);
2662
seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
2663
tmp = RREG32(RADEON_BUS_CNTL);
2664
seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
2665
tmp = RREG32(RADEON_MC_AGP_LOCATION);
2666
seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
2667
tmp = RREG32(RADEON_AGP_BASE);
2668
seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
2669
tmp = RREG32(RADEON_HOST_PATH_CNTL);
2670
seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
2671
tmp = RREG32(0x01D0);
2672
seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
2673
tmp = RREG32(RADEON_AIC_LO_ADDR);
2674
seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
2675
tmp = RREG32(RADEON_AIC_HI_ADDR);
2676
seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
2677
tmp = RREG32(0x01E4);
2678
seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
2682
static struct drm_info_list r100_debugfs_rbbm_list[] = {
2683
{"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
2686
static struct drm_info_list r100_debugfs_cp_list[] = {
2687
{"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
2688
{"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
2691
static struct drm_info_list r100_debugfs_mc_info_list[] = {
2692
{"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
2696
int r100_debugfs_rbbm_init(struct radeon_device *rdev)
2698
#if defined(CONFIG_DEBUG_FS)
2699
return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
2705
int r100_debugfs_cp_init(struct radeon_device *rdev)
2707
#if defined(CONFIG_DEBUG_FS)
2708
return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
2714
int r100_debugfs_mc_info_init(struct radeon_device *rdev)
2716
#if defined(CONFIG_DEBUG_FS)
2717
return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
2723
int r100_set_surface_reg(struct radeon_device *rdev, int reg,
2724
uint32_t tiling_flags, uint32_t pitch,
2725
uint32_t offset, uint32_t obj_size)
2727
int surf_index = reg * 16;
2730
if (rdev->family <= CHIP_RS200) {
2731
if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
2732
== (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
2733
flags |= RADEON_SURF_TILE_COLOR_BOTH;
2734
if (tiling_flags & RADEON_TILING_MACRO)
2735
flags |= RADEON_SURF_TILE_COLOR_MACRO;
2736
} else if (rdev->family <= CHIP_RV280) {
2737
if (tiling_flags & (RADEON_TILING_MACRO))
2738
flags |= R200_SURF_TILE_COLOR_MACRO;
2739
if (tiling_flags & RADEON_TILING_MICRO)
2740
flags |= R200_SURF_TILE_COLOR_MICRO;
2742
if (tiling_flags & RADEON_TILING_MACRO)
2743
flags |= R300_SURF_TILE_MACRO;
2744
if (tiling_flags & RADEON_TILING_MICRO)
2745
flags |= R300_SURF_TILE_MICRO;
2748
if (tiling_flags & RADEON_TILING_SWAP_16BIT)
2749
flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
2750
if (tiling_flags & RADEON_TILING_SWAP_32BIT)
2751
flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
2753
/* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */
2754
if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) {
2755
if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO)))
2756
if (ASIC_IS_RN50(rdev))
2760
/* r100/r200 divide by 16 */
2761
if (rdev->family < CHIP_R300)
2762
flags |= pitch / 16;
2767
DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
2768
WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
2769
WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
2770
WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
2774
void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
2776
int surf_index = reg * 16;
2777
WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
2780
void r100_bandwidth_update(struct radeon_device *rdev)
2782
fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
2783
fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
2784
fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
2785
uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
2786
fixed20_12 memtcas_ff[8] = {
2791
dfixed_init_half(1),
2792
dfixed_init_half(2),
2795
fixed20_12 memtcas_rs480_ff[8] = {
2801
dfixed_init_half(1),
2802
dfixed_init_half(2),
2803
dfixed_init_half(3),
2805
fixed20_12 memtcas2_ff[8] = {
2815
fixed20_12 memtrbs[8] = {
2817
dfixed_init_half(1),
2819
dfixed_init_half(2),
2821
dfixed_init_half(3),
2825
fixed20_12 memtrbs_r4xx[8] = {
2835
fixed20_12 min_mem_eff;
2836
fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
2837
fixed20_12 cur_latency_mclk, cur_latency_sclk;
2838
fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
2839
disp_drain_rate2, read_return_rate;
2840
fixed20_12 time_disp1_drop_priority;
2842
int cur_size = 16; /* in octawords */
2843
int critical_point = 0, critical_point2;
2844
/* uint32_t read_return_rate, time_disp1_drop_priority; */
2845
int stop_req, max_stop_req;
2846
struct drm_display_mode *mode1 = NULL;
2847
struct drm_display_mode *mode2 = NULL;
2848
uint32_t pixel_bytes1 = 0;
2849
uint32_t pixel_bytes2 = 0;
2851
radeon_update_display_priority(rdev);
2853
if (rdev->mode_info.crtcs[0]->base.enabled) {
2854
mode1 = &rdev->mode_info.crtcs[0]->base.mode;
2855
pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
2857
if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
2858
if (rdev->mode_info.crtcs[1]->base.enabled) {
2859
mode2 = &rdev->mode_info.crtcs[1]->base.mode;
2860
pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
2864
min_mem_eff.full = dfixed_const_8(0);
2866
if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
2867
uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
2868
mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
2869
mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
2870
/* check crtc enables */
2872
mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
2874
mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
2875
WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
2879
* determine is there is enough bw for current mode
2881
sclk_ff = rdev->pm.sclk;
2882
mclk_ff = rdev->pm.mclk;
2884
temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
2885
temp_ff.full = dfixed_const(temp);
2886
mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
2890
peak_disp_bw.full = 0;
2892
temp_ff.full = dfixed_const(1000);
2893
pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
2894
pix_clk.full = dfixed_div(pix_clk, temp_ff);
2895
temp_ff.full = dfixed_const(pixel_bytes1);
2896
peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
2899
temp_ff.full = dfixed_const(1000);
2900
pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
2901
pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
2902
temp_ff.full = dfixed_const(pixel_bytes2);
2903
peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
2906
mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
2907
if (peak_disp_bw.full >= mem_bw.full) {
2908
DRM_ERROR("You may not have enough display bandwidth for current mode\n"
2909
"If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
2912
/* Get values from the EXT_MEM_CNTL register...converting its contents. */
2913
temp = RREG32(RADEON_MEM_TIMING_CNTL);
2914
if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
2915
mem_trcd = ((temp >> 2) & 0x3) + 1;
2916
mem_trp = ((temp & 0x3)) + 1;
2917
mem_tras = ((temp & 0x70) >> 4) + 1;
2918
} else if (rdev->family == CHIP_R300 ||
2919
rdev->family == CHIP_R350) { /* r300, r350 */
2920
mem_trcd = (temp & 0x7) + 1;
2921
mem_trp = ((temp >> 8) & 0x7) + 1;
2922
mem_tras = ((temp >> 11) & 0xf) + 4;
2923
} else if (rdev->family == CHIP_RV350 ||
2924
rdev->family <= CHIP_RV380) {
2926
mem_trcd = (temp & 0x7) + 3;
2927
mem_trp = ((temp >> 8) & 0x7) + 3;
2928
mem_tras = ((temp >> 11) & 0xf) + 6;
2929
} else if (rdev->family == CHIP_R420 ||
2930
rdev->family == CHIP_R423 ||
2931
rdev->family == CHIP_RV410) {
2933
mem_trcd = (temp & 0xf) + 3;
2936
mem_trp = ((temp >> 8) & 0xf) + 3;
2939
mem_tras = ((temp >> 12) & 0x1f) + 6;
2942
} else { /* RV200, R200 */
2943
mem_trcd = (temp & 0x7) + 1;
2944
mem_trp = ((temp >> 8) & 0x7) + 1;
2945
mem_tras = ((temp >> 12) & 0xf) + 4;
2948
trcd_ff.full = dfixed_const(mem_trcd);
2949
trp_ff.full = dfixed_const(mem_trp);
2950
tras_ff.full = dfixed_const(mem_tras);
2952
/* Get values from the MEM_SDRAM_MODE_REG register...converting its */
2953
temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
2954
data = (temp & (7 << 20)) >> 20;
2955
if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
2956
if (rdev->family == CHIP_RS480) /* don't think rs400 */
2957
tcas_ff = memtcas_rs480_ff[data];
2959
tcas_ff = memtcas_ff[data];
2961
tcas_ff = memtcas2_ff[data];
2963
if (rdev->family == CHIP_RS400 ||
2964
rdev->family == CHIP_RS480) {
2965
/* extra cas latency stored in bits 23-25 0-4 clocks */
2966
data = (temp >> 23) & 0x7;
2968
tcas_ff.full += dfixed_const(data);
2971
if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
2972
/* on the R300, Tcas is included in Trbs.
2974
temp = RREG32(RADEON_MEM_CNTL);
2975
data = (R300_MEM_NUM_CHANNELS_MASK & temp);
2977
if (R300_MEM_USE_CD_CH_ONLY & temp) {
2978
temp = RREG32(R300_MC_IND_INDEX);
2979
temp &= ~R300_MC_IND_ADDR_MASK;
2980
temp |= R300_MC_READ_CNTL_CD_mcind;
2981
WREG32(R300_MC_IND_INDEX, temp);
2982
temp = RREG32(R300_MC_IND_DATA);
2983
data = (R300_MEM_RBS_POSITION_C_MASK & temp);
2985
temp = RREG32(R300_MC_READ_CNTL_AB);
2986
data = (R300_MEM_RBS_POSITION_A_MASK & temp);
2989
temp = RREG32(R300_MC_READ_CNTL_AB);
2990
data = (R300_MEM_RBS_POSITION_A_MASK & temp);
2992
if (rdev->family == CHIP_RV410 ||
2993
rdev->family == CHIP_R420 ||
2994
rdev->family == CHIP_R423)
2995
trbs_ff = memtrbs_r4xx[data];
2997
trbs_ff = memtrbs[data];
2998
tcas_ff.full += trbs_ff.full;
3001
sclk_eff_ff.full = sclk_ff.full;
3003
if (rdev->flags & RADEON_IS_AGP) {
3004
fixed20_12 agpmode_ff;
3005
agpmode_ff.full = dfixed_const(radeon_agpmode);
3006
temp_ff.full = dfixed_const_666(16);
3007
sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3009
/* TODO PCIE lanes may affect this - agpmode == 16?? */
3011
if (ASIC_IS_R300(rdev)) {
3012
sclk_delay_ff.full = dfixed_const(250);
3014
if ((rdev->family == CHIP_RV100) ||
3015
rdev->flags & RADEON_IS_IGP) {
3016
if (rdev->mc.vram_is_ddr)
3017
sclk_delay_ff.full = dfixed_const(41);
3019
sclk_delay_ff.full = dfixed_const(33);
3021
if (rdev->mc.vram_width == 128)
3022
sclk_delay_ff.full = dfixed_const(57);
3024
sclk_delay_ff.full = dfixed_const(41);
3028
mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3030
if (rdev->mc.vram_is_ddr) {
3031
if (rdev->mc.vram_width == 32) {
3032
k1.full = dfixed_const(40);
3035
k1.full = dfixed_const(20);
3039
k1.full = dfixed_const(40);
3043
temp_ff.full = dfixed_const(2);
3044
mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3045
temp_ff.full = dfixed_const(c);
3046
mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3047
temp_ff.full = dfixed_const(4);
3048
mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3049
mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3050
mc_latency_mclk.full += k1.full;
3052
mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3053
mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3056
HW cursor time assuming worst case of full size colour cursor.
3058
temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3059
temp_ff.full += trcd_ff.full;
3060
if (temp_ff.full < tras_ff.full)
3061
temp_ff.full = tras_ff.full;
3062
cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3064
temp_ff.full = dfixed_const(cur_size);
3065
cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3067
Find the total latency for the display data.
3069
disp_latency_overhead.full = dfixed_const(8);
3070
disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3071
mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3072
mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3074
if (mc_latency_mclk.full > mc_latency_sclk.full)
3075
disp_latency.full = mc_latency_mclk.full;
3077
disp_latency.full = mc_latency_sclk.full;
3079
/* setup Max GRPH_STOP_REQ default value */
3080
if (ASIC_IS_RV100(rdev))
3081
max_stop_req = 0x5c;
3083
max_stop_req = 0x7c;
3087
Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3088
GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3090
stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3092
if (stop_req > max_stop_req)
3093
stop_req = max_stop_req;
3096
Find the drain rate of the display buffer.
3098
temp_ff.full = dfixed_const((16/pixel_bytes1));
3099
disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3102
Find the critical point of the display buffer.
3104
crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3105
crit_point_ff.full += dfixed_const_half(0);
3107
critical_point = dfixed_trunc(crit_point_ff);
3109
if (rdev->disp_priority == 2) {
3114
The critical point should never be above max_stop_req-4. Setting
3115
GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3117
if (max_stop_req - critical_point < 4)
3120
if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3121
/* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3122
critical_point = 0x10;
3125
temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3126
temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3127
temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3128
temp &= ~(RADEON_GRPH_START_REQ_MASK);
3129
if ((rdev->family == CHIP_R350) &&
3130
(stop_req > 0x15)) {
3133
temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3134
temp |= RADEON_GRPH_BUFFER_SIZE;
3135
temp &= ~(RADEON_GRPH_CRITICAL_CNTL |
3136
RADEON_GRPH_CRITICAL_AT_SOF |
3137
RADEON_GRPH_STOP_CNTL);
3139
Write the result into the register.
3141
WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3142
(critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3145
if ((rdev->family == CHIP_RS400) ||
3146
(rdev->family == CHIP_RS480)) {
3147
/* attempt to program RS400 disp regs correctly ??? */
3148
temp = RREG32(RS400_DISP1_REG_CNTL);
3149
temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3150
RS400_DISP1_STOP_REQ_LEVEL_MASK);
3151
WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3152
(critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3153
(critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3154
temp = RREG32(RS400_DMIF_MEM_CNTL1);
3155
temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3156
RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3157
WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3158
(critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3159
(critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3163
DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3164
/* (unsigned int)info->SavedReg->grph_buffer_cntl, */
3165
(unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3170
stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3172
if (stop_req > max_stop_req)
3173
stop_req = max_stop_req;
3176
Find the drain rate of the display buffer.
3178
temp_ff.full = dfixed_const((16/pixel_bytes2));
3179
disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3181
grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3182
grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3183
grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3184
grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3185
if ((rdev->family == CHIP_R350) &&
3186
(stop_req > 0x15)) {
3189
grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3190
grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3191
grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL |
3192
RADEON_GRPH_CRITICAL_AT_SOF |
3193
RADEON_GRPH_STOP_CNTL);
3195
if ((rdev->family == CHIP_RS100) ||
3196
(rdev->family == CHIP_RS200))
3197
critical_point2 = 0;
3199
temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3200
temp_ff.full = dfixed_const(temp);
3201
temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3202
if (sclk_ff.full < temp_ff.full)
3203
temp_ff.full = sclk_ff.full;
3205
read_return_rate.full = temp_ff.full;
3208
temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3209
time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3211
time_disp1_drop_priority.full = 0;
3213
crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3214
crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3215
crit_point_ff.full += dfixed_const_half(0);
3217
critical_point2 = dfixed_trunc(crit_point_ff);
3219
if (rdev->disp_priority == 2) {
3220
critical_point2 = 0;
3223
if (max_stop_req - critical_point2 < 4)
3224
critical_point2 = 0;
3228
if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3229
/* some R300 cards have problem with this set to 0 */
3230
critical_point2 = 0x10;
3233
WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3234
(critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3236
if ((rdev->family == CHIP_RS400) ||
3237
(rdev->family == CHIP_RS480)) {
3239
/* attempt to program RS400 disp2 regs correctly ??? */
3240
temp = RREG32(RS400_DISP2_REQ_CNTL1);
3241
temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3242
RS400_DISP2_STOP_REQ_LEVEL_MASK);
3243
WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3244
(critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3245
(critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3246
temp = RREG32(RS400_DISP2_REQ_CNTL2);
3247
temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3248
RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3249
WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3250
(critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3251
(critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3253
WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3254
WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3255
WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC);
3256
WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3259
DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3260
(unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3264
static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
3266
DRM_ERROR("pitch %d\n", t->pitch);
3267
DRM_ERROR("use_pitch %d\n", t->use_pitch);
3268
DRM_ERROR("width %d\n", t->width);
3269
DRM_ERROR("width_11 %d\n", t->width_11);
3270
DRM_ERROR("height %d\n", t->height);
3271
DRM_ERROR("height_11 %d\n", t->height_11);
3272
DRM_ERROR("num levels %d\n", t->num_levels);
3273
DRM_ERROR("depth %d\n", t->txdepth);
3274
DRM_ERROR("bpp %d\n", t->cpp);
3275
DRM_ERROR("coordinate type %d\n", t->tex_coord_type);
3276
DRM_ERROR("width round to power of 2 %d\n", t->roundup_w);
3277
DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
3278
DRM_ERROR("compress format %d\n", t->compress_format);
3281
static int r100_track_compress_size(int compress_format, int w, int h)
3283
int block_width, block_height, block_bytes;
3284
int wblocks, hblocks;
3291
switch (compress_format) {
3292
case R100_TRACK_COMP_DXT1:
3297
case R100_TRACK_COMP_DXT35:
3303
hblocks = (h + block_height - 1) / block_height;
3304
wblocks = (w + block_width - 1) / block_width;
3305
if (wblocks < min_wblocks)
3306
wblocks = min_wblocks;
3307
sz = wblocks * hblocks * block_bytes;
3311
static int r100_cs_track_cube(struct radeon_device *rdev,
3312
struct r100_cs_track *track, unsigned idx)
3314
unsigned face, w, h;
3315
struct radeon_bo *cube_robj;
3317
unsigned compress_format = track->textures[idx].compress_format;
3319
for (face = 0; face < 5; face++) {
3320
cube_robj = track->textures[idx].cube_info[face].robj;
3321
w = track->textures[idx].cube_info[face].width;
3322
h = track->textures[idx].cube_info[face].height;
3324
if (compress_format) {
3325
size = r100_track_compress_size(compress_format, w, h);
3328
size *= track->textures[idx].cpp;
3330
size += track->textures[idx].cube_info[face].offset;
3332
if (size > radeon_bo_size(cube_robj)) {
3333
DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
3334
size, radeon_bo_size(cube_robj));
3335
r100_cs_track_texture_print(&track->textures[idx]);
3342
static int r100_cs_track_texture_check(struct radeon_device *rdev,
3343
struct r100_cs_track *track)
3345
struct radeon_bo *robj;
3347
unsigned u, i, w, h, d;
3350
for (u = 0; u < track->num_texture; u++) {
3351
if (!track->textures[u].enabled)
3353
if (track->textures[u].lookup_disable)
3355
robj = track->textures[u].robj;
3357
DRM_ERROR("No texture bound to unit %u\n", u);
3361
for (i = 0; i <= track->textures[u].num_levels; i++) {
3362
if (track->textures[u].use_pitch) {
3363
if (rdev->family < CHIP_R300)
3364
w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
3366
w = track->textures[u].pitch / (1 << i);
3368
w = track->textures[u].width;
3369
if (rdev->family >= CHIP_RV515)
3370
w |= track->textures[u].width_11;
3372
if (track->textures[u].roundup_w)
3373
w = roundup_pow_of_two(w);
3375
h = track->textures[u].height;
3376
if (rdev->family >= CHIP_RV515)
3377
h |= track->textures[u].height_11;
3379
if (track->textures[u].roundup_h)
3380
h = roundup_pow_of_two(h);
3381
if (track->textures[u].tex_coord_type == 1) {
3382
d = (1 << track->textures[u].txdepth) / (1 << i);
3388
if (track->textures[u].compress_format) {
3390
size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
3391
/* compressed textures are block based */
3395
size *= track->textures[u].cpp;
3397
switch (track->textures[u].tex_coord_type) {
3402
if (track->separate_cube) {
3403
ret = r100_cs_track_cube(rdev, track, u);
3410
DRM_ERROR("Invalid texture coordinate type %u for unit "
3411
"%u\n", track->textures[u].tex_coord_type, u);
3414
if (size > radeon_bo_size(robj)) {
3415
DRM_ERROR("Texture of unit %u needs %lu bytes but is "
3416
"%lu\n", u, size, radeon_bo_size(robj));
3417
r100_cs_track_texture_print(&track->textures[u]);
3424
int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3430
unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
3432
if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
3433
!track->blend_read_enable)
3436
for (i = 0; i < num_cb; i++) {
3437
if (track->cb[i].robj == NULL) {
3438
DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
3441
size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
3442
size += track->cb[i].offset;
3443
if (size > radeon_bo_size(track->cb[i].robj)) {
3444
DRM_ERROR("[drm] Buffer too small for color buffer %d "
3445
"(need %lu have %lu) !\n", i, size,
3446
radeon_bo_size(track->cb[i].robj));
3447
DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
3448
i, track->cb[i].pitch, track->cb[i].cpp,
3449
track->cb[i].offset, track->maxy);
3453
track->cb_dirty = false;
3455
if (track->zb_dirty && track->z_enabled) {
3456
if (track->zb.robj == NULL) {
3457
DRM_ERROR("[drm] No buffer for z buffer !\n");
3460
size = track->zb.pitch * track->zb.cpp * track->maxy;
3461
size += track->zb.offset;
3462
if (size > radeon_bo_size(track->zb.robj)) {
3463
DRM_ERROR("[drm] Buffer too small for z buffer "
3464
"(need %lu have %lu) !\n", size,
3465
radeon_bo_size(track->zb.robj));
3466
DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
3467
track->zb.pitch, track->zb.cpp,
3468
track->zb.offset, track->maxy);
3472
track->zb_dirty = false;
3474
if (track->aa_dirty && track->aaresolve) {
3475
if (track->aa.robj == NULL) {
3476
DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
3479
/* I believe the format comes from colorbuffer0. */
3480
size = track->aa.pitch * track->cb[0].cpp * track->maxy;
3481
size += track->aa.offset;
3482
if (size > radeon_bo_size(track->aa.robj)) {
3483
DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
3484
"(need %lu have %lu) !\n", i, size,
3485
radeon_bo_size(track->aa.robj));
3486
DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
3487
i, track->aa.pitch, track->cb[0].cpp,
3488
track->aa.offset, track->maxy);
3492
track->aa_dirty = false;
3494
prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
3495
if (track->vap_vf_cntl & (1 << 14)) {
3496
nverts = track->vap_alt_nverts;
3498
nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
3500
switch (prim_walk) {
3502
for (i = 0; i < track->num_arrays; i++) {
3503
size = track->arrays[i].esize * track->max_indx * 4;
3504
if (track->arrays[i].robj == NULL) {
3505
DRM_ERROR("(PW %u) Vertex array %u no buffer "
3506
"bound\n", prim_walk, i);
3509
if (size > radeon_bo_size(track->arrays[i].robj)) {
3510
dev_err(rdev->dev, "(PW %u) Vertex array %u "
3511
"need %lu dwords have %lu dwords\n",
3512
prim_walk, i, size >> 2,
3513
radeon_bo_size(track->arrays[i].robj)
3515
DRM_ERROR("Max indices %u\n", track->max_indx);
3521
for (i = 0; i < track->num_arrays; i++) {
3522
size = track->arrays[i].esize * (nverts - 1) * 4;
3523
if (track->arrays[i].robj == NULL) {
3524
DRM_ERROR("(PW %u) Vertex array %u no buffer "
3525
"bound\n", prim_walk, i);
3528
if (size > radeon_bo_size(track->arrays[i].robj)) {
3529
dev_err(rdev->dev, "(PW %u) Vertex array %u "
3530
"need %lu dwords have %lu dwords\n",
3531
prim_walk, i, size >> 2,
3532
radeon_bo_size(track->arrays[i].robj)
3539
size = track->vtx_size * nverts;
3540
if (size != track->immd_dwords) {
3541
DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
3542
track->immd_dwords, size);
3543
DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
3544
nverts, track->vtx_size);
3549
DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
3554
if (track->tex_dirty) {
3555
track->tex_dirty = false;
3556
return r100_cs_track_texture_check(rdev, track);
3561
void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
3565
track->cb_dirty = true;
3566
track->zb_dirty = true;
3567
track->tex_dirty = true;
3568
track->aa_dirty = true;
3570
if (rdev->family < CHIP_R300) {
3572
if (rdev->family <= CHIP_RS200)
3573
track->num_texture = 3;
3575
track->num_texture = 6;
3577
track->separate_cube = 1;
3580
track->num_texture = 16;
3582
track->separate_cube = 0;
3583
track->aaresolve = false;
3584
track->aa.robj = NULL;
3587
for (i = 0; i < track->num_cb; i++) {
3588
track->cb[i].robj = NULL;
3589
track->cb[i].pitch = 8192;
3590
track->cb[i].cpp = 16;
3591
track->cb[i].offset = 0;
3593
track->z_enabled = true;
3594
track->zb.robj = NULL;
3595
track->zb.pitch = 8192;
3597
track->zb.offset = 0;
3598
track->vtx_size = 0x7F;
3599
track->immd_dwords = 0xFFFFFFFFUL;
3600
track->num_arrays = 11;
3601
track->max_indx = 0x00FFFFFFUL;
3602
for (i = 0; i < track->num_arrays; i++) {
3603
track->arrays[i].robj = NULL;
3604
track->arrays[i].esize = 0x7F;
3606
for (i = 0; i < track->num_texture; i++) {
3607
track->textures[i].compress_format = R100_TRACK_COMP_NONE;
3608
track->textures[i].pitch = 16536;
3609
track->textures[i].width = 16536;
3610
track->textures[i].height = 16536;
3611
track->textures[i].width_11 = 1 << 11;
3612
track->textures[i].height_11 = 1 << 11;
3613
track->textures[i].num_levels = 12;
3614
if (rdev->family <= CHIP_RS200) {
3615
track->textures[i].tex_coord_type = 0;
3616
track->textures[i].txdepth = 0;
3618
track->textures[i].txdepth = 16;
3619
track->textures[i].tex_coord_type = 1;
3621
track->textures[i].cpp = 64;
3622
track->textures[i].robj = NULL;
3623
/* CS IB emission code makes sure texture unit are disabled */
3624
track->textures[i].enabled = false;
3625
track->textures[i].lookup_disable = false;
3626
track->textures[i].roundup_w = true;
3627
track->textures[i].roundup_h = true;
3628
if (track->separate_cube)
3629
for (face = 0; face < 5; face++) {
3630
track->textures[i].cube_info[face].robj = NULL;
3631
track->textures[i].cube_info[face].width = 16536;
3632
track->textures[i].cube_info[face].height = 16536;
3633
track->textures[i].cube_info[face].offset = 0;
3638
int r100_ring_test(struct radeon_device *rdev)
3645
r = radeon_scratch_get(rdev, &scratch);
3647
DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3650
WREG32(scratch, 0xCAFEDEAD);
3651
r = radeon_ring_lock(rdev, 2);
3653
DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3654
radeon_scratch_free(rdev, scratch);
3657
radeon_ring_write(rdev, PACKET0(scratch, 0));
3658
radeon_ring_write(rdev, 0xDEADBEEF);
3659
radeon_ring_unlock_commit(rdev);
3660
for (i = 0; i < rdev->usec_timeout; i++) {
3661
tmp = RREG32(scratch);
3662
if (tmp == 0xDEADBEEF) {
3667
if (i < rdev->usec_timeout) {
3668
DRM_INFO("ring test succeeded in %d usecs\n", i);
3670
DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3674
radeon_scratch_free(rdev, scratch);
3678
void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3680
radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
3681
radeon_ring_write(rdev, ib->gpu_addr);
3682
radeon_ring_write(rdev, ib->length_dw);
3685
int r100_ib_test(struct radeon_device *rdev)
3687
struct radeon_ib *ib;
3693
r = radeon_scratch_get(rdev, &scratch);
3695
DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3698
WREG32(scratch, 0xCAFEDEAD);
3699
r = radeon_ib_get(rdev, &ib);
3703
ib->ptr[0] = PACKET0(scratch, 0);
3704
ib->ptr[1] = 0xDEADBEEF;
3705
ib->ptr[2] = PACKET2(0);
3706
ib->ptr[3] = PACKET2(0);
3707
ib->ptr[4] = PACKET2(0);
3708
ib->ptr[5] = PACKET2(0);
3709
ib->ptr[6] = PACKET2(0);
3710
ib->ptr[7] = PACKET2(0);
3712
r = radeon_ib_schedule(rdev, ib);
3714
radeon_scratch_free(rdev, scratch);
3715
radeon_ib_free(rdev, &ib);
3718
r = radeon_fence_wait(ib->fence, false);
3722
for (i = 0; i < rdev->usec_timeout; i++) {
3723
tmp = RREG32(scratch);
3724
if (tmp == 0xDEADBEEF) {
3729
if (i < rdev->usec_timeout) {
3730
DRM_INFO("ib test succeeded in %u usecs\n", i);
3732
DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3736
radeon_scratch_free(rdev, scratch);
3737
radeon_ib_free(rdev, &ib);
3741
void r100_ib_fini(struct radeon_device *rdev)
3743
radeon_ib_pool_fini(rdev);
3746
int r100_ib_init(struct radeon_device *rdev)
3750
r = radeon_ib_pool_init(rdev);
3752
dev_err(rdev->dev, "failed initializing IB pool (%d).\n", r);
3756
r = r100_ib_test(rdev);
3758
dev_err(rdev->dev, "failed testing IB (%d).\n", r);
3765
void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3767
/* Shutdown CP we shouldn't need to do that but better be safe than
3770
rdev->cp.ready = false;
3771
WREG32(R_000740_CP_CSQ_CNTL, 0);
3773
/* Save few CRTC registers */
3774
save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3775
save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3776
save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3777
save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3778
if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3779
save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3780
save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3783
/* Disable VGA aperture access */
3784
WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
3785
/* Disable cursor, overlay, crtc */
3786
WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3787
WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3788
S_000054_CRTC_DISPLAY_DIS(1));
3789
WREG32(R_000050_CRTC_GEN_CNTL,
3790
(C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3791
S_000050_CRTC_DISP_REQ_EN_B(1));
3792
WREG32(R_000420_OV0_SCALE_CNTL,
3793
C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3794
WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3795
if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3796
WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3797
S_000360_CUR2_LOCK(1));
3798
WREG32(R_0003F8_CRTC2_GEN_CNTL,
3799
(C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3800
S_0003F8_CRTC2_DISPLAY_DIS(1) |
3801
S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3802
WREG32(R_000360_CUR2_OFFSET,
3803
C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3807
void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3809
/* Update base address for crtc */
3810
WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3811
if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3812
WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3814
/* Restore CRTC registers */
3815
WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
3816
WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3817
WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3818
if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3819
WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3823
void r100_vga_render_disable(struct radeon_device *rdev)
3827
tmp = RREG8(R_0003C2_GENMO_WT);
3828
WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3831
static void r100_debugfs(struct radeon_device *rdev)
3835
r = r100_debugfs_mc_info_init(rdev);
3837
dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n");
3840
static void r100_mc_program(struct radeon_device *rdev)
3842
struct r100_mc_save save;
3844
/* Stops all mc clients */
3845
r100_mc_stop(rdev, &save);
3846
if (rdev->flags & RADEON_IS_AGP) {
3847
WREG32(R_00014C_MC_AGP_LOCATION,
3848
S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3849
S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3850
WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3851
if (rdev->family > CHIP_RV200)
3852
WREG32(R_00015C_AGP_BASE_2,
3853
upper_32_bits(rdev->mc.agp_base) & 0xff);
3855
WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3856
WREG32(R_000170_AGP_BASE, 0);
3857
if (rdev->family > CHIP_RV200)
3858
WREG32(R_00015C_AGP_BASE_2, 0);
3860
/* Wait for mc idle */
3861
if (r100_mc_wait_for_idle(rdev))
3862
dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3863
/* Program MC, should be a 32bits limited address space */
3864
WREG32(R_000148_MC_FB_LOCATION,
3865
S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3866
S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3867
r100_mc_resume(rdev, &save);
3870
void r100_clock_startup(struct radeon_device *rdev)
3874
if (radeon_dynclks != -1 && radeon_dynclks)
3875
radeon_legacy_set_clock_gating(rdev, 1);
3876
/* We need to force on some of the block */
3877
tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3878
tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3879
if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3880
tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3881
WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3884
static int r100_startup(struct radeon_device *rdev)
3888
/* set common regs */
3889
r100_set_common_regs(rdev);
3891
r100_mc_program(rdev);
3893
r100_clock_startup(rdev);
3894
/* Initialize GART (initialize after TTM so we can allocate
3895
* memory through TTM but finalize after TTM) */
3896
r100_enable_bm(rdev);
3897
if (rdev->flags & RADEON_IS_PCI) {
3898
r = r100_pci_gart_enable(rdev);
3903
/* allocate wb buffer */
3904
r = radeon_wb_init(rdev);
3910
rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3911
/* 1M ring buffer */
3912
r = r100_cp_init(rdev, 1024 * 1024);
3914
dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3917
r = r100_ib_init(rdev);
3919
dev_err(rdev->dev, "failed initializing IB (%d).\n", r);
3925
int r100_resume(struct radeon_device *rdev)
3927
/* Make sur GART are not working */
3928
if (rdev->flags & RADEON_IS_PCI)
3929
r100_pci_gart_disable(rdev);
3930
/* Resume clock before doing reset */
3931
r100_clock_startup(rdev);
3932
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
3933
if (radeon_asic_reset(rdev)) {
3934
dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3935
RREG32(R_000E40_RBBM_STATUS),
3936
RREG32(R_0007C0_CP_STAT));
3939
radeon_combios_asic_init(rdev->ddev);
3940
/* Resume clock after posting */
3941
r100_clock_startup(rdev);
3942
/* Initialize surface registers */
3943
radeon_surface_init(rdev);
3944
return r100_startup(rdev);
3947
int r100_suspend(struct radeon_device *rdev)
3949
r100_cp_disable(rdev);
3950
radeon_wb_disable(rdev);
3951
r100_irq_disable(rdev);
3952
if (rdev->flags & RADEON_IS_PCI)
3953
r100_pci_gart_disable(rdev);
3957
void r100_fini(struct radeon_device *rdev)
3960
radeon_wb_fini(rdev);
3962
radeon_gem_fini(rdev);
3963
if (rdev->flags & RADEON_IS_PCI)
3964
r100_pci_gart_fini(rdev);
3965
radeon_agp_fini(rdev);
3966
radeon_irq_kms_fini(rdev);
3967
radeon_fence_driver_fini(rdev);
3968
radeon_bo_fini(rdev);
3969
radeon_atombios_fini(rdev);
3975
* Due to how kexec works, it can leave the hw fully initialised when it
3976
* boots the new kernel. However doing our init sequence with the CP and
3977
* WB stuff setup causes GPU hangs on the RN50 at least. So at startup
3978
* do some quick sanity checks and restore sane values to avoid this
3981
void r100_restore_sanity(struct radeon_device *rdev)
3985
tmp = RREG32(RADEON_CP_CSQ_CNTL);
3987
WREG32(RADEON_CP_CSQ_CNTL, 0);
3989
tmp = RREG32(RADEON_CP_RB_CNTL);
3991
WREG32(RADEON_CP_RB_CNTL, 0);
3993
tmp = RREG32(RADEON_SCRATCH_UMSK);
3995
WREG32(RADEON_SCRATCH_UMSK, 0);
3999
int r100_init(struct radeon_device *rdev)
4003
/* Register debugfs file specific to this group of asics */
4006
r100_vga_render_disable(rdev);
4007
/* Initialize scratch registers */
4008
radeon_scratch_init(rdev);
4009
/* Initialize surface registers */
4010
radeon_surface_init(rdev);
4011
/* sanity check some register to avoid hangs like after kexec */
4012
r100_restore_sanity(rdev);
4013
/* TODO: disable VGA need to use VGA request */
4015
if (!radeon_get_bios(rdev)) {
4016
if (ASIC_IS_AVIVO(rdev))
4019
if (rdev->is_atom_bios) {
4020
dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4023
r = radeon_combios_init(rdev);
4027
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
4028
if (radeon_asic_reset(rdev)) {
4030
"GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4031
RREG32(R_000E40_RBBM_STATUS),
4032
RREG32(R_0007C0_CP_STAT));
4034
/* check if cards are posted or not */
4035
if (radeon_boot_test_post_card(rdev) == false)
4037
/* Set asic errata */
4039
/* Initialize clocks */
4040
radeon_get_clock_info(rdev->ddev);
4041
/* initialize AGP */
4042
if (rdev->flags & RADEON_IS_AGP) {
4043
r = radeon_agp_init(rdev);
4045
radeon_agp_disable(rdev);
4048
/* initialize VRAM */
4051
r = radeon_fence_driver_init(rdev);
4054
r = radeon_irq_kms_init(rdev);
4057
/* Memory manager */
4058
r = radeon_bo_init(rdev);
4061
if (rdev->flags & RADEON_IS_PCI) {
4062
r = r100_pci_gart_init(rdev);
4066
r100_set_safe_registers(rdev);
4067
rdev->accel_working = true;
4068
r = r100_startup(rdev);
4070
/* Somethings want wront with the accel init stop accel */
4071
dev_err(rdev->dev, "Disabling GPU acceleration\n");
4073
radeon_wb_fini(rdev);
4075
radeon_irq_kms_fini(rdev);
4076
if (rdev->flags & RADEON_IS_PCI)
4077
r100_pci_gart_fini(rdev);
4078
rdev->accel_working = false;
4083
uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
4085
if (reg < rdev->rmmio_size)
4086
return readl(((void __iomem *)rdev->rmmio) + reg);
4088
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4089
return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4093
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
4095
if (reg < rdev->rmmio_size)
4096
writel(v, ((void __iomem *)rdev->rmmio) + reg);
4098
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4099
writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4103
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4105
if (reg < rdev->rio_mem_size)
4106
return ioread32(rdev->rio_mem + reg);
4108
iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4109
return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4113
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4115
if (reg < rdev->rio_mem_size)
4116
iowrite32(v, rdev->rio_mem + reg);
4118
iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4119
iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);