2
2
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3
3
* Copyright 2015-2021 Advanced Micro Devices, Inc.
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* on the rights to use, copy, modify, merge, publish, distribute, sub
10
* license, and/or sell copies of the Software, and to permit persons to whom
11
* the Software is furnished to do so, subject to the following conditions:
13
* The above copyright notice and this permission notice (including the next
14
* paragraph) shall be included in all copies or substantial portions of the
17
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23
* USE OR OTHER DEALINGS IN THE SOFTWARE.
5
* SPDX-License-Identifier: MIT
26
8
#include "si_build_pm4.h"
70
52
(G_009910_PIPE_CONFIG(tile_mode) << 26);
74
bool si_translate_format_to_hw(struct si_context *sctx, enum pipe_format format, unsigned *hw_fmt, unsigned *hw_type)
76
const struct util_format_description *desc = util_format_description(format);
77
*hw_fmt = si_translate_colorformat(sctx->gfx_level, format);
79
int firstchan = util_format_get_first_non_void_channel(format);
80
if (firstchan == -1 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
81
*hw_type = V_028C70_NUMBER_FLOAT;
83
*hw_type = V_028C70_NUMBER_UNORM;
84
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
85
*hw_type = V_028C70_NUMBER_SRGB;
86
else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
87
if (desc->channel[firstchan].pure_integer) {
88
*hw_type = V_028C70_NUMBER_SINT;
90
assert(desc->channel[firstchan].normalized);
91
*hw_type = V_028C70_NUMBER_SNORM;
93
} else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
94
if (desc->channel[firstchan].pure_integer) {
95
*hw_type = V_028C70_NUMBER_UINT;
97
assert(desc->channel[firstchan].normalized);
98
*hw_type = V_028C70_NUMBER_UNORM;
108
bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_texture *sdst, struct si_texture *ssrc, bool is_v5)
55
static bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_texture *sdst,
56
struct si_texture *ssrc)
58
bool is_v5 = sctx->gfx_level >= GFX10;
59
bool is_v5_2 = sctx->gfx_level >= GFX10_3;
110
60
unsigned bpp = sdst->surface.bpe;
111
61
uint64_t dst_address = sdst->buffer.gpu_address + sdst->surface.u.gfx9.surf_offset;
112
62
uint64_t src_address = ssrc->buffer.gpu_address + ssrc->surface.u.gfx9.surf_offset;
122
72
if (ssrc->surface.is_linear && sdst->surface.is_linear) {
123
73
struct radeon_cmdbuf *cs = sctx->sdma_cs;
125
unsigned bytes = src_pitch * copy_height * bpp;
75
uint64_t bytes = (uint64_t)src_pitch * copy_height * bpp;
127
if (!(bytes < (1u << 22)))
77
if (!(bytes <= (1u << (is_v5_2 ? 30 : 22))))
130
80
src_address += ssrc->surface.u.gfx9.offset[0];
151
101
unsigned tiled_width = DIV_ROUND_UP(tiled->buffer.b.b.width0, tiled->surface.blk_w);
152
102
unsigned tiled_height = DIV_ROUND_UP(tiled->buffer.b.b.height0, tiled->surface.blk_h);
153
103
unsigned linear_pitch = linear == ssrc ? src_pitch : dst_pitch;
154
unsigned linear_slice_pitch = ((uint64_t)linear->surface.u.gfx9.surf_slice_size) / bpp;
104
uint64_t linear_slice_pitch = linear->surface.u.gfx9.surf_slice_size / bpp;
155
105
uint64_t tiled_address = tiled == ssrc ? src_address : dst_address;
156
106
uint64_t linear_address = linear == ssrc ? src_address : dst_address;
157
107
struct radeon_cmdbuf *cs = sctx->sdma_cs;
162
112
linear_address += linear->surface.u.gfx9.offset[0];
164
114
/* Check if everything fits into the bitfields */
165
if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) &&
166
linear_pitch < (1 << 14) && linear_slice_pitch < (1 << 28) &&
167
copy_width < (1 << 14) && copy_height < (1 << 14)))
115
if (!(tiled_width <= (1 << 14) && tiled_height <= (1 << 14) &&
116
linear_pitch <= (1 << 14) && linear_slice_pitch <= (1 << 28) &&
117
copy_width <= (1 << 14) && copy_height <= (1 << 14)))
170
120
radeon_begin(cs);
196
unsigned hw_fmt, hw_type;
146
unsigned hw_fmt = ac_get_cb_format(sctx->gfx_level, tiled->buffer.b.b.format);
147
unsigned hw_type = ac_get_cb_number_type(tiled->buffer.b.b.format);
197
148
uint64_t md_address = tiled_address + tiled->surface.meta_offset;
199
si_translate_format_to_hw(sctx, tiled->buffer.b.b.format, &hw_fmt, &hw_type);
201
150
/* Add metadata */
202
151
radeon_emit((uint32_t)md_address);
203
152
radeon_emit((uint32_t)(md_address >> 32));
358
307
* starts reading from an address preceding linear_address!!!
360
309
start_linear_address =
361
linear->surface.u.legacy.level[0].offset_256B * 256;
310
(uint64_t)linear->surface.u.legacy.level[0].offset_256B * 256;
363
312
end_linear_address =
364
linear->surface.u.legacy.level[0].offset_256B * 256 +
365
bpp * ((copy_height - 1) * linear_pitch + copy_width);
313
(uint64_t)linear->surface.u.legacy.level[0].offset_256B * 256 +
314
bpp * ((copy_height - 1) * (uint64_t)linear_pitch + copy_width);
367
316
if ((0 + copy_width) % granularity)
368
317
end_linear_address += granularity - (0 + copy_width) % granularity;
429
378
if (!si_prepare_for_sdma_copy(sctx, dst, src))
432
/* Decompress DCC on older chips */
433
if (vi_dcc_enabled(src, 0) && sctx->gfx_level < GFX10)
434
si_decompress_dcc(sctx, src);
435
381
/* TODO: DCC compression is possible on GFX10+. See si_set_mutable_tex_desc_fields for
436
382
* additional constraints.
437
* For now, the only use-case of SDMA is DRI_PRIME tiled->linear copy, so this is not
383
* For now, the only use-case of SDMA is DRI_PRIME tiled->linear copy, and linear dst
439
386
if (vi_dcc_enabled(dst, 0))
389
/* Decompress DCC on older chips where SDMA can't read it. */
390
if (vi_dcc_enabled(src, 0) && sctx->gfx_level < GFX10)
391
si_decompress_dcc(sctx, src);
442
393
/* Always flush the gfx queue to get the winsys to handle the dependencies for us. */
443
394
si_flush_gfx_cs(sctx, 0, NULL);
455
if (!si_sdma_v4_v5_copy_texture(sctx, dst, src, sctx->gfx_level >= GFX10))
406
if (!si_sdma_v4_v5_copy_texture(sctx, dst, src))