2
* Copyright 2011 Christoph Bumiller
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
12
* The above copyright notice and this permission notice shall be included in
13
* all copies or substantial portions of the Software.
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
* OTHER DEALINGS IN THE SOFTWARE.
24
#include "codegen/nv50_ir_target_gm107.h"
25
#include "codegen/nv50_ir_lowering_gm107.h"
29
Target *getTargetGM107(unsigned int chipset)
31
return new TargetGM107(chipset);
34
// BULTINS / LIBRARY FUNCTIONS:
36
// lazyness -> will just hardcode everything for the time being
38
#include "lib/gm107.asm.h"
41
TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
43
*code = (const uint32_t *)&gm107_builtin_code[0];
44
*size = sizeof(gm107_builtin_code);
48
TargetGM107::getBuiltinOffset(int builtin) const
50
assert(builtin < NVC0_BUILTIN_COUNT);
51
return gm107_builtin_offsets[builtin];
55
TargetGM107::isOpSupported(operation op, DataType ty) const
66
return chipset >= NVISA_GM200_CHIPSET;
78
// Return true when an instruction supports the reuse flag. When supported, the
79
// hardware will use the operand reuse cache introduced since Maxwell, which
80
// should try to reduce bank conflicts by caching values for the subsequent
81
// instructions. Note that the next instructions have to use the same GPR id in
82
// the same operand slot.
84
TargetGM107::isReuseSupported(const Instruction *insn) const
86
const OpClass cl = getOpClass(insn->op);
88
// TODO: double-check!
96
case OPCLASS_BITFIELD:
97
if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
106
// Return true when an instruction requires to set up a barrier because it
107
// doesn't operate at a fixed latency. Variable latency instructions are memory
108
// operations, double precision operations, special function unit operations
109
// and other low throughput instructions.
111
TargetGM107::isBarrierRequired(const Instruction *insn) const
113
const OpClass cl = getOpClass(insn->op);
115
if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
122
case OPCLASS_SURFACE:
123
case OPCLASS_TEXTURE:
141
case OPCLASS_BITFIELD:
150
case OPCLASS_CONTROL:
167
return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
173
if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
174
!isFloatType(insn->dType))
177
case OPCLASS_CONVERT:
178
if (insn->def(0).getFile() != FILE_PREDICATE &&
179
insn->src(0).getFile() != FILE_PREDICATE)
189
TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
195
// Return the number of stall counts needed to complete a single instruction.
196
// On Maxwell GPUs, the pipeline depth is 6, but some instructions require
197
// different number of stall counts like memory operations.
199
TargetGM107::getLatency(const Instruction *insn) const
201
// TODO: better values! This should be good enough for now though.
241
if (insn->dType != TYPE_F64)
245
return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
253
if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
254
insn->src(0).getFile() == FILE_PREDICATE))
272
// Use the maximum number of stall counts for other instructions.
276
// Return the operand read latency which is the number of stall counts before
277
// an instruction can read its sources. For memory operations like ATOM, LOAD
278
// and STORE, the memory access has to be indirect.
280
TargetGM107::getReadLatency(const Instruction *insn) const
306
if (insn->def(0).getFile() != FILE_PREDICATE &&
307
insn->src(0).getFile() != FILE_PREDICATE)
313
if (insn->src(0).isIndirect(0)) {
314
switch (insn->src(0).getFile()) {
315
case FILE_MEMORY_SHARED:
316
case FILE_MEMORY_CONST:
318
case FILE_MEMORY_GLOBAL:
319
case FILE_MEMORY_LOCAL:
338
TargetGM107::isCS2RSV(SVSemantic sv) const
340
return sv == SV_CLOCK;
344
TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
346
if (stage == CG_STAGE_PRE_SSA) {
347
GM107LoweringPass pass(prog);
348
return pass.run(prog, false, true);
350
if (stage == CG_STAGE_POST_RA) {
351
NVC0LegalizePostRA pass(prog);
352
return pass.run(prog, false, true);
354
if (stage == CG_STAGE_SSA) {
355
GM107LegalizeSSA pass;
356
return pass.run(prog, false, true);
362
TargetGM107::getCodeEmitter(Program::Type type)
364
return createCodeEmitterGM107(type);
367
} // namespace nv50_ir