~mmach/netext73/mesa-ryzen

« back to all changes in this revision

Viewing changes to src/panfrost/compiler/bifrost_compile.c

  • Committer: mmach
  • Date: 2023-11-02 21:31:35 UTC
  • Revision ID: netbit73@gmail.com-20231102213135-18d4tzh7tj0uz752
2023-11-02 22:11:57

Show diffs side-by-side

added added

removed removed

Lines of Context:
1219
1219
   bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32);
1220
1220
}
1221
1221
 
1222
 
/* Extracts an atomic opcode */
1223
 
 
1224
1222
static enum bi_atom_opc
1225
 
bi_atom_opc_for_nir(nir_intrinsic_op op)
 
1223
bi_atom_opc_for_nir(nir_atomic_op op)
1226
1224
{
 
1225
   /* clang-format off */
1227
1226
   switch (op) {
1228
 
   case nir_intrinsic_global_atomic_add:
1229
 
   case nir_intrinsic_shared_atomic_add:
1230
 
   case nir_intrinsic_image_atomic_add:
1231
 
      return BI_ATOM_OPC_AADD;
1232
 
 
1233
 
   case nir_intrinsic_global_atomic_imin:
1234
 
   case nir_intrinsic_shared_atomic_imin:
1235
 
   case nir_intrinsic_image_atomic_imin:
1236
 
      return BI_ATOM_OPC_ASMIN;
1237
 
 
1238
 
   case nir_intrinsic_global_atomic_umin:
1239
 
   case nir_intrinsic_shared_atomic_umin:
1240
 
   case nir_intrinsic_image_atomic_umin:
1241
 
      return BI_ATOM_OPC_AUMIN;
1242
 
 
1243
 
   case nir_intrinsic_global_atomic_imax:
1244
 
   case nir_intrinsic_shared_atomic_imax:
1245
 
   case nir_intrinsic_image_atomic_imax:
1246
 
      return BI_ATOM_OPC_ASMAX;
1247
 
 
1248
 
   case nir_intrinsic_global_atomic_umax:
1249
 
   case nir_intrinsic_shared_atomic_umax:
1250
 
   case nir_intrinsic_image_atomic_umax:
1251
 
      return BI_ATOM_OPC_AUMAX;
1252
 
 
1253
 
   case nir_intrinsic_global_atomic_and:
1254
 
   case nir_intrinsic_shared_atomic_and:
1255
 
   case nir_intrinsic_image_atomic_and:
1256
 
      return BI_ATOM_OPC_AAND;
1257
 
 
1258
 
   case nir_intrinsic_global_atomic_or:
1259
 
   case nir_intrinsic_shared_atomic_or:
1260
 
   case nir_intrinsic_image_atomic_or:
1261
 
      return BI_ATOM_OPC_AOR;
1262
 
 
1263
 
   case nir_intrinsic_global_atomic_xor:
1264
 
   case nir_intrinsic_shared_atomic_xor:
1265
 
   case nir_intrinsic_image_atomic_xor:
1266
 
      return BI_ATOM_OPC_AXOR;
1267
 
 
1268
 
   default:
1269
 
      unreachable("Unexpected computational atomic");
 
1227
   case nir_atomic_op_iadd: return BI_ATOM_OPC_AADD;
 
1228
   case nir_atomic_op_imin: return BI_ATOM_OPC_ASMIN;
 
1229
   case nir_atomic_op_umin: return BI_ATOM_OPC_AUMIN;
 
1230
   case nir_atomic_op_imax: return BI_ATOM_OPC_ASMAX;
 
1231
   case nir_atomic_op_umax: return BI_ATOM_OPC_AUMAX;
 
1232
   case nir_atomic_op_iand: return BI_ATOM_OPC_AAND;
 
1233
   case nir_atomic_op_ior:  return BI_ATOM_OPC_AOR;
 
1234
   case nir_atomic_op_ixor: return BI_ATOM_OPC_AXOR;
 
1235
   default: unreachable("Unexpected computational atomic");
1270
1236
   }
 
1237
   /* clang-format on */
1271
1238
}
1272
1239
 
1273
1240
/* Optimized unary atomics are available with an implied #1 argument */
1392
1359
   bi_split_dest(b, instr->dest);
1393
1360
}
1394
1361
 
1395
 
static bi_index
1396
 
bi_emit_lea_image(bi_builder *b, nir_intrinsic_instr *instr)
 
1362
static void
 
1363
bi_emit_lea_image_to(bi_builder *b, bi_index dest, nir_intrinsic_instr *instr)
1397
1364
{
1398
1365
   enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
1399
1366
   bool array = nir_intrinsic_image_array(instr);
1411
1378
   bi_index coords = bi_src_index(&instr->src[1]);
1412
1379
   bi_index xy = bi_emit_image_coord(b, coords, 0, coord_comps, array);
1413
1380
   bi_index zw = bi_emit_image_coord(b, coords, 1, coord_comps, array);
1414
 
   bi_index dest = bi_temp(b->shader);
1415
1381
 
1416
1382
   if (b->shader->arch >= 9 && nir_src_is_const(instr->src[0])) {
1417
1383
      bi_instr *I = bi_lea_tex_imm_to(b, dest, xy, zw, false,
1431
1397
   }
1432
1398
 
1433
1399
   bi_emit_cached_split(b, dest, 3 * 32);
 
1400
}
 
1401
 
 
1402
static bi_index
 
1403
bi_emit_lea_image(bi_builder *b, nir_intrinsic_instr *instr)
 
1404
{
 
1405
   bi_index dest = bi_temp(b->shader);
 
1406
   bi_emit_lea_image_to(b, dest, instr);
1434
1407
   return dest;
1435
1408
}
1436
1409
 
1455
1428
 
1456
1429
static void
1457
1430
bi_emit_atomic_i32_to(bi_builder *b, bi_index dst, bi_index addr, bi_index arg,
1458
 
                      nir_intrinsic_op intrinsic)
 
1431
                      nir_atomic_op op)
1459
1432
{
1460
 
   enum bi_atom_opc opc = bi_atom_opc_for_nir(intrinsic);
 
1433
   enum bi_atom_opc opc = bi_atom_opc_for_nir(op);
1461
1434
   enum bi_atom_opc post_opc = opc;
1462
1435
   bool bifrost = b->shader->arch <= 8;
1463
1436
 
1483
1456
   }
1484
1457
}
1485
1458
 
1486
 
/* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5
1487
 
 * gl_FragCoord.z = ld_vary(fragz)
1488
 
 * gl_FragCoord.w = ld_vary(fragw)
1489
 
 */
1490
 
 
1491
1459
static void
1492
 
bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr)
 
1460
bi_emit_load_frag_coord_zw(bi_builder *b, bi_index dst, unsigned channel)
1493
1461
{
1494
 
   bi_index src[4] = {};
1495
 
 
1496
 
   for (unsigned i = 0; i < 2; ++i) {
1497
 
      src[i] = bi_fadd_f32(b, bi_u16_to_f32(b, bi_half(bi_preload(b, 59), i)),
1498
 
                           bi_imm_f32(0.5f));
1499
 
   }
1500
 
 
1501
 
   for (unsigned i = 0; i < 2; ++i) {
1502
 
      src[2 + i] = bi_ld_var_special(
1503
 
         b, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
1504
 
         BI_UPDATE_CLOBBER,
1505
 
         (i == 0) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W,
1506
 
         BI_VECSIZE_NONE);
1507
 
   }
1508
 
 
1509
 
   bi_make_vec_to(b, bi_dest_index(&instr->dest), src, NULL, 4, 32);
 
1462
   bi_ld_var_special_to(
 
1463
      b, dst, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER,
 
1464
      BI_UPDATE_CLOBBER,
 
1465
      (channel == 2) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W,
 
1466
      BI_VECSIZE_NONE);
1510
1467
}
1511
1468
 
1512
1469
static void
1605
1562
      break;
1606
1563
 
1607
1564
   case nir_intrinsic_scoped_barrier:
1608
 
      if (nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE) {
 
1565
      if (nir_intrinsic_execution_scope(instr) != SCOPE_NONE) {
1609
1566
         assert(b->shader->stage != MESA_SHADER_FRAGMENT);
1610
 
         assert(nir_intrinsic_execution_scope(instr) > NIR_SCOPE_SUBGROUP &&
 
1567
         assert(nir_intrinsic_execution_scope(instr) > SCOPE_SUBGROUP &&
1611
1568
                "todo: subgroup barriers (different divergence rules)");
1612
1569
         bi_barrier(b);
1613
1570
      }
1616
1573
       */
1617
1574
      break;
1618
1575
 
1619
 
   case nir_intrinsic_shared_atomic_add:
1620
 
   case nir_intrinsic_shared_atomic_imin:
1621
 
   case nir_intrinsic_shared_atomic_umin:
1622
 
   case nir_intrinsic_shared_atomic_imax:
1623
 
   case nir_intrinsic_shared_atomic_umax:
1624
 
   case nir_intrinsic_shared_atomic_and:
1625
 
   case nir_intrinsic_shared_atomic_or:
1626
 
   case nir_intrinsic_shared_atomic_xor: {
1627
 
      assert(nir_src_bit_size(instr->src[1]) == 32);
1628
 
 
1629
 
      bi_index addr = bi_src_index(&instr->src[0]);
1630
 
      bi_index addr_hi;
1631
 
 
1632
 
      if (b->shader->arch >= 9) {
1633
 
         bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
1634
 
         addr = bi_collect_v2i32(b, addr, addr_hi);
1635
 
      } else {
1636
 
         addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
1637
 
         bi_emit_cached_split(b, addr, 64);
1638
 
      }
1639
 
 
1640
 
      bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]),
1641
 
                            instr->intrinsic);
1642
 
      bi_split_dest(b, instr->dest);
1643
 
      break;
1644
 
   }
1645
 
 
1646
 
   case nir_intrinsic_image_atomic_add:
1647
 
   case nir_intrinsic_image_atomic_imin:
1648
 
   case nir_intrinsic_image_atomic_umin:
1649
 
   case nir_intrinsic_image_atomic_imax:
1650
 
   case nir_intrinsic_image_atomic_umax:
1651
 
   case nir_intrinsic_image_atomic_and:
1652
 
   case nir_intrinsic_image_atomic_or:
1653
 
   case nir_intrinsic_image_atomic_xor:
1654
 
      assert(nir_src_bit_size(instr->src[3]) == 32);
1655
 
 
1656
 
      bi_emit_atomic_i32_to(b, dst, bi_emit_lea_image(b, instr),
1657
 
                            bi_src_index(&instr->src[3]), instr->intrinsic);
1658
 
      bi_split_dest(b, instr->dest);
1659
 
      break;
1660
 
 
1661
 
   case nir_intrinsic_global_atomic_add:
1662
 
   case nir_intrinsic_global_atomic_imin:
1663
 
   case nir_intrinsic_global_atomic_umin:
1664
 
   case nir_intrinsic_global_atomic_imax:
1665
 
   case nir_intrinsic_global_atomic_umax:
1666
 
   case nir_intrinsic_global_atomic_and:
1667
 
   case nir_intrinsic_global_atomic_or:
1668
 
   case nir_intrinsic_global_atomic_xor:
1669
 
      assert(nir_src_bit_size(instr->src[1]) == 32);
1670
 
 
1671
 
      bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]),
1672
 
                            bi_src_index(&instr->src[1]), instr->intrinsic);
1673
 
 
1674
 
      bi_split_dest(b, instr->dest);
 
1576
   case nir_intrinsic_shared_atomic: {
 
1577
      nir_atomic_op op = nir_intrinsic_atomic_op(instr);
 
1578
 
 
1579
      if (op == nir_atomic_op_xchg) {
 
1580
         bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
 
1581
                          BI_SEG_WLS);
 
1582
      } else {
 
1583
         assert(nir_src_bit_size(instr->src[1]) == 32);
 
1584
 
 
1585
         bi_index addr = bi_src_index(&instr->src[0]);
 
1586
         bi_index addr_hi;
 
1587
 
 
1588
         if (b->shader->arch >= 9) {
 
1589
            bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
 
1590
            addr = bi_collect_v2i32(b, addr, addr_hi);
 
1591
         } else {
 
1592
            addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
 
1593
            bi_emit_cached_split(b, addr, 64);
 
1594
         }
 
1595
 
 
1596
         bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]), op);
 
1597
      }
 
1598
 
 
1599
      bi_split_dest(b, instr->dest);
 
1600
      break;
 
1601
   }
 
1602
 
 
1603
   case nir_intrinsic_global_atomic: {
 
1604
      nir_atomic_op op = nir_intrinsic_atomic_op(instr);
 
1605
 
 
1606
      if (op == nir_atomic_op_xchg) {
 
1607
         bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
 
1608
                          BI_SEG_NONE);
 
1609
      } else {
 
1610
         assert(nir_src_bit_size(instr->src[1]) == 32);
 
1611
 
 
1612
         bi_emit_atomic_i32_to(b, dst, bi_src_index(&instr->src[0]),
 
1613
                               bi_src_index(&instr->src[1]), op);
 
1614
      }
 
1615
 
 
1616
      bi_split_dest(b, instr->dest);
 
1617
      break;
 
1618
   }
 
1619
 
 
1620
   case nir_intrinsic_image_texel_address:
 
1621
      bi_emit_lea_image_to(b, dst, instr);
1675
1622
      break;
1676
1623
 
1677
1624
   case nir_intrinsic_image_load:
1682
1629
      bi_emit_image_store(b, instr);
1683
1630
      break;
1684
1631
 
1685
 
   case nir_intrinsic_global_atomic_exchange:
1686
 
      bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1687
 
                       BI_SEG_NONE);
1688
 
      bi_split_dest(b, instr->dest);
1689
 
      break;
1690
 
 
1691
 
   case nir_intrinsic_image_atomic_exchange:
1692
 
      bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
1693
 
                       BI_SEG_NONE);
1694
 
      bi_split_dest(b, instr->dest);
1695
 
      break;
1696
 
 
1697
 
   case nir_intrinsic_shared_atomic_exchange:
1698
 
      bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1699
 
                       BI_SEG_WLS);
1700
 
      bi_split_dest(b, instr->dest);
1701
 
      break;
1702
 
 
1703
 
   case nir_intrinsic_global_atomic_comp_swap:
 
1632
   case nir_intrinsic_global_atomic_swap:
1704
1633
      bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1705
1634
                          &instr->src[2], BI_SEG_NONE);
1706
1635
      bi_split_dest(b, instr->dest);
1707
1636
      break;
1708
1637
 
1709
 
   case nir_intrinsic_image_atomic_comp_swap:
1710
 
      bi_emit_acmpxchg_to(b, dst, bi_emit_lea_image(b, instr), &instr->src[3],
1711
 
                          &instr->src[4], BI_SEG_NONE);
1712
 
      bi_split_dest(b, instr->dest);
1713
 
      break;
1714
 
 
1715
 
   case nir_intrinsic_shared_atomic_comp_swap:
 
1638
   case nir_intrinsic_shared_atomic_swap:
1716
1639
      bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), &instr->src[1],
1717
1640
                          &instr->src[2], BI_SEG_WLS);
1718
1641
      bi_split_dest(b, instr->dest);
1719
1642
      break;
1720
1643
 
1721
 
   case nir_intrinsic_load_frag_coord:
1722
 
      bi_emit_load_frag_coord(b, instr);
 
1644
   case nir_intrinsic_load_pixel_coord:
 
1645
      /* Vectorized load of the preloaded i16vec2 */
 
1646
      bi_mov_i32_to(b, dst, bi_preload(b, 59));
 
1647
      break;
 
1648
 
 
1649
   case nir_intrinsic_load_frag_coord_zw:
 
1650
      bi_emit_load_frag_coord_zw(b, dst, nir_intrinsic_component(instr));
1723
1651
      break;
1724
1652
 
1725
1653
   case nir_intrinsic_load_converted_output_pan:
2426
2354
      bi_fma_to(b, sz, dst, s0, s1, bi_negzero());
2427
2355
      break;
2428
2356
 
2429
 
   case nir_op_fsub:
2430
 
      s1 = bi_neg(s1);
2431
 
      FALLTHROUGH;
2432
2357
   case nir_op_fadd:
2433
2358
      bi_fadd_to(b, sz, dst, s0, s1);
2434
2359
      break;
2525
2450
   case nir_op_extract_i8: {
2526
2451
      assert(comps == 1 && "should be scalarized");
2527
2452
      assert((src_sz == 16 || src_sz == 32) && "should be lowered");
2528
 
      unsigned byte = nir_src_as_uint(instr->src[1].src);
 
2453
      unsigned byte = nir_alu_src_as_uint(instr->src[1]);
2529
2454
 
2530
2455
      if (s0.swizzle == BI_SWIZZLE_H11) {
2531
2456
         assert(byte < 2);
2549
2474
   case nir_op_extract_i16: {
2550
2475
      assert(comps == 1 && "should be scalarized");
2551
2476
      assert(src_sz == 32 && "should be lowered");
2552
 
      unsigned half = nir_src_as_uint(instr->src[1].src);
 
2477
      unsigned half = nir_alu_src_as_uint(instr->src[1]);
2553
2478
      assert(half == 0 || half == 1);
2554
2479
 
2555
2480
      if (instr->op == nir_op_extract_i16)
2561
2486
 
2562
2487
   case nir_op_insert_u16: {
2563
2488
      assert(comps == 1 && "should be scalarized");
2564
 
      unsigned half = nir_src_as_uint(instr->src[1].src);
 
2489
      unsigned half = nir_alu_src_as_uint(instr->src[1]);
2565
2490
      assert(half == 0 || half == 1);
2566
2491
 
2567
2492
      if (half == 0)
4394
4319
}
4395
4320
 
4396
4321
static nir_mem_access_size_align
4397
 
mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, uint32_t align,
 
4322
mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
 
4323
                         uint8_t input_bit_size, uint32_t align,
4398
4324
                         uint32_t align_offset, bool offset_is_const,
4399
4325
                         const void *cb_data)
4400
4326
{
4789
4715
      NIR_PASS_V(nir, pan_nir_lower_store_component);
4790
4716
   }
4791
4717
 
4792
 
   NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes,
4793
 
              nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_constant |
4794
 
                 nir_var_mem_task_payload | nir_var_shader_temp |
4795
 
                 nir_var_function_temp | nir_var_mem_global |
4796
 
                 nir_var_mem_shared,
4797
 
              mem_access_size_align_cb, NULL);
 
4718
   nir_lower_mem_access_bit_sizes_options mem_size_options = {
 
4719
      .modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_constant |
 
4720
               nir_var_mem_task_payload | nir_var_shader_temp |
 
4721
               nir_var_function_temp | nir_var_mem_global | nir_var_mem_shared,
 
4722
      .callback = mem_access_size_align_cb,
 
4723
   };
 
4724
   NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &mem_size_options);
4798
4725
 
4799
4726
   NIR_PASS_V(nir, nir_lower_ssbo);
4800
4727
   NIR_PASS_V(nir, pan_lower_sample_pos);
4801
4728
   NIR_PASS_V(nir, nir_lower_bit_size, bi_lower_bit_size, NULL);
4802
4729
   NIR_PASS_V(nir, nir_lower_64bit_phis);
4803
 
   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
4804
4730
   NIR_PASS_V(nir, pan_nir_lower_64bit_intrin);
4805
4731
   NIR_PASS_V(nir, pan_lower_helper_invocation);
4806
4732
   NIR_PASS_V(nir, nir_lower_int64);
4819
4745
                 .lower_index_to_offset = true,
4820
4746
              });
4821
4747
 
 
4748
   NIR_PASS_V(nir, nir_lower_image_atomics_to_global);
4822
4749
   NIR_PASS_V(nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
4823
4750
   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
4824
4751
   NIR_PASS_V(nir, nir_lower_phis_to_scalar, true);
4825
4752
   NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */);
4826
4753
   NIR_PASS_V(nir, nir_lower_var_copies);
4827
4754
   NIR_PASS_V(nir, nir_lower_alu);
 
4755
   NIR_PASS_V(nir, nir_lower_frag_coord_to_pixel_coord);
4828
4756
}
4829
4757
 
4830
4758
static bi_context *
4883
4811
 
4884
4812
   ctx->allocated_vec = _mesa_hash_table_u64_create(ctx);
4885
4813
 
4886
 
   nir_foreach_function(func, nir) {
4887
 
      if (!func->impl)
4888
 
         continue;
4889
 
 
4890
 
      nir_index_blocks(func->impl);
 
4814
   nir_foreach_function_impl(impl, nir) {
 
4815
      nir_index_blocks(impl);
4891
4816
 
4892
4817
      ctx->indexed_nir_blocks =
4893
 
         rzalloc_array(ctx, bi_block *, func->impl->num_blocks);
4894
 
 
4895
 
      ctx->ssa_alloc += func->impl->ssa_alloc;
4896
 
      ctx->reg_alloc += func->impl->reg_alloc;
4897
 
 
4898
 
      emit_cf_list(ctx, &func->impl->body);
 
4818
         rzalloc_array(ctx, bi_block *, impl->num_blocks);
 
4819
 
 
4820
      ctx->ssa_alloc += impl->ssa_alloc;
 
4821
      ctx->reg_alloc += impl->reg_alloc;
 
4822
 
 
4823
      emit_cf_list(ctx, &impl->body);
4899
4824
      bi_emit_phis_deferred(ctx);
4900
4825
      break; /* TODO: Multi-function shaders */
4901
4826
   }