34
35
template <typename Closure>
35
36
launch_calculator<Closure>::launch_calculator(void)
36
: properties(arch::device_properties()),
37
: properties(device_properties()),
37
38
attributes(closure_attributes<Closure>())
40
41
template <typename Closure>
41
launch_calculator<Closure>::launch_calculator(const arch::device_properties_t& properties, const arch::function_attributes_t& attributes)
42
launch_calculator<Closure>::launch_calculator(const device_properties_t& properties, const function_attributes_t& attributes)
42
43
: properties(properties),
43
44
attributes(attributes)
46
47
template <typename Closure>
47
thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size(void)
49
thrust::pair<size_t, size_t> config = arch::default_block_configuration(properties, attributes);
48
template <typename UnaryFunction>
49
thrust::pair<size_t, size_t> launch_calculator<Closure>::default_block_configuration(UnaryFunction block_size_to_smem_size) const
51
// choose a block size
52
std::size_t num_threads_per_block = block_size_with_maximum_potential_occupancy(attributes, properties, block_size_to_smem_size);
54
// choose a subscription rate
55
std::size_t num_blocks_per_multiprocessor = properties.maxThreadsPerMultiProcessor / num_threads_per_block;
57
return thrust::make_pair(num_threads_per_block, num_blocks_per_multiprocessor);
61
template <typename Closure>
62
thrust::pair<size_t, size_t> launch_calculator<Closure>::default_block_configuration(void) const
64
// choose a block size
65
std::size_t num_threads_per_block = block_size_with_maximum_potential_occupancy(attributes, properties);
67
// choose a subscription rate
68
std::size_t num_blocks_per_multiprocessor = properties.maxThreadsPerMultiProcessor / num_threads_per_block;
70
return thrust::make_pair(num_threads_per_block, num_blocks_per_multiprocessor);
73
template <typename Closure>
74
thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size(void) const
76
thrust::pair<size_t, size_t> config = default_block_configuration();
50
77
return thrust::tuple<size_t,size_t,size_t>(config.second * properties.multiProcessorCount, config.first, 0);
53
80
template <typename Closure>
54
81
template <typename UnaryFunction>
55
thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size(UnaryFunction block_size_to_smem_size)
82
thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size(UnaryFunction block_size_to_smem_size) const
57
thrust::pair<size_t, size_t> config = arch::default_block_configuration(properties, attributes, block_size_to_smem_size);
84
thrust::pair<size_t, size_t> config = default_block_configuration(block_size_to_smem_size);
58
85
return thrust::tuple<size_t,size_t,size_t>(config.second * properties.multiProcessorCount, config.first, block_size_to_smem_size(config.first));
61
88
template <typename Closure>
62
thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size_available_smem(void)
89
thrust::tuple<size_t,size_t,size_t> launch_calculator<Closure>::with_variable_block_size_available_smem(void) const
64
thrust::pair<size_t, size_t> config = arch::default_block_configuration(properties, attributes);
65
size_t smem_per_block = arch::proportional_smem_allocation(properties, attributes, config.second);
91
thrust::pair<size_t, size_t> config = default_block_configuration();
92
size_t smem_per_block = proportional_smem_allocation(properties, attributes, config.second);
66
93
return thrust::tuple<size_t,size_t,size_t>(config.second * properties.multiProcessorCount, config.first, smem_per_block);