3
// ************************************************************************
6
// Manycore Performance-Portable Multidimensional Arrays
8
// Copyright (2012) Sandia Corporation
10
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
11
// the U.S. Government retains certain rights in this software.
13
// Redistribution and use in source and binary forms, with or without
14
// modification, are permitted provided that the following conditions are
17
// 1. Redistributions of source code must retain the above copyright
18
// notice, this list of conditions and the following disclaimer.
20
// 2. Redistributions in binary form must reproduce the above copyright
21
// notice, this list of conditions and the following disclaimer in the
22
// documentation and/or other materials provided with the distribution.
24
// 3. Neither the name of the Corporation nor the names of the
25
// contributors may be used to endorse or promote products derived from
26
// this software without specific prior written permission.
28
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
29
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
32
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
35
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
36
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
37
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
42
// ************************************************************************
47
#ifndef KOKKOS_SORT_HPP_
48
#define KOKKOS_SORT_HPP_
50
#include <Kokkos_Core.hpp>
58
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
61
template<class ValuesViewType>
62
struct CopyOp<ValuesViewType,1> {
63
template<class DstType, class SrcType>
64
KOKKOS_INLINE_FUNCTION
65
static void copy(DstType& dst, size_t i_dst,
66
SrcType& src, size_t i_src ) {
67
dst(i_dst) = src(i_src);
71
template<class ValuesViewType>
72
struct CopyOp<ValuesViewType,2> {
73
template<class DstType, class SrcType>
74
KOKKOS_INLINE_FUNCTION
75
static void copy(DstType& dst, size_t i_dst,
76
SrcType& src, size_t i_src ) {
77
for(int j = 0;j< (int) dst.dimension_1(); j++)
78
dst(i_dst,j) = src(i_src,j);
82
template<class ValuesViewType>
83
struct CopyOp<ValuesViewType,3> {
84
template<class DstType, class SrcType>
85
KOKKOS_INLINE_FUNCTION
86
static void copy(DstType& dst, size_t i_dst,
87
SrcType& src, size_t i_src ) {
88
for(int j = 0; j<dst.dimension_1(); j++)
89
for(int k = 0; k<dst.dimension_2(); k++)
90
dst(i_dst,j,k) = src(i_src,j,k);
95
template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space,
96
class SizeType = typename KeyViewType::memory_space::size_type>
101
template<class ValuesViewType, class PermuteViewType, class CopyOp>
102
struct bin_sort_sort_functor {
103
typedef ExecutionSpace execution_space;
104
typedef typename ValuesViewType::non_const_type values_view_type;
105
typedef typename ValuesViewType::const_type const_values_view_type;
106
Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout,
107
typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values;
108
values_view_type sorted_values;
109
typename PermuteViewType::const_type sort_order;
110
bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_):
111
values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {}
113
KOKKOS_INLINE_FUNCTION
114
void operator() (const int& i) const {
115
//printf("Sort: %i %i\n",i,sort_order(i));
116
CopyOp::copy(sorted_values,i,values,sort_order(i));
120
typedef ExecutionSpace execution_space;
121
typedef BinSortOp bin_op_type;
123
struct bin_count_tag {};
124
struct bin_offset_tag {};
125
struct bin_binning_tag {};
126
struct bin_sort_bins_tag {};
129
typedef SizeType size_type;
130
typedef size_type value_type;
132
typedef Kokkos::View<size_type*, execution_space> offset_type;
133
typedef Kokkos::View<const int*, execution_space> bin_count_type;
136
typedef Kokkos::View<typename KeyViewType::const_data_type,
137
typename KeyViewType::array_layout,
138
typename KeyViewType::memory_space> const_key_view_type;
139
typedef Kokkos::View<typename KeyViewType::const_data_type,
140
typename KeyViewType::array_layout,
141
typename KeyViewType::memory_space,
142
Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type;
144
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
145
typedef typename KeyViewType::const_value_type const_key_scalar;
148
const_key_view_type keys;
149
const_rnd_key_view_type keys_rnd;
154
offset_type bin_offsets;
156
Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic;
157
bin_count_type bin_count_const;
159
offset_type sort_order;
161
bool sort_within_bins;
165
// Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false)
166
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
167
bool sort_within_bins_ = false)
168
:keys(keys_),keys_rnd(keys_), bin_op(bin_op_) {
170
bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
171
bin_count_const = bin_count_atomic;
172
bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins());
173
sort_order = offset_type("PermutationVector",keys.dimension_0());
174
sort_within_bins = sort_within_bins_;
177
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
178
void create_permute_vector() {
179
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this);
180
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
182
Kokkos::deep_copy(bin_count_atomic,0);
183
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this);
186
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
189
// Sort a view with respect ot the first dimension using the permutation array
190
template<class ValuesViewType>
191
void sort(ValuesViewType values) {
192
ValuesViewType sorted_values = ValuesViewType("Copy",
193
values.dimension_0(),
194
values.dimension_1(),
195
values.dimension_2(),
196
values.dimension_3(),
197
values.dimension_4(),
198
values.dimension_5(),
199
values.dimension_6(),
200
values.dimension_7());
202
parallel_for(values.dimension_0(),
203
bin_sort_sort_functor<ValuesViewType, offset_type,
204
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
206
deep_copy(values,sorted_values);
209
// Get the permutation vector
210
KOKKOS_INLINE_FUNCTION
211
offset_type get_permute_vector() const { return sort_order;}
213
// Get the start offsets for each bin
214
KOKKOS_INLINE_FUNCTION
215
offset_type get_bin_offsets() const { return bin_offsets;}
217
// Get the count for each bin
218
KOKKOS_INLINE_FUNCTION
219
bin_count_type get_bin_count() const {return bin_count_const;}
222
KOKKOS_INLINE_FUNCTION
223
void operator() (const bin_count_tag& tag, const int& i) const {
224
bin_count_atomic(bin_op.bin(keys,i))++;
227
KOKKOS_INLINE_FUNCTION
228
void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const {
230
bin_offsets(i) = offset;
232
offset+=bin_count_const(i);
235
KOKKOS_INLINE_FUNCTION
236
void operator() (const bin_binning_tag& tag, const int& i) const {
237
const int bin = bin_op.bin(keys,i);
238
const int count = bin_count_atomic(bin)++;
240
sort_order(bin_offsets(bin) + count) = i;
243
KOKKOS_INLINE_FUNCTION
244
void operator() (const bin_sort_bins_tag& tag, const int&i ) const {
246
int upper_bound = bin_offsets(i)+bin_count_const(i);
249
int old_idx = sort_order(bin_offsets(i));
251
for(int k=bin_offsets(i)+1; k<upper_bound; k++) {
252
new_idx = sort_order(k);
254
if(!bin_op(keys_rnd,old_idx,new_idx)) {
255
sort_order(k-1) = new_idx;
256
sort_order(k) = old_idx;
269
template<class KeyViewType>
270
struct DefaultBinOp1D {
273
typename KeyViewType::const_value_type range_;
274
typename KeyViewType::const_value_type min_;
276
//Construct BinOp with number of bins, minimum value and maxuimum value
277
DefaultBinOp1D(int max_bins, typename KeyViewType::const_value_type min,
278
typename KeyViewType::const_value_type max )
279
:max_bins_(max_bins+1),mul_(1.0*max_bins/(max-min)),range_(max-min),min_(min) {}
281
//Determine bin index from key value
282
template<class ViewType>
283
KOKKOS_INLINE_FUNCTION
284
int bin(ViewType& keys, const int& i) const {
285
return int(mul_*(keys(i)-min_));
288
//Return maximum bin index + 1
289
KOKKOS_INLINE_FUNCTION
290
int max_bins() const {
294
//Compare to keys within a bin if true new_val will be put before old_val
295
template<class ViewType, typename iType1, typename iType2>
296
KOKKOS_INLINE_FUNCTION
297
bool operator()(ViewType& keys, iType1& i1, iType2& i2) const {
298
return keys(i1)<keys(i2);
302
template<class KeyViewType>
303
struct DefaultBinOp3D {
306
typename KeyViewType::non_const_value_type range_[3];
307
typename KeyViewType::non_const_value_type min_[3];
309
DefaultBinOp3D(int max_bins[], typename KeyViewType::const_value_type min[],
310
typename KeyViewType::const_value_type max[] )
312
max_bins_[0] = max_bins[0]+1;
313
max_bins_[1] = max_bins[1]+1;
314
max_bins_[2] = max_bins[2]+1;
315
mul_[0] = 1.0*max_bins[0]/(max[0]-min[0]);
316
mul_[1] = 1.0*max_bins[1]/(max[1]-min[1]);
317
mul_[2] = 1.0*max_bins[2]/(max[2]-min[2]);
318
range_[0] = max[0]-min[0];
319
range_[1] = max[1]-min[1];
320
range_[2] = max[2]-min[2];
326
template<class ViewType>
327
KOKKOS_INLINE_FUNCTION
328
int bin(ViewType& keys, const int& i) const {
329
return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) +
330
int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) +
331
int(mul_[2]*(keys(i,2)-min_[2])));
334
KOKKOS_INLINE_FUNCTION
335
int max_bins() const {
336
return max_bins_[0]*max_bins_[1]*max_bins_[2];
339
template<class ViewType, typename iType1, typename iType2>
340
KOKKOS_INLINE_FUNCTION
341
bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const {
342
if (keys(i1,0)>keys(i2,0)) return true;
343
else if (keys(i1,0)==keys(i2,0)) {
344
if (keys(i1,1)>keys(i2,1)) return true;
345
else if (keys(i1,1)==keys(i2,2)) {
346
if (keys(i1,2)>keys(i2,2)) return true;
353
template<typename Scalar>
359
KOKKOS_INLINE_FUNCTION
366
KOKKOS_INLINE_FUNCTION
367
min_max (const min_max& val) {
373
KOKKOS_INLINE_FUNCTION
374
min_max operator = (const min_max& val) {
381
KOKKOS_INLINE_FUNCTION
382
void operator+= (const Scalar& val) {
384
min = min<val?min:val;
385
max = max>val?max:val;
393
KOKKOS_INLINE_FUNCTION
394
void operator+= (const min_max& val) {
395
if(init && val.init) {
396
min = min<val.min?min:val.min;
397
max = max>val.max?max:val.max;
407
KOKKOS_INLINE_FUNCTION
408
void operator+= (volatile const Scalar& val) volatile {
410
min = min<val?min:val;
411
max = max>val?max:val;
419
KOKKOS_INLINE_FUNCTION
420
void operator+= (volatile const min_max& val) volatile {
421
if(init && val.init) {
422
min = min<val.min?min:val.min;
423
max = max>val.max?max:val.max;
435
template<class ViewType>
436
struct min_max_functor {
437
typedef typename ViewType::execution_space execution_space;
439
typedef min_max<typename ViewType::non_const_value_type> value_type;
440
min_max_functor (const ViewType view_):view(view_) {
443
KOKKOS_INLINE_FUNCTION
444
void operator()(const size_t& i, value_type& val) const {
449
template<class ViewType>
450
bool try_std_sort(ViewType view) {
451
bool possible = true;
454
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
455
possible = possible && (ViewType::Rank == 1);
456
possible = possible && (stride[0] == 1);
458
std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0());
465
template<class ViewType>
466
void sort(ViewType view, bool always_use_kokkos_sort = false) {
467
if(!always_use_kokkos_sort) {
468
if(SortImpl::try_std_sort(view)) return;
471
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
472
SortImpl::min_max<typename ViewType::non_const_value_type> val;
473
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
474
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
475
bin_sort.create_permute_vector();
479
/*template<class ViewType, class Comparator>
480
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {