1
/* -----------------------------------------------------------------------------
3
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4
Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
14
The above copyright notice and this permission notice shall be included
15
in all copies or substantial portions of the Software.
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
-------------------------------------------------------------------------- */
27
#include "clusterfit.h"
28
#include "colourset.h"
29
#include "colourblock.h"
34
ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
35
: ColourFit( colours, flags )
37
// set the iteration count
38
m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
40
// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
42
m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
44
m_metric = VEC4_CONST( 1.0f );
46
// initialise the best error
47
m_besterror = VEC4_CONST( FLT_MAX );
50
int const count = m_colours->GetCount();
51
Vec3 const* values = m_colours->GetPoints();
53
// get the covariance matrix
54
Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
56
// compute the principle component
57
m_principle = ComputePrincipleComponent( covariance );
60
bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
63
int const count = m_colours->GetCount();
64
Vec3 const* values = m_colours->GetPoints();
66
// build the list of dot products
68
u8* order = ( u8* )m_order + 16*iteration;
69
for( int i = 0; i < count; ++i )
71
dps[i] = Dot( values[i], axis );
75
// stable sort using them
76
for( int i = 0; i < count; ++i )
78
for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
80
std::swap( dps[j], dps[j - 1] );
81
std::swap( order[j], order[j - 1] );
85
// check this ordering is unique
86
for( int it = 0; it < iteration; ++it )
88
u8 const* prev = ( u8* )m_order + 16*it;
90
for( int i = 0; i < count; ++i )
92
if( order[i] != prev[i] )
102
// copy the ordering and weight all the points
103
Vec3 const* unweighted = m_colours->GetPoints();
104
float const* weights = m_colours->GetWeights();
105
m_xsum_wsum = VEC4_CONST( 0.0f );
106
for( int i = 0; i < count; ++i )
109
Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
110
Vec4 w( weights[j] );
112
m_points_weights[i] = x;
118
void ClusterFit::Compress3( void* block )
121
int const count = m_colours->GetCount();
122
Vec4 const two = VEC4_CONST( 2.0 );
123
Vec4 const one = VEC4_CONST( 1.0f );
124
Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
125
Vec4 const zero = VEC4_CONST( 0.0f );
126
Vec4 const half = VEC4_CONST( 0.5f );
127
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
128
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
130
// prepare an ordering using the principle axis
131
ConstructOrdering( m_principle, 0 );
133
// check all possible clusters and iterate on the total order
134
Vec4 beststart = VEC4_CONST( 0.0f );
135
Vec4 bestend = VEC4_CONST( 0.0f );
136
Vec4 besterror = m_besterror;
138
int bestiteration = 0;
139
int besti = 0, bestj = 0;
141
// loop over iterations (we avoid the case that all points in first or last cluster)
142
for( int iterationIndex = 0;; )
144
// first cluster [0,i) is at the start
145
Vec4 part0 = VEC4_CONST( 0.0f );
146
for( int i = 0; i < count; ++i )
148
// second cluster [i,j) is half along
149
Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
150
int jmin = ( i == 0 ) ? 1 : i;
151
for( int j = jmin;; )
153
// last cluster [j,count) is at the end
154
Vec4 part2 = m_xsum_wsum - part1 - part0;
156
// compute least squares terms directly
157
Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
158
Vec4 alpha2_sum = alphax_sum.SplatW();
160
Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
161
Vec4 beta2_sum = betax_sum.SplatW();
163
Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
165
// compute the least-squares optimal points
166
Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
167
Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
168
Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
171
a = Min( one, Max( zero, a ) );
172
b = Min( one, Max( zero, b ) );
173
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
174
b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
176
// compute the error (we skip the constant xxsum)
177
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
178
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
179
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
180
Vec4 e4 = MultiplyAdd( two, e3, e1 );
182
// apply the metric to the error term
183
Vec4 e5 = e4*m_metric;
184
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
186
// keep the solution if it wins
187
if( CompareAnyLessThan( error, besterror ) )
194
bestiteration = iterationIndex;
200
part1 += m_points_weights[j];
205
part0 += m_points_weights[i];
208
// stop if we didn't improve in this iteration
209
if( bestiteration != iterationIndex )
212
// advance if possible
214
if( iterationIndex == m_iterationCount )
217
// stop if a new iteration is an ordering that has already been tried
218
Vec3 axis = ( bestend - beststart ).GetVec3();
219
if( !ConstructOrdering( axis, iterationIndex ) )
223
// save the block if necessary
224
if( CompareAnyLessThan( besterror, m_besterror ) )
227
u8 const* order = ( u8* )m_order + 16*bestiteration;
230
for( int m = 0; m < besti; ++m )
231
unordered[order[m]] = 0;
232
for( int m = besti; m < bestj; ++m )
233
unordered[order[m]] = 2;
234
for( int m = bestj; m < count; ++m )
235
unordered[order[m]] = 1;
237
m_colours->RemapIndices( unordered, bestindices );
240
WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
243
m_besterror = besterror;
247
void ClusterFit::Compress4( void* block )
250
int const count = m_colours->GetCount();
251
Vec4 const two = VEC4_CONST( 2.0f );
252
Vec4 const one = VEC4_CONST( 1.0f );
253
Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
254
Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
255
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
256
Vec4 const zero = VEC4_CONST( 0.0f );
257
Vec4 const half = VEC4_CONST( 0.5f );
258
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
259
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
261
// prepare an ordering using the principle axis
262
ConstructOrdering( m_principle, 0 );
264
// check all possible clusters and iterate on the total order
265
Vec4 beststart = VEC4_CONST( 0.0f );
266
Vec4 bestend = VEC4_CONST( 0.0f );
267
Vec4 besterror = m_besterror;
269
int bestiteration = 0;
270
int besti = 0, bestj = 0, bestk = 0;
272
// loop over iterations (we avoid the case that all points in first or last cluster)
273
for( int iterationIndex = 0;; )
275
// first cluster [0,i) is at the start
276
Vec4 part0 = VEC4_CONST( 0.0f );
277
for( int i = 0; i < count; ++i )
279
// second cluster [i,j) is one third along
280
Vec4 part1 = VEC4_CONST( 0.0f );
283
// third cluster [j,k) is two thirds along
284
Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
285
int kmin = ( j == 0 ) ? 1 : j;
286
for( int k = kmin;; )
288
// last cluster [k,count) is at the end
289
Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
291
// compute least squares terms directly
292
Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
293
Vec4 const alpha2_sum = alphax_sum.SplatW();
295
Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
296
Vec4 const beta2_sum = betax_sum.SplatW();
298
Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
300
// compute the least-squares optimal points
301
Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
302
Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
303
Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
306
a = Min( one, Max( zero, a ) );
307
b = Min( one, Max( zero, b ) );
308
a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
309
b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
311
// compute the error (we skip the constant xxsum)
312
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
313
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
314
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
315
Vec4 e4 = MultiplyAdd( two, e3, e1 );
317
// apply the metric to the error term
318
Vec4 e5 = e4*m_metric;
319
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
321
// keep the solution if it wins
322
if( CompareAnyLessThan( error, besterror ) )
330
bestiteration = iterationIndex;
336
part2 += m_points_weights[k];
343
part1 += m_points_weights[j];
348
part0 += m_points_weights[i];
351
// stop if we didn't improve in this iteration
352
if( bestiteration != iterationIndex )
355
// advance if possible
357
if( iterationIndex == m_iterationCount )
360
// stop if a new iteration is an ordering that has already been tried
361
Vec3 axis = ( bestend - beststart ).GetVec3();
362
if( !ConstructOrdering( axis, iterationIndex ) )
366
// save the block if necessary
367
if( CompareAnyLessThan( besterror, m_besterror ) )
370
u8 const* order = ( u8* )m_order + 16*bestiteration;
373
for( int m = 0; m < besti; ++m )
374
unordered[order[m]] = 0;
375
for( int m = besti; m < bestj; ++m )
376
unordered[order[m]] = 2;
377
for( int m = bestj; m < bestk; ++m )
378
unordered[order[m]] = 3;
379
for( int m = bestk; m < count; ++m )
380
unordered[order[m]] = 1;
382
m_colours->RemapIndices( unordered, bestindices );
385
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
388
m_besterror = besterror;
392
} // namespace squish