53
53
<a name="l00026"></a>00026 <span class="comment"></span>
54
54
<a name="l00027"></a>00027 <span class="comment">*/</span>
55
55
<a name="l00028"></a>00028
56
<a name="l00029"></a>00029 <span class="keyword">namespace </span>bm
57
<a name="l00030"></a>00030 {
58
<a name="l00031"></a>00031
59
<a name="l00032"></a>00032
60
<a name="l00033"></a><a class="code" href="a00099.html#ae064a0440f9730f9661e51e6089dd853">00033</a> <span class="preprocessor">#define BM_MINISET_GAPLEN (bm::gap_len_table<true>::_len[0])</span>
61
<a name="l00034"></a><a class="code" href="a00099.html#a4c52fe8ea11bf34fcaafb71253846fea">00034</a> <span class="preprocessor"></span><span class="preprocessor">#define BM_MINISET_ARRSIZE(x) ((x / 32) + ( (x % 32) && 1 ))</span>
62
<a name="l00035"></a>00035 <span class="preprocessor"></span><span class="comment"></span>
63
<a name="l00036"></a>00036 <span class="comment">/*! @defgroup mset Small sets functionality</span>
64
<a name="l00037"></a>00037 <span class="comment"> * Templates in this group are used to keep block types in BM library.</span>
65
<a name="l00038"></a>00038 <span class="comment"> * Classes of this group can tune bvector template (MS parameter)</span>
66
<a name="l00039"></a>00039 <span class="comment"> * for best performance or minimal memory usage.</span>
67
<a name="l00040"></a>00040 <span class="comment"> * @ingroup bmagic</span>
68
<a name="l00041"></a>00041 <span class="comment"> * @{</span>
69
<a name="l00042"></a>00042 <span class="comment"> */</span>
70
<a name="l00043"></a>00043
71
<a name="l00044"></a>00044 <span class="comment"></span>
72
<a name="l00045"></a>00045 <span class="comment">/*!</span>
73
<a name="l00046"></a>00046 <span class="comment"> @brief Template class implements memory saving set functionality</span>
74
<a name="l00047"></a>00047 <span class="comment"> </span>
75
<a name="l00048"></a>00048 <span class="comment"> Template can be used as template parameter for bvector if we </span>
76
<a name="l00049"></a>00049 <span class="comment"> want to tune bvector for minimal memory consumption.</span>
77
<a name="l00050"></a>00050 <span class="comment"></span>
78
<a name="l00051"></a>00051 <span class="comment"> @sa bvmini</span>
79
<a name="l00052"></a>00052 <span class="comment">*/</span>
80
<a name="l00053"></a><a class="code" href="a00074.html">00053</a> <span class="keyword">template</span> <<span class="keyword">class</span> A, <span class="keywordtype">size_t</span> N> <span class="keyword">class </span><a class="code" href="a00074.html" title="Template class implements memory saving set functionality.">miniset</a>
81
<a name="l00054"></a>00054 {
82
<a name="l00055"></a>00055 <span class="keyword">public</span>:
56
<a name="l00029"></a>00029
57
<a name="l00030"></a>00030
58
<a name="l00031"></a>00031 <span class="keyword">namespace </span>bm
59
<a name="l00032"></a>00032 {
60
<a name="l00033"></a>00033 <span class="comment"></span>
61
<a name="l00034"></a>00034 <span class="comment">/** @defgroup SSE2 Processor specific optimizations for SSE2 instructions</span>
62
<a name="l00035"></a>00035 <span class="comment"> * @ingroup bmagic</span>
63
<a name="l00036"></a>00036 <span class="comment"> */</span>
64
<a name="l00037"></a>00037
65
<a name="l00038"></a>00038 <span class="comment"></span>
66
<a name="l00039"></a>00039 <span class="comment">/*! </span>
67
<a name="l00040"></a>00040 <span class="comment"> @brief SSE2 reinitialization guard class</span>
68
<a name="l00041"></a>00041 <span class="comment"></span>
69
<a name="l00042"></a>00042 <span class="comment"> SSE2 requires to call _mm_empty() if we are intermixing</span>
70
<a name="l00043"></a>00043 <span class="comment"> MMX integer commands with floating point arithmetics.</span>
71
<a name="l00044"></a>00044 <span class="comment"> This class guards critical code fragments where SSE2 integer</span>
72
<a name="l00045"></a>00045 <span class="comment"> is used.</span>
73
<a name="l00046"></a>00046 <span class="comment"></span>
74
<a name="l00047"></a>00047 <span class="comment"> @ingroup SSE2</span>
75
<a name="l00048"></a>00048 <span class="comment">*/</span>
76
<a name="l00049"></a><a class="code" href="a00084.html">00049</a> <span class="keyword">class </span><a class="code" href="a00084.html" title="SSE2 reinitialization guard class.">sse_empty_guard</a>
77
<a name="l00050"></a>00050 {
78
<a name="l00051"></a>00051 <span class="keyword">public</span>:
79
<a name="l00052"></a><a class="code" href="a00084.html#a231af2137d8bd3aefc374982804ace24">00052</a> <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a> <a class="code" href="a00084.html#a231af2137d8bd3aefc374982804ace24">sse_empty_guard</a>()
80
<a name="l00053"></a>00053 {
81
<a name="l00054"></a>00054 _mm_empty();
82
<a name="l00055"></a>00055 }
83
83
<a name="l00056"></a>00056
84
<a name="l00057"></a><a class="code" href="a00074.html#a22e5b5c7d702203c5fa5b0480cd85c25">00057</a> <a class="code" href="a00074.html#a22e5b5c7d702203c5fa5b0480cd85c25">miniset</a>()
85
<a name="l00058"></a>00058 : m_buf(0),
86
<a name="l00059"></a>00059 m_type(1)
87
<a name="l00060"></a>00060 {}
88
<a name="l00061"></a>00061
89
<a name="l00062"></a><a class="code" href="a00074.html#a61e5961df48b9521392397a0863319ee">00062</a> <a class="code" href="a00074.html#a22e5b5c7d702203c5fa5b0480cd85c25">miniset</a>(<span class="keyword">const</span> <a class="code" href="a00074.html" title="Template class implements memory saving set functionality.">miniset</a>& mset)
90
<a name="l00063"></a>00063 {
91
<a name="l00064"></a>00064 <span class="keywordflow">if</span> (mset.m_buf)
92
<a name="l00065"></a>00065 {
93
<a name="l00066"></a>00066 <span class="keywordflow">if</span> (mset.m_type)
94
<a name="l00067"></a>00067 init_gapbuf(mset.m_buf);
95
<a name="l00068"></a>00068 <span class="keywordflow">else</span>
96
<a name="l00069"></a>00069 init_bitbuf(mset.m_buf);
97
<a name="l00070"></a>00070 }
98
<a name="l00071"></a>00071 <span class="keywordflow">else</span>
99
<a name="l00072"></a>00072 {
100
<a name="l00073"></a>00073 m_type = mset.m_type;
101
<a name="l00074"></a>00074 m_buf = 0;
102
<a name="l00075"></a>00075 }
103
<a name="l00076"></a>00076 }
104
<a name="l00077"></a>00077
105
<a name="l00078"></a><a class="code" href="a00074.html#a6e5569bf59dc0e96fd3637888f5e5fd1">00078</a> <a class="code" href="a00074.html#a6e5569bf59dc0e96fd3637888f5e5fd1">~miniset</a>()
106
<a name="l00079"></a>00079 {
107
<a name="l00080"></a>00080 <span class="keywordflow">if</span> (m_buf)
108
<a name="l00081"></a>00081 {
109
<a name="l00082"></a>00082 A::deallocate(m_buf, m_type ?
110
<a name="l00083"></a>00083 (<a class="code" href="a00099.html#ae064a0440f9730f9661e51e6089dd853">BM_MINISET_GAPLEN</a> / (<span class="keyword">sizeof</span>(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>) / <span class="keyword">sizeof</span>(<a class="code" href="a00110.html#ac654d6319039a86546d235a236fc7cf6">bm::gap_word_t</a>)))
111
<a name="l00084"></a>00084 :
112
<a name="l00085"></a>00085 (<a class="code" href="a00099.html#a4c52fe8ea11bf34fcaafb71253846fea">BM_MINISET_ARRSIZE</a>(N)));
113
<a name="l00086"></a>00086 }
114
<a name="l00087"></a>00087 }
115
<a name="l00088"></a>00088 <span class="comment"></span>
116
<a name="l00089"></a>00089 <span class="comment"> /// Checks if bit pos 1 or 0. Returns 0 if 0 and non zero otherwise.</span>
117
<a name="l00090"></a><a class="code" href="a00074.html#a187012a7ef52ff62748c5b3937e4578c">00090</a> <span class="comment"></span> <span class="keywordtype">unsigned</span> <a class="code" href="a00074.html#a187012a7ef52ff62748c5b3937e4578c" title="Checks if bit pos 1 or 0. Returns 0 if 0 and non zero otherwise.">test</a>(<a class="code" href="a00110.html#aa3824d882a037396370b16f2f0a8bf37">bm::id_t</a> n)<span class="keyword"> const </span>
118
<a name="l00091"></a>00091 <span class="keyword"> </span>{
119
<a name="l00092"></a>00092 <span class="keywordflow">return</span>
120
<a name="l00093"></a>00093 !m_buf ? 0
121
<a name="l00094"></a>00094 :
122
<a name="l00095"></a>00095 m_type ?
123
<a name="l00096"></a>00096 <a class="code" href="a00114.html#ga866352ef5986a7d2b709795d127b666b" title="Tests if bit = pos is true.">gap_test</a>((<a class="code" href="a00110.html#ac654d6319039a86546d235a236fc7cf6">gap_word_t</a>*)m_buf, n)
124
<a name="l00097"></a>00097 :
125
<a name="l00098"></a>00098 m_buf[n>><a class="code" href="a00110.html#a83d76bccf6fe3770f32d5ba11d2a37ad">bm::set_word_shift</a>] & (1<<(n & <a class="code" href="a00110.html#addbf345be3733d5e4575d71733ed1da8">bm::set_word_mask</a>));
126
<a name="l00099"></a>00099 }
127
<a name="l00100"></a>00100
128
<a name="l00101"></a><a class="code" href="a00074.html#a34d569e459720cb037a01d74bc6cccac">00101</a> <span class="keywordtype">void</span> <span class="keyword">set</span>(<a class="code" href="a00110.html#aa3824d882a037396370b16f2f0a8bf37">bm::id_t</a> n, <span class="keywordtype">bool</span> val=<span class="keyword">true</span>)
129
<a name="l00102"></a>00102 {
130
<a name="l00103"></a>00103 <span class="keywordflow">if</span> (m_type == 0)
131
<a name="l00104"></a>00104 {
132
<a name="l00105"></a>00105 <span class="keywordflow">if</span> (!m_buf)
133
<a name="l00106"></a>00106 {
134
<a name="l00107"></a>00107 <span class="keywordflow">if</span> (!val) <span class="keywordflow">return</span>;
135
<a name="l00108"></a>00108 init_bitbuf(0);
136
<a name="l00109"></a>00109 }
84
<a name="l00057"></a><a class="code" href="a00084.html#a5d197a685ce1f87a1cc01b047960377b">00057</a> <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a> <a class="code" href="a00084.html#a5d197a685ce1f87a1cc01b047960377b">~sse_empty_guard</a>()
85
<a name="l00058"></a>00058 {
86
<a name="l00059"></a>00059 _mm_empty();
87
<a name="l00060"></a>00060 }
88
<a name="l00061"></a>00061 };
89
<a name="l00062"></a>00062
90
<a name="l00063"></a>00063
91
<a name="l00064"></a>00064 <span class="comment"></span>
92
<a name="l00065"></a>00065 <span class="comment">/*! </span>
93
<a name="l00066"></a>00066 <span class="comment"> @brief XOR array elements to specified mask</span>
94
<a name="l00067"></a>00067 <span class="comment"> *dst = *src ^ mask</span>
95
<a name="l00068"></a>00068 <span class="comment"></span>
96
<a name="l00069"></a>00069 <span class="comment"> @ingroup SSE2</span>
97
<a name="l00070"></a>00070 <span class="comment">*/</span>
98
<a name="l00071"></a>00071 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
99
<a name="l00072"></a><a class="code" href="a00122.html#ga75c6ddeb0d8a279caa92341878309b50">00072</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#ga75c6ddeb0d8a279caa92341878309b50" title="XOR array elements to specified mask dst = *src ^ mask.">sse2_xor_arr_2_mask</a>(__m128i* BMRESTRICT dst,
100
<a name="l00073"></a>00073 <span class="keyword">const</span> __m128i* BMRESTRICT src,
101
<a name="l00074"></a>00074 <span class="keyword">const</span> __m128i* BMRESTRICT src_end,
102
<a name="l00075"></a>00075 <a class="code" href="a00115.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a> mask)
103
<a name="l00076"></a>00076 {
104
<a name="l00077"></a>00077 __m128i xmm2 = _mm_set_epi32(mask, mask, mask, mask);
105
<a name="l00078"></a>00078 <span class="keywordflow">do</span>
106
<a name="l00079"></a>00079 {
107
<a name="l00080"></a>00080 __m128i xmm1 = _mm_load_si128(src);
108
<a name="l00081"></a>00081
109
<a name="l00082"></a>00082 xmm1 = _mm_xor_si128(xmm1, xmm2);
110
<a name="l00083"></a>00083 _mm_store_si128(dst, xmm1);
111
<a name="l00084"></a>00084 ++dst;
112
<a name="l00085"></a>00085 ++src;
113
<a name="l00086"></a>00086
114
<a name="l00087"></a>00087 } <span class="keywordflow">while</span> (src < src_end);
115
<a name="l00088"></a>00088 }
116
<a name="l00089"></a>00089
117
<a name="l00090"></a>00090 <span class="comment"></span>
118
<a name="l00091"></a>00091 <span class="comment">/*! </span>
119
<a name="l00092"></a>00092 <span class="comment"> @brief Inverts array elements and NOT them to specified mask</span>
120
<a name="l00093"></a>00093 <span class="comment"> *dst = ~*src & mask</span>
121
<a name="l00094"></a>00094 <span class="comment"></span>
122
<a name="l00095"></a>00095 <span class="comment"> @ingroup SSE2</span>
123
<a name="l00096"></a>00096 <span class="comment">*/</span>
124
<a name="l00097"></a>00097 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
125
<a name="l00098"></a><a class="code" href="a00122.html#gab7b21f448684c4d84927792661e67ed5">00098</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#gab7b21f448684c4d84927792661e67ed5" title="Inverts array elements and NOT them to specified mask dst = ~*src &amp; mask.">sse2_andnot_arr_2_mask</a>(__m128i* BMRESTRICT dst,
126
<a name="l00099"></a>00099 <span class="keyword">const</span> __m128i* BMRESTRICT src,
127
<a name="l00100"></a>00100 <span class="keyword">const</span> __m128i* BMRESTRICT src_end,
128
<a name="l00101"></a>00101 <a class="code" href="a00115.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a> mask)
129
<a name="l00102"></a>00102 {
130
<a name="l00103"></a>00103 __m128i xmm2 = _mm_set_epi32(mask, mask, mask, mask);
131
<a name="l00104"></a>00104 <span class="keywordflow">do</span>
132
<a name="l00105"></a>00105 {
133
<a name="l00106"></a>00106 <span class="comment">//_mm_prefetch((const char*)(src)+1024, _MM_HINT_NTA);</span>
134
<a name="l00107"></a>00107 <span class="comment">//_mm_prefetch((const char*)(src)+1088, _MM_HINT_NTA);</span>
135
<a name="l00108"></a>00108
136
<a name="l00109"></a>00109 __m128i xmm1 = _mm_load_si128(src);
137
137
<a name="l00110"></a>00110
138
<a name="l00111"></a>00111 <span class="keywordtype">unsigned</span> nword = n >> <a class="code" href="a00110.html#a83d76bccf6fe3770f32d5ba11d2a37ad">bm::set_word_shift</a>;
139
<a name="l00112"></a>00112 <span class="keywordtype">unsigned</span> mask = unsigned(1) << (n & <a class="code" href="a00110.html#addbf345be3733d5e4575d71733ed1da8">bm::set_word_mask</a>);
140
<a name="l00113"></a>00113
141
<a name="l00114"></a>00114 val ? (m_buf[nword] |= mask) : (m_buf[nword] &= ~mask);
142
<a name="l00115"></a>00115 }
143
<a name="l00116"></a>00116 <span class="keywordflow">else</span>
144
<a name="l00117"></a>00117 {
145
<a name="l00118"></a>00118 <span class="keywordflow">if</span> (!m_buf)
146
<a name="l00119"></a>00119 {
147
<a name="l00120"></a>00120 <span class="keywordflow">if</span> (!val) <span class="keywordflow">return</span>;
148
<a name="l00121"></a>00121 init_gapbuf(0);
149
<a name="l00122"></a>00122 }
150
<a name="l00123"></a>00123
151
<a name="l00124"></a>00124 <span class="keywordtype">unsigned</span> is_set;
152
<a name="l00125"></a>00125 <span class="keywordtype">unsigned</span> new_block_len =
153
<a name="l00126"></a>00126 <a class="code" href="a00114.html#ga69186bd13bda27e04b3e33683ff884c2" title="Abstract distance(similarity) operation for GAP buffers. Receives functor F as a...">gap_set_value</a>(val, (<a class="code" href="a00110.html#ac654d6319039a86546d235a236fc7cf6">gap_word_t</a>*)m_buf, n, &is_set);
154
<a name="l00127"></a>00127
155
<a name="l00128"></a>00128 <span class="keywordflow">if</span> (new_block_len > <span class="keywordtype">unsigned</span>(<a class="code" href="a00099.html#ae064a0440f9730f9661e51e6089dd853">BM_MINISET_GAPLEN</a>-4))
156
<a name="l00129"></a>00129 {
157
<a name="l00130"></a>00130 convert_buf();
158
<a name="l00131"></a>00131 }
159
<a name="l00132"></a>00132 }
160
<a name="l00133"></a>00133 }
161
<a name="l00134"></a>00134
162
<a name="l00135"></a><a class="code" href="a00074.html#a2b74fe02080d4d515893e749624e7797">00135</a> <span class="keywordtype">unsigned</span> <a class="code" href="a00074.html#a2b74fe02080d4d515893e749624e7797">mem_used</a>()<span class="keyword"> const</span>
163
<a name="l00136"></a>00136 <span class="keyword"> </span>{
164
<a name="l00137"></a>00137 <span class="keywordflow">return</span> <span class="keyword">sizeof</span>(*this) +
165
<a name="l00138"></a>00138 m_buf ?
166
<a name="l00139"></a>00139 (m_type ? (<a class="code" href="a00099.html#ae064a0440f9730f9661e51e6089dd853">BM_MINISET_GAPLEN</a> * <span class="keyword">sizeof</span>(<a class="code" href="a00110.html#ac654d6319039a86546d235a236fc7cf6">gap_word_t</a>))
167
<a name="l00140"></a>00140 : (<a class="code" href="a00099.html#a4c52fe8ea11bf34fcaafb71253846fea">BM_MINISET_ARRSIZE</a>(N) * <span class="keyword">sizeof</span>(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>)))
168
<a name="l00141"></a>00141 : 0;
169
<a name="l00142"></a>00142 }
170
<a name="l00143"></a>00143
171
<a name="l00144"></a><a class="code" href="a00074.html#af94007643783b752eed263ac72ca8a5c">00144</a> <span class="keywordtype">void</span> <a class="code" href="a00074.html#af94007643783b752eed263ac72ca8a5c">swap</a>(<a class="code" href="a00074.html" title="Template class implements memory saving set functionality.">miniset</a>& mset)
172
<a name="l00145"></a>00145 {
173
<a name="l00146"></a>00146 <a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* buftmp = m_buf;
174
<a name="l00147"></a>00147 m_buf = mset.m_buf;
175
<a name="l00148"></a>00148 mset.m_buf = buftmp;
176
<a name="l00149"></a>00149 <span class="keywordtype">unsigned</span> typetmp = m_type;
177
<a name="l00150"></a>00150 m_type = mset.m_type;
178
<a name="l00151"></a>00151 mset.m_type = typetmp;
179
<a name="l00152"></a>00152 }
180
<a name="l00153"></a>00153
138
<a name="l00111"></a>00111 xmm1 = _mm_andnot_si128(xmm1, xmm2); <span class="comment">// xmm1 = (~xmm1) & xmm2 </span>
139
<a name="l00112"></a>00112 _mm_store_si128(dst, xmm1);
140
<a name="l00113"></a>00113 ++dst;
141
<a name="l00114"></a>00114 ++src;
142
<a name="l00115"></a>00115
143
<a name="l00116"></a>00116 } <span class="keywordflow">while</span> (src < src_end);
144
<a name="l00117"></a>00117 }
145
<a name="l00118"></a>00118 <span class="comment"></span>
146
<a name="l00119"></a>00119 <span class="comment">/*! </span>
147
<a name="l00120"></a>00120 <span class="comment"> @brief AND array elements against another array</span>
148
<a name="l00121"></a>00121 <span class="comment"> *dst &= *src</span>
149
<a name="l00122"></a>00122 <span class="comment"></span>
150
<a name="l00123"></a>00123 <span class="comment"> @ingroup SSE2</span>
151
<a name="l00124"></a>00124 <span class="comment">*/</span>
152
<a name="l00125"></a>00125 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
153
<a name="l00126"></a><a class="code" href="a00122.html#ga795b544f311409a55da4ee61a3cd939a">00126</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#ga795b544f311409a55da4ee61a3cd939a" title="AND array elements against another array dst &amp;= *src.">sse2_and_arr</a>(__m128i* BMRESTRICT dst,
154
<a name="l00127"></a>00127 <span class="keyword">const</span> __m128i* BMRESTRICT src,
155
<a name="l00128"></a>00128 <span class="keyword">const</span> __m128i* BMRESTRICT src_end)
156
<a name="l00129"></a>00129 {
157
<a name="l00130"></a>00130 __m128i xmm1, xmm2;
158
<a name="l00131"></a>00131 <span class="keywordflow">do</span>
159
<a name="l00132"></a>00132 {
160
<a name="l00133"></a>00133 _mm_prefetch((<span class="keyword">const</span> <span class="keywordtype">char</span>*)(src)+512, _MM_HINT_NTA);
161
<a name="l00134"></a>00134
162
<a name="l00135"></a>00135 xmm1 = _mm_load_si128(src++);
163
<a name="l00136"></a>00136 xmm2 = _mm_load_si128(dst);
164
<a name="l00137"></a>00137 xmm1 = _mm_and_si128(xmm1, xmm2);
165
<a name="l00138"></a>00138 _mm_store_si128(dst++, xmm1);
166
<a name="l00139"></a>00139
167
<a name="l00140"></a>00140 xmm1 = _mm_load_si128(src++);
168
<a name="l00141"></a>00141 xmm2 = _mm_load_si128(dst);
169
<a name="l00142"></a>00142 xmm1 = _mm_and_si128(xmm1, xmm2);
170
<a name="l00143"></a>00143 _mm_store_si128(dst++, xmm1);
171
<a name="l00144"></a>00144
172
<a name="l00145"></a>00145 xmm1 = _mm_load_si128(src++);
173
<a name="l00146"></a>00146 xmm2 = _mm_load_si128(dst);
174
<a name="l00147"></a>00147 xmm1 = _mm_and_si128(xmm1, xmm2);
175
<a name="l00148"></a>00148 _mm_store_si128(dst++, xmm1);
176
<a name="l00149"></a>00149
177
<a name="l00150"></a>00150 xmm1 = _mm_load_si128(src++);
178
<a name="l00151"></a>00151 xmm2 = _mm_load_si128(dst);
179
<a name="l00152"></a>00152 xmm1 = _mm_and_si128(xmm1, xmm2);
180
<a name="l00153"></a>00153 _mm_store_si128(dst++, xmm1);
181
181
<a name="l00154"></a>00154
182
<a name="l00155"></a>00155 <span class="keyword">private</span>:
182
<a name="l00155"></a>00155 } <span class="keywordflow">while</span> (src < src_end);
183
183
<a name="l00156"></a>00156
184
<a name="l00157"></a>00157 <span class="keywordtype">void</span> init_bitbuf(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* buf)
185
<a name="l00158"></a>00158 {
186
<a name="l00159"></a>00159 <span class="keywordtype">unsigned</span> arr_size = <a class="code" href="a00099.html#a4c52fe8ea11bf34fcaafb71253846fea">BM_MINISET_ARRSIZE</a>(N);
187
<a name="l00160"></a>00160 m_buf = A::allocate(arr_size, 0);
188
<a name="l00161"></a>00161 <span class="keywordflow">if</span> (buf)
189
<a name="l00162"></a>00162 {
190
<a name="l00163"></a>00163 ::memcpy(m_buf, buf, arr_size * <span class="keyword">sizeof</span>(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>));
191
<a name="l00164"></a>00164 }
192
<a name="l00165"></a>00165 <span class="keywordflow">else</span>
193
<a name="l00166"></a>00166 {
194
<a name="l00167"></a>00167 ::memset(m_buf, 0, arr_size * <span class="keyword">sizeof</span>(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>));
195
<a name="l00168"></a>00168 }
196
<a name="l00169"></a>00169 m_type = 0;
197
<a name="l00170"></a>00170 }
198
<a name="l00171"></a>00171
199
<a name="l00172"></a>00172 <span class="keywordtype">void</span> init_gapbuf(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* buf)
184
<a name="l00157"></a>00157 }
185
<a name="l00158"></a>00158
186
<a name="l00159"></a>00159 <span class="comment"></span>
187
<a name="l00160"></a>00160 <span class="comment">/*! </span>
188
<a name="l00161"></a>00161 <span class="comment"> @brief OR array elements against another array</span>
189
<a name="l00162"></a>00162 <span class="comment"> *dst |= *src</span>
190
<a name="l00163"></a>00163 <span class="comment"></span>
191
<a name="l00164"></a>00164 <span class="comment"> @ingroup SSE2</span>
192
<a name="l00165"></a>00165 <span class="comment">*/</span>
193
<a name="l00166"></a>00166 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
194
<a name="l00167"></a><a class="code" href="a00122.html#ga3a7d61e4e8ad8791ab38fd1c3436aa67">00167</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#ga3a7d61e4e8ad8791ab38fd1c3436aa67" title="OR array elements against another array dst |= *src.">sse2_or_arr</a>(__m128i* BMRESTRICT dst,
195
<a name="l00168"></a>00168 <span class="keyword">const</span> __m128i* BMRESTRICT src,
196
<a name="l00169"></a>00169 <span class="keyword">const</span> __m128i* BMRESTRICT src_end)
197
<a name="l00170"></a>00170 {
198
<a name="l00171"></a>00171 __m128i xmm1, xmm2;
199
<a name="l00172"></a>00172 <span class="keywordflow">do</span>
200
200
<a name="l00173"></a>00173 {
201
<a name="l00174"></a>00174 <span class="keywordtype">unsigned</span> arr_size =
202
<a name="l00175"></a>00175 <a class="code" href="a00099.html#ae064a0440f9730f9661e51e6089dd853">BM_MINISET_GAPLEN</a> / (<span class="keyword">sizeof</span>(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>) / <span class="keyword">sizeof</span>(<a class="code" href="a00110.html#ac654d6319039a86546d235a236fc7cf6">bm::gap_word_t</a>));
203
<a name="l00176"></a>00176 m_buf = A::allocate(arr_size, 0);
204
<a name="l00177"></a>00177 <span class="keywordflow">if</span> (buf)
205
<a name="l00178"></a>00178 {
206
<a name="l00179"></a>00179 ::memcpy(m_buf, buf, arr_size * <span class="keyword">sizeof</span>(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>));
207
<a name="l00180"></a>00180 }
208
<a name="l00181"></a>00181 <span class="keywordflow">else</span>
209
<a name="l00182"></a>00182 {
210
<a name="l00183"></a>00183 *m_buf = 0;
211
<a name="l00184"></a>00184 <a class="code" href="a00114.html#gaef53b2877ff369badd7bb25b26bb9029" title="Sets all bits to 0 or 1 (GAP).">gap_set_all</a>((<a class="code" href="a00110.html#ac654d6319039a86546d235a236fc7cf6">gap_word_t</a>*)m_buf, <a class="code" href="a00110.html#ad0b8714080144ac70197840ff96752b7">bm::gap_max_bits</a>, 0);
212
<a name="l00185"></a>00185 }
213
<a name="l00186"></a>00186 m_type = 1;
214
<a name="l00187"></a>00187 }
215
<a name="l00188"></a>00188
216
<a name="l00189"></a>00189 <span class="keywordtype">void</span> convert_buf()
217
<a name="l00190"></a>00190 {
218
<a name="l00191"></a>00191 <span class="keywordtype">unsigned</span> arr_size = <a class="code" href="a00099.html#a4c52fe8ea11bf34fcaafb71253846fea">BM_MINISET_ARRSIZE</a>(N);
219
<a name="l00192"></a>00192 <a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* buf = A::allocate(arr_size, 0);
220
<a name="l00193"></a>00193
221
<a name="l00194"></a>00194 <a class="code" href="a00114.html#ga4862f4dcdcb7c0575e2e2db9e5f2a849" title="GAP block to bitblock conversion.">gap_convert_to_bitset</a>(buf, (<a class="code" href="a00110.html#ac654d6319039a86546d235a236fc7cf6">gap_word_t</a>*) m_buf, arr_size);
222
<a name="l00195"></a>00195 arr_size =
223
<a name="l00196"></a>00196 <a class="code" href="a00099.html#ae064a0440f9730f9661e51e6089dd853">BM_MINISET_GAPLEN</a> / (<span class="keyword">sizeof</span>(<a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>) / <span class="keyword">sizeof</span>(<a class="code" href="a00110.html#ac654d6319039a86546d235a236fc7cf6">bm::gap_word_t</a>));
224
<a name="l00197"></a>00197 A::deallocate(m_buf, arr_size);
225
<a name="l00198"></a>00198 m_buf = buf;
226
<a name="l00199"></a>00199 m_type = 0;
227
<a name="l00200"></a>00200 }
228
<a name="l00201"></a>00201
229
<a name="l00202"></a>00202 <span class="keyword">private</span>:
230
<a name="l00203"></a>00203 <a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* m_buf; <span class="comment">//!< Buffer pointer</span>
231
<a name="l00204"></a>00204 <span class="comment"></span> <span class="keywordtype">unsigned</span> m_type; <span class="comment">//!< buffer type (0-bit, 1-gap)</span>
232
<a name="l00205"></a>00205 <span class="comment"></span>};
233
<a name="l00206"></a>00206
234
<a name="l00207"></a>00207 <span class="comment"></span>
235
<a name="l00208"></a>00208 <span class="comment">/*!</span>
236
<a name="l00209"></a>00209 <span class="comment"> @brief Mini bitvector used in bvector template to keep block type flags</span>
237
<a name="l00210"></a>00210 <span class="comment"> </span>
238
<a name="l00211"></a>00211 <span class="comment"> Template is used as a default template parameter MS for bvector </span>
239
<a name="l00212"></a>00212 <span class="comment"> Offers maximum performance comparing to miniset.</span>
240
<a name="l00213"></a>00213 <span class="comment"></span>
241
<a name="l00214"></a>00214 <span class="comment"> @sa miniset</span>
242
<a name="l00215"></a>00215 <span class="comment">*/</span>
243
<a name="l00216"></a><a class="code" href="a00045.html">00216</a> <span class="keyword">template</span><<span class="keywordtype">size_t</span> N> <span class="keyword">class </span><a class="code" href="a00045.html" title="Mini bitvector used in bvector template to keep block type flags.">bvmini</a>
244
<a name="l00217"></a>00217 {
245
<a name="l00218"></a>00218 <span class="keyword">public</span>:
246
<a name="l00219"></a>00219
247
<a name="l00220"></a><a class="code" href="a00045.html#a06c2a8c6253e47e107acf75f3f9dfb38">00220</a> <a class="code" href="a00045.html#a06c2a8c6253e47e107acf75f3f9dfb38">bvmini</a>(<span class="keywordtype">int</span> start_strategy = 0)
248
<a name="l00221"></a>00221 {
249
<a name="l00222"></a>00222 ::memset(m_buf, 0, <span class="keyword">sizeof</span>(m_buf));
250
<a name="l00223"></a>00223 }
251
<a name="l00224"></a>00224
252
<a name="l00225"></a><a class="code" href="a00045.html#aaca8fc1df0be6bf5e0dbb78f399c793d">00225</a> <a class="code" href="a00045.html#a06c2a8c6253e47e107acf75f3f9dfb38">bvmini</a>(<span class="keyword">const</span> <a class="code" href="a00045.html" title="Mini bitvector used in bvector template to keep block type flags.">bvmini</a>& mset)
253
<a name="l00226"></a>00226 {
254
<a name="l00227"></a>00227 ::memcpy(m_buf, mset.m_buf, <span class="keyword">sizeof</span>(m_buf));
255
<a name="l00228"></a>00228 }
256
<a name="l00229"></a>00229
257
<a name="l00230"></a>00230 <span class="comment"></span>
258
<a name="l00231"></a>00231 <span class="comment"> /// Checks if bit pos 1 or 0. Returns 0 if 0 and non zero otherwise.</span>
259
<a name="l00232"></a><a class="code" href="a00045.html#a6c5950cd2043fe1de37aa7eeba2ad208">00232</a> <span class="comment"></span> <span class="keywordtype">unsigned</span> <a class="code" href="a00045.html#a6c5950cd2043fe1de37aa7eeba2ad208" title="Checks if bit pos 1 or 0. Returns 0 if 0 and non zero otherwise.">test</a>(<a class="code" href="a00110.html#aa3824d882a037396370b16f2f0a8bf37">bm::id_t</a> n)<span class="keyword"> const </span>
260
<a name="l00233"></a>00233 <span class="keyword"> </span>{
261
<a name="l00234"></a>00234 <span class="keywordflow">return</span> m_buf[n>><a class="code" href="a00110.html#a83d76bccf6fe3770f32d5ba11d2a37ad">bm::set_word_shift</a>] & (1<<(n & <a class="code" href="a00110.html#addbf345be3733d5e4575d71733ed1da8">bm::set_word_mask</a>));
262
<a name="l00235"></a>00235 }
263
<a name="l00236"></a>00236
264
<a name="l00237"></a><a class="code" href="a00045.html#a74c7834f34f41e6042243cdbe2b7b93e">00237</a> <span class="keywordtype">void</span> <span class="keyword">set</span>(<a class="code" href="a00110.html#aa3824d882a037396370b16f2f0a8bf37">bm::id_t</a> n, <span class="keywordtype">bool</span> val=<span class="keyword">true</span>)
265
<a name="l00238"></a>00238 {
266
<a name="l00239"></a>00239 <span class="keywordtype">unsigned</span> nword = n >> <a class="code" href="a00110.html#a83d76bccf6fe3770f32d5ba11d2a37ad">bm::set_word_shift</a>;
267
<a name="l00240"></a>00240 <span class="keywordtype">unsigned</span> mask = unsigned(1) << (n & <a class="code" href="a00110.html#addbf345be3733d5e4575d71733ed1da8">bm::set_word_mask</a>);
268
<a name="l00241"></a>00241
269
<a name="l00242"></a>00242 val ? (m_buf[nword] |= mask) : (m_buf[nword] &= ~mask);
270
<a name="l00243"></a>00243 }
271
<a name="l00244"></a>00244
272
<a name="l00245"></a><a class="code" href="a00045.html#a3253569551b1ce6fb6b1e1d21f318f70">00245</a> <span class="keywordtype">unsigned</span> <a class="code" href="a00045.html#a3253569551b1ce6fb6b1e1d21f318f70">mem_used</a>()<span class="keyword"> const</span>
273
<a name="l00246"></a>00246 <span class="keyword"> </span>{
274
<a name="l00247"></a>00247 <span class="keywordflow">return</span> <span class="keyword">sizeof</span>(*this);
275
<a name="l00248"></a>00248 }
276
<a name="l00249"></a>00249
277
<a name="l00250"></a><a class="code" href="a00045.html#af5f33b8936925bec92522af3c44dd677">00250</a> <span class="keywordtype">void</span> <a class="code" href="a00045.html#af5f33b8936925bec92522af3c44dd677">swap</a>(<a class="code" href="a00045.html" title="Mini bitvector used in bvector template to keep block type flags.">bvmini</a>& mset)
278
<a name="l00251"></a>00251 {
279
<a name="l00252"></a>00252 <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> i = 0; i < <a class="code" href="a00099.html#a4c52fe8ea11bf34fcaafb71253846fea">BM_MINISET_ARRSIZE</a>(N); ++i)
280
<a name="l00253"></a>00253 {
281
<a name="l00254"></a>00254 <a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a> tmp = m_buf[i];
282
<a name="l00255"></a>00255 m_buf[i] = mset.m_buf[i];
283
<a name="l00256"></a>00256 mset.m_buf[i] = tmp;
284
<a name="l00257"></a>00257 }
285
<a name="l00258"></a>00258 }
286
<a name="l00259"></a>00259
287
<a name="l00260"></a>00260 <span class="keyword">private</span>:
288
<a name="l00261"></a>00261 <a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a> m_buf[<a class="code" href="a00099.html#a4c52fe8ea11bf34fcaafb71253846fea">BM_MINISET_ARRSIZE</a>(N)];
289
<a name="l00262"></a>00262 };
290
<a name="l00263"></a>00263
291
<a name="l00264"></a>00264 <span class="comment"></span>
292
<a name="l00265"></a>00265 <span class="comment">/*!@} */</span>
293
<a name="l00266"></a>00266 <span class="comment"></span>
294
<a name="l00267"></a>00267 <span class="comment">/*!</span>
295
<a name="l00268"></a>00268 <span class="comment"> @brief Bitvector class with very limited functionality.</span>
296
<a name="l00269"></a>00269 <span class="comment"></span>
297
<a name="l00270"></a>00270 <span class="comment"> Class implements simple bitset and used for internal </span>
298
<a name="l00271"></a>00271 <span class="comment"> and testing purposes. </span>
299
<a name="l00272"></a>00272 <span class="comment">*/</span>
300
<a name="l00273"></a><a class="code" href="a00044.html">00273</a> <span class="keyword">template</span><<span class="keyword">class</span> A> <span class="keyword">class </span><a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>
301
<a name="l00274"></a>00274 {
302
<a name="l00275"></a>00275 <span class="keyword">public</span>:
303
<a name="l00276"></a><a class="code" href="a00044.html#a6a19b7eb95001b59818314ab36af98f8">00276</a> <a class="code" href="a00044.html#a6a19b7eb95001b59818314ab36af98f8">bvector_mini</a>(<span class="keywordtype">unsigned</span> size)
304
<a name="l00277"></a>00277 : m_buf(0),
305
<a name="l00278"></a>00278 m_size(size)
306
<a name="l00279"></a>00279 {
307
<a name="l00280"></a>00280 <span class="keywordtype">unsigned</span> arr_size = (size / 32) + 1;
308
<a name="l00281"></a>00281 m_buf = A::allocate(arr_size, 0);
309
<a name="l00282"></a>00282 ::memset(m_buf, 0, arr_size * <span class="keyword">sizeof</span>(unsigned));
310
<a name="l00283"></a>00283 }
311
<a name="l00284"></a><a class="code" href="a00044.html#adf04b08db38bfd6445c26cdbbf9dc296">00284</a> <a class="code" href="a00044.html#a6a19b7eb95001b59818314ab36af98f8">bvector_mini</a>(<span class="keyword">const</span> <a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>& <a class="code" href="a00043.html" title="bitvector with runtime compression of bits.">bvect</a>)
312
<a name="l00285"></a>00285 : m_size(bvect.m_size)
313
<a name="l00286"></a>00286 {
314
<a name="l00287"></a>00287 <span class="keywordtype">unsigned</span> arr_size = (m_size / 32) + 1;
315
<a name="l00288"></a>00288 m_buf = A::allocate(arr_size, 0);
316
<a name="l00289"></a>00289 ::memcpy(m_buf, bvect.m_buf, arr_size * <span class="keyword">sizeof</span>(unsigned));
317
<a name="l00290"></a>00290 }
318
<a name="l00291"></a>00291
319
<a name="l00292"></a><a class="code" href="a00044.html#ad8ab729a8aafd44e623bd251b9f9c1fa">00292</a> <a class="code" href="a00044.html#ad8ab729a8aafd44e623bd251b9f9c1fa">~bvector_mini</a>()
320
<a name="l00293"></a>00293 {
321
<a name="l00294"></a>00294 A::deallocate(m_buf, (m_size / 32) + 1);
322
<a name="l00295"></a>00295 }
323
<a name="l00296"></a>00296 <span class="comment"></span>
324
<a name="l00297"></a>00297 <span class="comment"> /// Checks if bit pos 1 or 0. Returns 0 if 0 and non zero otherwise.</span>
325
<a name="l00298"></a><a class="code" href="a00044.html#a6cfcb4606f6fbb52ae78943fcb85b97e">00298</a> <span class="comment"></span> <span class="keywordtype">int</span> <a class="code" href="a00044.html#a6cfcb4606f6fbb52ae78943fcb85b97e" title="Checks if bit pos 1 or 0. Returns 0 if 0 and non zero otherwise.">is_bit_true</a>(<span class="keywordtype">unsigned</span> pos)<span class="keyword"> const</span>
326
<a name="l00299"></a>00299 <span class="keyword"> </span>{
327
<a name="l00300"></a>00300 <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> mask = (<span class="keywordtype">unsigned</span> char)((<span class="keywordtype">char</span>)0x1 << (pos & 7));
328
<a name="l00301"></a>00301 <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>* offs = (<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>*)m_buf + (pos >> 3); <span class="comment">// m_buf + (pos/8)</span>
329
<a name="l00302"></a>00302
330
<a name="l00303"></a>00303 <span class="keywordflow">return</span> (*offs) & mask;
331
<a name="l00304"></a>00304 }
201
<a name="l00174"></a>00174 _mm_prefetch((<span class="keyword">const</span> <span class="keywordtype">char</span>*)(src)+512, _MM_HINT_NTA);
202
<a name="l00175"></a>00175
203
<a name="l00176"></a>00176 xmm1 = _mm_load_si128(src++);
204
<a name="l00177"></a>00177 xmm2 = _mm_load_si128(dst);
205
<a name="l00178"></a>00178 xmm1 = _mm_or_si128(xmm1, xmm2);
206
<a name="l00179"></a>00179 _mm_store_si128(dst++, xmm1);
207
<a name="l00180"></a>00180
208
<a name="l00181"></a>00181 xmm1 = _mm_load_si128(src++);
209
<a name="l00182"></a>00182 xmm2 = _mm_load_si128(dst);
210
<a name="l00183"></a>00183 xmm1 = _mm_or_si128(xmm1, xmm2);
211
<a name="l00184"></a>00184 _mm_store_si128(dst++, xmm1);
212
<a name="l00185"></a>00185
213
<a name="l00186"></a>00186 xmm1 = _mm_load_si128(src++);
214
<a name="l00187"></a>00187 xmm2 = _mm_load_si128(dst);
215
<a name="l00188"></a>00188 xmm1 = _mm_or_si128(xmm1, xmm2);
216
<a name="l00189"></a>00189 _mm_store_si128(dst++, xmm1);
217
<a name="l00190"></a>00190
218
<a name="l00191"></a>00191 xmm1 = _mm_load_si128(src++);
219
<a name="l00192"></a>00192 xmm2 = _mm_load_si128(dst);
220
<a name="l00193"></a>00193 xmm1 = _mm_or_si128(xmm1, xmm2);
221
<a name="l00194"></a>00194 _mm_store_si128(dst++, xmm1);
222
<a name="l00195"></a>00195
223
<a name="l00196"></a>00196 } <span class="keywordflow">while</span> (src < src_end);
224
<a name="l00197"></a>00197 }
225
<a name="l00198"></a>00198
226
<a name="l00199"></a>00199 <span class="comment"></span>
227
<a name="l00200"></a>00200 <span class="comment">/*! </span>
228
<a name="l00201"></a>00201 <span class="comment"> @brief OR array elements against another array</span>
229
<a name="l00202"></a>00202 <span class="comment"> *dst ^= *src</span>
230
<a name="l00203"></a>00203 <span class="comment"></span>
231
<a name="l00204"></a>00204 <span class="comment"> @ingroup SSE2</span>
232
<a name="l00205"></a>00205 <span class="comment">*/</span>
233
<a name="l00206"></a>00206 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
234
<a name="l00207"></a><a class="code" href="a00122.html#gaf1a5ad26557cc4d71d7421c35a8445fe">00207</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#gaf1a5ad26557cc4d71d7421c35a8445fe" title="OR array elements against another array dst ^= *src.">sse2_xor_arr</a>(__m128i* BMRESTRICT dst,
235
<a name="l00208"></a>00208 <span class="keyword">const</span> __m128i* BMRESTRICT src,
236
<a name="l00209"></a>00209 <span class="keyword">const</span> __m128i* BMRESTRICT src_end)
237
<a name="l00210"></a>00210 {
238
<a name="l00211"></a>00211 __m128i xmm1, xmm2;
239
<a name="l00212"></a>00212 <span class="keywordflow">do</span>
240
<a name="l00213"></a>00213 {
241
<a name="l00214"></a>00214 _mm_prefetch((<span class="keyword">const</span> <span class="keywordtype">char</span>*)(src)+512, _MM_HINT_NTA);
242
<a name="l00215"></a>00215
243
<a name="l00216"></a>00216 xmm1 = _mm_load_si128(src++);
244
<a name="l00217"></a>00217 xmm2 = _mm_load_si128(dst);
245
<a name="l00218"></a>00218 xmm1 = _mm_xor_si128(xmm1, xmm2);
246
<a name="l00219"></a>00219 _mm_store_si128(dst++, xmm1);
247
<a name="l00220"></a>00220
248
<a name="l00221"></a>00221 xmm1 = _mm_load_si128(src++);
249
<a name="l00222"></a>00222 xmm2 = _mm_load_si128(dst);
250
<a name="l00223"></a>00223 xmm1 = _mm_xor_si128(xmm1, xmm2);
251
<a name="l00224"></a>00224 _mm_store_si128(dst++, xmm1);
252
<a name="l00225"></a>00225
253
<a name="l00226"></a>00226 xmm1 = _mm_load_si128(src++);
254
<a name="l00227"></a>00227 xmm2 = _mm_load_si128(dst);
255
<a name="l00228"></a>00228 xmm1 = _mm_xor_si128(xmm1, xmm2);
256
<a name="l00229"></a>00229 _mm_store_si128(dst++, xmm1);
257
<a name="l00230"></a>00230
258
<a name="l00231"></a>00231 xmm1 = _mm_load_si128(src++);
259
<a name="l00232"></a>00232 xmm2 = _mm_load_si128(dst);
260
<a name="l00233"></a>00233 xmm1 = _mm_xor_si128(xmm1, xmm2);
261
<a name="l00234"></a>00234 _mm_store_si128(dst++, xmm1);
262
<a name="l00235"></a>00235
263
<a name="l00236"></a>00236 } <span class="keywordflow">while</span> (src < src_end);
264
<a name="l00237"></a>00237 }
265
<a name="l00238"></a>00238
266
<a name="l00239"></a>00239
267
<a name="l00240"></a>00240 <span class="comment"></span>
268
<a name="l00241"></a>00241 <span class="comment">/*! </span>
269
<a name="l00242"></a>00242 <span class="comment"> @brief AND-NOT (SUB) array elements against another array</span>
270
<a name="l00243"></a>00243 <span class="comment"> *dst &= ~*src</span>
271
<a name="l00244"></a>00244 <span class="comment"></span>
272
<a name="l00245"></a>00245 <span class="comment"> @ingroup SSE2</span>
273
<a name="l00246"></a>00246 <span class="comment">*/</span>
274
<a name="l00247"></a>00247 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
275
<a name="l00248"></a><a class="code" href="a00122.html#gac99f3b138f8a5e8ffb1296b129f618f0">00248</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#gac99f3b138f8a5e8ffb1296b129f618f0" title="AND-NOT (SUB) array elements against another array dst &amp;= ~*src.">sse2_sub_arr</a>(__m128i* BMRESTRICT dst,
276
<a name="l00249"></a>00249 <span class="keyword">const</span> __m128i* BMRESTRICT src,
277
<a name="l00250"></a>00250 <span class="keyword">const</span> __m128i* BMRESTRICT src_end)
278
<a name="l00251"></a>00251 {
279
<a name="l00252"></a>00252 __m128i xmm1, xmm2;
280
<a name="l00253"></a>00253 <span class="keywordflow">do</span>
281
<a name="l00254"></a>00254 {
282
<a name="l00255"></a>00255 _mm_prefetch((<span class="keyword">const</span> <span class="keywordtype">char</span>*)(src)+512, _MM_HINT_NTA);
283
<a name="l00256"></a>00256
284
<a name="l00257"></a>00257 xmm1 = _mm_load_si128(src++);
285
<a name="l00258"></a>00258 xmm2 = _mm_load_si128(dst);
286
<a name="l00259"></a>00259 xmm1 = _mm_andnot_si128(xmm1, xmm2);
287
<a name="l00260"></a>00260 _mm_store_si128(dst++, xmm1);
288
<a name="l00261"></a>00261
289
<a name="l00262"></a>00262 xmm1 = _mm_load_si128(src++);
290
<a name="l00263"></a>00263 xmm2 = _mm_load_si128(dst);
291
<a name="l00264"></a>00264 xmm1 = _mm_andnot_si128(xmm1, xmm2);
292
<a name="l00265"></a>00265 _mm_store_si128(dst++, xmm1);
293
<a name="l00266"></a>00266
294
<a name="l00267"></a>00267 xmm1 = _mm_load_si128(src++);
295
<a name="l00268"></a>00268 xmm2 = _mm_load_si128(dst);
296
<a name="l00269"></a>00269 xmm1 = _mm_andnot_si128(xmm1, xmm2);
297
<a name="l00270"></a>00270 _mm_store_si128(dst++, xmm1);
298
<a name="l00271"></a>00271
299
<a name="l00272"></a>00272 xmm1 = _mm_load_si128(src++);
300
<a name="l00273"></a>00273 xmm2 = _mm_load_si128(dst);
301
<a name="l00274"></a>00274 xmm1 = _mm_andnot_si128(xmm1, xmm2);
302
<a name="l00275"></a>00275 _mm_store_si128(dst++, xmm1);
303
<a name="l00276"></a>00276
304
<a name="l00277"></a>00277 } <span class="keywordflow">while</span> (src < src_end);
305
<a name="l00278"></a>00278 }
306
<a name="l00279"></a>00279 <span class="comment"></span>
307
<a name="l00280"></a>00280 <span class="comment">/*! </span>
308
<a name="l00281"></a>00281 <span class="comment"> @brief SSE2 block memset</span>
309
<a name="l00282"></a>00282 <span class="comment"> *dst = value</span>
310
<a name="l00283"></a>00283 <span class="comment"></span>
311
<a name="l00284"></a>00284 <span class="comment"> @ingroup SSE2</span>
312
<a name="l00285"></a>00285 <span class="comment">*/</span>
313
<a name="l00286"></a>00286
314
<a name="l00287"></a>00287 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
315
<a name="l00288"></a><a class="code" href="a00122.html#ga302f4fcd0abf355957b305d16d04f452">00288</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#ga302f4fcd0abf355957b305d16d04f452" title="SSE2 block memset dst = value.">sse2_set_block</a>(__m128i* BMRESTRICT dst,
316
<a name="l00289"></a>00289 __m128i* BMRESTRICT dst_end,
317
<a name="l00290"></a>00290 <a class="code" href="a00115.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a> value)
318
<a name="l00291"></a>00291 {
319
<a name="l00292"></a>00292 __m128i xmm0 = _mm_set_epi32 (value, value, value, value);
320
<a name="l00293"></a>00293 <span class="keywordflow">do</span>
321
<a name="l00294"></a>00294 {
322
<a name="l00295"></a>00295 _mm_store_si128(dst, xmm0);
323
<a name="l00296"></a>00296 <span class="comment">/* </span>
324
<a name="l00297"></a>00297 <span class="comment"> _mm_store_si128(dst+1, xmm0);</span>
325
<a name="l00298"></a>00298 <span class="comment"> _mm_store_si128(dst+2, xmm0);</span>
326
<a name="l00299"></a>00299 <span class="comment"> _mm_store_si128(dst+3, xmm0);</span>
327
<a name="l00300"></a>00300 <span class="comment"></span>
328
<a name="l00301"></a>00301 <span class="comment"> _mm_store_si128(dst+4, xmm0);</span>
329
<a name="l00302"></a>00302 <span class="comment"> _mm_store_si128(dst+5, xmm0);</span>
330
<a name="l00303"></a>00303 <span class="comment"> _mm_store_si128(dst+6, xmm0);</span>
331
<a name="l00304"></a>00304 <span class="comment"> _mm_store_si128(dst+7, xmm0);</span>
332
332
<a name="l00305"></a>00305 <span class="comment"></span>
333
<a name="l00306"></a>00306 <span class="comment"> /// Sets bit number pos to 1</span>
334
<a name="l00307"></a><a class="code" href="a00044.html#ae28f350dc925a9c8bef673fcffce9c8e">00307</a> <span class="comment"></span> <span class="keywordtype">void</span> <a class="code" href="a00044.html#ae28f350dc925a9c8bef673fcffce9c8e" title="Sets bit number pos to 1.">set_bit</a>(<span class="keywordtype">unsigned</span> pos)
335
<a name="l00308"></a>00308 {
336
<a name="l00309"></a>00309 <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> mask = (<span class="keywordtype">unsigned</span> char)(0x1 << (pos & 7));
337
<a name="l00310"></a>00310 <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>* offs = (<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>*)m_buf + (pos >> 3);
338
<a name="l00311"></a>00311 *offs |= mask;
339
<a name="l00312"></a>00312 }
333
<a name="l00306"></a>00306 <span class="comment"> dst += 8;</span>
334
<a name="l00307"></a>00307 <span class="comment">*/</span>
335
<a name="l00308"></a>00308 } <span class="keywordflow">while</span> (++dst < dst_end);
336
<a name="l00309"></a>00309
337
<a name="l00310"></a>00310 _mm_sfence();
338
<a name="l00311"></a>00311 }
339
<a name="l00312"></a>00312
340
340
<a name="l00313"></a>00313
341
341
<a name="l00314"></a>00314 <span class="comment"></span>
342
<a name="l00315"></a>00315 <span class="comment"> /// Sets bit number pos to 0</span>
343
<a name="l00316"></a><a class="code" href="a00044.html#a852767299516dacb6b27296d71ea60c0">00316</a> <span class="comment"></span> <span class="keywordtype">void</span> <a class="code" href="a00044.html#a852767299516dacb6b27296d71ea60c0" title="Sets bit number pos to 0.">clear_bit</a>(<span class="keywordtype">unsigned</span> pos)
344
<a name="l00317"></a>00317 {
345
<a name="l00318"></a>00318 <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> mask = (<span class="keywordtype">unsigned</span> char)(0x1 << (pos & 7));
346
<a name="l00319"></a>00319 <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>* offs = (<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>*)m_buf + (pos >> 3);
347
<a name="l00320"></a>00320
348
<a name="l00321"></a>00321 *offs &= ~mask;
349
<a name="l00322"></a>00322 }
350
<a name="l00323"></a>00323 <span class="comment"></span>
351
<a name="l00324"></a>00324 <span class="comment"> /// Counts number of bits ON </span>
352
<a name="l00325"></a><a class="code" href="a00044.html#a1becbe1fa1d767f6ea17801bd64b4bd5">00325</a> <span class="comment"></span> <span class="keywordtype">unsigned</span> <a class="code" href="a00044.html#a1becbe1fa1d767f6ea17801bd64b4bd5" title="Counts number of bits ON.">bit_count</a>()<span class="keyword"> const</span>
353
<a name="l00326"></a>00326 <span class="keyword"> </span>{
354
<a name="l00327"></a>00327 <span class="keyword">register</span> <span class="keywordtype">unsigned</span> count = 0;
355
<a name="l00328"></a>00328 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* end = m_buf + (m_size / 32)+1;
356
<a name="l00329"></a>00329
357
<a name="l00330"></a>00330 <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span>* start = m_buf; start < end; ++start)
358
<a name="l00331"></a>00331 {
359
<a name="l00332"></a>00332 <span class="keyword">register</span> <span class="keywordtype">unsigned</span> value = *start;
360
<a name="l00333"></a>00333 <span class="keywordflow">for</span> (count += (value!=0); value &= value - 1; ++count);
361
<a name="l00334"></a>00334 }
362
<a name="l00335"></a>00335 <span class="keywordflow">return</span> count;
363
<a name="l00336"></a>00336 }
364
<a name="l00337"></a>00337 <span class="comment"></span>
365
<a name="l00338"></a>00338 <span class="comment"> /// Comparison.</span>
366
<a name="l00339"></a><a class="code" href="a00044.html#ac3b022f81e3eaafab851a23232053a93">00339</a> <span class="comment"></span> <span class="keywordtype">int</span> <a class="code" href="a00044.html#ac3b022f81e3eaafab851a23232053a93" title="Comparison.">compare</a>(<span class="keyword">const</span> <a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>& <a class="code" href="a00043.html" title="bitvector with runtime compression of bits.">bvect</a>)
367
<a name="l00340"></a>00340 {
368
<a name="l00341"></a>00341 <span class="keywordtype">unsigned</span> cnt1 = <a class="code" href="a00044.html#a1becbe1fa1d767f6ea17801bd64b4bd5" title="Counts number of bits ON.">bit_count</a>();
369
<a name="l00342"></a>00342 <span class="keywordtype">unsigned</span> cnt2 = bvect.<a class="code" href="a00044.html#a1becbe1fa1d767f6ea17801bd64b4bd5" title="Counts number of bits ON.">bit_count</a>();
370
<a name="l00343"></a>00343
371
<a name="l00344"></a>00344 <span class="keywordflow">if</span> (!cnt1 && !cnt2) <span class="keywordflow">return</span> 0;
372
<a name="l00345"></a>00345
373
<a name="l00346"></a>00346 <span class="keywordtype">unsigned</span> cnt_min = cnt1 < cnt2 ? cnt1 : cnt2;
374
<a name="l00347"></a>00347
375
<a name="l00348"></a>00348 <span class="keywordflow">if</span> (!cnt_min) <span class="keywordflow">return</span> cnt1 ? 1 : -1;
376
<a name="l00349"></a>00349
377
<a name="l00350"></a>00350 <span class="keywordtype">unsigned</span> idx1 = <a class="code" href="a00044.html#a69391ac769601c2386ca87cfce255c6b" title="Returns index of the first ON bit.">get_first</a>();
378
<a name="l00351"></a>00351 <span class="keywordtype">unsigned</span> idx2 = bvect.<a class="code" href="a00044.html#a69391ac769601c2386ca87cfce255c6b" title="Returns index of the first ON bit.">get_first</a>();
379
<a name="l00352"></a>00352
380
<a name="l00353"></a>00353 <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> i = 0; i < cnt_min; ++i)
381
<a name="l00354"></a>00354 {
382
<a name="l00355"></a>00355 <span class="keywordflow">if</span> (idx1 != idx2)
383
<a name="l00356"></a>00356 {
384
<a name="l00357"></a>00357 <span class="keywordflow">return</span> idx1 < idx2 ? 1 : -1;
385
<a name="l00358"></a>00358 }
386
<a name="l00359"></a>00359 idx1 = <a class="code" href="a00044.html#ab43a59f4e322f5a270a7c9bb920b17a3" title="Returns index of next bit, which is ON.">get_next</a>(idx1);
387
<a name="l00360"></a>00360 idx2 = bvect.<a class="code" href="a00044.html#ab43a59f4e322f5a270a7c9bb920b17a3" title="Returns index of next bit, which is ON.">get_next</a>(idx2);
388
<a name="l00361"></a>00361 }
389
<a name="l00362"></a>00362
390
<a name="l00363"></a>00363 <a class="code" href="a00089.html#aa44515fab0ace8928d1cb82009a95bf8">BM_ASSERT</a>(idx1==0 || idx2==0);
391
<a name="l00364"></a>00364
392
<a name="l00365"></a>00365 <span class="keywordflow">if</span> (idx1 != idx2)
393
<a name="l00366"></a>00366 {
394
<a name="l00367"></a>00367 <span class="keywordflow">if</span> (!idx1) <span class="keywordflow">return</span> -1;
395
<a name="l00368"></a>00368 <span class="keywordflow">if</span> (!idx2) <span class="keywordflow">return</span> 1;
396
<a name="l00369"></a>00369 <span class="keywordflow">return</span> idx1 < idx2 ? 1 : -1;
397
<a name="l00370"></a>00370 }
342
<a name="l00315"></a>00315 <span class="comment">/*! </span>
343
<a name="l00316"></a>00316 <span class="comment"> @brief SSE2 block copy</span>
344
<a name="l00317"></a>00317 <span class="comment"> *dst = *src</span>
345
<a name="l00318"></a>00318 <span class="comment"></span>
346
<a name="l00319"></a>00319 <span class="comment"> @ingroup SSE2</span>
347
<a name="l00320"></a>00320 <span class="comment">*/</span>
348
<a name="l00321"></a>00321 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
349
<a name="l00322"></a><a class="code" href="a00122.html#ga571dd54af5c555cad9dfa6bef4561777">00322</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#ga571dd54af5c555cad9dfa6bef4561777" title="SSE2 block copy dst = *src.">sse2_copy_block</a>(__m128i* BMRESTRICT dst,
350
<a name="l00323"></a>00323 <span class="keyword">const</span> __m128i* BMRESTRICT src,
351
<a name="l00324"></a>00324 <span class="keyword">const</span> __m128i* BMRESTRICT src_end)
352
<a name="l00325"></a>00325 {
353
<a name="l00326"></a>00326 __m128i xmm0, xmm1, xmm2, xmm3;
354
<a name="l00327"></a>00327 <span class="keywordflow">do</span>
355
<a name="l00328"></a>00328 {
356
<a name="l00329"></a>00329 _mm_prefetch((<span class="keyword">const</span> <span class="keywordtype">char</span>*)(src)+512, _MM_HINT_NTA);
357
<a name="l00330"></a>00330
358
<a name="l00331"></a>00331 xmm0 = _mm_load_si128(src+0);
359
<a name="l00332"></a>00332 xmm1 = _mm_load_si128(src+1);
360
<a name="l00333"></a>00333 xmm2 = _mm_load_si128(src+2);
361
<a name="l00334"></a>00334 xmm3 = _mm_load_si128(src+3);
362
<a name="l00335"></a>00335
363
<a name="l00336"></a>00336 _mm_store_si128(dst+0, xmm0);
364
<a name="l00337"></a>00337 _mm_store_si128(dst+1, xmm1);
365
<a name="l00338"></a>00338 _mm_store_si128(dst+2, xmm2);
366
<a name="l00339"></a>00339 _mm_store_si128(dst+3, xmm3);
367
<a name="l00340"></a>00340
368
<a name="l00341"></a>00341 xmm0 = _mm_load_si128(src+4);
369
<a name="l00342"></a>00342 xmm1 = _mm_load_si128(src+5);
370
<a name="l00343"></a>00343 xmm2 = _mm_load_si128(src+6);
371
<a name="l00344"></a>00344 xmm3 = _mm_load_si128(src+7);
372
<a name="l00345"></a>00345
373
<a name="l00346"></a>00346 _mm_store_si128(dst+4, xmm0);
374
<a name="l00347"></a>00347 _mm_store_si128(dst+5, xmm1);
375
<a name="l00348"></a>00348 _mm_store_si128(dst+6, xmm2);
376
<a name="l00349"></a>00349 _mm_store_si128(dst+7, xmm3);
377
<a name="l00350"></a>00350
378
<a name="l00351"></a>00351 src += 8;
379
<a name="l00352"></a>00352 dst += 8;
380
<a name="l00353"></a>00353
381
<a name="l00354"></a>00354 } <span class="keywordflow">while</span> (src < src_end);
382
<a name="l00355"></a>00355 }
383
<a name="l00356"></a>00356 <span class="comment"></span>
384
<a name="l00357"></a>00357 <span class="comment">/*! </span>
385
<a name="l00358"></a>00358 <span class="comment"> @brief Invert array elements</span>
386
<a name="l00359"></a>00359 <span class="comment"> *dst = ~*dst</span>
387
<a name="l00360"></a>00360 <span class="comment"> or</span>
388
<a name="l00361"></a>00361 <span class="comment"> *dst ^= *dst </span>
389
<a name="l00362"></a>00362 <span class="comment"></span>
390
<a name="l00363"></a>00363 <span class="comment"> @ingroup SSE2</span>
391
<a name="l00364"></a>00364 <span class="comment">*/</span>
392
<a name="l00365"></a>00365 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
393
<a name="l00366"></a><a class="code" href="a00122.html#ga8d506147673d88005f92caee7f5dd23a">00366</a> <span class="keywordtype">void</span> <a class="code" href="a00122.html#ga8d506147673d88005f92caee7f5dd23a" title="Invert array elements dst = ~*dst or dst ^= *dst.">sse2_invert_arr</a>(<a class="code" href="a00115.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* first, <a class="code" href="a00115.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* last)
394
<a name="l00367"></a>00367 {
395
<a name="l00368"></a>00368 __m128i xmm1 = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF,
396
<a name="l00369"></a>00369 0xFFFFFFFF, 0xFFFFFFFF);
397
<a name="l00370"></a>00370 __m128i* wrd_ptr = (__m128i*)first;
398
398
<a name="l00371"></a>00371
399
<a name="l00372"></a>00372 <span class="keywordflow">return</span> 0;
400
<a name="l00373"></a>00373 }
401
<a name="l00374"></a>00374
402
<a name="l00375"></a>00375 <span class="comment"></span>
403
<a name="l00376"></a>00376 <span class="comment"> /// Returns index of the first ON bit</span>
404
<a name="l00377"></a><a class="code" href="a00044.html#a69391ac769601c2386ca87cfce255c6b">00377</a> <span class="comment"></span> <span class="keywordtype">unsigned</span> <a class="code" href="a00044.html#a69391ac769601c2386ca87cfce255c6b" title="Returns index of the first ON bit.">get_first</a>()<span class="keyword"> const</span>
405
<a name="l00378"></a>00378 <span class="keyword"> </span>{
406
<a name="l00379"></a>00379 <span class="keywordtype">unsigned</span> pos = 0;
407
<a name="l00380"></a>00380 <span class="keyword">const</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>* ptr = (<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>*) m_buf;
408
<a name="l00381"></a>00381
409
<a name="l00382"></a>00382 <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span> i = 0; i < (m_size/8)+1; ++i)
410
<a name="l00383"></a>00383 {
411
<a name="l00384"></a>00384 <span class="keyword">register</span> <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> w = ptr[i];
412
<a name="l00385"></a>00385
413
<a name="l00386"></a>00386
414
<a name="l00387"></a>00387 <span class="keywordflow">if</span> (w != 0)
415
<a name="l00388"></a>00388 {
416
<a name="l00389"></a>00389 <span class="keywordflow">while</span> ((w & 1) == 0)
417
<a name="l00390"></a>00390 {
418
<a name="l00391"></a>00391 w >>= 1;
419
<a name="l00392"></a>00392 ++pos;
420
<a name="l00393"></a>00393 }
421
<a name="l00394"></a>00394 <span class="keywordflow">return</span> pos;
422
<a name="l00395"></a>00395 }
423
<a name="l00396"></a>00396 pos += <span class="keyword">sizeof</span>(<span class="keywordtype">unsigned</span> char) * 8;
424
<a name="l00397"></a>00397 }
425
<a name="l00398"></a>00398 <span class="keywordflow">return</span> 0;
426
<a name="l00399"></a>00399 }
427
<a name="l00400"></a>00400
428
<a name="l00401"></a>00401 <span class="comment"></span>
429
<a name="l00402"></a>00402 <span class="comment"> /// Returns index of next bit, which is ON</span>
430
<a name="l00403"></a><a class="code" href="a00044.html#ab43a59f4e322f5a270a7c9bb920b17a3">00403</a> <span class="comment"></span> <span class="keywordtype">unsigned</span> <a class="code" href="a00044.html#ab43a59f4e322f5a270a7c9bb920b17a3" title="Returns index of next bit, which is ON.">get_next</a>(<span class="keywordtype">unsigned</span> idx)<span class="keyword"> const</span>
431
<a name="l00404"></a>00404 <span class="keyword"> </span>{
432
<a name="l00405"></a>00405 <span class="keyword">register</span> <span class="keywordtype">unsigned</span> i;
433
<a name="l00406"></a>00406
434
<a name="l00407"></a>00407 <span class="keywordflow">for</span> (i = idx+1; i < m_size; ++i)
435
<a name="l00408"></a>00408 {
436
<a name="l00409"></a>00409 <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>* offs = (<span class="keywordtype">unsigned</span> <span class="keywordtype">char</span>*)m_buf + (i >> 3);
437
<a name="l00410"></a>00410 <span class="keywordflow">if</span> (*offs)
438
<a name="l00411"></a>00411 {
439
<a name="l00412"></a>00412 <span class="keywordtype">unsigned</span> <span class="keywordtype">char</span> mask = (<span class="keywordtype">unsigned</span> char)((<span class="keywordtype">char</span>)0x1 << (i & 7));
399
<a name="l00372"></a>00372 <span class="keywordflow">do</span>
400
<a name="l00373"></a>00373 {
401
<a name="l00374"></a>00374 _mm_prefetch((<span class="keyword">const</span> <span class="keywordtype">char</span>*)(wrd_ptr)+512, _MM_HINT_NTA);
402
<a name="l00375"></a>00375
403
<a name="l00376"></a>00376 __m128i xmm0 = _mm_load_si128(wrd_ptr);
404
<a name="l00377"></a>00377 xmm0 = _mm_xor_si128(xmm0, xmm1);
405
<a name="l00378"></a>00378 _mm_store_si128(wrd_ptr, xmm0);
406
<a name="l00379"></a>00379 ++wrd_ptr;
407
<a name="l00380"></a>00380 } <span class="keywordflow">while</span> (wrd_ptr < (__m128i*)last);
408
<a name="l00381"></a>00381 }
409
<a name="l00382"></a>00382
410
<a name="l00383"></a>00383 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
411
<a name="l00384"></a><a class="code" href="a00115.html#ac0c75fb7b3dc61602843ac4e1b9b7ef5">00384</a> __m128i <a class="code" href="a00115.html#ac0c75fb7b3dc61602843ac4e1b9b7ef5">sse2_and</a>(__m128i a, __m128i b)
412
<a name="l00385"></a>00385 {
413
<a name="l00386"></a>00386 <span class="keywordflow">return</span> _mm_and_si128(a, b);
414
<a name="l00387"></a>00387 }
415
<a name="l00388"></a>00388
416
<a name="l00389"></a>00389 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
417
<a name="l00390"></a><a class="code" href="a00115.html#adea798a9a95a04845c33876087a2f46b">00390</a> __m128i <a class="code" href="a00115.html#adea798a9a95a04845c33876087a2f46b">sse2_or</a>(__m128i a, __m128i b)
418
<a name="l00391"></a>00391 {
419
<a name="l00392"></a>00392 <span class="keywordflow">return</span> _mm_or_si128(a, b);
420
<a name="l00393"></a>00393 }
421
<a name="l00394"></a>00394
422
<a name="l00395"></a>00395
423
<a name="l00396"></a>00396 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
424
<a name="l00397"></a><a class="code" href="a00115.html#a6f5de19ee3e1be05037908b4777c4da8">00397</a> __m128i <a class="code" href="a00115.html#a6f5de19ee3e1be05037908b4777c4da8">sse2_xor</a>(__m128i a, __m128i b)
425
<a name="l00398"></a>00398 {
426
<a name="l00399"></a>00399 <span class="keywordflow">return</span> _mm_xor_si128(a, b);
427
<a name="l00400"></a>00400 }
428
<a name="l00401"></a>00401
429
<a name="l00402"></a>00402 <a class="code" href="a00092.html#a938734d014fb68dd8b2251fe8ec2b025">BMFORCEINLINE</a>
430
<a name="l00403"></a><a class="code" href="a00115.html#ab3e6d46fcba1bc2a1a5390c10f571382">00403</a> __m128i <a class="code" href="a00115.html#ab3e6d46fcba1bc2a1a5390c10f571382">sse2_sub</a>(__m128i a, __m128i b)
431
<a name="l00404"></a>00404 {
432
<a name="l00405"></a>00405 <span class="keywordflow">return</span> _mm_andnot_si128(b, a);
433
<a name="l00406"></a>00406 }
434
<a name="l00407"></a>00407
435
<a name="l00408"></a>00408
436
<a name="l00409"></a>00409
437
<a name="l00410"></a>00410 } <span class="comment">// namespace</span>
438
<a name="l00411"></a>00411
439
<a name="l00412"></a>00412
440
440
<a name="l00413"></a>00413
441
<a name="l00414"></a>00414 <span class="keywordflow">if</span> (*offs & mask)
442
<a name="l00415"></a>00415 {
443
<a name="l00416"></a>00416 <span class="keywordflow">return</span> i;
444
<a name="l00417"></a>00417 }
445
<a name="l00418"></a>00418 }
446
<a name="l00419"></a>00419 <span class="keywordflow">else</span>
447
<a name="l00420"></a>00420 {
448
<a name="l00421"></a>00421 i += 7;
449
<a name="l00422"></a>00422 }
450
<a name="l00423"></a>00423 }
451
<a name="l00424"></a>00424 <span class="keywordflow">return</span> 0;
452
<a name="l00425"></a>00425 }
453
<a name="l00426"></a>00426
454
<a name="l00427"></a>00427
455
<a name="l00428"></a><a class="code" href="a00044.html#a8a95d88b371e57e7f9f2636f535fde97">00428</a> <span class="keywordtype">void</span> <a class="code" href="a00044.html#a8a95d88b371e57e7f9f2636f535fde97">combine_and</a>(<span class="keyword">const</span> <a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>& <a class="code" href="a00043.html" title="bitvector with runtime compression of bits.">bvect</a>)
456
<a name="l00429"></a>00429 {
457
<a name="l00430"></a>00430 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* end = m_buf + (m_size / 32)+1;
458
<a name="l00431"></a>00431
459
<a name="l00432"></a>00432 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* src = bvect.<a class="code" href="a00044.html#a44873b590c47bf1b52ea62a3384aea1a">get_buf</a>();
460
<a name="l00433"></a>00433
461
<a name="l00434"></a>00434 <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span>* start = m_buf; start < end; ++start)
462
<a name="l00435"></a>00435 {
463
<a name="l00436"></a>00436 *start &= *src++;
464
<a name="l00437"></a>00437 }
465
<a name="l00438"></a>00438 }
466
<a name="l00439"></a>00439
467
<a name="l00440"></a><a class="code" href="a00044.html#ab53f530e6be242a26491141a05d09982">00440</a> <span class="keywordtype">void</span> <a class="code" href="a00044.html#ab53f530e6be242a26491141a05d09982">combine_xor</a>(<span class="keyword">const</span> <a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>& <a class="code" href="a00043.html" title="bitvector with runtime compression of bits.">bvect</a>)
468
<a name="l00441"></a>00441 {
469
<a name="l00442"></a>00442 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* end = m_buf + (m_size / 32)+1;
470
<a name="l00443"></a>00443
471
<a name="l00444"></a>00444 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* src = bvect.<a class="code" href="a00044.html#a44873b590c47bf1b52ea62a3384aea1a">get_buf</a>();
472
<a name="l00445"></a>00445
473
<a name="l00446"></a>00446 <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span>* start = m_buf; start < end; ++start)
474
<a name="l00447"></a>00447 {
475
<a name="l00448"></a>00448 *start ^= *src++;
476
<a name="l00449"></a>00449 }
477
<a name="l00450"></a>00450 }
478
<a name="l00451"></a>00451
479
<a name="l00452"></a>00452
480
<a name="l00453"></a><a class="code" href="a00044.html#a9b977e253866aafed72df9ae6be18965">00453</a> <span class="keywordtype">void</span> <a class="code" href="a00044.html#a9b977e253866aafed72df9ae6be18965">combine_or</a>(<span class="keyword">const</span> <a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>& <a class="code" href="a00043.html" title="bitvector with runtime compression of bits.">bvect</a>)
481
<a name="l00454"></a>00454 {
482
<a name="l00455"></a>00455 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* end = m_buf + (m_size / 32)+1;
483
<a name="l00456"></a>00456
484
<a name="l00457"></a>00457 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* src = bvect.<a class="code" href="a00044.html#a44873b590c47bf1b52ea62a3384aea1a">get_buf</a>();
485
<a name="l00458"></a>00458
486
<a name="l00459"></a>00459 <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span>* start = m_buf; start < end; ++start)
487
<a name="l00460"></a>00460 {
488
<a name="l00461"></a>00461 *start |= *src++;
489
<a name="l00462"></a>00462 }
490
<a name="l00463"></a>00463 }
491
<a name="l00464"></a>00464
492
<a name="l00465"></a><a class="code" href="a00044.html#a606b3a5570dd2d1016c60d00118bf8d5">00465</a> <span class="keywordtype">void</span> <a class="code" href="a00044.html#a606b3a5570dd2d1016c60d00118bf8d5">combine_sub</a>(<span class="keyword">const</span> <a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>& <a class="code" href="a00043.html" title="bitvector with runtime compression of bits.">bvect</a>)
493
<a name="l00466"></a>00466 {
494
<a name="l00467"></a>00467 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* end = m_buf + (m_size / 32)+1;
495
<a name="l00468"></a>00468
496
<a name="l00469"></a>00469 <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* src = bvect.<a class="code" href="a00044.html#a44873b590c47bf1b52ea62a3384aea1a">get_buf</a>();
497
<a name="l00470"></a>00470
498
<a name="l00471"></a>00471 <span class="keywordflow">for</span> (<span class="keywordtype">unsigned</span>* start = m_buf; start < end; ++start)
499
<a name="l00472"></a>00472 {
500
<a name="l00473"></a>00473 *start &= ~(*src++);
501
<a name="l00474"></a>00474 }
502
<a name="l00475"></a>00475 }
503
<a name="l00476"></a>00476
504
<a name="l00477"></a><a class="code" href="a00044.html#a44873b590c47bf1b52ea62a3384aea1a">00477</a> <span class="keyword">const</span> <span class="keywordtype">unsigned</span>* <a class="code" href="a00044.html#a44873b590c47bf1b52ea62a3384aea1a">get_buf</a>()<span class="keyword"> const </span>{ <span class="keywordflow">return</span> m_buf; }
505
<a name="l00478"></a><a class="code" href="a00044.html#ab94ed32db48e192c6bd6da15bfa9c486">00478</a> <span class="keywordtype">unsigned</span> <a class="code" href="a00044.html#ab94ed32db48e192c6bd6da15bfa9c486">mem_used</a>()<span class="keyword"> const</span>
506
<a name="l00479"></a>00479 <span class="keyword"> </span>{
507
<a name="l00480"></a>00480 <span class="keywordflow">return</span> <span class="keyword">sizeof</span>(<a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>) + (m_size / 32) + 1;
508
<a name="l00481"></a>00481 }
509
<a name="l00482"></a>00482
510
<a name="l00483"></a><a class="code" href="a00044.html#a6e51947affb26ce925b3a71c81105802">00483</a> <span class="keywordtype">void</span> <a class="code" href="a00044.html#a6e51947affb26ce925b3a71c81105802">swap</a>(<a class="code" href="a00044.html" title="Bitvector class with very limited functionality.">bvector_mini</a>& bvm)
511
<a name="l00484"></a>00484 {
512
<a name="l00485"></a>00485 <a class="code" href="a00089.html#aa44515fab0ace8928d1cb82009a95bf8">BM_ASSERT</a>(m_size == bvm.m_size);
513
<a name="l00486"></a>00486 <a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* buftmp = m_buf;
514
<a name="l00487"></a>00487 m_buf = bvm.m_buf;
515
<a name="l00488"></a>00488 bvm.m_buf = buftmp;
516
<a name="l00489"></a>00489 }
517
<a name="l00490"></a>00490
518
<a name="l00491"></a>00491 <span class="keyword">private</span>:
519
<a name="l00492"></a>00492 <a class="code" href="a00110.html#a17fd5ba52db3ddda05e6f8dd5000a1a4">bm::word_t</a>* m_buf;
520
<a name="l00493"></a>00493 <span class="keywordtype">unsigned</span> m_size;
521
<a name="l00494"></a>00494 };
522
<a name="l00495"></a>00495
523
<a name="l00496"></a>00496
524
<a name="l00497"></a>00497
525
<a name="l00498"></a>00498 } <span class="comment">// namespace bm</span>
526
<a name="l00499"></a>00499
527
<a name="l00500"></a>00500 <span class="preprocessor">#endif</span>
441
<a name="l00414"></a>00414 <span class="preprocessor">#endif</span>
528
442
</pre></div></div>
529
<hr size="1"/><address style="text-align: right;"><small>Generated on Sun Nov 22 10:49:35 2009 for BitMagic by
443
<hr size="1"/><address style="text-align: right;"><small>Generated on Fri Jan 8 20:09:19 2010 for BitMagic by
530
444
<a href="http://www.doxygen.org/index.html">
531
445
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.6.1 </small></address>