~ubuntu-branches/ubuntu/vivid/freerdp/vivid

« back to all changes in this revision

Viewing changes to libfreerdp/primitives/prim_sign_opt.c

  • Committer: Package Import Robot
  • Author(s): Iain Lane
  • Date: 2014-11-11 12:20:50 UTC
  • mfrom: (1.1.9) (9.1.17 sid)
  • Revision ID: package-import@ubuntu.com-20141111122050-wyr8hrnwco9fcmum
Tags: 1.1.0~git20140921.1.440916e+dfsg1-2ubuntu1
* Merge with Debian unstable, remaining changes
  - Disable ffmpeg support
* Disable gstreamer support, this relies on gstreamer 0.10 and we don't want
  to add any more deps on that.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* FreeRDP: A Remote Desktop Protocol Client
 
2
 * Optimized sign operations.
 
3
 * vi:ts=4 sw=4:
 
4
 *
 
5
 * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
 
6
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 
7
 * not use this file except in compliance with the License. You may obtain
 
8
 * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
 
9
 * Unless required by applicable law or agreed to in writing, software
 
10
 * distributed under the License is distributed on an "AS IS" BASIS,
 
11
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 
12
 * or implied. See the License for the specific language governing
 
13
 * permissions and limitations under the License.
 
14
 */
 
15
 
 
16
#ifdef HAVE_CONFIG_H
 
17
#include "config.h"
 
18
#endif
 
19
 
 
20
#include <freerdp/types.h>
 
21
#include <freerdp/primitives.h>
 
22
#include <winpr/sysinfo.h>
 
23
 
 
24
#ifdef WITH_SSE2
 
25
#include <emmintrin.h>
 
26
#include <tmmintrin.h>
 
27
#endif /* WITH_SSE2 */
 
28
 
 
29
#include "prim_internal.h"
 
30
#include "prim_sign.h"
 
31
 
 
32
 
 
33
#ifdef WITH_SSE2
 
34
/* ------------------------------------------------------------------------- */
 
35
PRIMITIVES_HIDDEN pstatus_t ssse3_sign_16s(
 
36
        const INT16 *pSrc,
 
37
        INT16 *pDst,
 
38
        INT32 len)
 
39
{
 
40
        const INT16 *sptr = (const INT16 *) pSrc;
 
41
        INT16 *dptr = (INT16 *) pDst;
 
42
        size_t count;
 
43
 
 
44
        if (len < 16)
 
45
        {
 
46
                return general_sign_16s(pSrc, pDst, len);
 
47
        }
 
48
 
 
49
        /* Check for 16-byte alignment (eventually). */
 
50
        if ((ULONG_PTR) pDst & 0x01)
 
51
        {
 
52
                return general_sign_16s(pSrc, pDst, len);
 
53
        }
 
54
 
 
55
        /* Seek 16-byte alignment. */
 
56
        while ((ULONG_PTR) dptr & 0x0f)
 
57
        {
 
58
                INT16 src = *sptr++;
 
59
                *dptr++ = (src < 0) ? (-1) : ((src > 0) ? 1 : 0);
 
60
                if (--len == 0) return PRIMITIVES_SUCCESS;
 
61
        }
 
62
 
 
63
        /* Do 32-short chunks using 8 XMM registers. */
 
64
        count = len >> 5;       /* / 32  */
 
65
        len -= count << 5;      /* * 32 */
 
66
        if ((ULONG_PTR) sptr & 0x0f)
 
67
        {
 
68
                /* Unaligned */
 
69
                while (count--)
 
70
                {
 
71
                        __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
 
72
                        xmm0 = _mm_set1_epi16(0x0001U);
 
73
                        xmm1 = _mm_set1_epi16(0x0001U);
 
74
                        xmm2 = _mm_set1_epi16(0x0001U);
 
75
                        xmm3 = _mm_set1_epi16(0x0001U);
 
76
                        xmm4 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
 
77
                        xmm5 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
 
78
                        xmm6 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
 
79
                        xmm7 = _mm_lddqu_si128((__m128i *) sptr); sptr += 8;
 
80
                        xmm0 = _mm_sign_epi16(xmm0, xmm4);
 
81
                        xmm1 = _mm_sign_epi16(xmm1, xmm5);
 
82
                        xmm2 = _mm_sign_epi16(xmm2, xmm6);
 
83
                        xmm3 = _mm_sign_epi16(xmm3, xmm7);
 
84
                        _mm_store_si128((__m128i *) dptr, xmm0);         dptr += 8;
 
85
                        _mm_store_si128((__m128i *) dptr, xmm1);         dptr += 8;
 
86
                        _mm_store_si128((__m128i *) dptr, xmm2);         dptr += 8;
 
87
                        _mm_store_si128((__m128i *) dptr, xmm3);         dptr += 8;
 
88
                }
 
89
        }
 
90
        else
 
91
        {
 
92
                /* Aligned */
 
93
                while (count--)
 
94
                {
 
95
                        __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
 
96
                        xmm0 = _mm_set1_epi16(0x0001U);
 
97
                        xmm1 = _mm_set1_epi16(0x0001U);
 
98
                        xmm2 = _mm_set1_epi16(0x0001U);
 
99
                        xmm3 = _mm_set1_epi16(0x0001U);
 
100
                        xmm4 = _mm_load_si128((__m128i *) sptr); sptr += 8;
 
101
                        xmm5 = _mm_load_si128((__m128i *) sptr); sptr += 8;
 
102
                        xmm6 = _mm_load_si128((__m128i *) sptr); sptr += 8;
 
103
                        xmm7 = _mm_load_si128((__m128i *) sptr); sptr += 8;
 
104
                        xmm0 = _mm_sign_epi16(xmm0, xmm4);
 
105
                        xmm1 = _mm_sign_epi16(xmm1, xmm5);
 
106
                        xmm2 = _mm_sign_epi16(xmm2, xmm6);
 
107
                        xmm3 = _mm_sign_epi16(xmm3, xmm7);
 
108
                        _mm_store_si128((__m128i *) dptr, xmm0);         dptr += 8;
 
109
                        _mm_store_si128((__m128i *) dptr, xmm1);         dptr += 8;
 
110
                        _mm_store_si128((__m128i *) dptr, xmm2);         dptr += 8;
 
111
                        _mm_store_si128((__m128i *) dptr, xmm3);         dptr += 8;
 
112
                }
 
113
        }
 
114
 
 
115
        /* Do 8-short chunks using two XMM registers. */
 
116
        count = len >> 3;
 
117
        len -= count << 3;
 
118
        while (count--)
 
119
        {
 
120
                __m128i xmm0 = _mm_set1_epi16(0x0001U);
 
121
                __m128i xmm1 = LOAD_SI128(sptr);                                        sptr += 8;
 
122
                xmm0 = _mm_sign_epi16(xmm0, xmm1);
 
123
                _mm_store_si128((__m128i *) dptr, xmm0);                        dptr += 8;
 
124
        }
 
125
 
 
126
        /* Do leftovers. */
 
127
        while (len--)
 
128
        {
 
129
                INT16 src = *sptr++;
 
130
                *dptr++ = (src < 0) ? -1 : ((src > 0) ? 1 : 0);
 
131
        }
 
132
 
 
133
        return PRIMITIVES_SUCCESS;
 
134
}
 
135
#endif /* WITH_SSE2 */
 
136
 
 
137
/* ------------------------------------------------------------------------- */
 
138
void primitives_init_sign_opt(primitives_t *prims)
 
139
{
 
140
        /* Pick tuned versions if possible. */
 
141
        /* I didn't spot an IPP version of this. */
 
142
#if defined(WITH_SSE2)
 
143
        if (IsProcessorFeaturePresentEx(PF_EX_SSSE3)
 
144
                        && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
 
145
        {
 
146
                prims->sign_16s  = ssse3_sign_16s;
 
147
        }
 
148
#endif
 
149
}
 
150