Revision 8c872ffb host/lib/transport/convert_types_impl.hpp

b/host/lib/transport/convert_types_impl.hpp
28 28
    #define USE_EMMINTRIN_H //use sse2 intrinsics
29 29
#endif
30 30

  
31
#if defined(USE_EMMINTRIN_H)
32
    #include <emmintrin.h>
33
#endif
34

  
35
//! shortcut for a byteswap16 with casting
36
#define BSWAP16_C(num) uhd::byteswap(boost::uint16_t(num))
37

  
31 38
/***********************************************************************
32 39
 * Typedefs
33 40
 **********************************************************************/
......
47 54
static UHD_INLINE void sc16_to_item32_bswap(
48 55
    const sc16_t *input, item32_t *output, size_t nsamps
49 56
){
50
    const item32_t *item32_input = (const item32_t *)input;
51 57
    for (size_t i = 0; i < nsamps; i++){
52
        output[i] = uhd::byteswap(item32_input[i]);
58
        boost::uint16_t real = BSWAP16_C(input[i].real());
59
        boost::uint16_t imag = BSWAP16_C(input[i].imag());
60
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
53 61
    }
54 62
}
55 63

  
......
65 73
static UHD_INLINE void item32_to_sc16_bswap(
66 74
    const item32_t *input, sc16_t *output, size_t nsamps
67 75
){
68
    item32_t *item32_output = (item32_t *)output;
69 76
    for (size_t i = 0; i < nsamps; i++){
70
        item32_output[i] = uhd::byteswap(input[i]);
77
        boost::int16_t real = BSWAP16_C(input[i] >> 0);
78
        boost::int16_t imag = BSWAP16_C(input[i] >> 16);
79
        output[i] = sc16_t(real, imag);
71 80
    }
72 81
}
73 82

  
74 83
/***********************************************************************
75
 * Convert complex float buffer to items32
84
 * Convert complex float buffer to items32 (no swap)
76 85
 **********************************************************************/
77 86
static const float shorts_per_float = float(32767);
78 87

  
79
static UHD_INLINE item32_t fc32_to_item32(fc32_t num){
80
    boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float);
81
    boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float);
82
    return (item32_t(real) << 16) | (item32_t(imag) << 0);
88
#define FC32_TO_SC16_C(num) boost::int16_t(num*shorts_per_float)
89

  
90
////////////////////////////////////
91
// none-swap
92
////////////////////////////////////
93
#if defined(USE_EMMINTRIN_H)
94
static UHD_INLINE void fc32_to_item32_nswap(
95
    const fc32_t *input, item32_t *output, size_t nsamps
96
){
97
    __m128 scalar = _mm_set_ps1(shorts_per_float);
98

  
99
    //convert blocks of samples with intrinsics
100
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
101
        //load from input
102
        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
103
        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
104

  
105
        //convert and scale
106
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
107
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
108

  
109
        //pack
110
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
111

  
112
        //store to output
113
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
114
    }
115

  
116
    //convert remainder
117
    for (; i < nsamps; i++){
118
        boost::uint16_t real = FC32_TO_SC16_C(input[i].real());
119
        boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag());
120
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
121
    }
83 122
}
84 123

  
124
#else
85 125
static UHD_INLINE void fc32_to_item32_nswap(
86 126
    const fc32_t *input, item32_t *output, size_t nsamps
87 127
){
88 128
    for (size_t i = 0; i < nsamps; i++){
89
        output[i] = fc32_to_item32(input[i]);
129
        boost::uint16_t real = FC32_TO_SC16_C(input[i].real());
130
        boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag());
131
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
90 132
    }
91 133
}
92 134

  
93
#if defined(USE_EMMINTRIN_H)
94
#include <emmintrin.h>
135
#endif
95 136

  
137
////////////////////////////////////
138
// byte-swap
139
////////////////////////////////////
140
#if defined(USE_EMMINTRIN_H)
96 141
static UHD_INLINE void fc32_to_item32_bswap(
97 142
    const fc32_t *input, item32_t *output, size_t nsamps
98 143
){
......
108 153
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
109 154
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
110 155

  
111
        //pack + byteswap -> byteswap 32 bit words
156
        //pack + byteswap -> byteswap 16 bit words
112 157
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
113 158
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
114 159

  
......
118 163

  
119 164
    //convert remainder
120 165
    for (; i < nsamps; i++){
121
        output[i] = uhd::byteswap(fc32_to_item32(input[i]));
166
        boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real()));
167
        boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag()));
168
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
122 169
    }
123 170
}
124 171

  
......
127 174
    const fc32_t *input, item32_t *output, size_t nsamps
128 175
){
129 176
    for (size_t i = 0; i < nsamps; i++){
130
        output[i] = uhd::byteswap(fc32_to_item32(input[i]));
177
        boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real()));
178
        boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag()));
179
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
131 180
    }
132 181
}
133 182

  
......
138 187
 **********************************************************************/
139 188
static const float floats_per_short = float(1.0/shorts_per_float);
140 189

  
141
static UHD_INLINE fc32_t item32_to_fc32(item32_t item){
142
    return fc32_t(
143
        float(boost::int16_t(item >> 16)*floats_per_short),
144
        float(boost::int16_t(item >> 0)*floats_per_short)
145
    );
190
#define I16_TO_FC32_C(num) (boost::int16_t(num)*floats_per_short)
191

  
192
////////////////////////////////////
193
// none-swap
194
////////////////////////////////////
195
#if defined(USE_EMMINTRIN_H)
196
static UHD_INLINE void item32_to_fc32_nswap(
197
    const item32_t *input, fc32_t *output, size_t nsamps
198
){
199
    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
200
    __m128i zeroi = _mm_setzero_si128();
201

  
202
    //convert blocks of samples with intrinsics
203
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
204
        //load from input
205
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
206

  
207
        //unpack
208
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
209
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
210

  
211
        //convert and scale
212
        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
213
        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
214

  
215
        //store to output
216
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
217
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
218
    }
219

  
220
    //convert remainder
221
    for (; i < nsamps; i++){
222
        float real = I16_TO_FC32_C(input[i] >> 0);
223
        float imag = I16_TO_FC32_C(input[i] >> 16);
224
        output[i] = fc32_t(real, imag);
225
    }
146 226
}
147 227

  
228
#else
148 229
static UHD_INLINE void item32_to_fc32_nswap(
149 230
    const item32_t *input, fc32_t *output, size_t nsamps
150 231
){
151 232
    for (size_t i = 0; i < nsamps; i++){
152
        output[i] = item32_to_fc32(input[i]);
233
        float real = I16_TO_FC32_C(input[i] >> 0);
234
        float imag = I16_TO_FC32_C(input[i] >> 16);
235
        output[i] = fc32_t(real, imag);
153 236
    }
154 237
}
238
#endif
155 239

  
240
////////////////////////////////////
241
// byte-swap
242
////////////////////////////////////
156 243
#if defined(USE_EMMINTRIN_H)
157
#include <emmintrin.h>
158

  
159 244
static UHD_INLINE void item32_to_fc32_bswap(
160 245
    const item32_t *input, fc32_t *output, size_t nsamps
161 246
){
......
167 252
        //load from input
168 253
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
169 254

  
170
        //byteswap + unpack -> byteswap 32 bit words
255
        //byteswap + unpack -> byteswap 16 bit words
171 256
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
172 257
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
173 258
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
......
183 268

  
184 269
    //convert remainder
185 270
    for (; i < nsamps; i++){
186
        output[i] = item32_to_fc32(uhd::byteswap(input[i]));
271
        float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0));
272
        float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16));
273
        output[i] = fc32_t(real, imag);
187 274
    }
188 275
}
189 276

  
......
192 279
    const item32_t *input, fc32_t *output, size_t nsamps
193 280
){
194 281
    for (size_t i = 0; i < nsamps; i++){
195
        output[i] = item32_to_fc32(uhd::byteswap(input[i]));
282
        float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0));
283
        float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16));
284
        output[i] = fc32_t(real, imag);
196 285
    }
197 286
}
198 287

  

Also available in: Unified diff