Statistics
| Branch: | Tag: | Revision:

root / host / lib / transport / convert_types_impl.hpp @ 8c872ffb

History | View | Annotate | Download (9.8 kB)

1
//
2
// Copyright 2010 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

    
18
#ifndef INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP
19
#define INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP
20

    
21
#include <uhd/config.hpp>
22
#include <uhd/utils/byteswap.hpp>
23
#include <boost/cstdint.hpp>
24
#include <cstring>
25
#include <complex>
26

    
27
#ifdef HAVE_EMMINTRIN_H
28
    #define USE_EMMINTRIN_H //use sse2 intrinsics
29
#endif
30

    
31
#if defined(USE_EMMINTRIN_H)
32
    #include <emmintrin.h>
33
#endif
34

    
35
//! shortcut for a byteswap16 with casting
36
#define BSWAP16_C(num) uhd::byteswap(boost::uint16_t(num))
37

    
38
/***********************************************************************
39
 * Typedefs
40
 **********************************************************************/
41
typedef std::complex<float>          fc32_t;
42
typedef std::complex<boost::int16_t> sc16_t;
43
typedef boost::uint32_t              item32_t;
44

    
45
/***********************************************************************
46
 * Convert complex short buffer to items32
47
 **********************************************************************/
48
static UHD_INLINE void sc16_to_item32_nswap(
49
    const sc16_t *input, item32_t *output, size_t nsamps
50
){
51
    std::memcpy(output, input, nsamps*sizeof(item32_t));
52
}
53

    
54
static UHD_INLINE void sc16_to_item32_bswap(
55
    const sc16_t *input, item32_t *output, size_t nsamps
56
){
57
    for (size_t i = 0; i < nsamps; i++){
58
        boost::uint16_t real = BSWAP16_C(input[i].real());
59
        boost::uint16_t imag = BSWAP16_C(input[i].imag());
60
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
61
    }
62
}
63

    
64
/***********************************************************************
65
 * Convert items32 buffer to complex short
66
 **********************************************************************/
67
static UHD_INLINE void item32_to_sc16_nswap(
68
    const item32_t *input, sc16_t *output, size_t nsamps
69
){
70
    std::memcpy(output, input, nsamps*sizeof(item32_t));
71
}
72

    
73
static UHD_INLINE void item32_to_sc16_bswap(
74
    const item32_t *input, sc16_t *output, size_t nsamps
75
){
76
    for (size_t i = 0; i < nsamps; i++){
77
        boost::int16_t real = BSWAP16_C(input[i] >> 0);
78
        boost::int16_t imag = BSWAP16_C(input[i] >> 16);
79
        output[i] = sc16_t(real, imag);
80
    }
81
}
82

    
83
/***********************************************************************
84
 * Convert complex float buffer to items32 (no swap)
85
 **********************************************************************/
86
static const float shorts_per_float = float(32767);
87

    
88
#define FC32_TO_SC16_C(num) boost::int16_t(num*shorts_per_float)
89

    
90
////////////////////////////////////
91
// none-swap
92
////////////////////////////////////
93
#if defined(USE_EMMINTRIN_H)
94
static UHD_INLINE void fc32_to_item32_nswap(
95
    const fc32_t *input, item32_t *output, size_t nsamps
96
){
97
    __m128 scalar = _mm_set_ps1(shorts_per_float);
98

    
99
    //convert blocks of samples with intrinsics
100
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
101
        //load from input
102
        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
103
        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
104

    
105
        //convert and scale
106
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
107
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
108

    
109
        //pack
110
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
111

    
112
        //store to output
113
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
114
    }
115

    
116
    //convert remainder
117
    for (; i < nsamps; i++){
118
        boost::uint16_t real = FC32_TO_SC16_C(input[i].real());
119
        boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag());
120
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
121
    }
122
}
123

    
124
#else
125
static UHD_INLINE void fc32_to_item32_nswap(
126
    const fc32_t *input, item32_t *output, size_t nsamps
127
){
128
    for (size_t i = 0; i < nsamps; i++){
129
        boost::uint16_t real = FC32_TO_SC16_C(input[i].real());
130
        boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag());
131
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
132
    }
133
}
134

    
135
#endif
136

    
137
////////////////////////////////////
138
// byte-swap
139
////////////////////////////////////
140
#if defined(USE_EMMINTRIN_H)
141
static UHD_INLINE void fc32_to_item32_bswap(
142
    const fc32_t *input, item32_t *output, size_t nsamps
143
){
144
    __m128 scalar = _mm_set_ps1(shorts_per_float);
145

    
146
    //convert blocks of samples with intrinsics
147
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
148
        //load from input
149
        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
150
        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
151

    
152
        //convert and scale
153
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
154
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
155

    
156
        //pack + byteswap -> byteswap 16 bit words
157
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
158
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
159

    
160
        //store to output
161
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
162
    }
163

    
164
    //convert remainder
165
    for (; i < nsamps; i++){
166
        boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real()));
167
        boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag()));
168
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
169
    }
170
}
171

    
172
#else
173
static UHD_INLINE void fc32_to_item32_bswap(
174
    const fc32_t *input, item32_t *output, size_t nsamps
175
){
176
    for (size_t i = 0; i < nsamps; i++){
177
        boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real()));
178
        boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag()));
179
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
180
    }
181
}
182

    
183
#endif
184

    
185
/***********************************************************************
186
 * Convert items32 buffer to complex float
187
 **********************************************************************/
188
static const float floats_per_short = float(1.0/shorts_per_float);
189

    
190
#define I16_TO_FC32_C(num) (boost::int16_t(num)*floats_per_short)
191

    
192
////////////////////////////////////
193
// none-swap
194
////////////////////////////////////
195
#if defined(USE_EMMINTRIN_H)
196
static UHD_INLINE void item32_to_fc32_nswap(
197
    const item32_t *input, fc32_t *output, size_t nsamps
198
){
199
    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
200
    __m128i zeroi = _mm_setzero_si128();
201

    
202
    //convert blocks of samples with intrinsics
203
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
204
        //load from input
205
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
206

    
207
        //unpack
208
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
209
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
210

    
211
        //convert and scale
212
        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
213
        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
214

    
215
        //store to output
216
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
217
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
218
    }
219

    
220
    //convert remainder
221
    for (; i < nsamps; i++){
222
        float real = I16_TO_FC32_C(input[i] >> 0);
223
        float imag = I16_TO_FC32_C(input[i] >> 16);
224
        output[i] = fc32_t(real, imag);
225
    }
226
}
227

    
228
#else
229
static UHD_INLINE void item32_to_fc32_nswap(
230
    const item32_t *input, fc32_t *output, size_t nsamps
231
){
232
    for (size_t i = 0; i < nsamps; i++){
233
        float real = I16_TO_FC32_C(input[i] >> 0);
234
        float imag = I16_TO_FC32_C(input[i] >> 16);
235
        output[i] = fc32_t(real, imag);
236
    }
237
}
238
#endif
239

    
240
////////////////////////////////////
241
// byte-swap
242
////////////////////////////////////
243
#if defined(USE_EMMINTRIN_H)
244
static UHD_INLINE void item32_to_fc32_bswap(
245
    const item32_t *input, fc32_t *output, size_t nsamps
246
){
247
    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
248
    __m128i zeroi = _mm_setzero_si128();
249

    
250
    //convert blocks of samples with intrinsics
251
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
252
        //load from input
253
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
254

    
255
        //byteswap + unpack -> byteswap 16 bit words
256
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
257
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
258
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
259

    
260
        //convert and scale
261
        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
262
        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
263

    
264
        //store to output
265
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
266
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
267
    }
268

    
269
    //convert remainder
270
    for (; i < nsamps; i++){
271
        float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0));
272
        float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16));
273
        output[i] = fc32_t(real, imag);
274
    }
275
}
276

    
277
#else
278
static UHD_INLINE void item32_to_fc32_bswap(
279
    const item32_t *input, fc32_t *output, size_t nsamps
280
){
281
    for (size_t i = 0; i < nsamps; i++){
282
        float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0));
283
        float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16));
284
        output[i] = fc32_t(real, imag);
285
    }
286
}
287

    
288
#endif
289

    
290
#endif /* INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP */