Statistics
| Branch: | Tag: | Revision:

root / host / lib / transport / convert_types_impl.hpp @ 738f4a86

History | View | Annotate | Download (9.2 kB)

1
//
2
// Copyright 2010 Ettus Research LLC
3
//
4
// This program is free software: you can redistribute it and/or modify
5
// it under the terms of the GNU General Public License as published by
6
// the Free Software Foundation, either version 3 of the License, or
7
// (at your option) any later version.
8
//
9
// This program is distributed in the hope that it will be useful,
10
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
// GNU General Public License for more details.
13
//
14
// You should have received a copy of the GNU General Public License
15
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
//
17

    
18
#ifndef INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP
19
#define INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP
20

    
21
#include <uhd/config.hpp>
22
#include <uhd/utils/byteswap.hpp>
23
#include <boost/cstdint.hpp>
24
#include <cstring>
25
#include <complex>
26

    
27
#ifdef HAVE_EMMINTRIN_H
28
    #define USE_EMMINTRIN_H //use sse2 intrinsics
29
#endif
30

    
31
#if defined(USE_EMMINTRIN_H)
32
    #include <emmintrin.h>
33
#endif
34

    
35
/***********************************************************************
36
 * Typedefs
37
 **********************************************************************/
38
typedef std::complex<float>          fc32_t;
39
typedef std::complex<boost::int16_t> sc16_t;
40
typedef boost::uint32_t              item32_t;
41

    
42
/***********************************************************************
43
 * Convert complex short buffer to items32
44
 **********************************************************************/
45
static UHD_INLINE void sc16_to_item32_nswap(
46
    const sc16_t *input, item32_t *output, size_t nsamps
47
){
48
    std::memcpy(output, input, nsamps*sizeof(item32_t));
49
}
50

    
51
static UHD_INLINE void sc16_to_item32_bswap(
52
    const sc16_t *input, item32_t *output, size_t nsamps
53
){
54
    const item32_t *item32_input = (const item32_t *)input;
55
    for (size_t i = 0; i < nsamps; i++){
56
        output[i] = uhd::byteswap(item32_input[i]);
57
    }
58
}
59

    
60
/***********************************************************************
61
 * Convert items32 buffer to complex short
62
 **********************************************************************/
63
static UHD_INLINE void item32_to_sc16_nswap(
64
    const item32_t *input, sc16_t *output, size_t nsamps
65
){
66
    std::memcpy(output, input, nsamps*sizeof(item32_t));
67
}
68

    
69
static UHD_INLINE void item32_to_sc16_bswap(
70
    const item32_t *input, sc16_t *output, size_t nsamps
71
){
72
    item32_t *item32_output = (item32_t *)output;
73
    for (size_t i = 0; i < nsamps; i++){
74
        item32_output[i] = uhd::byteswap(input[i]);
75
    }
76
}
77

    
78
/***********************************************************************
79
 * Convert complex float buffer to items32 (no swap)
80
 **********************************************************************/
81
static const float shorts_per_float = float(32767);
82

    
83
static UHD_INLINE item32_t fc32_to_item32(fc32_t num){
84
    boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float);
85
    boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float);
86
    return (item32_t(real) << 16) | (item32_t(imag) << 0);
87
}
88

    
89
////////////////////////////////////
90
// none-swap
91
////////////////////////////////////
92
#if defined(USE_EMMINTRIN_H)
93
static UHD_INLINE void fc32_to_item32_nswap(
94
    const fc32_t *input, item32_t *output, size_t nsamps
95
){
96
    __m128 scalar = _mm_set_ps1(shorts_per_float);
97

    
98
    //convert blocks of samples with intrinsics
99
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
100
        //load from input
101
        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
102
        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
103

    
104
        //convert and scale
105
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
106
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
107

    
108
        //pack + swap 16-bit pairs
109
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
110
        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
111
        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
112

    
113
        //store to output
114
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
115
    }
116

    
117
    //convert remainder
118
    for (; i < nsamps; i++){
119
        output[i] = fc32_to_item32(input[i]);
120
    }
121
}
122

    
123
#else
124
static UHD_INLINE void fc32_to_item32_nswap(
125
    const fc32_t *input, item32_t *output, size_t nsamps
126
){
127
    for (size_t i = 0; i < nsamps; i++){
128
        output[i] = fc32_to_item32(input[i]);
129
    }
130
}
131

    
132
#endif
133

    
134
////////////////////////////////////
135
// byte-swap
136
////////////////////////////////////
137
#if defined(USE_EMMINTRIN_H)
138
static UHD_INLINE void fc32_to_item32_bswap(
139
    const fc32_t *input, item32_t *output, size_t nsamps
140
){
141
    __m128 scalar = _mm_set_ps1(shorts_per_float);
142

    
143
    //convert blocks of samples with intrinsics
144
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
145
        //load from input
146
        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
147
        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
148

    
149
        //convert and scale
150
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
151
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
152

    
153
        //pack + byteswap -> byteswap 16 bit words
154
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
155
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
156

    
157
        //store to output
158
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
159
    }
160

    
161
    //convert remainder
162
    for (; i < nsamps; i++){
163
        output[i] = uhd::byteswap(fc32_to_item32(input[i]));
164
    }
165
}
166

    
167
#else
168
static UHD_INLINE void fc32_to_item32_bswap(
169
    const fc32_t *input, item32_t *output, size_t nsamps
170
){
171
    for (size_t i = 0; i < nsamps; i++){
172
        output[i] = uhd::byteswap(fc32_to_item32(input[i]));
173
    }
174
}
175

    
176
#endif
177

    
178
/***********************************************************************
179
 * Convert items32 buffer to complex float
180
 **********************************************************************/
181
static const float floats_per_short = float(1.0/shorts_per_float);
182

    
183
static UHD_INLINE fc32_t item32_to_fc32(item32_t item){
184
    return fc32_t(
185
        float(boost::int16_t(item >> 16)*floats_per_short),
186
        float(boost::int16_t(item >> 0)*floats_per_short)
187
    );
188
}
189

    
190
////////////////////////////////////
191
// none-swap
192
////////////////////////////////////
193
#if defined(USE_EMMINTRIN_H)
194
static UHD_INLINE void item32_to_fc32_nswap(
195
    const item32_t *input, fc32_t *output, size_t nsamps
196
){
197
    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
198
    __m128i zeroi = _mm_setzero_si128();
199

    
200
    //convert blocks of samples with intrinsics
201
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
202
        //load from input
203
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
204

    
205
        //unpack + swap 16-bit pairs
206
        tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
207
        tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
208
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
209
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
210

    
211
        //convert and scale
212
        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
213
        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
214

    
215
        //store to output
216
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
217
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
218
    }
219

    
220
    //convert remainder
221
    for (; i < nsamps; i++){
222
        output[i] = item32_to_fc32(input[i]);
223
    }
224
}
225

    
226
#else
227
static UHD_INLINE void item32_to_fc32_nswap(
228
    const item32_t *input, fc32_t *output, size_t nsamps
229
){
230
    for (size_t i = 0; i < nsamps; i++){
231
        output[i] = item32_to_fc32(input[i]);
232
    }
233
}
234
#endif
235

    
236
////////////////////////////////////
237
// byte-swap
238
////////////////////////////////////
239
#if defined(USE_EMMINTRIN_H)
240
static UHD_INLINE void item32_to_fc32_bswap(
241
    const item32_t *input, fc32_t *output, size_t nsamps
242
){
243
    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
244
    __m128i zeroi = _mm_setzero_si128();
245

    
246
    //convert blocks of samples with intrinsics
247
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
248
        //load from input
249
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
250

    
251
        //byteswap + unpack -> byteswap 16 bit words
252
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
253
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
254
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
255

    
256
        //convert and scale
257
        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
258
        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
259

    
260
        //store to output
261
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
262
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
263
    }
264

    
265
    //convert remainder
266
    for (; i < nsamps; i++){
267
        output[i] = item32_to_fc32(uhd::byteswap(input[i]));
268
    }
269
}
270

    
271
#else
272
static UHD_INLINE void item32_to_fc32_bswap(
273
    const item32_t *input, fc32_t *output, size_t nsamps
274
){
275
    for (size_t i = 0; i < nsamps; i++){
276
        output[i] = item32_to_fc32(uhd::byteswap(input[i]));
277
    }
278
}
279

    
280
#endif
281

    
282
#endif /* INCLUDED_LIBUHD_TRANSPORT_CONVERT_TYPES_IMPL_HPP */