Revision 8c872ffb

b/host/lib/transport/convert_types_impl.hpp
28 28
    #define USE_EMMINTRIN_H //use sse2 intrinsics
29 29
#endif
30 30

  
31
#if defined(USE_EMMINTRIN_H)
32
    #include <emmintrin.h>
33
#endif
34

  
35
//! shortcut for a byteswap16 with casting
36
#define BSWAP16_C(num) uhd::byteswap(boost::uint16_t(num))
37

  
31 38
/***********************************************************************
32 39
 * Typedefs
33 40
 **********************************************************************/
......
47 54
static UHD_INLINE void sc16_to_item32_bswap(
48 55
    const sc16_t *input, item32_t *output, size_t nsamps
49 56
){
50
    const item32_t *item32_input = (const item32_t *)input;
51 57
    for (size_t i = 0; i < nsamps; i++){
52
        output[i] = uhd::byteswap(item32_input[i]);
58
        boost::uint16_t real = BSWAP16_C(input[i].real());
59
        boost::uint16_t imag = BSWAP16_C(input[i].imag());
60
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
53 61
    }
54 62
}
55 63

  
......
65 73
static UHD_INLINE void item32_to_sc16_bswap(
66 74
    const item32_t *input, sc16_t *output, size_t nsamps
67 75
){
68
    item32_t *item32_output = (item32_t *)output;
69 76
    for (size_t i = 0; i < nsamps; i++){
70
        item32_output[i] = uhd::byteswap(input[i]);
77
        boost::int16_t real = BSWAP16_C(input[i] >> 0);
78
        boost::int16_t imag = BSWAP16_C(input[i] >> 16);
79
        output[i] = sc16_t(real, imag);
71 80
    }
72 81
}
73 82

  
74 83
/***********************************************************************
75
 * Convert complex float buffer to items32
84
 * Convert complex float buffer to items32 (no swap)
76 85
 **********************************************************************/
77 86
static const float shorts_per_float = float(32767);
78 87

  
79
static UHD_INLINE item32_t fc32_to_item32(fc32_t num){
80
    boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float);
81
    boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float);
82
    return (item32_t(real) << 16) | (item32_t(imag) << 0);
88
#define FC32_TO_SC16_C(num) boost::int16_t(num*shorts_per_float)
89

  
90
////////////////////////////////////
91
// none-swap
92
////////////////////////////////////
93
#if defined(USE_EMMINTRIN_H)
94
static UHD_INLINE void fc32_to_item32_nswap(
95
    const fc32_t *input, item32_t *output, size_t nsamps
96
){
97
    __m128 scalar = _mm_set_ps1(shorts_per_float);
98

  
99
    //convert blocks of samples with intrinsics
100
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
101
        //load from input
102
        __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
103
        __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
104

  
105
        //convert and scale
106
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
107
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
108

  
109
        //pack
110
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
111

  
112
        //store to output
113
        _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
114
    }
115

  
116
    //convert remainder
117
    for (; i < nsamps; i++){
118
        boost::uint16_t real = FC32_TO_SC16_C(input[i].real());
119
        boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag());
120
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
121
    }
83 122
}
84 123

  
124
#else
85 125
static UHD_INLINE void fc32_to_item32_nswap(
86 126
    const fc32_t *input, item32_t *output, size_t nsamps
87 127
){
88 128
    for (size_t i = 0; i < nsamps; i++){
89
        output[i] = fc32_to_item32(input[i]);
129
        boost::uint16_t real = FC32_TO_SC16_C(input[i].real());
130
        boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag());
131
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
90 132
    }
91 133
}
92 134

  
93
#if defined(USE_EMMINTRIN_H)
94
#include <emmintrin.h>
135
#endif
95 136

  
137
////////////////////////////////////
138
// byte-swap
139
////////////////////////////////////
140
#if defined(USE_EMMINTRIN_H)
96 141
static UHD_INLINE void fc32_to_item32_bswap(
97 142
    const fc32_t *input, item32_t *output, size_t nsamps
98 143
){
......
108 153
        __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
109 154
        __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
110 155

  
111
        //pack + byteswap -> byteswap 32 bit words
156
        //pack + byteswap -> byteswap 16 bit words
112 157
        __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
113 158
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
114 159

  
......
118 163

  
119 164
    //convert remainder
120 165
    for (; i < nsamps; i++){
121
        output[i] = uhd::byteswap(fc32_to_item32(input[i]));
166
        boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real()));
167
        boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag()));
168
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
122 169
    }
123 170
}
124 171

  
......
127 174
    const fc32_t *input, item32_t *output, size_t nsamps
128 175
){
129 176
    for (size_t i = 0; i < nsamps; i++){
130
        output[i] = uhd::byteswap(fc32_to_item32(input[i]));
177
        boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real()));
178
        boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag()));
179
        output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16);
131 180
    }
132 181
}
133 182

  
......
138 187
 **********************************************************************/
139 188
static const float floats_per_short = float(1.0/shorts_per_float);
140 189

  
141
static UHD_INLINE fc32_t item32_to_fc32(item32_t item){
142
    return fc32_t(
143
        float(boost::int16_t(item >> 16)*floats_per_short),
144
        float(boost::int16_t(item >> 0)*floats_per_short)
145
    );
190
#define I16_TO_FC32_C(num) (boost::int16_t(num)*floats_per_short)
191

  
192
////////////////////////////////////
193
// none-swap
194
////////////////////////////////////
195
#if defined(USE_EMMINTRIN_H)
196
static UHD_INLINE void item32_to_fc32_nswap(
197
    const item32_t *input, fc32_t *output, size_t nsamps
198
){
199
    __m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16));
200
    __m128i zeroi = _mm_setzero_si128();
201

  
202
    //convert blocks of samples with intrinsics
203
    size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
204
        //load from input
205
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
206

  
207
        //unpack
208
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
209
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
210

  
211
        //convert and scale
212
        __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
213
        __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
214

  
215
        //store to output
216
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
217
        _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
218
    }
219

  
220
    //convert remainder
221
    for (; i < nsamps; i++){
222
        float real = I16_TO_FC32_C(input[i] >> 0);
223
        float imag = I16_TO_FC32_C(input[i] >> 16);
224
        output[i] = fc32_t(real, imag);
225
    }
146 226
}
147 227

  
228
#else
148 229
static UHD_INLINE void item32_to_fc32_nswap(
149 230
    const item32_t *input, fc32_t *output, size_t nsamps
150 231
){
151 232
    for (size_t i = 0; i < nsamps; i++){
152
        output[i] = item32_to_fc32(input[i]);
233
        float real = I16_TO_FC32_C(input[i] >> 0);
234
        float imag = I16_TO_FC32_C(input[i] >> 16);
235
        output[i] = fc32_t(real, imag);
153 236
    }
154 237
}
238
#endif
155 239

  
240
////////////////////////////////////
241
// byte-swap
242
////////////////////////////////////
156 243
#if defined(USE_EMMINTRIN_H)
157
#include <emmintrin.h>
158

  
159 244
static UHD_INLINE void item32_to_fc32_bswap(
160 245
    const item32_t *input, fc32_t *output, size_t nsamps
161 246
){
......
167 252
        //load from input
168 253
        __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
169 254

  
170
        //byteswap + unpack -> byteswap 32 bit words
255
        //byteswap + unpack -> byteswap 16 bit words
171 256
        tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
172 257
        __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
173 258
        __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
......
183 268

  
184 269
    //convert remainder
185 270
    for (; i < nsamps; i++){
186
        output[i] = item32_to_fc32(uhd::byteswap(input[i]));
271
        float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0));
272
        float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16));
273
        output[i] = fc32_t(real, imag);
187 274
    }
188 275
}
189 276

  
......
192 279
    const item32_t *input, fc32_t *output, size_t nsamps
193 280
){
194 281
    for (size_t i = 0; i < nsamps; i++){
195
        output[i] = item32_to_fc32(uhd::byteswap(input[i]));
282
        float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0));
283
        float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16));
284
        output[i] = fc32_t(real, imag);
196 285
    }
197 286
}
198 287

  
b/host/test/convert_types_test.cpp
17 17

  
18 18
#include <uhd/transport/convert_types.hpp>
19 19
#include <boost/test/unit_test.hpp>
20
#include <boost/foreach.hpp>
20 21
#include <boost/cstdint.hpp>
22
#include <boost/asio/buffer.hpp>
21 23
#include <complex>
24
#include <vector>
25
#include <cstdlib>
22 26

  
23 27
using namespace uhd;
24 28

  
25
template <typename host_type, typename dev_type, size_t nsamps>
26
void loopback(
29
//typedefs for complex types
30
typedef std::complex<boost::uint16_t> sc16_t;
31
typedef std::complex<float> fc32_t;
32

  
33
//extract pointer to POD since using &vector.front() throws in MSVC
34
template <typename T> void * pod2ptr(T &pod){
35
    return boost::asio::buffer_cast<void *>(boost::asio::buffer(pod));
36
}
37
template <typename T> const void * pod2ptr(const T &pod){
38
    return boost::asio::buffer_cast<const void *>(boost::asio::buffer(pod));
39
}
40

  
41
/***********************************************************************
42
 * Loopback runner:
43
 *    convert input buffer into intermediate buffer
44
 *    convert intermediate buffer into output buffer
45
 **********************************************************************/
46
template <typename Range> static void loopback(
47
    size_t nsamps,
27 48
    const io_type_t &io_type,
28 49
    const otw_type_t &otw_type,
29
    const host_type *input,
30
    host_type *output
50
    const Range &input,
51
    Range &output
31 52
){
32
    dev_type dev[nsamps];
53
    //item32 is largest device type
54
    std::vector<boost::uint32_t> dev(nsamps);
33 55

  
34 56
    //convert to dev type
35 57
    transport::convert_io_type_to_otw_type(
36
        input, io_type,
37
        dev, otw_type,
58
        pod2ptr(input), io_type,
59
        pod2ptr(dev), otw_type,
38 60
        nsamps
39 61
    );
40 62

  
41 63
    //convert back to host type
42 64
    transport::convert_otw_type_to_io_type(
43
        dev, otw_type,
44
        output, io_type,
65
        pod2ptr(dev), otw_type,
66
        pod2ptr(output), io_type,
45 67
        nsamps
46 68
    );
47 69
}
48 70

  
49
typedef std::complex<boost::uint16_t> sc16_t;
71
/***********************************************************************
72
 * Test short conversion
73
 **********************************************************************/
74
static void test_convert_types_sc16(
75
    size_t nsamps,
76
    const io_type_t &io_type,
77
    const otw_type_t &otw_type
78
){
79
    //fill the input samples
80
    std::vector<sc16_t> input(nsamps), output(nsamps);
81
    BOOST_FOREACH(sc16_t &in, input) in = sc16_t(
82
        std::rand()-(RAND_MAX/2),
83
        std::rand()-(RAND_MAX/2)
84
    );
50 85

  
51
BOOST_AUTO_TEST_CASE(test_convert_types_be_sc16){
52
    sc16_t in_sc16[] = {
53
        sc16_t(0, -1234), sc16_t(4321, 1234),
54
        sc16_t(9876, -4567), sc16_t(8912, 0)
55
    }, out_sc16[4];
86
    //run the loopback and test
87
    loopback(nsamps, io_type, otw_type, input, output);
88
    BOOST_CHECK_EQUAL_COLLECTIONS(input.begin(), input.end(), output.begin(), output.end());
89
}
56 90

  
91
BOOST_AUTO_TEST_CASE(test_convert_types_be_sc16){
57 92
    io_type_t io_type(io_type_t::COMPLEX_INT16);
58 93
    otw_type_t otw_type;
59 94
    otw_type.byteorder = otw_type_t::BO_BIG_ENDIAN;
60 95
    otw_type.width = 16;
61 96

  
62
    loopback<sc16_t, boost::uint32_t, 4>(io_type, otw_type, in_sc16, out_sc16);
63
    BOOST_CHECK_EQUAL_COLLECTIONS(in_sc16, in_sc16+4, out_sc16, out_sc16+4);
97
    //try various lengths to test edge cases
98
    for (size_t nsamps = 0; nsamps < 16; nsamps++){
99
        test_convert_types_sc16(nsamps, io_type, otw_type);
100
    }
64 101
}
65 102

  
66 103
BOOST_AUTO_TEST_CASE(test_convert_types_le_sc16){
67
    sc16_t in_sc16[] = {
68
        sc16_t(0, -1234), sc16_t(4321, 1234),
69
        sc16_t(9876, -4567), sc16_t(8912, 0)
70
    }, out_sc16[4];
71

  
72 104
    io_type_t io_type(io_type_t::COMPLEX_INT16);
73 105
    otw_type_t otw_type;
74 106
    otw_type.byteorder = otw_type_t::BO_LITTLE_ENDIAN;
75 107
    otw_type.width = 16;
76 108

  
77
    loopback<sc16_t, boost::uint32_t, 4>(io_type, otw_type, in_sc16, out_sc16);
78
    BOOST_CHECK_EQUAL_COLLECTIONS(in_sc16, in_sc16+4, out_sc16, out_sc16+4);
109
    //try various lengths to test edge cases
110
    for (size_t nsamps = 0; nsamps < 16; nsamps++){
111
        test_convert_types_sc16(nsamps, io_type, otw_type);
112
    }
79 113
}
80 114

  
81
typedef std::complex<float> fc32_t;
82

  
83
#define BOOST_CHECK_CLOSE_COMPLEX(a1, a2, p) \
84
    BOOST_CHECK_CLOSE(a1.real(), a2.real(), p); \
85
    BOOST_CHECK_CLOSE(a1.imag(), a2.imag(), p);
115
/***********************************************************************
116
 * Test float conversion
117
 **********************************************************************/
118
static void test_convert_types_fc32(
119
    size_t nsamps,
120
    const io_type_t &io_type,
121
    const otw_type_t &otw_type
122
){
123
    //fill the input samples
124
    std::vector<fc32_t> input(nsamps), output(nsamps);
125
    BOOST_FOREACH(fc32_t &in, input) in = fc32_t(
126
        (std::rand()/float(RAND_MAX/2)) - 1,
127
        (std::rand()/float(RAND_MAX/2)) - 1
128
    );
86 129

  
87
static const float tolerance = float(0.1);
130
    //run the loopback and test
131
    loopback(nsamps, io_type, otw_type, input, output);
132
    for (size_t i = 0; i < nsamps; i++){
133
        BOOST_CHECK_CLOSE_FRACTION(input[i].real(), output[i].real(), float(0.01));
134
        BOOST_CHECK_CLOSE_FRACTION(input[i].imag(), output[i].imag(), float(0.01));
135
    }
136
}
88 137

  
89 138
BOOST_AUTO_TEST_CASE(test_convert_types_be_fc32){
90
    fc32_t in_fc32[] = {
91
        fc32_t(float(0), float(-0.2)), fc32_t(float(0.03), float(-0.16)),
92
        fc32_t(float(1.0), float(.45)), fc32_t(float(0.09), float(0))
93
    }, out_fc32[4];
94

  
95 139
    io_type_t io_type(io_type_t::COMPLEX_FLOAT32);
96 140
    otw_type_t otw_type;
97 141
    otw_type.byteorder = otw_type_t::BO_BIG_ENDIAN;
98 142
    otw_type.width = 16;
99 143

  
100
    loopback<fc32_t, boost::uint32_t, 4>(io_type, otw_type, in_fc32, out_fc32);
101

  
102
    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[0], out_fc32[0], tolerance);
103
    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[1], out_fc32[1], tolerance);
104
    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[2], out_fc32[2], tolerance);
105
    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[3], out_fc32[3], tolerance);
144
    //try various lengths to test edge cases
145
    for (size_t nsamps = 0; nsamps < 16; nsamps++){
146
        test_convert_types_fc32(nsamps, io_type, otw_type);
147
    }
106 148
}
107 149

  
108 150
BOOST_AUTO_TEST_CASE(test_convert_types_le_fc32){
109
    fc32_t in_fc32[] = {
110
        fc32_t(float(0), float(-0.2)), fc32_t(float(0.03), float(-0.16)),
111
        fc32_t(float(1.0), float(.45)), fc32_t(float(0.09), float(0))
112
    }, out_fc32[4];
113

  
114 151
    io_type_t io_type(io_type_t::COMPLEX_FLOAT32);
115 152
    otw_type_t otw_type;
116 153
    otw_type.byteorder = otw_type_t::BO_LITTLE_ENDIAN;
117 154
    otw_type.width = 16;
118 155

  
119
    loopback<fc32_t, boost::uint32_t, 4>(io_type, otw_type, in_fc32, out_fc32);
120

  
121
    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[0], out_fc32[0], tolerance);
122
    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[1], out_fc32[1], tolerance);
123
    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[2], out_fc32[2], tolerance);
124
    BOOST_CHECK_CLOSE_COMPLEX(in_fc32[3], out_fc32[3], tolerance);
156
    //try various lengths to test edge cases
157
    for (size_t nsamps = 0; nsamps < 16; nsamps++){
158
        test_convert_types_fc32(nsamps, io_type, otw_type);
159
    }
125 160
}

Also available in: Unified diff