Revision 738f4a86
| b/host/lib/transport/convert_types_impl.hpp | ||
|---|---|---|
| 32 | 32 |
#include <emmintrin.h> |
| 33 | 33 |
#endif |
| 34 | 34 |
|
| 35 |
//! shortcut for a byteswap16 with casting |
|
| 36 |
#define BSWAP16_C(num) uhd::byteswap(boost::uint16_t(num)) |
|
| 37 |
|
|
| 38 | 35 |
/*********************************************************************** |
| 39 | 36 |
* Typedefs |
| 40 | 37 |
**********************************************************************/ |
| ... | ... | |
| 54 | 51 |
static UHD_INLINE void sc16_to_item32_bswap( |
| 55 | 52 |
const sc16_t *input, item32_t *output, size_t nsamps |
| 56 | 53 |
){
|
| 54 |
const item32_t *item32_input = (const item32_t *)input; |
|
| 57 | 55 |
for (size_t i = 0; i < nsamps; i++){
|
| 58 |
boost::uint16_t real = BSWAP16_C(input[i].real()); |
|
| 59 |
boost::uint16_t imag = BSWAP16_C(input[i].imag()); |
|
| 60 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 56 |
output[i] = uhd::byteswap(item32_input[i]); |
|
| 61 | 57 |
} |
| 62 | 58 |
} |
| 63 | 59 |
|
| ... | ... | |
| 73 | 69 |
static UHD_INLINE void item32_to_sc16_bswap( |
| 74 | 70 |
const item32_t *input, sc16_t *output, size_t nsamps |
| 75 | 71 |
){
|
| 72 |
item32_t *item32_output = (item32_t *)output; |
|
| 76 | 73 |
for (size_t i = 0; i < nsamps; i++){
|
| 77 |
boost::int16_t real = BSWAP16_C(input[i] >> 0); |
|
| 78 |
boost::int16_t imag = BSWAP16_C(input[i] >> 16); |
|
| 79 |
output[i] = sc16_t(real, imag); |
|
| 74 |
item32_output[i] = uhd::byteswap(input[i]); |
|
| 80 | 75 |
} |
| 81 | 76 |
} |
| 82 | 77 |
|
| ... | ... | |
| 85 | 80 |
**********************************************************************/ |
| 86 | 81 |
static const float shorts_per_float = float(32767); |
| 87 | 82 |
|
| 88 |
#define FC32_TO_SC16_C(num) boost::int16_t(num*shorts_per_float) |
|
| 83 |
static UHD_INLINE item32_t fc32_to_item32(fc32_t num){
|
|
| 84 |
boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float); |
|
| 85 |
boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float); |
|
| 86 |
return (item32_t(real) << 16) | (item32_t(imag) << 0); |
|
| 87 |
} |
|
| 89 | 88 |
|
| 90 | 89 |
//////////////////////////////////// |
| 91 | 90 |
// none-swap |
| ... | ... | |
| 106 | 105 |
__m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); |
| 107 | 106 |
__m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); |
| 108 | 107 |
|
| 109 |
//pack |
|
| 108 |
//pack + swap 16-bit pairs
|
|
| 110 | 109 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); |
| 110 |
tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 111 |
tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 111 | 112 |
|
| 112 | 113 |
//store to output |
| 113 | 114 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); |
| ... | ... | |
| 115 | 116 |
|
| 116 | 117 |
//convert remainder |
| 117 | 118 |
for (; i < nsamps; i++){
|
| 118 |
boost::uint16_t real = FC32_TO_SC16_C(input[i].real()); |
|
| 119 |
boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag()); |
|
| 120 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 119 |
output[i] = fc32_to_item32(input[i]); |
|
| 121 | 120 |
} |
| 122 | 121 |
} |
| 123 | 122 |
|
| ... | ... | |
| 126 | 125 |
const fc32_t *input, item32_t *output, size_t nsamps |
| 127 | 126 |
){
|
| 128 | 127 |
for (size_t i = 0; i < nsamps; i++){
|
| 129 |
boost::uint16_t real = FC32_TO_SC16_C(input[i].real()); |
|
| 130 |
boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag()); |
|
| 131 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 128 |
output[i] = fc32_to_item32(input[i]); |
|
| 132 | 129 |
} |
| 133 | 130 |
} |
| 134 | 131 |
|
| ... | ... | |
| 163 | 160 |
|
| 164 | 161 |
//convert remainder |
| 165 | 162 |
for (; i < nsamps; i++){
|
| 166 |
boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real())); |
|
| 167 |
boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag())); |
|
| 168 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 163 |
output[i] = uhd::byteswap(fc32_to_item32(input[i])); |
|
| 169 | 164 |
} |
| 170 | 165 |
} |
| 171 | 166 |
|
| ... | ... | |
| 174 | 169 |
const fc32_t *input, item32_t *output, size_t nsamps |
| 175 | 170 |
){
|
| 176 | 171 |
for (size_t i = 0; i < nsamps; i++){
|
| 177 |
boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real())); |
|
| 178 |
boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag())); |
|
| 179 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 172 |
output[i] = uhd::byteswap(fc32_to_item32(input[i])); |
|
| 180 | 173 |
} |
| 181 | 174 |
} |
| 182 | 175 |
|
| ... | ... | |
| 187 | 180 |
**********************************************************************/ |
| 188 | 181 |
static const float floats_per_short = float(1.0/shorts_per_float); |
| 189 | 182 |
|
| 190 |
#define I16_TO_FC32_C(num) (boost::int16_t(num)*floats_per_short) |
|
| 183 |
static UHD_INLINE fc32_t item32_to_fc32(item32_t item){
|
|
| 184 |
return fc32_t( |
|
| 185 |
float(boost::int16_t(item >> 16)*floats_per_short), |
|
| 186 |
float(boost::int16_t(item >> 0)*floats_per_short) |
|
| 187 |
); |
|
| 188 |
} |
|
| 191 | 189 |
|
| 192 | 190 |
//////////////////////////////////// |
| 193 | 191 |
// none-swap |
| ... | ... | |
| 204 | 202 |
//load from input |
| 205 | 203 |
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); |
| 206 | 204 |
|
| 207 |
//unpack |
|
| 205 |
//unpack + swap 16-bit pairs |
|
| 206 |
tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 207 |
tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); |
|
| 208 | 208 |
__m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits |
| 209 | 209 |
__m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); |
| 210 | 210 |
|
| ... | ... | |
| 219 | 219 |
|
| 220 | 220 |
//convert remainder |
| 221 | 221 |
for (; i < nsamps; i++){
|
| 222 |
float real = I16_TO_FC32_C(input[i] >> 0); |
|
| 223 |
float imag = I16_TO_FC32_C(input[i] >> 16); |
|
| 224 |
output[i] = fc32_t(real, imag); |
|
| 222 |
output[i] = item32_to_fc32(input[i]); |
|
| 225 | 223 |
} |
| 226 | 224 |
} |
| 227 | 225 |
|
| ... | ... | |
| 230 | 228 |
const item32_t *input, fc32_t *output, size_t nsamps |
| 231 | 229 |
){
|
| 232 | 230 |
for (size_t i = 0; i < nsamps; i++){
|
| 233 |
float real = I16_TO_FC32_C(input[i] >> 0); |
|
| 234 |
float imag = I16_TO_FC32_C(input[i] >> 16); |
|
| 235 |
output[i] = fc32_t(real, imag); |
|
| 231 |
output[i] = item32_to_fc32(input[i]); |
|
| 236 | 232 |
} |
| 237 | 233 |
} |
| 238 | 234 |
#endif |
| ... | ... | |
| 268 | 264 |
|
| 269 | 265 |
//convert remainder |
| 270 | 266 |
for (; i < nsamps; i++){
|
| 271 |
float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0)); |
|
| 272 |
float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16)); |
|
| 273 |
output[i] = fc32_t(real, imag); |
|
| 267 |
output[i] = item32_to_fc32(uhd::byteswap(input[i])); |
|
| 274 | 268 |
} |
| 275 | 269 |
} |
| 276 | 270 |
|
| ... | ... | |
| 279 | 273 |
const item32_t *input, fc32_t *output, size_t nsamps |
| 280 | 274 |
){
|
| 281 | 275 |
for (size_t i = 0; i < nsamps; i++){
|
| 282 |
float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0)); |
|
| 283 |
float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16)); |
|
| 284 |
output[i] = fc32_t(real, imag); |
|
| 276 |
output[i] = item32_to_fc32(uhd::byteswap(input[i])); |
|
| 285 | 277 |
} |
| 286 | 278 |
} |
| 287 | 279 |
|
Also available in: Unified diff