Revision 8c872ffb host/lib/transport/convert_types_impl.hpp
| b/host/lib/transport/convert_types_impl.hpp | ||
|---|---|---|
| 28 | 28 |
#define USE_EMMINTRIN_H //use sse2 intrinsics |
| 29 | 29 |
#endif |
| 30 | 30 |
|
| 31 |
#if defined(USE_EMMINTRIN_H) |
|
| 32 |
#include <emmintrin.h> |
|
| 33 |
#endif |
|
| 34 |
|
|
| 35 |
//! shortcut for a byteswap16 with casting |
|
| 36 |
#define BSWAP16_C(num) uhd::byteswap(boost::uint16_t(num)) |
|
| 37 |
|
|
| 31 | 38 |
/*********************************************************************** |
| 32 | 39 |
* Typedefs |
| 33 | 40 |
**********************************************************************/ |
| ... | ... | |
| 47 | 54 |
static UHD_INLINE void sc16_to_item32_bswap( |
| 48 | 55 |
const sc16_t *input, item32_t *output, size_t nsamps |
| 49 | 56 |
){
|
| 50 |
const item32_t *item32_input = (const item32_t *)input; |
|
| 51 | 57 |
for (size_t i = 0; i < nsamps; i++){
|
| 52 |
output[i] = uhd::byteswap(item32_input[i]); |
|
| 58 |
boost::uint16_t real = BSWAP16_C(input[i].real()); |
|
| 59 |
boost::uint16_t imag = BSWAP16_C(input[i].imag()); |
|
| 60 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 53 | 61 |
} |
| 54 | 62 |
} |
| 55 | 63 |
|
| ... | ... | |
| 65 | 73 |
static UHD_INLINE void item32_to_sc16_bswap( |
| 66 | 74 |
const item32_t *input, sc16_t *output, size_t nsamps |
| 67 | 75 |
){
|
| 68 |
item32_t *item32_output = (item32_t *)output; |
|
| 69 | 76 |
for (size_t i = 0; i < nsamps; i++){
|
| 70 |
item32_output[i] = uhd::byteswap(input[i]); |
|
| 77 |
boost::int16_t real = BSWAP16_C(input[i] >> 0); |
|
| 78 |
boost::int16_t imag = BSWAP16_C(input[i] >> 16); |
|
| 79 |
output[i] = sc16_t(real, imag); |
|
| 71 | 80 |
} |
| 72 | 81 |
} |
| 73 | 82 |
|
| 74 | 83 |
/*********************************************************************** |
| 75 |
* Convert complex float buffer to items32 |
|
| 84 |
* Convert complex float buffer to items32 (no swap)
|
|
| 76 | 85 |
**********************************************************************/ |
| 77 | 86 |
static const float shorts_per_float = float(32767); |
| 78 | 87 |
|
| 79 |
static UHD_INLINE item32_t fc32_to_item32(fc32_t num){
|
|
| 80 |
boost::uint16_t real = boost::int16_t(num.real()*shorts_per_float); |
|
| 81 |
boost::uint16_t imag = boost::int16_t(num.imag()*shorts_per_float); |
|
| 82 |
return (item32_t(real) << 16) | (item32_t(imag) << 0); |
|
| 88 |
#define FC32_TO_SC16_C(num) boost::int16_t(num*shorts_per_float) |
|
| 89 |
|
|
| 90 |
//////////////////////////////////// |
|
| 91 |
// none-swap |
|
| 92 |
//////////////////////////////////// |
|
| 93 |
#if defined(USE_EMMINTRIN_H) |
|
| 94 |
static UHD_INLINE void fc32_to_item32_nswap( |
|
| 95 |
const fc32_t *input, item32_t *output, size_t nsamps |
|
| 96 |
){
|
|
| 97 |
__m128 scalar = _mm_set_ps1(shorts_per_float); |
|
| 98 |
|
|
| 99 |
//convert blocks of samples with intrinsics |
|
| 100 |
size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
|
|
| 101 |
//load from input |
|
| 102 |
__m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); |
|
| 103 |
__m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); |
|
| 104 |
|
|
| 105 |
//convert and scale |
|
| 106 |
__m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); |
|
| 107 |
__m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); |
|
| 108 |
|
|
| 109 |
//pack |
|
| 110 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); |
|
| 111 |
|
|
| 112 |
//store to output |
|
| 113 |
_mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); |
|
| 114 |
} |
|
| 115 |
|
|
| 116 |
//convert remainder |
|
| 117 |
for (; i < nsamps; i++){
|
|
| 118 |
boost::uint16_t real = FC32_TO_SC16_C(input[i].real()); |
|
| 119 |
boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag()); |
|
| 120 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 121 |
} |
|
| 83 | 122 |
} |
| 84 | 123 |
|
| 124 |
#else |
|
| 85 | 125 |
static UHD_INLINE void fc32_to_item32_nswap( |
| 86 | 126 |
const fc32_t *input, item32_t *output, size_t nsamps |
| 87 | 127 |
){
|
| 88 | 128 |
for (size_t i = 0; i < nsamps; i++){
|
| 89 |
output[i] = fc32_to_item32(input[i]); |
|
| 129 |
boost::uint16_t real = FC32_TO_SC16_C(input[i].real()); |
|
| 130 |
boost::uint16_t imag = FC32_TO_SC16_C(input[i].imag()); |
|
| 131 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 90 | 132 |
} |
| 91 | 133 |
} |
| 92 | 134 |
|
| 93 |
#if defined(USE_EMMINTRIN_H) |
|
| 94 |
#include <emmintrin.h> |
|
| 135 |
#endif |
|
| 95 | 136 |
|
| 137 |
//////////////////////////////////// |
|
| 138 |
// byte-swap |
|
| 139 |
//////////////////////////////////// |
|
| 140 |
#if defined(USE_EMMINTRIN_H) |
|
| 96 | 141 |
static UHD_INLINE void fc32_to_item32_bswap( |
| 97 | 142 |
const fc32_t *input, item32_t *output, size_t nsamps |
| 98 | 143 |
){
|
| ... | ... | |
| 108 | 153 |
__m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); |
| 109 | 154 |
__m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); |
| 110 | 155 |
|
| 111 |
//pack + byteswap -> byteswap 32 bit words
|
|
| 156 |
//pack + byteswap -> byteswap 16 bit words
|
|
| 112 | 157 |
__m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); |
| 113 | 158 |
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); |
| 114 | 159 |
|
| ... | ... | |
| 118 | 163 |
|
| 119 | 164 |
//convert remainder |
| 120 | 165 |
for (; i < nsamps; i++){
|
| 121 |
output[i] = uhd::byteswap(fc32_to_item32(input[i])); |
|
| 166 |
boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real())); |
|
| 167 |
boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag())); |
|
| 168 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 122 | 169 |
} |
| 123 | 170 |
} |
| 124 | 171 |
|
| ... | ... | |
| 127 | 174 |
const fc32_t *input, item32_t *output, size_t nsamps |
| 128 | 175 |
){
|
| 129 | 176 |
for (size_t i = 0; i < nsamps; i++){
|
| 130 |
output[i] = uhd::byteswap(fc32_to_item32(input[i])); |
|
| 177 |
boost::uint16_t real = BSWAP16_C(FC32_TO_SC16_C(input[i].real())); |
|
| 178 |
boost::uint16_t imag = BSWAP16_C(FC32_TO_SC16_C(input[i].imag())); |
|
| 179 |
output[i] = (item32_t(real) << 0) | (item32_t(imag) << 16); |
|
| 131 | 180 |
} |
| 132 | 181 |
} |
| 133 | 182 |
|
| ... | ... | |
| 138 | 187 |
**********************************************************************/ |
| 139 | 188 |
static const float floats_per_short = float(1.0/shorts_per_float); |
| 140 | 189 |
|
| 141 |
static UHD_INLINE fc32_t item32_to_fc32(item32_t item){
|
|
| 142 |
return fc32_t( |
|
| 143 |
float(boost::int16_t(item >> 16)*floats_per_short), |
|
| 144 |
float(boost::int16_t(item >> 0)*floats_per_short) |
|
| 145 |
); |
|
| 190 |
#define I16_TO_FC32_C(num) (boost::int16_t(num)*floats_per_short) |
|
| 191 |
|
|
| 192 |
//////////////////////////////////// |
|
| 193 |
// none-swap |
|
| 194 |
//////////////////////////////////// |
|
| 195 |
#if defined(USE_EMMINTRIN_H) |
|
| 196 |
static UHD_INLINE void item32_to_fc32_nswap( |
|
| 197 |
const item32_t *input, fc32_t *output, size_t nsamps |
|
| 198 |
){
|
|
| 199 |
__m128 scalar = _mm_set_ps1(floats_per_short/(1 << 16)); |
|
| 200 |
__m128i zeroi = _mm_setzero_si128(); |
|
| 201 |
|
|
| 202 |
//convert blocks of samples with intrinsics |
|
| 203 |
size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
|
|
| 204 |
//load from input |
|
| 205 |
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); |
|
| 206 |
|
|
| 207 |
//unpack |
|
| 208 |
__m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits |
|
| 209 |
__m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); |
|
| 210 |
|
|
| 211 |
//convert and scale |
|
| 212 |
__m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); |
|
| 213 |
__m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); |
|
| 214 |
|
|
| 215 |
//store to output |
|
| 216 |
_mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); |
|
| 217 |
_mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); |
|
| 218 |
} |
|
| 219 |
|
|
| 220 |
//convert remainder |
|
| 221 |
for (; i < nsamps; i++){
|
|
| 222 |
float real = I16_TO_FC32_C(input[i] >> 0); |
|
| 223 |
float imag = I16_TO_FC32_C(input[i] >> 16); |
|
| 224 |
output[i] = fc32_t(real, imag); |
|
| 225 |
} |
|
| 146 | 226 |
} |
| 147 | 227 |
|
| 228 |
#else |
|
| 148 | 229 |
static UHD_INLINE void item32_to_fc32_nswap( |
| 149 | 230 |
const item32_t *input, fc32_t *output, size_t nsamps |
| 150 | 231 |
){
|
| 151 | 232 |
for (size_t i = 0; i < nsamps; i++){
|
| 152 |
output[i] = item32_to_fc32(input[i]); |
|
| 233 |
float real = I16_TO_FC32_C(input[i] >> 0); |
|
| 234 |
float imag = I16_TO_FC32_C(input[i] >> 16); |
|
| 235 |
output[i] = fc32_t(real, imag); |
|
| 153 | 236 |
} |
| 154 | 237 |
} |
| 238 |
#endif |
|
| 155 | 239 |
|
| 240 |
//////////////////////////////////// |
|
| 241 |
// byte-swap |
|
| 242 |
//////////////////////////////////// |
|
| 156 | 243 |
#if defined(USE_EMMINTRIN_H) |
| 157 |
#include <emmintrin.h> |
|
| 158 |
|
|
| 159 | 244 |
static UHD_INLINE void item32_to_fc32_bswap( |
| 160 | 245 |
const item32_t *input, fc32_t *output, size_t nsamps |
| 161 | 246 |
){
|
| ... | ... | |
| 167 | 252 |
//load from input |
| 168 | 253 |
__m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); |
| 169 | 254 |
|
| 170 |
//byteswap + unpack -> byteswap 32 bit words
|
|
| 255 |
//byteswap + unpack -> byteswap 16 bit words
|
|
| 171 | 256 |
tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); |
| 172 | 257 |
__m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits |
| 173 | 258 |
__m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); |
| ... | ... | |
| 183 | 268 |
|
| 184 | 269 |
//convert remainder |
| 185 | 270 |
for (; i < nsamps; i++){
|
| 186 |
output[i] = item32_to_fc32(uhd::byteswap(input[i])); |
|
| 271 |
float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0)); |
|
| 272 |
float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16)); |
|
| 273 |
output[i] = fc32_t(real, imag); |
|
| 187 | 274 |
} |
| 188 | 275 |
} |
| 189 | 276 |
|
| ... | ... | |
| 192 | 279 |
const item32_t *input, fc32_t *output, size_t nsamps |
| 193 | 280 |
){
|
| 194 | 281 |
for (size_t i = 0; i < nsamps; i++){
|
| 195 |
output[i] = item32_to_fc32(uhd::byteswap(input[i])); |
|
| 282 |
float real = I16_TO_FC32_C(BSWAP16_C(input[i] >> 0)); |
|
| 283 |
float imag = I16_TO_FC32_C(BSWAP16_C(input[i] >> 16)); |
|
| 284 |
output[i] = fc32_t(real, imag); |
|
| 196 | 285 |
} |
| 197 | 286 |
} |
| 198 | 287 |
|
Also available in: Unified diff