diff options
author | Colin Guthrie <cguthrie@mandriva.org> | 2011-03-18 00:10:27 +0000 |
---|---|---|
committer | Colin Guthrie <cguthrie@mandriva.org> | 2011-03-18 00:10:27 +0000 |
commit | b8db02755addb7c00adde9b1bd05d486863f46a6 (patch) | |
tree | 21ac6ec11ce6300dfbe487f84ed58c9df4e1a3d0 /src/modules | |
parent | bc43fe5c60a64a58a4b6999aa088a9df3c03aaad (diff) | |
parent | ee93eff6b7a04193e7afa3c5aa4fe71558634b21 (diff) |
Merge remote-tracking branch 'vudentz/master'
Diffstat (limited to 'src/modules')
18 files changed, 2017 insertions, 449 deletions
diff --git a/src/modules/bluetooth/bluetooth-util.c b/src/modules/bluetooth/bluetooth-util.c index e6f6e17e..9c679687 100644 --- a/src/modules/bluetooth/bluetooth-util.c +++ b/src/modules/bluetooth/bluetooth-util.c @@ -714,6 +714,47 @@ static void list_adapters(pa_bluetooth_discovery *y) { send_and_add_to_pending(y, NULL, m, list_adapters_reply); } +int pa_bluetooth_transport_parse_property(pa_bluetooth_transport *t, DBusMessageIter *i) +{ + const char *key; + DBusMessageIter variant_i; + + if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_STRING) { + pa_log("Property name not a string."); + return -1; + } + + dbus_message_iter_get_basic(i, &key); + + if (!dbus_message_iter_next(i)) { + pa_log("Property value missing"); + return -1; + } + + if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_VARIANT) { + pa_log("Property value not a variant."); + return -1; + } + + dbus_message_iter_recurse(i, &variant_i); + + switch (dbus_message_iter_get_arg_type(&variant_i)) { + + case DBUS_TYPE_BOOLEAN: { + + pa_bool_t *value; + dbus_message_iter_get_basic(&variant_i, &value); + + if (pa_streq(key, "NREC")) + t->nrec = value; + + break; + } + } + + return 0; +} + static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *userdata) { DBusError err; pa_bluetooth_discovery *y; @@ -862,6 +903,28 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us } return DBUS_HANDLER_RESULT_NOT_YET_HANDLED; + } else if (dbus_message_is_signal(m, "org.bluez.MediaTransport", "PropertyChanged")) { + pa_bluetooth_device *d; + pa_bluetooth_transport *t; + void *state = NULL; + DBusMessageIter arg_i; + + while ((d = pa_hashmap_iterate(y->devices, &state, NULL))) + if ((t = pa_hashmap_get(d->transports, dbus_message_get_path(m)))) + break; + + if (!t) + goto fail; + + if (!dbus_message_iter_init(m, &arg_i)) { + pa_log("Failed to parse PropertyChanged: %s", err.message); + goto fail; + } + + if (pa_bluetooth_transport_parse_property(t, &arg_i) < 0) + goto fail; + + return DBUS_HANDLER_RESULT_NOT_YET_HANDLED; } fail: @@ -934,10 +997,11 @@ const pa_bluetooth_transport* pa_bluetooth_device_get_transport(const pa_bluetoo return NULL; } -int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype) { +int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype, size_t *imtu, size_t *omtu) { DBusMessage *m, *r; DBusError err; int ret; + uint16_t i, o; pa_assert(t); pa_assert(t->y); @@ -955,7 +1019,7 @@ int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char * } #ifdef DBUS_TYPE_UNIX_FD - if (!dbus_message_get_args(r, &err, DBUS_TYPE_UNIX_FD, &ret, DBUS_TYPE_INVALID)) { + if (!dbus_message_get_args(r, &err, DBUS_TYPE_UNIX_FD, &ret, DBUS_TYPE_UINT16, &i, DBUS_TYPE_UINT16, &o, DBUS_TYPE_INVALID)) { pa_log("Failed to parse org.bluez.MediaTransport.Acquire(): %s", err.message); ret = -1; dbus_error_free(&err); @@ -963,6 +1027,12 @@ int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char * } #endif + if (imtu) + *imtu = i; + + if (omtu) + *omtu = o; + fail: dbus_message_unref(r); return ret; @@ -1028,6 +1098,7 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage const char *path, *dev_path = NULL, *uuid = NULL; uint8_t *config = NULL; int size = 0; + pa_bool_t nrec; enum profile p; DBusMessageIter args, props; DBusMessage *r; @@ -1063,6 +1134,10 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage if (var != DBUS_TYPE_OBJECT_PATH) goto fail; dbus_message_iter_get_basic(&value, &dev_path); + } else if (strcasecmp(key, "NREC") == 0) { + if (var != DBUS_TYPE_BOOLEAN) + goto fail; + dbus_message_iter_get_basic(&value, &nrec); } else if (strcasecmp(key, "Configuration") == 0) { DBusMessageIter array; if (var != DBUS_TYPE_ARRAY) @@ -1086,6 +1161,8 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage p = PROFILE_A2DP_SOURCE; t = transport_new(y, path, p, config, size); + if (nrec) + t->nrec = nrec; pa_hashmap_put(d->transports, t->path, t); pa_log_debug("Transport %s profile %d available", t->path, t->profile); @@ -1395,6 +1472,7 @@ pa_bluetooth_discovery* pa_bluetooth_discovery_get(pa_core *c) { "type='signal',sender='org.bluez',interface='org.bluez.AudioSink',member='PropertyChanged'", "type='signal',sender='org.bluez',interface='org.bluez.AudioSource',member='PropertyChanged'", "type='signal',sender='org.bluez',interface='org.bluez.HandsfreeGateway',member='PropertyChanged'", + "type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'", NULL) < 0) { pa_log("Failed to add D-Bus matches: %s", err.message); goto fail; @@ -1462,6 +1540,7 @@ void pa_bluetooth_discovery_unref(pa_bluetooth_discovery *y) { "type='signal',sender='org.bluez',interface='org.bluez.AudioSink',member='PropertyChanged'", "type='signal',sender='org.bluez',interface='org.bluez.AudioSource',member='PropertyChanged'", "type='signal',sender='org.bluez',interface='org.bluez.HandsfreeGateway',member='PropertyChanged'", + "type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'", NULL); if (y->filter_added) diff --git a/src/modules/bluetooth/bluetooth-util.h b/src/modules/bluetooth/bluetooth-util.h index f141209d..bb0cb24a 100644 --- a/src/modules/bluetooth/bluetooth-util.h +++ b/src/modules/bluetooth/bluetooth-util.h @@ -70,6 +70,7 @@ struct pa_bluetooth_transport { uint8_t codec; uint8_t *config; int config_size; + pa_bool_t nrec; }; /* This enum is shared among Audio, Headset, AudioSink, and AudioSource, although not all values are acceptable in all profiles */ @@ -126,8 +127,9 @@ const pa_bluetooth_device* pa_bluetooth_discovery_get_by_address(pa_bluetooth_di const pa_bluetooth_transport* pa_bluetooth_discovery_get_transport(pa_bluetooth_discovery *y, const char *path); const pa_bluetooth_transport* pa_bluetooth_device_get_transport(const pa_bluetooth_device *d, enum profile profile); -int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype); +int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype, size_t *imtu, size_t *omtu); void pa_bluetooth_transport_release(const pa_bluetooth_transport *t, const char *accesstype); +int pa_bluetooth_transport_parse_property(pa_bluetooth_transport *t, DBusMessageIter *i); pa_hook* pa_bluetooth_discovery_hook(pa_bluetooth_discovery *d); diff --git a/src/modules/bluetooth/module-bluetooth-device.c b/src/modules/bluetooth/module-bluetooth-device.c index 936d3c77..d29e29b8 100644 --- a/src/modules/bluetooth/module-bluetooth-device.c +++ b/src/modules/bluetooth/module-bluetooth-device.c @@ -51,13 +51,16 @@ #include "module-bluetooth-device-symdef.h" #include "ipc.h" -#include "sbc.h" +#include "sbc/sbc.h" #include "rtp.h" #include "bluetooth-util.h" #define MAX_BITPOOL 64 #define MIN_BITPOOL 2U +#define BITPOOL_DEC_LIMIT 32 +#define BITPOOL_DEC_STEP 5 + PA_MODULE_AUTHOR("Joao Paulo Rechi Vita"); PA_MODULE_DESCRIPTION("Bluetooth audio sink and source"); PA_MODULE_VERSION(PACKAGE_VERSION); @@ -117,6 +120,8 @@ struct a2dp_info { size_t buffer_size; /* Size of the buffer */ uint16_t seq_num; /* Cumulative packet sequence */ + uint8_t min_bitpool; + uint8_t max_bitpool; }; struct hsp_info { @@ -574,7 +579,7 @@ static int setup_a2dp(struct userdata *u) { } /* Run from main thread */ -static void setup_sbc(struct a2dp_info *a2dp) { +static void setup_sbc(struct a2dp_info *a2dp, enum profile p) { sbc_capabilities_t *active_capabilities; pa_assert(a2dp); @@ -660,7 +665,11 @@ static void setup_sbc(struct a2dp_info *a2dp) { pa_assert_not_reached(); } - a2dp->sbc.bitpool = active_capabilities->max_bitpool; + a2dp->min_bitpool = active_capabilities->min_bitpool; + a2dp->max_bitpool = active_capabilities->max_bitpool; + + /* Set minimum bitpool for source to get the maximum possible block_size */ + a2dp->sbc.bitpool = p == PROFILE_A2DP ? a2dp->max_bitpool : a2dp->min_bitpool; a2dp->codesize = sbc_get_codesize(&a2dp->sbc); a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); } @@ -728,7 +737,7 @@ static int set_conf(struct userdata *u) { /* setup SBC encoder now we agree on parameters */ if (u->profile == PROFILE_A2DP || u->profile == PROFILE_A2DP_SOURCE) { - setup_sbc(&u->a2dp); + setup_sbc(&u->a2dp, u->profile); u->block_size = ((u->link_mtu - sizeof(struct rtp_header) - sizeof(struct rtp_payload)) @@ -743,6 +752,39 @@ static int set_conf(struct userdata *u) { return 0; } +/* from IO thread */ +static void a2dp_set_bitpool(struct userdata *u, uint8_t bitpool) +{ + struct a2dp_info *a2dp; + + pa_assert(u); + + a2dp = &u->a2dp; + + if (a2dp->sbc.bitpool == bitpool) + return; + + if (bitpool > a2dp->max_bitpool) + bitpool = a2dp->max_bitpool; + else if (bitpool < a2dp->min_bitpool) + bitpool = a2dp->min_bitpool; + + a2dp->sbc.bitpool = bitpool; + + a2dp->codesize = sbc_get_codesize(&a2dp->sbc); + a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); + + pa_log_debug("Bitpool has changed to %u", a2dp->sbc.bitpool); + + u->block_size = + (u->link_mtu - sizeof(struct rtp_header) - sizeof(struct rtp_payload)) + / a2dp->frame_length * a2dp->codesize; + + pa_sink_set_max_request_within_thread(u->sink, u->block_size); + pa_sink_set_fixed_latency_within_thread(u->sink, + FIXED_LATENCY_PLAYBACK_A2DP + pa_bytes_to_usec(u->block_size, &u->sample_spec)); +} + /* from IO thread, except in SCO over PCM */ static int setup_stream(struct userdata *u) { @@ -758,6 +800,9 @@ static int setup_stream(struct userdata *u) { pa_log_debug("Stream properly set up, we're ready to roll!"); + if (u->profile == PROFILE_A2DP) + a2dp_set_bitpool(u, u->a2dp.max_bitpool); + u->rtpoll_item = pa_rtpoll_item_new(u->rtpoll, PA_RTPOLL_NEVER, 1); pollfd = pa_rtpoll_item_get_pollfd(u->rtpoll_item, NULL); pollfd->fd = u->stream_fd; @@ -910,7 +955,8 @@ static int bt_transport_acquire(struct userdata *u, pa_bool_t start) { return -1; } - u->stream_fd = pa_bluetooth_transport_acquire(t, accesstype); + /* FIXME: Handle in/out MTU properly when unix socket is not longer supported */ + u->stream_fd = pa_bluetooth_transport_acquire(t, accesstype, NULL, &u->link_mtu); if (u->stream_fd < 0) return -1; @@ -1441,7 +1487,7 @@ static int a2dp_process_push(struct userdata *u) { d = pa_memblock_acquire(memchunk.memblock); to_write = memchunk.length = pa_memblock_get_length(memchunk.memblock); - while (PA_LIKELY(to_decode > 0 && to_write > 0)) { + while (PA_LIKELY(to_decode > 0)) { size_t written; ssize_t decoded; @@ -1460,10 +1506,12 @@ static int a2dp_process_push(struct userdata *u) { /* pa_log_debug("SBC: decoded: %lu; written: %lu", (unsigned long) decoded, (unsigned long) written); */ /* pa_log_debug("SBC: frame_length: %lu; codesize: %lu", (unsigned long) a2dp->frame_length, (unsigned long) a2dp->codesize); */ + /* Reset frame length, it can be changed due to bitpool change */ + a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); + pa_assert_fp((size_t) decoded <= to_decode); pa_assert_fp((size_t) decoded == a2dp->frame_length); - pa_assert_fp((size_t) written <= to_write); pa_assert_fp((size_t) written == a2dp->codesize); p = (const uint8_t*) p + decoded; @@ -1475,6 +1523,8 @@ static int a2dp_process_push(struct userdata *u) { frame_count++; } + memchunk.length -= to_write; + pa_memblock_release(memchunk.memblock); pa_source_post(u->source, &memchunk); @@ -1488,6 +1538,27 @@ static int a2dp_process_push(struct userdata *u) { return ret; } +static void a2dp_reduce_bitpool(struct userdata *u) +{ + struct a2dp_info *a2dp; + uint8_t bitpool; + + pa_assert(u); + + a2dp = &u->a2dp; + + /* Check if bitpool is already at its limit */ + if (a2dp->sbc.bitpool <= BITPOOL_DEC_LIMIT) + return; + + bitpool = a2dp->sbc.bitpool - BITPOOL_DEC_STEP; + + if (bitpool < BITPOOL_DEC_LIMIT) + bitpool = BITPOOL_DEC_LIMIT; + + a2dp_set_bitpool(u, bitpool); +} + static void thread_func(void *userdata) { struct userdata *u = userdata; unsigned do_write = 0; @@ -1579,6 +1650,9 @@ static void thread_func(void *userdata) { pa_sink_render_full(u->sink, skip_bytes, &tmp); pa_memblock_unref(tmp.memblock); u->write_index += skip_bytes; + + if (u->profile == PROFILE_A2DP) + a2dp_reduce_bitpool(u); } } @@ -1677,7 +1751,7 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us dbus_message_get_path(m), dbus_message_get_member(m)); - if (!dbus_message_has_path(m, u->path)) + if (!dbus_message_has_path(m, u->path) && !dbus_message_has_path(m, u->transport)) goto fail; if (dbus_message_is_signal(m, "org.bluez.Headset", "SpeakerGainChanged") || @@ -1703,6 +1777,28 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us pa_source_volume_changed(u->source, &v); } } + } else if (dbus_message_is_signal(m, "org.bluez.MediaTransport", "PropertyChanged")) { + DBusMessageIter arg_i; + pa_bluetooth_transport *t; + pa_bool_t nrec; + + t = (pa_bluetooth_transport *) pa_bluetooth_discovery_get_transport(u->discovery, u->transport); + pa_assert(t); + + if (!dbus_message_iter_init(m, &arg_i)) { + pa_log("Failed to parse PropertyChanged: %s", err.message); + goto fail; + } + + nrec = t->nrec; + + if (pa_bluetooth_transport_parse_property(t, &arg_i) < 0) + goto fail; + + if (nrec != t->nrec) { + pa_log_debug("dbus: property 'NREC' changed to value '%s'", t->nrec ? "True" : "False"); + pa_proplist_sets(u->source->proplist, "bluetooth.nrec", t->nrec ? "1" : "0"); + } } fail: @@ -1944,6 +2040,7 @@ static int add_source(struct userdata *u) { pa_proplist_sets(data.proplist, "bluetooth.protocol", u->profile == PROFILE_A2DP_SOURCE ? "a2dp_source" : "hsp"); if ((u->profile == PROFILE_HSP) || (u->profile == PROFILE_HFGW)) pa_proplist_sets(data.proplist, PA_PROP_DEVICE_INTENDED_ROLES, "phone"); + data.card = u->card; data.name = get_name("source", u->modargs, u->address, &b); data.namereg_fail = b; @@ -1970,8 +2067,15 @@ static int add_source(struct userdata *u) { pa_bytes_to_usec(u->block_size, &u->sample_spec)); } - if (u->profile == PROFILE_HSP || u->profile == PROFILE_HFGW) - pa_proplist_sets(u->source->proplist, "bluetooth.nrec", (u->hsp.pcm_capabilities.flags & BT_PCM_FLAG_NREC) ? "1" : "0"); + if ((u->profile == PROFILE_HSP) || (u->profile == PROFILE_HFGW)) { + if (u->transport) { + const pa_bluetooth_transport *t; + t = pa_bluetooth_discovery_get_transport(u->discovery, u->transport); + pa_assert(t); + pa_proplist_sets(u->source->proplist, "bluetooth.nrec", t->nrec ? "1" : "0"); + } else + pa_proplist_sets(u->source->proplist, "bluetooth.nrec", (u->hsp.pcm_capabilities.flags & BT_PCM_FLAG_NREC) ? "1" : "0"); + } if (u->profile == PROFILE_HSP) { u->source->set_volume = source_set_volume_cb; @@ -2094,7 +2198,11 @@ static int bt_transport_config_a2dp(struct userdata *u) { pa_assert_not_reached(); } - a2dp->sbc.bitpool = config->max_bitpool; + a2dp->min_bitpool = config->min_bitpool; + a2dp->max_bitpool = config->max_bitpool; + + /* Set minimum bitpool for source to get the maximum possible block_size */ + a2dp->sbc.bitpool = u->profile == PROFILE_A2DP ? a2dp->max_bitpool : a2dp->min_bitpool; a2dp->codesize = sbc_get_codesize(&a2dp->sbc); a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); @@ -2118,99 +2226,12 @@ static int bt_transport_config(struct userdata *u) { return bt_transport_config_a2dp(u); } -static int parse_transport_property(struct userdata *u, DBusMessageIter *i) { - const char *key; - DBusMessageIter variant_i; - - pa_assert(u); - pa_assert(i); - - if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_STRING) { - pa_log("Property name not a string."); - return -1; - } - - dbus_message_iter_get_basic(i, &key); - - if (!dbus_message_iter_next(i)) { - pa_log("Property value missing"); - return -1; - } - - if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_VARIANT) { - pa_log("Property value not a variant."); - return -1; - } - - dbus_message_iter_recurse(i, &variant_i); - - switch (dbus_message_iter_get_arg_type(&variant_i)) { - - case DBUS_TYPE_UINT16: { - - uint16_t value; - dbus_message_iter_get_basic(&variant_i, &value); - - if (pa_streq(key, "OMTU")) - u->link_mtu = value; - - break; - } - - } - - return 0; -} - /* Run from main thread */ static int bt_transport_open(struct userdata *u) { - DBusMessage *m, *r; - DBusMessageIter arg_i, element_i; - DBusError err; - if (bt_transport_acquire(u, FALSE) < 0) return -1; - dbus_error_init(&err); - - pa_assert_se(m = dbus_message_new_method_call("org.bluez", u->transport, "org.bluez.MediaTransport", "GetProperties")); - r = dbus_connection_send_with_reply_and_block(pa_dbus_connection_get(u->connection), m, -1, &err); - - if (dbus_error_is_set(&err) || !r) { - pa_log("Failed to get transport properties: %s", err.message); - goto fail; - } - - if (!dbus_message_iter_init(r, &arg_i)) { - pa_log("GetProperties reply has no arguments."); - goto fail; - } - - if (dbus_message_iter_get_arg_type(&arg_i) != DBUS_TYPE_ARRAY) { - pa_log("GetProperties argument is not an array."); - goto fail; - } - - dbus_message_iter_recurse(&arg_i, &element_i); - while (dbus_message_iter_get_arg_type(&element_i) != DBUS_TYPE_INVALID) { - - if (dbus_message_iter_get_arg_type(&element_i) == DBUS_TYPE_DICT_ENTRY) { - DBusMessageIter dict_i; - - dbus_message_iter_recurse(&element_i, &dict_i); - - parse_transport_property(u, &dict_i); - } - - if (!dbus_message_iter_next(&element_i)) - break; - } - return bt_transport_config(u); - -fail: - dbus_message_unref(r); - return -1; } /* Run from main thread */ @@ -2690,7 +2711,7 @@ int pa__init(pa_module* m) { struct userdata *u; const char *address, *path; DBusError err; - char *mike, *speaker; + char *mike, *speaker, *transport; const pa_bluetooth_device *device; pa_assert(m); @@ -2769,15 +2790,18 @@ int pa__init(pa_module* m) { speaker = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.Headset',member='SpeakerGainChanged',path='%s'", u->path); mike = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.Headset',member='MicrophoneGainChanged',path='%s'", u->path); + transport = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'"); if (pa_dbus_add_matches( pa_dbus_connection_get(u->connection), &err, speaker, mike, + transport, NULL) < 0) { pa_xfree(speaker); pa_xfree(mike); + pa_xfree(transport); pa_log("Failed to add D-Bus matches: %s", err.message); goto fail; @@ -2785,6 +2809,7 @@ int pa__init(pa_module* m) { pa_xfree(speaker); pa_xfree(mike); + pa_xfree(transport); /* Connect to the BT service */ init_bt(u); diff --git a/src/modules/bluetooth/sbc.c b/src/modules/bluetooth/sbc/sbc.c index 5157c70f..98b236bd 100644 --- a/src/modules/bluetooth/sbc.c +++ b/src/modules/bluetooth/sbc/sbc.c @@ -77,7 +77,7 @@ struct sbc_frame { uint8_t joint; /* only the lower 4 bits of every element are to be used */ - uint32_t scale_factor[2][8]; + uint32_t SBC_ALIGNED scale_factor[2][8]; /* raw integer subband samples in the frame */ int32_t SBC_ALIGNED sb_sample_f[16][2][8]; @@ -159,7 +159,8 @@ static uint8_t sbc_crc8(const uint8_t *data, size_t len) * Takes a pointer to the frame in question, a pointer to the bits array and * the sampling frequency (as 2 bit integer) */ -static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) +static SBC_ALWAYS_INLINE void sbc_calculate_bits_internal( + const struct sbc_frame *frame, int (*bits)[8], int subbands) { uint8_t sf = frame->frequency; @@ -170,17 +171,17 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) for (ch = 0; ch < frame->channels; ch++) { max_bitneed = 0; if (frame->allocation == SNR) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { bitneed[ch][sb] = frame->scale_factor[ch][sb]; if (bitneed[ch][sb] > max_bitneed) max_bitneed = bitneed[ch][sb]; } } else { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if (frame->scale_factor[ch][sb] == 0) bitneed[ch][sb] = -5; else { - if (frame->subbands == 4) + if (subbands == 4) loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; else loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; @@ -201,7 +202,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) bitslice--; bitcount += slicecount; slicecount = 0; - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) slicecount++; else if (bitneed[ch][sb] == bitslice + 1) @@ -214,7 +215,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) bitslice--; } - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if (bitneed[ch][sb] < bitslice + 2) bits[ch][sb] = 0; else { @@ -224,7 +225,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } } - for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { + for (sb = 0; bitcount < frame->bitpool && + sb < subbands; sb++) { if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { bits[ch][sb]++; bitcount++; @@ -234,7 +236,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } } - for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { + for (sb = 0; bitcount < frame->bitpool && + sb < subbands; sb++) { if (bits[ch][sb] < 16) { bits[ch][sb]++; bitcount++; @@ -250,7 +253,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) max_bitneed = 0; if (frame->allocation == SNR) { for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { bitneed[ch][sb] = frame->scale_factor[ch][sb]; if (bitneed[ch][sb] > max_bitneed) max_bitneed = bitneed[ch][sb]; @@ -258,11 +261,11 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } } else { for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if (frame->scale_factor[ch][sb] == 0) bitneed[ch][sb] = -5; else { - if (frame->subbands == 4) + if (subbands == 4) loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; else loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; @@ -285,7 +288,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) bitcount += slicecount; slicecount = 0; for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) slicecount++; else if (bitneed[ch][sb] == bitslice + 1) @@ -300,7 +303,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if (bitneed[ch][sb] < bitslice + 2) { bits[ch][sb] = 0; } else { @@ -324,7 +327,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) if (ch == 1) { ch = 0; sb++; - if (sb >= frame->subbands) break; + if (sb >= subbands) + break; } else ch = 1; } @@ -339,7 +343,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) if (ch == 1) { ch = 0; sb++; - if (sb >= frame->subbands) break; + if (sb >= subbands) + break; } else ch = 1; } @@ -348,6 +353,14 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } +static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) +{ + if (frame->subbands == 4) + sbc_calculate_bits_internal(frame, bits, 4); + else + sbc_calculate_bits_internal(frame, bits, 8); +} + /* * Unpacks a SBC frame at the beginning of the stream in data, * which has at most len bytes into frame. @@ -534,6 +547,16 @@ static void sbc_decoder_init(struct sbc_decoder_state *state, state->offset[ch][i] = (10 * i + 10); } +static SBC_ALWAYS_INLINE int16_t sbc_clip16(int32_t s) +{ + if (s > 0x7FFF) + return 0x7FFF; + else if (s < -0x8000) + return -0x8000; + else + return s; +} + static inline void sbc_synthesize_four(struct sbc_decoder_state *state, struct sbc_frame *frame, int ch, int blk) { @@ -562,7 +585,7 @@ static inline void sbc_synthesize_four(struct sbc_decoder_state *state, k = (i + 4) & 0xf; /* Store in output, Q0 */ - frame->pcm_sample[ch][blk * 4 + i] = SCALE4_STAGED1( + frame->pcm_sample[ch][blk * 4 + i] = sbc_clip16(SCALE4_STAGED1( MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0], MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0], MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1], @@ -572,7 +595,7 @@ static inline void sbc_synthesize_four(struct sbc_decoder_state *state, MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3], MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3], MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4], - MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4]))))))))))); + MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4])))))))))))); } } @@ -607,8 +630,8 @@ static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, for (idx = 0, i = 0; i < 8; i++, idx += 5) { k = (i + 8) & 0xf; - /* Store in output */ - frame->pcm_sample[ch][blk * 8 + i] = SCALE8_STAGED1( // Q0 + /* Store in output, Q0 */ + frame->pcm_sample[ch][blk * 8 + i] = sbc_clip16(SCALE8_STAGED1( MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0], MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0], MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1], @@ -618,7 +641,7 @@ static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3], MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3], MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4], - MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4]))))))))))); + MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4])))))))))))); } } @@ -732,9 +755,9 @@ static int sbc_analyze_audio(struct sbc_encoder_state *state, * -99 not implemented */ -static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( +static SBC_ALWAYS_INLINE ssize_t sbc_pack_frame_internal( uint8_t *data, struct sbc_frame *frame, size_t len, - int frame_subbands, int frame_channels) + int frame_subbands, int frame_channels, int joint) { /* Bitstream writer starts from the fourth byte */ uint8_t *data_ptr = data + 4; @@ -791,63 +814,6 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( crc_pos = 16; if (frame->mode == JOINT_STEREO) { - /* like frame->sb_sample but joint stereo */ - int32_t sb_sample_j[16][2]; - /* scalefactor and scale_factor in joint case */ - uint32_t scalefactor_j[2]; - uint8_t scale_factor_j[2]; - - uint8_t joint = 0; - frame->joint = 0; - - for (sb = 0; sb < frame_subbands - 1; sb++) { - scale_factor_j[0] = 0; - scalefactor_j[0] = 2 << SCALE_OUT_BITS; - scale_factor_j[1] = 0; - scalefactor_j[1] = 2 << SCALE_OUT_BITS; - - for (blk = 0; blk < frame->blocks; blk++) { - uint32_t tmp; - /* Calculate joint stereo signal */ - sb_sample_j[blk][0] = - ASR(frame->sb_sample_f[blk][0][sb], 1) + - ASR(frame->sb_sample_f[blk][1][sb], 1); - sb_sample_j[blk][1] = - ASR(frame->sb_sample_f[blk][0][sb], 1) - - ASR(frame->sb_sample_f[blk][1][sb], 1); - - /* calculate scale_factor_j and scalefactor_j for joint case */ - tmp = fabs(sb_sample_j[blk][0]); - while (scalefactor_j[0] < tmp) { - scale_factor_j[0]++; - scalefactor_j[0] *= 2; - } - tmp = fabs(sb_sample_j[blk][1]); - while (scalefactor_j[1] < tmp) { - scale_factor_j[1]++; - scalefactor_j[1] *= 2; - } - } - - /* decide whether to join this subband */ - if ((frame->scale_factor[0][sb] + - frame->scale_factor[1][sb]) > - (scale_factor_j[0] + - scale_factor_j[1])) { - /* use joint stereo for this subband */ - joint |= 1 << (frame_subbands - 1 - sb); - frame->joint |= 1 << sb; - frame->scale_factor[0][sb] = scale_factor_j[0]; - frame->scale_factor[1][sb] = scale_factor_j[1]; - for (blk = 0; blk < frame->blocks; blk++) { - frame->sb_sample_f[blk][0][sb] = - sb_sample_j[blk][0]; - frame->sb_sample_f[blk][1][sb] = - sb_sample_j[blk][1]; - } - } - } - PUT_BITS(data_ptr, bits_cache, bits_count, joint, frame_subbands); crc_header[crc_pos >> 3] = joint; @@ -905,18 +871,23 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( return data_ptr - data; } -static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) +static ssize_t sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len, + int joint) { if (frame->subbands == 4) { if (frame->channels == 1) - return sbc_pack_frame_internal(data, frame, len, 4, 1); + return sbc_pack_frame_internal( + data, frame, len, 4, 1, joint); else - return sbc_pack_frame_internal(data, frame, len, 4, 2); + return sbc_pack_frame_internal( + data, frame, len, 4, 2, joint); } else { if (frame->channels == 1) - return sbc_pack_frame_internal(data, frame, len, 8, 1); + return sbc_pack_frame_internal( + data, frame, len, 8, 1, joint); else - return sbc_pack_frame_internal(data, frame, len, 8, 2); + return sbc_pack_frame_internal( + data, frame, len, 8, 2, joint); } } @@ -924,7 +895,7 @@ static void sbc_encoder_init(struct sbc_encoder_state *state, const struct sbc_frame *frame) { memset(&state->X, 0, sizeof(state->X)); - state->position = SBC_X_BUFFER_SIZE - frame->subbands * 9; + state->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7; sbc_init_primitives(state); } @@ -1046,10 +1017,11 @@ ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, } ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written) + void *output, size_t output_len, ssize_t *written) { struct sbc_priv *priv; - int framelen, samples; + int samples; + ssize_t framelen; int (*sbc_enc_process_input)(int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], int nsamples, int nchannels); @@ -1114,11 +1086,18 @@ ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); - priv->enc_state.sbc_calc_scalefactors( - priv->frame.sb_sample_f, priv->frame.scale_factor, - priv->frame.blocks, priv->frame.channels, priv->frame.subbands); - - framelen = sbc_pack_frame(output, &priv->frame, output_len); + if (priv->frame.mode == JOINT_STEREO) { + int j = priv->enc_state.sbc_calc_scalefactors_j( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.subbands); + framelen = sbc_pack_frame(output, &priv->frame, output_len, j); + } else { + priv->enc_state.sbc_calc_scalefactors( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.channels, + priv->frame.subbands); + framelen = sbc_pack_frame(output, &priv->frame, output_len, 0); + } if (written) *written = framelen; @@ -1131,8 +1110,7 @@ void sbc_finish(sbc_t *sbc) if (!sbc) return; - if (sbc->priv_alloc_base) - free(sbc->priv_alloc_base); + free(sbc->priv_alloc_base); memset(sbc, 0, sizeof(sbc_t)); } diff --git a/src/modules/bluetooth/sbc.h b/src/modules/bluetooth/sbc/sbc.h index 65435884..c9c56d38 100644 --- a/src/modules/bluetooth/sbc.h +++ b/src/modules/bluetooth/sbc/sbc.h @@ -90,7 +90,7 @@ ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, /* Encodes ONE input block into ONE output block */ ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written); + void *output, size_t output_len, ssize_t *written); /* Returns the output block size in bytes */ size_t sbc_get_frame_length(sbc_t *sbc); diff --git a/src/modules/bluetooth/sbc_math.h b/src/modules/bluetooth/sbc/sbc_math.h index b87bc81c..b87bc81c 100644 --- a/src/modules/bluetooth/sbc_math.h +++ b/src/modules/bluetooth/sbc/sbc_math.h diff --git a/src/modules/bluetooth/sbc_primitives.c b/src/modules/bluetooth/sbc/sbc_primitives.c index 6b0be3f5..3a76a7a0 100644 --- a/src/modules/bluetooth/sbc_primitives.c +++ b/src/modules/bluetooth/sbc/sbc_primitives.c @@ -32,7 +32,9 @@ #include "sbc_primitives.h" #include "sbc_primitives_mmx.h" +#include "sbc_primitives_iwmmxt.h" #include "sbc_primitives_neon.h" +#include "sbc_primitives_armv6.h" /* * A reference C code of analysis filter with SIMD-friendly tables @@ -231,12 +233,12 @@ static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( /* handle X buffer wraparound */ if (position < nsamples) { if (nchannels > 0) - memcpy(&X[0][SBC_X_BUFFER_SIZE - 36], &X[0][position], + memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position], 36 * sizeof(int16_t)); if (nchannels > 1) - memcpy(&X[1][SBC_X_BUFFER_SIZE - 36], &X[1][position], + memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position], 36 * sizeof(int16_t)); - position = SBC_X_BUFFER_SIZE - 36; + position = SBC_X_BUFFER_SIZE - 40; } #define PCM(i) (big_endian ? \ @@ -439,6 +441,80 @@ static void sbc_calc_scalefactors( } } +static int sbc_calc_scalefactors_j( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands) +{ + int blk, joint = 0; + int32_t tmp0, tmp1; + uint32_t x, y; + + /* last subband does not use joint stereo */ + int sb = subbands - 1; + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = fabs(sb_sample_f[blk][0][sb]); + tmp1 = fabs(sb_sample_f[blk][1][sb]); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(x); + scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(y); + + /* the rest of subbands can use joint stereo */ + while (--sb >= 0) { + int32_t sb_sample_j[16][2]; + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = sb_sample_f[blk][0][sb]; + tmp1 = sb_sample_f[blk][1][sb]; + sb_sample_j[blk][0] = ASR(tmp0, 1) + ASR(tmp1, 1); + sb_sample_j[blk][1] = ASR(tmp0, 1) - ASR(tmp1, 1); + tmp0 = fabs(tmp0); + tmp1 = fabs(tmp1); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(x); + scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(y); + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = fabs(sb_sample_j[blk][0]); + tmp1 = fabs(sb_sample_j[blk][1]); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + x = (31 - SCALE_OUT_BITS) - sbc_clz(x); + y = (31 - SCALE_OUT_BITS) - sbc_clz(y); + + /* decide whether to use joint stereo for this subband */ + if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) { + joint |= 1 << (subbands - 1 - sb); + scale_factor[0][sb] = x; + scale_factor[1][sb] = y; + for (blk = 0; blk < blocks; blk++) { + sb_sample_f[blk][0][sb] = sb_sample_j[blk][0]; + sb_sample_f[blk][1][sb] = sb_sample_j[blk][1]; + } + } + } + + /* bitmask with the information about subbands using joint stereo */ + return joint; +} + /* * Detect CPU features and setup function pointers */ @@ -456,6 +532,7 @@ void sbc_init_primitives(struct sbc_encoder_state *state) /* Default implementation for scale factors calculation */ state->sbc_calc_scalefactors = sbc_calc_scalefactors; + state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; state->implementation_info = "Generic C"; /* X86/AMD64 optimizations */ @@ -464,6 +541,12 @@ void sbc_init_primitives(struct sbc_encoder_state *state) #endif /* ARM optimizations */ +#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT + sbc_init_primitives_armv6(state); +#endif +#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT + sbc_init_primitives_iwmmxt(state); +#endif #ifdef SBC_BUILD_WITH_NEON_SUPPORT sbc_init_primitives_neon(state); #endif diff --git a/src/modules/bluetooth/sbc_primitives.h b/src/modules/bluetooth/sbc/sbc_primitives.h index 3d01c115..b4b9df2f 100644 --- a/src/modules/bluetooth/sbc_primitives.h +++ b/src/modules/bluetooth/sbc/sbc_primitives.h @@ -62,6 +62,10 @@ struct sbc_encoder_state { void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], uint32_t scale_factor[2][8], int blocks, int channels, int subbands); + /* Scale factors calculation with joint stereo support */ + int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands); const char *implementation_info; }; diff --git a/src/modules/bluetooth/sbc/sbc_primitives_armv6.c b/src/modules/bluetooth/sbc/sbc_primitives_armv6.c new file mode 100644 index 00000000..95860980 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_armv6.c @@ -0,0 +1,299 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdint.h> +#include <limits.h> +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_armv6.h" + +/* + * ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline. + */ + +#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT + +static void __attribute__((naked)) sbc_analyze_four_armv6() +{ + /* r0 = in, r1 = out, r2 = consts */ + asm volatile ( + "push {r1, r4-r7, lr}\n" + "push {r8-r11}\n" + "ldrd r4, r5, [r0, #0]\n" + "ldrd r6, r7, [r2, #0]\n" + "ldrd r8, r9, [r0, #16]\n" + "ldrd r10, r11, [r2, #16]\n" + "mov r14, #0x8000\n" + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #32]\n" + "ldrd r6, r7, [r2, #32]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #48]\n" + "ldrd r10, r11, [r2, #48]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #64]\n" + "ldrd r6, r7, [r2, #64]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #8]\n" + "ldrd r10, r11, [r2, #8]\n" + "smlad r3, r4, r6, r3\n" /* t1[0] is done */ + "smlad r12, r5, r7, r12\n" /* t1[1] is done */ + "ldrd r4, r5, [r0, #24]\n" + "ldrd r6, r7, [r2, #24]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */ + "smlad r12, r8, r10, r14\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #40]\n" + "ldrd r10, r11, [r2, #40]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r0, #56]\n" + "ldrd r6, r7, [r2, #56]\n" + "smlad r12, r8, r10, r12\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #72]\n" + "ldrd r10, r11, [r2, #72]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r2, #80]\n" /* start loading cos table */ + "smlad r12, r8, r10, r12\n" /* t1[2] is done */ + "smlad r14, r9, r11, r14\n" /* t1[3] is done */ + "ldrd r6, r7, [r2, #88]\n" + "ldrd r8, r9, [r2, #96]\n" + "ldrd r10, r11, [r2, #104]\n" /* cos table fully loaded */ + "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */ + "smuad r4, r3, r4\n" + "smuad r5, r3, r5\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "smuad r6, r3, r6\n" + "smuad r7, r3, r7\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "pop {r8-r11}\n" + "stmia r1, {r4, r5, r6, r7}\n" + "pop {r1, r4-r7, pc}\n" + ); +} + +#define sbc_analyze_four(in, out, consts) \ + ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \ + sbc_analyze_four_armv6)((in), (out), (consts)) + +static void __attribute__((naked)) sbc_analyze_eight_armv6() +{ + /* r0 = in, r1 = out, r2 = consts */ + asm volatile ( + "push {r1, r4-r7, lr}\n" + "push {r8-r11}\n" + "ldrd r4, r5, [r0, #24]\n" + "ldrd r6, r7, [r2, #24]\n" + "ldrd r8, r9, [r0, #56]\n" + "ldrd r10, r11, [r2, #56]\n" + "mov r14, #0x8000\n" + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #88]\n" + "ldrd r6, r7, [r2, #88]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #120]\n" + "ldrd r10, r11, [r2, #120]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #152]\n" + "ldrd r6, r7, [r2, #152]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #16]\n" + "ldrd r10, r11, [r2, #16]\n" + "smlad r3, r4, r6, r3\n" /* t1[6] is done */ + "smlad r12, r5, r7, r12\n" /* t1[7] is done */ + "ldrd r4, r5, [r0, #48]\n" + "ldrd r6, r7, [r2, #48]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[6] and t1[7] */ + "str r3, [sp, #-4]!\n" /* save to stack */ + "smlad r3, r8, r10, r14\n" + "smlad r12, r9, r11, r14\n" + "ldrd r8, r9, [r0, #80]\n" + "ldrd r10, r11, [r2, #80]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #112]\n" + "ldrd r6, r7, [r2, #112]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #144]\n" + "ldrd r10, r11, [r2, #144]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #0]\n" + "ldrd r6, r7, [r2, #0]\n" + "smlad r3, r8, r10, r3\n" /* t1[4] is done */ + "smlad r12, r9, r11, r12\n" /* t1[5] is done */ + "ldrd r8, r9, [r0, #32]\n" + "ldrd r10, r11, [r2, #32]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[4] and t1[5] */ + "str r3, [sp, #-4]!\n" /* save to stack */ + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #64]\n" + "ldrd r6, r7, [r2, #64]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #96]\n" + "ldrd r10, r11, [r2, #96]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #128]\n" + "ldrd r6, r7, [r2, #128]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #8]\n" + "ldrd r10, r11, [r2, #8]\n" + "smlad r3, r4, r6, r3\n" /* t1[0] is done */ + "smlad r12, r5, r7, r12\n" /* t1[1] is done */ + "ldrd r4, r5, [r0, #40]\n" + "ldrd r6, r7, [r2, #40]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */ + "smlad r12, r8, r10, r14\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #72]\n" + "ldrd r10, r11, [r2, #72]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r0, #104]\n" + "ldrd r6, r7, [r2, #104]\n" + "smlad r12, r8, r10, r12\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #136]\n" + "ldrd r10, r11, [r2, #136]!\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r2, #(160 - 136 + 0)]\n" + "smlad r12, r8, r10, r12\n" /* t1[2] is done */ + "smlad r14, r9, r11, r14\n" /* t1[3] is done */ + "ldrd r6, r7, [r2, #(160 - 136 + 8)]\n" + "smuad r4, r3, r4\n" + "smuad r5, r3, r5\n" + "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */ + /* r3 = t2[0:1] */ + /* r12 = t2[2:3] */ + "pop {r0, r14}\n" /* t2[4:5], t2[6:7] */ + "ldrd r8, r9, [r2, #(160 - 136 + 32)]\n" + "smuad r6, r3, r6\n" + "smuad r7, r3, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 40)]\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 64)]\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 72)]\n" + "smlad r4, r0, r8, r4\n" + "smlad r5, r0, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 96)]\n" + "smlad r6, r0, r10, r6\n" + "smlad r7, r0, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 104)]\n" + "smlad r4, r14, r8, r4\n" + "smlad r5, r14, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]\n" + "smlad r6, r14, r10, r6\n" + "smlad r7, r14, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]\n" + "stmia r1!, {r4, r5}\n" + "smuad r4, r3, r8\n" + "smuad r5, r3, r9\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]\n" + "stmia r1!, {r6, r7}\n" + "smuad r6, r3, r10\n" + "smuad r7, r3, r11\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]\n" + "smlad r4, r0, r8, r4\n" + "smlad r5, r0, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]\n" + "smlad r6, r0, r10, r6\n" + "smlad r7, r0, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]\n" + "smlad r4, r14, r8, r4\n" + "smlad r5, r14, r9, r5\n" + "smlad r6, r14, r10, r6\n" + "smlad r7, r14, r11, r7\n" + "pop {r8-r11}\n" + "stmia r1!, {r4, r5, r6, r7}\n" + "pop {r1, r4-r7, pc}\n" + ); +} + +#define sbc_analyze_eight(in, out, consts) \ + ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \ + sbc_analyze_eight_armv6)((in), (out), (consts)) + +static void sbc_analyze_4b_4s_armv6(int16_t *x, int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static void sbc_analyze_4b_8s_armv6(int16_t *x, int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_armv6(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_armv6; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_armv6; + state->implementation_info = "ARMv6 SIMD"; +} + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_armv6.h b/src/modules/bluetooth/sbc/sbc_primitives_armv6.h new file mode 100644 index 00000000..1862aede --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_armv6.h @@ -0,0 +1,52 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_ARMV6_H +#define __SBC_PRIMITIVES_ARMV6_H + +#include "sbc_primitives.h" + +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) +#define SBC_HAVE_ARMV6 1 +#endif + +#if !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) && \ + defined(__GNUC__) && defined(SBC_HAVE_ARMV6) && \ + defined(__ARM_EABI__) && !defined(__thumb__) && \ + !defined(__ARM_NEON__) + +#define SBC_BUILD_WITH_ARMV6_SUPPORT + +void sbc_init_primitives_armv6(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c new file mode 100644 index 00000000..213967ef --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c @@ -0,0 +1,304 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2010 Keith Mok <ek9852@gmail.com> + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdint.h> +#include <limits.h> +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_iwmmxt.h" + +/* + * IWMMXT optimizations + */ + +#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT + +static inline void sbc_analyze_four_iwmmxt(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + asm volatile ( + "wldrd wr0, [%0]\n" + "tbcstw wr4, %2\n" + "wldrd wr2, [%1]\n" + "wldrd wr1, [%0, #8]\n" + "wldrd wr3, [%1, #8]\n" + "wmadds wr0, wr2, wr0\n" + " wldrd wr6, [%0, #16]\n" + "wmadds wr1, wr3, wr1\n" + " wldrd wr7, [%0, #24]\n" + "waddwss wr0, wr0, wr4\n" + " wldrd wr8, [%1, #16]\n" + "waddwss wr1, wr1, wr4\n" + " wldrd wr9, [%1, #24]\n" + " wmadds wr6, wr8, wr6\n" + " wldrd wr2, [%0, #32]\n" + " wmadds wr7, wr9, wr7\n" + " wldrd wr3, [%0, #40]\n" + " waddwss wr0, wr6, wr0\n" + " wldrd wr4, [%1, #32]\n" + " waddwss wr1, wr7, wr1\n" + " wldrd wr5, [%1, #40]\n" + " wmadds wr2, wr4, wr2\n" + "wldrd wr6, [%0, #48]\n" + " wmadds wr3, wr5, wr3\n" + "wldrd wr7, [%0, #56]\n" + " waddwss wr0, wr2, wr0\n" + "wldrd wr8, [%1, #48]\n" + " waddwss wr1, wr3, wr1\n" + "wldrd wr9, [%1, #56]\n" + "wmadds wr6, wr8, wr6\n" + " wldrd wr2, [%0, #64]\n" + "wmadds wr7, wr9, wr7\n" + " wldrd wr3, [%0, #72]\n" + "waddwss wr0, wr6, wr0\n" + " wldrd wr4, [%1, #64]\n" + "waddwss wr1, wr7, wr1\n" + " wldrd wr5, [%1, #72]\n" + " wmadds wr2, wr4, wr2\n" + "tmcr wcgr0, %4\n" + " wmadds wr3, wr5, wr3\n" + " waddwss wr0, wr2, wr0\n" + " waddwss wr1, wr3, wr1\n" + "\n" + "wsrawg wr0, wr0, wcgr0\n" + " wldrd wr4, [%1, #80]\n" + "wsrawg wr1, wr1, wcgr0\n" + " wldrd wr5, [%1, #88]\n" + "wpackwss wr0, wr0, wr0\n" + " wldrd wr6, [%1, #96]\n" + "wpackwss wr1, wr1, wr1\n" + "wmadds wr2, wr5, wr0\n" + " wldrd wr7, [%1, #104]\n" + "wmadds wr0, wr4, wr0\n" + "\n" + " wmadds wr3, wr7, wr1\n" + " wmadds wr1, wr6, wr1\n" + " waddwss wr2, wr3, wr2\n" + " waddwss wr0, wr1, wr0\n" + "\n" + "wstrd wr0, [%3]\n" + "wstrd wr2, [%3, #8]\n" + : + : "r" (in), "r" (consts), + "r" (1 << (SBC_PROTO_FIXED4_SCALE - 1)), "r" (out), + "r" (SBC_PROTO_FIXED4_SCALE) + : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7", + "wr8", "wr9", "wcgr0", "memory"); +} + +static inline void sbc_analyze_eight_iwmmxt(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + asm volatile ( + "wldrd wr0, [%0]\n" + "tbcstw wr15, %2\n" + "wldrd wr1, [%0, #8]\n" + "wldrd wr2, [%0, #16]\n" + "wldrd wr3, [%0, #24]\n" + "wldrd wr4, [%1]\n" + "wldrd wr5, [%1, #8]\n" + "wldrd wr6, [%1, #16]\n" + "wldrd wr7, [%1, #24]\n" + "wmadds wr0, wr0, wr4\n" + " wldrd wr8, [%1, #32]\n" + "wmadds wr1, wr1, wr5\n" + " wldrd wr9, [%1, #40]\n" + "wmadds wr2, wr2, wr6\n" + " wldrd wr10, [%1, #48]\n" + "wmadds wr3, wr3, wr7\n" + " wldrd wr11, [%1, #56]\n" + "waddwss wr0, wr0, wr15\n" + " wldrd wr4, [%0, #32]\n" + "waddwss wr1, wr1, wr15\n" + " wldrd wr5, [%0, #40]\n" + "waddwss wr2, wr2, wr15\n" + " wldrd wr6, [%0, #48]\n" + "waddwss wr3, wr3, wr15\n" + " wldrd wr7, [%0, #56]\n" + " wmadds wr4, wr4, wr8\n" + " wldrd wr12, [%0, #64]\n" + " wmadds wr5, wr5, wr9\n" + " wldrd wr13, [%0, #72]\n" + " wmadds wr6, wr6, wr10\n" + " wldrd wr14, [%0, #80]\n" + " wmadds wr7, wr7, wr11\n" + " wldrd wr15, [%0, #88]\n" + " waddwss wr0, wr4, wr0\n" + " wldrd wr8, [%1, #64]\n" + " waddwss wr1, wr5, wr1\n" + " wldrd wr9, [%1, #72]\n" + " waddwss wr2, wr6, wr2\n" + " wldrd wr10, [%1, #80]\n" + " waddwss wr3, wr7, wr3\n" + " wldrd wr11, [%1, #88]\n" + " wmadds wr12, wr12, wr8\n" + "wldrd wr4, [%0, #96]\n" + " wmadds wr13, wr13, wr9\n" + "wldrd wr5, [%0, #104]\n" + " wmadds wr14, wr14, wr10\n" + "wldrd wr6, [%0, #112]\n" + " wmadds wr15, wr15, wr11\n" + "wldrd wr7, [%0, #120]\n" + " waddwss wr0, wr12, wr0\n" + "wldrd wr8, [%1, #96]\n" + " waddwss wr1, wr13, wr1\n" + "wldrd wr9, [%1, #104]\n" + " waddwss wr2, wr14, wr2\n" + "wldrd wr10, [%1, #112]\n" + " waddwss wr3, wr15, wr3\n" + "wldrd wr11, [%1, #120]\n" + "wmadds wr4, wr4, wr8\n" + " wldrd wr12, [%0, #128]\n" + "wmadds wr5, wr5, wr9\n" + " wldrd wr13, [%0, #136]\n" + "wmadds wr6, wr6, wr10\n" + " wldrd wr14, [%0, #144]\n" + "wmadds wr7, wr7, wr11\n" + " wldrd wr15, [%0, #152]\n" + "waddwss wr0, wr4, wr0\n" + " wldrd wr8, [%1, #128]\n" + "waddwss wr1, wr5, wr1\n" + " wldrd wr9, [%1, #136]\n" + "waddwss wr2, wr6, wr2\n" + " wldrd wr10, [%1, #144]\n" + " waddwss wr3, wr7, wr3\n" + " wldrd wr11, [%1, #152]\n" + " wmadds wr12, wr12, wr8\n" + "tmcr wcgr0, %4\n" + " wmadds wr13, wr13, wr9\n" + " wmadds wr14, wr14, wr10\n" + " wmadds wr15, wr15, wr11\n" + " waddwss wr0, wr12, wr0\n" + " waddwss wr1, wr13, wr1\n" + " waddwss wr2, wr14, wr2\n" + " waddwss wr3, wr15, wr3\n" + "\n" + "wsrawg wr0, wr0, wcgr0\n" + "wsrawg wr1, wr1, wcgr0\n" + "wsrawg wr2, wr2, wcgr0\n" + "wsrawg wr3, wr3, wcgr0\n" + "\n" + "wpackwss wr0, wr0, wr0\n" + "wpackwss wr1, wr1, wr1\n" + " wldrd wr4, [%1, #160]\n" + "wpackwss wr2, wr2, wr2\n" + " wldrd wr5, [%1, #168]\n" + "wpackwss wr3, wr3, wr3\n" + " wldrd wr6, [%1, #192]\n" + " wmadds wr4, wr4, wr0\n" + " wldrd wr7, [%1, #200]\n" + " wmadds wr5, wr5, wr0\n" + " wldrd wr8, [%1, #224]\n" + " wmadds wr6, wr6, wr1\n" + " wldrd wr9, [%1, #232]\n" + " wmadds wr7, wr7, wr1\n" + " waddwss wr4, wr6, wr4\n" + " waddwss wr5, wr7, wr5\n" + " wmadds wr8, wr8, wr2\n" + "wldrd wr6, [%1, #256]\n" + " wmadds wr9, wr9, wr2\n" + "wldrd wr7, [%1, #264]\n" + "waddwss wr4, wr8, wr4\n" + " waddwss wr5, wr9, wr5\n" + "wmadds wr6, wr6, wr3\n" + "wmadds wr7, wr7, wr3\n" + "waddwss wr4, wr6, wr4\n" + "waddwss wr5, wr7, wr5\n" + "\n" + "wstrd wr4, [%3]\n" + "wstrd wr5, [%3, #8]\n" + "\n" + "wldrd wr6, [%1, #176]\n" + "wldrd wr5, [%1, #184]\n" + "wmadds wr5, wr5, wr0\n" + "wldrd wr8, [%1, #208]\n" + "wmadds wr0, wr6, wr0\n" + "wldrd wr9, [%1, #216]\n" + "wmadds wr9, wr9, wr1\n" + "wldrd wr6, [%1, #240]\n" + "wmadds wr1, wr8, wr1\n" + "wldrd wr7, [%1, #248]\n" + "waddwss wr0, wr1, wr0\n" + "waddwss wr5, wr9, wr5\n" + "wmadds wr7, wr7, wr2\n" + "wldrd wr8, [%1, #272]\n" + "wmadds wr2, wr6, wr2\n" + "wldrd wr9, [%1, #280]\n" + "waddwss wr0, wr2, wr0\n" + "waddwss wr5, wr7, wr5\n" + "wmadds wr9, wr9, wr3\n" + "wmadds wr3, wr8, wr3\n" + "waddwss wr0, wr3, wr0\n" + "waddwss wr5, wr9, wr5\n" + "\n" + "wstrd wr0, [%3, #16]\n" + "wstrd wr5, [%3, #24]\n" + : + : "r" (in), "r" (consts), + "r" (1 << (SBC_PROTO_FIXED8_SCALE - 1)), "r" (out), + "r" (SBC_PROTO_FIXED8_SCALE) + : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7", + "wr8", "wr9", "wr10", "wr11", "wr12", "wr13", "wr14", "wr15", + "wcgr0", "memory"); +} + +static inline void sbc_analyze_4b_4s_iwmmxt(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_iwmmxt(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_iwmmxt(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_iwmmxt(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_iwmmxt; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_iwmmxt; + state->implementation_info = "IWMMXT"; +} + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h new file mode 100644 index 00000000..b535e686 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h @@ -0,0 +1,42 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2010 Keith Mok <ek9852@gmail.com> + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_IWMMXT_H +#define __SBC_PRIMITIVES_IWMMXT_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && defined(__IWMMXT__) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_IWMMXT_SUPPORT + +void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/src/modules/bluetooth/sbc_primitives_mmx.c b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c index 08e9ca28..ab89d074 100644 --- a/src/modules/bluetooth/sbc_primitives_mmx.c +++ b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c @@ -100,7 +100,7 @@ static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out, : : "r" (in), "r" (consts), "r" (&round_c), "r" (out), "i" (SBC_PROTO_FIXED4_SCALE) - : "memory"); + : "cc", "memory"); } static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, @@ -242,7 +242,7 @@ static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, : : "r" (in), "r" (consts), "r" (&round_c), "r" (out), "i" (SBC_PROTO_FIXED8_SCALE) - : "memory"); + : "cc", "memory"); } static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out, @@ -275,6 +275,59 @@ static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out, asm volatile ("emms\n"); } +static void sbc_calc_scalefactors_mmx( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + static const SBC_ALIGNED int32_t consts[2] = { + 1 << SCALE_OUT_BITS, + 1 << SCALE_OUT_BITS, + }; + int ch, sb; + intptr_t blk; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb += 2) { + blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0])); + asm volatile ( + "movq (%4), %%mm0\n" + "1:\n" + "movq (%1, %0), %%mm1\n" + "pxor %%mm2, %%mm2\n" + "pcmpgtd %%mm2, %%mm1\n" + "paddd (%1, %0), %%mm1\n" + "pcmpgtd %%mm1, %%mm2\n" + "pxor %%mm2, %%mm1\n" + + "por %%mm1, %%mm0\n" + + "sub %2, %0\n" + "jns 1b\n" + + "movd %%mm0, %k0\n" + "psrlq $32, %%mm0\n" + "bsrl %k0, %k0\n" + "subl %5, %k0\n" + "movl %k0, (%3)\n" + + "movd %%mm0, %k0\n" + "bsrl %k0, %k0\n" + "subl %5, %k0\n" + "movl %k0, 4(%3)\n" + : "+r" (blk) + : "r" (&sb_sample_f[0][ch][sb]), + "i" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + "r" (&scale_factor[ch][sb]), + "r" (&consts), + "i" (SCALE_OUT_BITS) + : "cc", "memory"); + } + } + asm volatile ("emms\n"); +} + static int check_mmx_support(void) { #ifdef __amd64__ @@ -313,6 +366,7 @@ void sbc_init_primitives_mmx(struct sbc_encoder_state *state) if (check_mmx_support()) { state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; + state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx; state->implementation_info = "MMX"; } } diff --git a/src/modules/bluetooth/sbc_primitives_mmx.h b/src/modules/bluetooth/sbc/sbc_primitives_mmx.h index c1e44a5d..c1e44a5d 100644 --- a/src/modules/bluetooth/sbc_primitives_mmx.h +++ b/src/modules/bluetooth/sbc/sbc_primitives_mmx.h diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.c b/src/modules/bluetooth/sbc/sbc_primitives_neon.c new file mode 100644 index 00000000..c233d3c6 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.c @@ -0,0 +1,892 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org> + * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> + * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdint.h> +#include <limits.h> +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_neon.h" + +/* + * ARM NEON optimizations + */ + +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + +static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vmlal.s16 q1, d5, d9\n" + + "vpadd.s32 d0, d0, d1\n" + "vpadd.s32 d1, d2, d3\n" + + "vrshrn.s32 d0, q0, %3\n" + + "vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n" + + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vmull.s16 q3, d2, d0\n" + "vmull.s16 q4, d3, d0\n" + "vmlal.s16 q3, d4, d1\n" + "vmlal.s16 q4, d5, d1\n" + + "vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11"); +} + +static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmull.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmull.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q8, d6, d10\n" + "vmlal.s16 q9, d7, d11\n" + + "vpadd.s32 d0, d12, d13\n" + "vpadd.s32 d1, d14, d15\n" + "vpadd.s32 d2, d16, d17\n" + "vpadd.s32 d3, d18, d19\n" + + "vrshr.s32 q0, q0, %3\n" + "vrshr.s32 q1, q1, %3\n" + "vmovn.s32 d0, q0\n" + "vmovn.s32 d1, q1\n" + + "vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */ + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmull.s16 q6, d4, d0\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmull.s16 q7, d5, d0\n" + "vmull.s16 q8, d6, d0\n" + "vmull.s16 q9, d7, d0\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d1\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d1\n" + "vmlal.s16 q8, d6, d1\n" + "vmlal.s16 q9, d7, d1\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d2\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d2\n" + "vmlal.s16 q8, d6, d2\n" + "vmlal.s16 q9, d7, d2\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d3\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d3\n" + "vmlal.s16 q8, d6, d3\n" + "vmlal.s16 q9, d7, d3\n" + + "vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */ + "vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */ + "vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1, d2, d3}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11", + "d12", "d13", "d14", "d15", "d16", "d17", + "d18", "d19"); +} + +static inline void sbc_analyze_4b_4s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + _sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + _sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); +} + +static void sbc_calc_scalefactors_neon( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + int ch, sb; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb += 4) { + int blk = blocks; + int32_t *in = &sb_sample_f[0][ch][sb]; + asm volatile ( + "vmov.s32 q0, #0\n" + "vmov.s32 q1, %[c1]\n" + "vmov.s32 q14, #1\n" + "vmov.s32 q15, %[c2]\n" + "vadd.s32 q1, q1, q14\n" + "1:\n" + "vld1.32 {d16, d17}, [%[in], :128], %[inc]\n" + "vabs.s32 q8, q8\n" + "vld1.32 {d18, d19}, [%[in], :128], %[inc]\n" + "vabs.s32 q9, q9\n" + "vld1.32 {d20, d21}, [%[in], :128], %[inc]\n" + "vabs.s32 q10, q10\n" + "vld1.32 {d22, d23}, [%[in], :128], %[inc]\n" + "vabs.s32 q11, q11\n" + "vmax.s32 q0, q0, q8\n" + "vmax.s32 q1, q1, q9\n" + "vmax.s32 q0, q0, q10\n" + "vmax.s32 q1, q1, q11\n" + "subs %[blk], %[blk], #4\n" + "bgt 1b\n" + "vmax.s32 q0, q0, q1\n" + "vsub.s32 q0, q0, q14\n" + "vclz.s32 q0, q0\n" + "vsub.s32 q0, q15, q0\n" + "vst1.32 {d0, d1}, [%[out], :128]\n" + : + [blk] "+r" (blk), + [in] "+r" (in) + : + [inc] "r" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + [out] "r" (&scale_factor[ch][sb]), + [c1] "i" (1 << SCALE_OUT_BITS), + [c2] "i" (31 - SCALE_OUT_BITS) + : "d0", "d1", "d2", "d3", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23", "d24", "d25", "d26", + "d27", "d28", "d29", "d30", "d31", "cc", "memory"); + } + } +} + +int sbc_calc_scalefactors_j_neon( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands) +{ + static SBC_ALIGNED int32_t joint_bits_mask[8] = { + 8, 4, 2, 1, 128, 64, 32, 16 + }; + int joint, i; + int32_t *in0, *in1; + int32_t *in = &sb_sample_f[0][0][0]; + uint32_t *out0, *out1; + uint32_t *out = &scale_factor[0][0]; + int32_t *consts = joint_bits_mask; + + i = subbands; + + asm volatile ( + /* + * constants: q13 = (31 - SCALE_OUT_BITS), q14 = 1 + * input: q0 = ((1 << SCALE_OUT_BITS) + 1) + * %[in0] - samples for channel 0 + * %[in1] - samples for shannel 1 + * output: q0, q1 - scale factors without joint stereo + * q2, q3 - scale factors with joint stereo + * q15 - joint stereo selection mask + */ + ".macro calc_scalefactors\n" + "vmov.s32 q1, q0\n" + "vmov.s32 q2, q0\n" + "vmov.s32 q3, q0\n" + "mov %[i], %[blocks]\n" + "1:\n" + "vld1.32 {d18, d19}, [%[in1], :128], %[inc]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d16, d17}, [%[in0], :128], %[inc]\n" + "vhadd.s32 q10, q8, q11\n" + "vhsub.s32 q11, q8, q11\n" + "vabs.s32 q8, q8\n" + "vabs.s32 q9, q9\n" + "vabs.s32 q10, q10\n" + "vabs.s32 q11, q11\n" + "vmax.s32 q0, q0, q8\n" + "vmax.s32 q1, q1, q9\n" + "vmax.s32 q2, q2, q10\n" + "vmax.s32 q3, q3, q11\n" + "subs %[i], %[i], #1\n" + "bgt 1b\n" + "vsub.s32 q0, q0, q14\n" + "vsub.s32 q1, q1, q14\n" + "vsub.s32 q2, q2, q14\n" + "vsub.s32 q3, q3, q14\n" + "vclz.s32 q0, q0\n" + "vclz.s32 q1, q1\n" + "vclz.s32 q2, q2\n" + "vclz.s32 q3, q3\n" + "vsub.s32 q0, q13, q0\n" + "vsub.s32 q1, q13, q1\n" + "vsub.s32 q2, q13, q2\n" + "vsub.s32 q3, q13, q3\n" + ".endm\n" + /* + * constants: q14 = 1 + * input: q15 - joint stereo selection mask + * %[in0] - value set by calc_scalefactors macro + * %[in1] - value set by calc_scalefactors macro + */ + ".macro update_joint_stereo_samples\n" + "sub %[out1], %[in1], %[inc]\n" + "sub %[out0], %[in0], %[inc]\n" + "sub %[in1], %[in1], %[inc], asl #1\n" + "sub %[in0], %[in0], %[inc], asl #1\n" + "vld1.32 {d18, d19}, [%[in1], :128]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d16, d17}, [%[in0], :128]\n" + "vld1.32 {d2, d3}, [%[out1], :128]\n" + "vbic.s32 q3, q1, q14\n" + "vld1.32 {d0, d1}, [%[out0], :128]\n" + "vhsub.s32 q10, q8, q11\n" + "vhadd.s32 q11, q8, q11\n" + "vhsub.s32 q2, q0, q3\n" + "vhadd.s32 q3, q0, q3\n" + "vbif.s32 q10, q9, q15\n" + "vbif.s32 d22, d16, d30\n" + "sub %[inc], %[zero], %[inc], asl #1\n" + "sub %[i], %[blocks], #2\n" + "2:\n" + "vbif.s32 d23, d17, d31\n" + "vst1.32 {d20, d21}, [%[in1], :128], %[inc]\n" + "vbif.s32 d4, d2, d30\n" + "vld1.32 {d18, d19}, [%[in1], :128]\n" + "vbif.s32 d5, d3, d31\n" + "vst1.32 {d22, d23}, [%[in0], :128], %[inc]\n" + "vbif.s32 d6, d0, d30\n" + "vld1.32 {d16, d17}, [%[in0], :128]\n" + "vbif.s32 d7, d1, d31\n" + "vst1.32 {d4, d5}, [%[out1], :128], %[inc]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d2, d3}, [%[out1], :128]\n" + "vst1.32 {d6, d7}, [%[out0], :128], %[inc]\n" + "vbic.s32 q3, q1, q14\n" + "vld1.32 {d0, d1}, [%[out0], :128]\n" + "vhsub.s32 q10, q8, q11\n" + "vhadd.s32 q11, q8, q11\n" + "vhsub.s32 q2, q0, q3\n" + "vhadd.s32 q3, q0, q3\n" + "vbif.s32 q10, q9, q15\n" + "vbif.s32 d22, d16, d30\n" + "subs %[i], %[i], #2\n" + "bgt 2b\n" + "sub %[inc], %[zero], %[inc], asr #1\n" + "vbif.s32 d23, d17, d31\n" + "vst1.32 {d20, d21}, [%[in1], :128]\n" + "vbif.s32 q2, q1, q15\n" + "vst1.32 {d22, d23}, [%[in0], :128]\n" + "vbif.s32 q3, q0, q15\n" + "vst1.32 {d4, d5}, [%[out1], :128]\n" + "vst1.32 {d6, d7}, [%[out0], :128]\n" + ".endm\n" + + "vmov.s32 q14, #1\n" + "vmov.s32 q13, %[c2]\n" + + "cmp %[i], #4\n" + "bne 8f\n" + + "4:\n" /* 4 subbands */ + "add %[in0], %[in], #0\n" + "add %[in1], %[in], #32\n" + "add %[out0], %[out], #0\n" + "add %[out1], %[out], #32\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 0, 1, 2 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */ + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* calculate and save to memory 'joint' variable */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + " vpadd.s32 d16, d16, d16\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vst1.32 {d16[0]}, [%[joint]]\n" + + "update_joint_stereo_samples\n" + "b 9f\n" + + "8:\n" /* 8 subbands */ + "add %[in0], %[in], #16\n\n" + "add %[in1], %[in], #48\n" + "add %[out0], %[out], #16\n\n" + "add %[out1], %[out], #48\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 4, 5, 6 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */ + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* calculate part of 'joint' variable and save it to d24 */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vpadd.s32 d24, d16, d16\n" + + "update_joint_stereo_samples\n" + + "add %[in0], %[in], #0\n" + "add %[in1], %[in], #32\n" + "add %[out0], %[out], #0\n\n" + "add %[out1], %[out], #32\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 0, 1, 2, 3 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* combine last part of 'joint' with d24 and save to memory */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + " vpadd.s32 d16, d16, d16\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + " vadd.s32 d16, d16, d24\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vst1.32 {d16[0]}, [%[joint]]\n" + + "update_joint_stereo_samples\n" + "9:\n" + ".purgem calc_scalefactors\n" + ".purgem update_joint_stereo_samples\n" + : + [i] "+&r" (i), + [in] "+&r" (in), + [in0] "=&r" (in0), + [in1] "=&r" (in1), + [out] "+&r" (out), + [out0] "=&r" (out0), + [out1] "=&r" (out1), + [consts] "+&r" (consts) + : + [inc] "r" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + [blocks] "r" (blocks), + [joint] "r" (&joint), + [c1] "i" (1 << SCALE_OUT_BITS), + [c2] "i" (31 - SCALE_OUT_BITS), + [zero] "r" (0) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", + "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "d30", "d31", "cc", "memory"); + + return joint; +} + +#define PERM_BE(a, b, c, d) { \ + (a * 2) + 1, (a * 2) + 0, \ + (b * 2) + 1, (b * 2) + 0, \ + (c * 2) + 1, (c * 2) + 0, \ + (d * 2) + 1, (d * 2) + 0 \ + } +#define PERM_LE(a, b, c, d) { \ + (a * 2) + 0, (a * 2) + 1, \ + (b * 2) + 0, (b * 2) + 1, \ + (c * 2) + 0, (c * 2) + 1, \ + (d * 2) + 0, (d * 2) + 1 \ + } + +static SBC_ALWAYS_INLINE int sbc_enc_process_input_4s_neon_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + static SBC_ALIGNED uint8_t perm_be[2][8] = { + PERM_BE(7, 3, 6, 4), + PERM_BE(0, 2, 1, 5) + }; + static SBC_ALIGNED uint8_t perm_le[2][8] = { + PERM_LE(7, 3, 6, 4), + PERM_LE(0, 2, 1, 5) + }; + /* handle X buffer wraparound */ + if (position < nsamples) { + int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 40]; + int16_t *src = &X[0][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0}, [%[src], :64]!\n" + "vst1.16 {d0}, [%[dst], :64]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + if (nchannels > 1) { + dst = &X[1][SBC_X_BUFFER_SIZE - 40]; + src = &X[1][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0}, [%[src], :64]!\n" + "vst1.16 {d0}, [%[dst], :64]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + } + position = SBC_X_BUFFER_SIZE - 40; + } + + if ((nchannels > 1) && ((uintptr_t)pcm & 1)) { + /* poor 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[y], %[y], #16\n" + "sub %[position], %[position], #8\n" + "vld1.8 {d4, d5}, [%[pcm]]!\n" + "vuzp.16 d4, d5\n" + "vld1.8 {d20, d21}, [%[pcm]]!\n" + "vuzp.16 d20, d21\n" + "vswp d5, d20\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vtbl.8 d18, {d20, d21}, d0\n" + "vtbl.8 d19, {d20, d21}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "vst1.16 {d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else if (nchannels > 1) { + /* proper 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[y], %[y], #16\n" + "sub %[position], %[position], #8\n" + "vld2.16 {d4, d5}, [%[pcm]]!\n" + "vld2.16 {d20, d21}, [%[pcm]]!\n" + "vswp d5, d20\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vtbl.8 d18, {d20, d21}, d0\n" + "vtbl.8 d19, {d20, d21}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "vst1.16 {d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else { + int16_t *x = &X[0][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[position], %[position], #8\n" + "vld1.8 {d4, d5}, [%[pcm]]!\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19"); + } + return position; +} + +static SBC_ALWAYS_INLINE int sbc_enc_process_input_8s_neon_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + static SBC_ALIGNED uint8_t perm_be[4][8] = { + PERM_BE(15, 7, 14, 8), + PERM_BE(13, 9, 12, 10), + PERM_BE(11, 3, 6, 0), + PERM_BE(5, 1, 4, 2) + }; + static SBC_ALIGNED uint8_t perm_le[4][8] = { + PERM_LE(15, 7, 14, 8), + PERM_LE(13, 9, 12, 10), + PERM_LE(11, 3, 6, 0), + PERM_LE(5, 1, 4, 2) + }; + /* handle X buffer wraparound */ + if (position < nsamples) { + int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 72]; + int16_t *src = &X[0][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1}, [%[src], :128]!\n" + "vst1.16 {d0, d1}, [%[dst], :128]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + if (nchannels > 1) { + dst = &X[1][SBC_X_BUFFER_SIZE - 72]; + src = &X[1][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1}, [%[src], :128]!\n" + "vst1.16 {d0, d1}, [%[dst], :128]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + } + position = SBC_X_BUFFER_SIZE - 72; + } + + if ((nchannels > 1) && ((uintptr_t)pcm & 1)) { + /* poor 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[y], %[y], #32\n" + "sub %[position], %[position], #16\n" + "vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vuzp.16 q2, q3\n" + "vld1.8 {d20, d21, d22, d23}, [%[pcm]]!\n" + "vuzp.16 q10, q11\n" + "vswp q3, q10\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "vtbl.8 d16, {d20, d21, d22, d23}, d0\n" + "vtbl.8 d17, {d20, d21, d22, d23}, d1\n" + "vtbl.8 d18, {d20, d21, d22, d23}, d2\n" + "vtbl.8 d19, {d20, d21, d22, d23}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else if (nchannels > 1) { + /* proper 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[y], %[y], #32\n" + "sub %[position], %[position], #16\n" + "vld2.16 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vld2.16 {d20, d21, d22, d23}, [%[pcm]]!\n" + "vswp q3, q10\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "vtbl.8 d16, {d20, d21, d22, d23}, d0\n" + "vtbl.8 d17, {d20, d21, d22, d23}, d1\n" + "vtbl.8 d18, {d20, d21, d22, d23}, d2\n" + "vtbl.8 d19, {d20, d21, d22, d23}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else { + int16_t *x = &X[0][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[position], %[position], #16\n" + "vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19"); + } + return position; +} + +#undef PERM_BE +#undef PERM_LE + +static int sbc_enc_process_input_4s_be_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_4s_neon_internal( + position, pcm, X, nsamples, nchannels, 1); +} + +static int sbc_enc_process_input_4s_le_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_4s_neon_internal( + position, pcm, X, nsamples, nchannels, 0); +} + +static int sbc_enc_process_input_8s_be_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_8s_neon_internal( + position, pcm, X, nsamples, nchannels, 1); +} + +static int sbc_enc_process_input_8s_le_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_8s_neon_internal( + position, pcm, X, nsamples, nchannels, 0); +} + +void sbc_init_primitives_neon(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; + state->sbc_calc_scalefactors = sbc_calc_scalefactors_neon; + state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j_neon; + state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le_neon; + state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be_neon; + state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le_neon; + state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be_neon; + state->implementation_info = "NEON"; +} + +#endif diff --git a/src/modules/bluetooth/sbc_primitives_neon.h b/src/modules/bluetooth/sbc/sbc_primitives_neon.h index 30766ed8..30766ed8 100644 --- a/src/modules/bluetooth/sbc_primitives_neon.h +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.h diff --git a/src/modules/bluetooth/sbc_tables.h b/src/modules/bluetooth/sbc/sbc_tables.h index 0057c73f..0057c73f 100644 --- a/src/modules/bluetooth/sbc_tables.h +++ b/src/modules/bluetooth/sbc/sbc_tables.h diff --git a/src/modules/bluetooth/sbc_primitives_neon.c b/src/modules/bluetooth/sbc_primitives_neon.c deleted file mode 100644 index f1bc7b48..00000000 --- a/src/modules/bluetooth/sbc_primitives_neon.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org> - * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> - * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include <stdint.h> -#include <limits.h> -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives_neon.h" - -/* - * ARM NEON optimizations - */ - -#ifdef SBC_BUILD_WITH_NEON_SUPPORT - -static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - /* TODO: merge even and odd cases (or even merge all four calls to this - * function) in order to have only aligned reads from 'in' array - * and reduce number of load instructions */ - asm volatile ( - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmull.s16 q0, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmull.s16 q1, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q0, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q1, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q0, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q1, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q0, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q1, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q0, d4, d8\n" - "vmlal.s16 q1, d5, d9\n" - - "vpadd.s32 d0, d0, d1\n" - "vpadd.s32 d1, d2, d3\n" - - "vrshrn.s32 d0, q0, %3\n" - - "vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n" - - "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ - - "vmull.s16 q3, d2, d0\n" - "vmull.s16 q4, d3, d0\n" - "vmlal.s16 q3, d4, d1\n" - "vmlal.s16 q4, d5, d1\n" - - "vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */ - "vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */ - - "vst1.32 {d0, d1}, [%2, :128]\n" - : "+r" (in), "+r" (consts) - : "r" (out), - "i" (SBC_PROTO_FIXED4_SCALE) - : "memory", - "d0", "d1", "d2", "d3", "d4", "d5", - "d6", "d7", "d8", "d9", "d10", "d11"); -} - -static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - /* TODO: merge even and odd cases (or even merge all four calls to this - * function) in order to have only aligned reads from 'in' array - * and reduce number of load instructions */ - asm volatile ( - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmull.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmull.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmull.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmull.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q8, d6, d10\n" - "vmlal.s16 q9, d7, d11\n" - - "vpadd.s32 d0, d12, d13\n" - "vpadd.s32 d1, d14, d15\n" - "vpadd.s32 d2, d16, d17\n" - "vpadd.s32 d3, d18, d19\n" - - "vrshr.s32 q0, q0, %3\n" - "vrshr.s32 q1, q1, %3\n" - "vmovn.s32 d0, q0\n" - "vmovn.s32 d1, q1\n" - - "vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */ - "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmull.s16 q6, d4, d0\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmull.s16 q7, d5, d0\n" - "vmull.s16 q8, d6, d0\n" - "vmull.s16 q9, d7, d0\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d1\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d1\n" - "vmlal.s16 q8, d6, d1\n" - "vmlal.s16 q9, d7, d1\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d2\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d2\n" - "vmlal.s16 q8, d6, d2\n" - "vmlal.s16 q9, d7, d2\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d3\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d3\n" - "vmlal.s16 q8, d6, d3\n" - "vmlal.s16 q9, d7, d3\n" - - "vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */ - "vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */ - "vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */ - "vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */ - - "vst1.32 {d0, d1, d2, d3}, [%2, :128]\n" - : "+r" (in), "+r" (consts) - : "r" (out), - "i" (SBC_PROTO_FIXED8_SCALE) - : "memory", - "d0", "d1", "d2", "d3", "d4", "d5", - "d6", "d7", "d8", "d9", "d10", "d11", - "d12", "d13", "d14", "d15", "d16", "d17", - "d18", "d19"); -} - -static inline void sbc_analyze_4b_4s_neon(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - _sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - _sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - _sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - _sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even); -} - -static inline void sbc_analyze_4b_8s_neon(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - _sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - _sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - _sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); -} - -void sbc_init_primitives_neon(struct sbc_encoder_state *state) -{ - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; - state->implementation_info = "NEON"; -} - -#endif |