From 8c982a4afece524fec5d928425477f3e7a56b817 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 2 Dec 2010 14:11:13 +0200 Subject: bluetooth: handle Acquire API change Acquire now return input and output MTU of the file descriptor so it is no longer necessary to get those after acquiring the fd, which less round trips and faster response time when switching profiles. --- src/modules/bluetooth/bluetooth-util.c | 11 ++++-- src/modules/bluetooth/bluetooth-util.h | 2 +- src/modules/bluetooth/module-bluetooth-device.c | 46 ++----------------------- 3 files changed, 12 insertions(+), 47 deletions(-) diff --git a/src/modules/bluetooth/bluetooth-util.c b/src/modules/bluetooth/bluetooth-util.c index e6f6e17e..17ba1302 100644 --- a/src/modules/bluetooth/bluetooth-util.c +++ b/src/modules/bluetooth/bluetooth-util.c @@ -934,10 +934,11 @@ const pa_bluetooth_transport* pa_bluetooth_device_get_transport(const pa_bluetoo return NULL; } -int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype) { +int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype, size_t *imtu, size_t *omtu) { DBusMessage *m, *r; DBusError err; int ret; + uint16_t i, o; pa_assert(t); pa_assert(t->y); @@ -955,7 +956,7 @@ int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char * } #ifdef DBUS_TYPE_UNIX_FD - if (!dbus_message_get_args(r, &err, DBUS_TYPE_UNIX_FD, &ret, DBUS_TYPE_INVALID)) { + if (!dbus_message_get_args(r, &err, DBUS_TYPE_UNIX_FD, &ret, DBUS_TYPE_UINT16, &i, DBUS_TYPE_UINT16, &o, DBUS_TYPE_INVALID)) { pa_log("Failed to parse org.bluez.MediaTransport.Acquire(): %s", err.message); ret = -1; dbus_error_free(&err); @@ -963,6 +964,12 @@ int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char * } #endif + if (imtu) + *imtu = i; + + if (omtu) + *omtu = o; + fail: dbus_message_unref(r); return ret; diff --git a/src/modules/bluetooth/bluetooth-util.h b/src/modules/bluetooth/bluetooth-util.h index f141209d..b471c34d 100644 --- a/src/modules/bluetooth/bluetooth-util.h +++ b/src/modules/bluetooth/bluetooth-util.h @@ -126,7 +126,7 @@ const pa_bluetooth_device* pa_bluetooth_discovery_get_by_address(pa_bluetooth_di const pa_bluetooth_transport* pa_bluetooth_discovery_get_transport(pa_bluetooth_discovery *y, const char *path); const pa_bluetooth_transport* pa_bluetooth_device_get_transport(const pa_bluetooth_device *d, enum profile profile); -int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype); +int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype, size_t *imtu, size_t *omtu); void pa_bluetooth_transport_release(const pa_bluetooth_transport *t, const char *accesstype); pa_hook* pa_bluetooth_discovery_hook(pa_bluetooth_discovery *d); diff --git a/src/modules/bluetooth/module-bluetooth-device.c b/src/modules/bluetooth/module-bluetooth-device.c index 936d3c77..34ff8f8c 100644 --- a/src/modules/bluetooth/module-bluetooth-device.c +++ b/src/modules/bluetooth/module-bluetooth-device.c @@ -910,7 +910,8 @@ static int bt_transport_acquire(struct userdata *u, pa_bool_t start) { return -1; } - u->stream_fd = pa_bluetooth_transport_acquire(t, accesstype); + /* FIXME: Handle in/out MTU properly when unix socket is not longer supported */ + u->stream_fd = pa_bluetooth_transport_acquire(t, accesstype, NULL, &u->link_mtu); if (u->stream_fd < 0) return -1; @@ -2164,53 +2165,10 @@ static int parse_transport_property(struct userdata *u, DBusMessageIter *i) { /* Run from main thread */ static int bt_transport_open(struct userdata *u) { - DBusMessage *m, *r; - DBusMessageIter arg_i, element_i; - DBusError err; - if (bt_transport_acquire(u, FALSE) < 0) return -1; - dbus_error_init(&err); - - pa_assert_se(m = dbus_message_new_method_call("org.bluez", u->transport, "org.bluez.MediaTransport", "GetProperties")); - r = dbus_connection_send_with_reply_and_block(pa_dbus_connection_get(u->connection), m, -1, &err); - - if (dbus_error_is_set(&err) || !r) { - pa_log("Failed to get transport properties: %s", err.message); - goto fail; - } - - if (!dbus_message_iter_init(r, &arg_i)) { - pa_log("GetProperties reply has no arguments."); - goto fail; - } - - if (dbus_message_iter_get_arg_type(&arg_i) != DBUS_TYPE_ARRAY) { - pa_log("GetProperties argument is not an array."); - goto fail; - } - - dbus_message_iter_recurse(&arg_i, &element_i); - while (dbus_message_iter_get_arg_type(&element_i) != DBUS_TYPE_INVALID) { - - if (dbus_message_iter_get_arg_type(&element_i) == DBUS_TYPE_DICT_ENTRY) { - DBusMessageIter dict_i; - - dbus_message_iter_recurse(&element_i, &dict_i); - - parse_transport_property(u, &dict_i); - } - - if (!dbus_message_iter_next(&element_i)) - break; - } - return bt_transport_config(u); - -fail: - dbus_message_unref(r); - return -1; } /* Run from main thread */ -- cgit From 1c388f977a4801202c9d0f07d43b705ab3f83f16 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 23 Dec 2010 13:13:44 +0200 Subject: bluetooth: reduce bitpool if audio start skipping When audio skips it could be that there is some bandwidth limitation in the link e.g. headset doesn't support EDR (< 2.0), and by reducing the bitpool it may find a better rate that either prevent the skips completely or at least reduce them. --- src/modules/bluetooth/module-bluetooth-device.c | 71 +++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/modules/bluetooth/module-bluetooth-device.c b/src/modules/bluetooth/module-bluetooth-device.c index 34ff8f8c..99c6e194 100644 --- a/src/modules/bluetooth/module-bluetooth-device.c +++ b/src/modules/bluetooth/module-bluetooth-device.c @@ -58,6 +58,9 @@ #define MAX_BITPOOL 64 #define MIN_BITPOOL 2U +#define BITPOOL_DEC_LIMIT 32 +#define BITPOOL_DEC_STEP 5 + PA_MODULE_AUTHOR("Joao Paulo Rechi Vita"); PA_MODULE_DESCRIPTION("Bluetooth audio sink and source"); PA_MODULE_VERSION(PACKAGE_VERSION); @@ -117,6 +120,8 @@ struct a2dp_info { size_t buffer_size; /* Size of the buffer */ uint16_t seq_num; /* Cumulative packet sequence */ + uint8_t min_bitpool; + uint8_t max_bitpool; }; struct hsp_info { @@ -660,6 +665,9 @@ static void setup_sbc(struct a2dp_info *a2dp) { pa_assert_not_reached(); } + a2dp->min_bitpool = active_capabilities->min_bitpool; + a2dp->max_bitpool = active_capabilities->max_bitpool; + a2dp->sbc.bitpool = active_capabilities->max_bitpool; a2dp->codesize = sbc_get_codesize(&a2dp->sbc); a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); @@ -743,6 +751,39 @@ static int set_conf(struct userdata *u) { return 0; } +/* from IO thread */ +static void a2dp_set_bitpool(struct userdata *u, uint8_t bitpool) +{ + struct a2dp_info *a2dp; + + pa_assert(u); + + a2dp = &u->a2dp; + + if (a2dp->sbc.bitpool == bitpool) + return; + + if (bitpool > a2dp->max_bitpool) + bitpool = a2dp->max_bitpool; + else if (bitpool < a2dp->min_bitpool) + bitpool = a2dp->min_bitpool; + + a2dp->sbc.bitpool = bitpool; + + a2dp->codesize = sbc_get_codesize(&a2dp->sbc); + a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); + + pa_log_debug("Bitpool has changed to %u", a2dp->sbc.bitpool); + + u->block_size = + (u->link_mtu - sizeof(struct rtp_header) - sizeof(struct rtp_payload)) + / a2dp->frame_length * a2dp->codesize; + + pa_sink_set_max_request_within_thread(u->sink, u->block_size); + pa_sink_set_fixed_latency_within_thread(u->sink, + FIXED_LATENCY_PLAYBACK_A2DP + pa_bytes_to_usec(u->block_size, &u->sample_spec)); +} + /* from IO thread, except in SCO over PCM */ static int setup_stream(struct userdata *u) { @@ -758,6 +799,9 @@ static int setup_stream(struct userdata *u) { pa_log_debug("Stream properly set up, we're ready to roll!"); + if (u->profile == PROFILE_A2DP) + a2dp_set_bitpool(u, u->a2dp.max_bitpool); + u->rtpoll_item = pa_rtpoll_item_new(u->rtpoll, PA_RTPOLL_NEVER, 1); pollfd = pa_rtpoll_item_get_pollfd(u->rtpoll_item, NULL); pollfd->fd = u->stream_fd; @@ -1489,6 +1533,27 @@ static int a2dp_process_push(struct userdata *u) { return ret; } +static void a2dp_reduce_bitpool(struct userdata *u) +{ + struct a2dp_info *a2dp; + uint8_t bitpool; + + pa_assert(u); + + a2dp = &u->a2dp; + + /* Check if bitpool is already at its limit */ + if (a2dp->sbc.bitpool <= BITPOOL_DEC_LIMIT) + return; + + bitpool = a2dp->sbc.bitpool - BITPOOL_DEC_STEP; + + if (bitpool < BITPOOL_DEC_LIMIT) + bitpool = BITPOOL_DEC_LIMIT; + + a2dp_set_bitpool(u, bitpool); +} + static void thread_func(void *userdata) { struct userdata *u = userdata; unsigned do_write = 0; @@ -1580,6 +1645,9 @@ static void thread_func(void *userdata) { pa_sink_render_full(u->sink, skip_bytes, &tmp); pa_memblock_unref(tmp.memblock); u->write_index += skip_bytes; + + if (u->profile == PROFILE_A2DP) + a2dp_reduce_bitpool(u); } } @@ -2095,6 +2163,9 @@ static int bt_transport_config_a2dp(struct userdata *u) { pa_assert_not_reached(); } + a2dp->min_bitpool = config->min_bitpool; + a2dp->max_bitpool = config->max_bitpool; + a2dp->sbc.bitpool = config->max_bitpool; a2dp->codesize = sbc_get_codesize(&a2dp->sbc); a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); -- cgit From 97f7c5759e65a700a934790ee0d846a33c4a7f66 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 23 Dec 2010 15:24:39 +0200 Subject: bluetooth: fix a2dp_process_push Use minimum bitpool configured to get the maximum block_size possible, also remove checks for how much has been written when decoding sbc frames since the block size may change due to bitpool changes. --- src/modules/bluetooth/module-bluetooth-device.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/modules/bluetooth/module-bluetooth-device.c b/src/modules/bluetooth/module-bluetooth-device.c index 99c6e194..75cf498f 100644 --- a/src/modules/bluetooth/module-bluetooth-device.c +++ b/src/modules/bluetooth/module-bluetooth-device.c @@ -579,7 +579,7 @@ static int setup_a2dp(struct userdata *u) { } /* Run from main thread */ -static void setup_sbc(struct a2dp_info *a2dp) { +static void setup_sbc(struct a2dp_info *a2dp, enum profile p) { sbc_capabilities_t *active_capabilities; pa_assert(a2dp); @@ -668,7 +668,8 @@ static void setup_sbc(struct a2dp_info *a2dp) { a2dp->min_bitpool = active_capabilities->min_bitpool; a2dp->max_bitpool = active_capabilities->max_bitpool; - a2dp->sbc.bitpool = active_capabilities->max_bitpool; + /* Set minimum bitpool for source to get the maximum possible block_size */ + a2dp->sbc.bitpool = p == PROFILE_A2DP ? a2dp->max_bitpool : a2dp->min_bitpool; a2dp->codesize = sbc_get_codesize(&a2dp->sbc); a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); } @@ -736,7 +737,7 @@ static int set_conf(struct userdata *u) { /* setup SBC encoder now we agree on parameters */ if (u->profile == PROFILE_A2DP || u->profile == PROFILE_A2DP_SOURCE) { - setup_sbc(&u->a2dp); + setup_sbc(&u->a2dp, u->profile); u->block_size = ((u->link_mtu - sizeof(struct rtp_header) - sizeof(struct rtp_payload)) @@ -1486,7 +1487,7 @@ static int a2dp_process_push(struct userdata *u) { d = pa_memblock_acquire(memchunk.memblock); to_write = memchunk.length = pa_memblock_get_length(memchunk.memblock); - while (PA_LIKELY(to_decode > 0 && to_write > 0)) { + while (PA_LIKELY(to_decode > 0)) { size_t written; ssize_t decoded; @@ -1505,10 +1506,12 @@ static int a2dp_process_push(struct userdata *u) { /* pa_log_debug("SBC: decoded: %lu; written: %lu", (unsigned long) decoded, (unsigned long) written); */ /* pa_log_debug("SBC: frame_length: %lu; codesize: %lu", (unsigned long) a2dp->frame_length, (unsigned long) a2dp->codesize); */ + /* Reset frame length, it can be changed due to bitpool change */ + a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); + pa_assert_fp((size_t) decoded <= to_decode); pa_assert_fp((size_t) decoded == a2dp->frame_length); - pa_assert_fp((size_t) written <= to_write); pa_assert_fp((size_t) written == a2dp->codesize); p = (const uint8_t*) p + decoded; @@ -1520,6 +1523,8 @@ static int a2dp_process_push(struct userdata *u) { frame_count++; } + memchunk.length -= to_write; + pa_memblock_release(memchunk.memblock); pa_source_post(u->source, &memchunk); @@ -2166,7 +2171,8 @@ static int bt_transport_config_a2dp(struct userdata *u) { a2dp->min_bitpool = config->min_bitpool; a2dp->max_bitpool = config->max_bitpool; - a2dp->sbc.bitpool = config->max_bitpool; + /* Set minimum bitpool for source to get the maximum possible block_size */ + a2dp->sbc.bitpool = u->profile == PROFILE_A2DP ? a2dp->max_bitpool : a2dp->min_bitpool; a2dp->codesize = sbc_get_codesize(&a2dp->sbc); a2dp->frame_length = sbc_get_frame_length(&a2dp->sbc); -- cgit From ad8562452768520dd70659cf4be686608b557961 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 14 Jan 2011 14:18:08 +0200 Subject: bluetooth: add proper handling for bluetooth.nrec property NREC stands for Noise Reduction and Echo Cancelation, it can be changed at any point by the headset. When set to "1" indicates that those algorithms shall be enabled by default and "0" means the headset probably have them active so they should be disabled in PA side. --- src/modules/bluetooth/bluetooth-util.c | 72 +++++++++++++++++++++ src/modules/bluetooth/bluetooth-util.h | 2 + src/modules/bluetooth/module-bluetooth-device.c | 86 +++++++++++-------------- 3 files changed, 112 insertions(+), 48 deletions(-) diff --git a/src/modules/bluetooth/bluetooth-util.c b/src/modules/bluetooth/bluetooth-util.c index 17ba1302..9c679687 100644 --- a/src/modules/bluetooth/bluetooth-util.c +++ b/src/modules/bluetooth/bluetooth-util.c @@ -714,6 +714,47 @@ static void list_adapters(pa_bluetooth_discovery *y) { send_and_add_to_pending(y, NULL, m, list_adapters_reply); } +int pa_bluetooth_transport_parse_property(pa_bluetooth_transport *t, DBusMessageIter *i) +{ + const char *key; + DBusMessageIter variant_i; + + if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_STRING) { + pa_log("Property name not a string."); + return -1; + } + + dbus_message_iter_get_basic(i, &key); + + if (!dbus_message_iter_next(i)) { + pa_log("Property value missing"); + return -1; + } + + if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_VARIANT) { + pa_log("Property value not a variant."); + return -1; + } + + dbus_message_iter_recurse(i, &variant_i); + + switch (dbus_message_iter_get_arg_type(&variant_i)) { + + case DBUS_TYPE_BOOLEAN: { + + pa_bool_t *value; + dbus_message_iter_get_basic(&variant_i, &value); + + if (pa_streq(key, "NREC")) + t->nrec = value; + + break; + } + } + + return 0; +} + static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *userdata) { DBusError err; pa_bluetooth_discovery *y; @@ -861,6 +902,28 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us } } + return DBUS_HANDLER_RESULT_NOT_YET_HANDLED; + } else if (dbus_message_is_signal(m, "org.bluez.MediaTransport", "PropertyChanged")) { + pa_bluetooth_device *d; + pa_bluetooth_transport *t; + void *state = NULL; + DBusMessageIter arg_i; + + while ((d = pa_hashmap_iterate(y->devices, &state, NULL))) + if ((t = pa_hashmap_get(d->transports, dbus_message_get_path(m)))) + break; + + if (!t) + goto fail; + + if (!dbus_message_iter_init(m, &arg_i)) { + pa_log("Failed to parse PropertyChanged: %s", err.message); + goto fail; + } + + if (pa_bluetooth_transport_parse_property(t, &arg_i) < 0) + goto fail; + return DBUS_HANDLER_RESULT_NOT_YET_HANDLED; } @@ -1035,6 +1098,7 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage const char *path, *dev_path = NULL, *uuid = NULL; uint8_t *config = NULL; int size = 0; + pa_bool_t nrec; enum profile p; DBusMessageIter args, props; DBusMessage *r; @@ -1070,6 +1134,10 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage if (var != DBUS_TYPE_OBJECT_PATH) goto fail; dbus_message_iter_get_basic(&value, &dev_path); + } else if (strcasecmp(key, "NREC") == 0) { + if (var != DBUS_TYPE_BOOLEAN) + goto fail; + dbus_message_iter_get_basic(&value, &nrec); } else if (strcasecmp(key, "Configuration") == 0) { DBusMessageIter array; if (var != DBUS_TYPE_ARRAY) @@ -1093,6 +1161,8 @@ static DBusMessage *endpoint_set_configuration(DBusConnection *conn, DBusMessage p = PROFILE_A2DP_SOURCE; t = transport_new(y, path, p, config, size); + if (nrec) + t->nrec = nrec; pa_hashmap_put(d->transports, t->path, t); pa_log_debug("Transport %s profile %d available", t->path, t->profile); @@ -1402,6 +1472,7 @@ pa_bluetooth_discovery* pa_bluetooth_discovery_get(pa_core *c) { "type='signal',sender='org.bluez',interface='org.bluez.AudioSink',member='PropertyChanged'", "type='signal',sender='org.bluez',interface='org.bluez.AudioSource',member='PropertyChanged'", "type='signal',sender='org.bluez',interface='org.bluez.HandsfreeGateway',member='PropertyChanged'", + "type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'", NULL) < 0) { pa_log("Failed to add D-Bus matches: %s", err.message); goto fail; @@ -1469,6 +1540,7 @@ void pa_bluetooth_discovery_unref(pa_bluetooth_discovery *y) { "type='signal',sender='org.bluez',interface='org.bluez.AudioSink',member='PropertyChanged'", "type='signal',sender='org.bluez',interface='org.bluez.AudioSource',member='PropertyChanged'", "type='signal',sender='org.bluez',interface='org.bluez.HandsfreeGateway',member='PropertyChanged'", + "type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'", NULL); if (y->filter_added) diff --git a/src/modules/bluetooth/bluetooth-util.h b/src/modules/bluetooth/bluetooth-util.h index b471c34d..bb0cb24a 100644 --- a/src/modules/bluetooth/bluetooth-util.h +++ b/src/modules/bluetooth/bluetooth-util.h @@ -70,6 +70,7 @@ struct pa_bluetooth_transport { uint8_t codec; uint8_t *config; int config_size; + pa_bool_t nrec; }; /* This enum is shared among Audio, Headset, AudioSink, and AudioSource, although not all values are acceptable in all profiles */ @@ -128,6 +129,7 @@ const pa_bluetooth_transport* pa_bluetooth_device_get_transport(const pa_bluetoo int pa_bluetooth_transport_acquire(const pa_bluetooth_transport *t, const char *accesstype, size_t *imtu, size_t *omtu); void pa_bluetooth_transport_release(const pa_bluetooth_transport *t, const char *accesstype); +int pa_bluetooth_transport_parse_property(pa_bluetooth_transport *t, DBusMessageIter *i); pa_hook* pa_bluetooth_discovery_hook(pa_bluetooth_discovery *d); diff --git a/src/modules/bluetooth/module-bluetooth-device.c b/src/modules/bluetooth/module-bluetooth-device.c index 75cf498f..86aaa461 100644 --- a/src/modules/bluetooth/module-bluetooth-device.c +++ b/src/modules/bluetooth/module-bluetooth-device.c @@ -1751,7 +1751,7 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us dbus_message_get_path(m), dbus_message_get_member(m)); - if (!dbus_message_has_path(m, u->path)) + if (!dbus_message_has_path(m, u->path) && !dbus_message_has_path(m, u->transport)) goto fail; if (dbus_message_is_signal(m, "org.bluez.Headset", "SpeakerGainChanged") || @@ -1777,6 +1777,28 @@ static DBusHandlerResult filter_cb(DBusConnection *bus, DBusMessage *m, void *us pa_source_volume_changed(u->source, &v); } } + } else if (dbus_message_is_signal(m, "org.bluez.MediaTransport", "PropertyChanged")) { + DBusMessageIter arg_i; + pa_bluetooth_transport *t; + pa_bool_t nrec; + + t = (pa_bluetooth_transport *) pa_bluetooth_discovery_get_transport(u->discovery, u->transport); + pa_assert(t); + + if (!dbus_message_iter_init(m, &arg_i)) { + pa_log("Failed to parse PropertyChanged: %s", err.message); + goto fail; + } + + nrec = t->nrec; + + if (pa_bluetooth_transport_parse_property(t, &arg_i) < 0) + goto fail; + + if (nrec != t->nrec) { + pa_log_debug("dbus: property 'NREC' changed to value '%s'", t->nrec ? "True" : "False"); + pa_proplist_sets(u->source->proplist, "bluetooth.nrec", t->nrec ? "1" : "0"); + } } fail: @@ -2018,6 +2040,7 @@ static int add_source(struct userdata *u) { pa_proplist_sets(data.proplist, "bluetooth.protocol", u->profile == PROFILE_A2DP_SOURCE ? "a2dp_source" : "hsp"); if ((u->profile == PROFILE_HSP) || (u->profile == PROFILE_HFGW)) pa_proplist_sets(data.proplist, PA_PROP_DEVICE_INTENDED_ROLES, "phone"); + data.card = u->card; data.name = get_name("source", u->modargs, u->address, &b); data.namereg_fail = b; @@ -2044,8 +2067,15 @@ static int add_source(struct userdata *u) { pa_bytes_to_usec(u->block_size, &u->sample_spec)); } - if (u->profile == PROFILE_HSP || u->profile == PROFILE_HFGW) - pa_proplist_sets(u->source->proplist, "bluetooth.nrec", (u->hsp.pcm_capabilities.flags & BT_PCM_FLAG_NREC) ? "1" : "0"); + if ((u->profile == PROFILE_HSP) || (u->profile == PROFILE_HFGW)) { + if (u->transport) { + const pa_bluetooth_transport *t; + t = pa_bluetooth_discovery_get_transport(u->discovery, u->transport); + pa_assert(t); + pa_proplist_sets(u->source->proplist, "bluetooth.nrec", t->nrec ? "1" : "0"); + } else + pa_proplist_sets(u->source->proplist, "bluetooth.nrec", (u->hsp.pcm_capabilities.flags & BT_PCM_FLAG_NREC) ? "1" : "0"); + } if (u->profile == PROFILE_HSP) { u->source->set_volume = source_set_volume_cb; @@ -2196,50 +2226,6 @@ static int bt_transport_config(struct userdata *u) { return bt_transport_config_a2dp(u); } -static int parse_transport_property(struct userdata *u, DBusMessageIter *i) { - const char *key; - DBusMessageIter variant_i; - - pa_assert(u); - pa_assert(i); - - if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_STRING) { - pa_log("Property name not a string."); - return -1; - } - - dbus_message_iter_get_basic(i, &key); - - if (!dbus_message_iter_next(i)) { - pa_log("Property value missing"); - return -1; - } - - if (dbus_message_iter_get_arg_type(i) != DBUS_TYPE_VARIANT) { - pa_log("Property value not a variant."); - return -1; - } - - dbus_message_iter_recurse(i, &variant_i); - - switch (dbus_message_iter_get_arg_type(&variant_i)) { - - case DBUS_TYPE_UINT16: { - - uint16_t value; - dbus_message_iter_get_basic(&variant_i, &value); - - if (pa_streq(key, "OMTU")) - u->link_mtu = value; - - break; - } - - } - - return 0; -} - /* Run from main thread */ static int bt_transport_open(struct userdata *u) { if (bt_transport_acquire(u, FALSE) < 0) @@ -2725,7 +2711,7 @@ int pa__init(pa_module* m) { struct userdata *u; const char *address, *path; DBusError err; - char *mike, *speaker; + char *mike, *speaker, *transport; const pa_bluetooth_device *device; pa_assert(m); @@ -2804,15 +2790,18 @@ int pa__init(pa_module* m) { speaker = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.Headset',member='SpeakerGainChanged',path='%s'", u->path); mike = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.Headset',member='MicrophoneGainChanged',path='%s'", u->path); + transport = pa_sprintf_malloc("type='signal',sender='org.bluez',interface='org.bluez.MediaTransport',member='PropertyChanged'"); if (pa_dbus_add_matches( pa_dbus_connection_get(u->connection), &err, speaker, mike, + transport, NULL) < 0) { pa_xfree(speaker); pa_xfree(mike); + pa_xfree(transport); pa_log("Failed to add D-Bus matches: %s", err.message); goto fail; @@ -2820,6 +2809,7 @@ int pa__init(pa_module* m) { pa_xfree(speaker); pa_xfree(mike); + pa_xfree(transport); /* Connect to the BT service */ init_bt(u); -- cgit From e4eb4670108ad2b4a0d9c3044e12ed0d933f834e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 14 Mar 2011 14:46:10 -0300 Subject: build: move sbc related files to its own directory This should make it easier to apply patches from BlueZ which also uses sbc subdir for this files. --- src/Makefile.am | 2 +- src/modules/bluetooth/module-bluetooth-device.c | 2 +- src/modules/bluetooth/sbc.c | 1252 ----------------------- src/modules/bluetooth/sbc.h | 111 -- src/modules/bluetooth/sbc/sbc.c | 1252 +++++++++++++++++++++++ src/modules/bluetooth/sbc/sbc.h | 111 ++ src/modules/bluetooth/sbc/sbc_math.h | 60 ++ src/modules/bluetooth/sbc/sbc_primitives.c | 470 +++++++++ src/modules/bluetooth/sbc/sbc_primitives.h | 75 ++ src/modules/bluetooth/sbc/sbc_primitives_mmx.c | 320 ++++++ src/modules/bluetooth/sbc/sbc_primitives_mmx.h | 40 + src/modules/bluetooth/sbc/sbc_primitives_neon.c | 246 +++++ src/modules/bluetooth/sbc/sbc_primitives_neon.h | 40 + src/modules/bluetooth/sbc/sbc_tables.h | 659 ++++++++++++ src/modules/bluetooth/sbc_math.h | 60 -- src/modules/bluetooth/sbc_primitives.c | 470 --------- src/modules/bluetooth/sbc_primitives.h | 75 -- src/modules/bluetooth/sbc_primitives_mmx.c | 320 ------ src/modules/bluetooth/sbc_primitives_mmx.h | 40 - src/modules/bluetooth/sbc_primitives_neon.c | 246 ----- src/modules/bluetooth/sbc_primitives_neon.h | 40 - src/modules/bluetooth/sbc_tables.h | 659 ------------ 22 files changed, 3275 insertions(+), 3275 deletions(-) delete mode 100644 src/modules/bluetooth/sbc.c delete mode 100644 src/modules/bluetooth/sbc.h create mode 100644 src/modules/bluetooth/sbc/sbc.c create mode 100644 src/modules/bluetooth/sbc/sbc.h create mode 100644 src/modules/bluetooth/sbc/sbc_math.h create mode 100644 src/modules/bluetooth/sbc/sbc_primitives.c create mode 100644 src/modules/bluetooth/sbc/sbc_primitives.h create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_mmx.c create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_mmx.h create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_neon.c create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_neon.h create mode 100644 src/modules/bluetooth/sbc/sbc_tables.h delete mode 100644 src/modules/bluetooth/sbc_math.h delete mode 100644 src/modules/bluetooth/sbc_primitives.c delete mode 100644 src/modules/bluetooth/sbc_primitives.h delete mode 100644 src/modules/bluetooth/sbc_primitives_mmx.c delete mode 100644 src/modules/bluetooth/sbc_primitives_mmx.h delete mode 100644 src/modules/bluetooth/sbc_primitives_neon.c delete mode 100644 src/modules/bluetooth/sbc_primitives_neon.h delete mode 100644 src/modules/bluetooth/sbc_tables.h diff --git a/src/Makefile.am b/src/Makefile.am index 2ab63556..d4a72832 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1820,7 +1820,7 @@ module_bluetooth_discover_la_LDFLAGS = $(MODULE_LDFLAGS) module_bluetooth_discover_la_LIBADD = $(MODULE_LIBADD) $(DBUS_LIBS) libbluetooth-util.la module_bluetooth_discover_la_CFLAGS = $(AM_CFLAGS) $(DBUS_CFLAGS) -libbluetooth_sbc_la_SOURCES = modules/bluetooth/sbc.c modules/bluetooth/sbc.h modules/bluetooth/sbc_tables.h modules/bluetooth/sbc_math.h modules/bluetooth/sbc_primitives.h modules/bluetooth/sbc_primitives.c modules/bluetooth/sbc_primitives_mmx.h modules/bluetooth/sbc_primitives_neon.h modules/bluetooth/sbc_primitives_mmx.c modules/bluetooth/sbc_primitives_neon.c +libbluetooth_sbc_la_SOURCES = modules/bluetooth/sbc/sbc.c modules/bluetooth/sbc/sbc.h modules/bluetooth/sbc/sbc_tables.h modules/bluetooth/sbc/sbc_math.h modules/bluetooth/sbc/sbc_primitives.h modules/bluetooth/sbc/sbc_primitives.c modules/bluetooth/sbc/sbc_primitives_mmx.h modules/bluetooth/sbc/sbc_primitives_neon.h modules/bluetooth/sbc/sbc_primitives_mmx.c modules/bluetooth/sbc/sbc_primitives_neon.c libbluetooth_sbc_la_LDFLAGS = -avoid-version libbluetooth_sbc_la_LIBADD = $(MODULE_LIBADD) libbluetooth_sbc_la_CFLAGS = $(AM_CFLAGS) diff --git a/src/modules/bluetooth/module-bluetooth-device.c b/src/modules/bluetooth/module-bluetooth-device.c index 86aaa461..d29e29b8 100644 --- a/src/modules/bluetooth/module-bluetooth-device.c +++ b/src/modules/bluetooth/module-bluetooth-device.c @@ -51,7 +51,7 @@ #include "module-bluetooth-device-symdef.h" #include "ipc.h" -#include "sbc.h" +#include "sbc/sbc.h" #include "rtp.h" #include "bluetooth-util.h" diff --git a/src/modules/bluetooth/sbc.c b/src/modules/bluetooth/sbc.c deleted file mode 100644 index 5157c70f..00000000 --- a/src/modules/bluetooth/sbc.c +++ /dev/null @@ -1,1252 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2008 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -/* todo items: - - use a log2 table for byte integer scale factors calculation (sum log2 results - for high and low bytes) fill bitpool by 16 bits instead of one at a time in - bits allocation/bitpool generation port to the dsp - -*/ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include -#include -#include -#include - -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc.h" -#include "sbc_primitives.h" - -#define SBC_SYNCWORD 0x9C - -/* This structure contains an unpacked SBC frame. - Yes, there is probably quite some unused space herein */ -struct sbc_frame { - uint8_t frequency; - uint8_t block_mode; - uint8_t blocks; - enum { - MONO = SBC_MODE_MONO, - DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL, - STEREO = SBC_MODE_STEREO, - JOINT_STEREO = SBC_MODE_JOINT_STEREO - } mode; - uint8_t channels; - enum { - LOUDNESS = SBC_AM_LOUDNESS, - SNR = SBC_AM_SNR - } allocation; - uint8_t subband_mode; - uint8_t subbands; - uint8_t bitpool; - uint16_t codesize; - uint8_t length; - - /* bit number x set means joint stereo has been used in subband x */ - uint8_t joint; - - /* only the lower 4 bits of every element are to be used */ - uint32_t scale_factor[2][8]; - - /* raw integer subband samples in the frame */ - int32_t SBC_ALIGNED sb_sample_f[16][2][8]; - - /* modified subband samples */ - int32_t SBC_ALIGNED sb_sample[16][2][8]; - - /* original pcm audio samples */ - int16_t SBC_ALIGNED pcm_sample[2][16*8]; -}; - -struct sbc_decoder_state { - int subbands; - int32_t V[2][170]; - int offset[2][16]; -}; - -/* - * Calculates the CRC-8 of the first len bits in data - */ -static const uint8_t crc_table[256] = { - 0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53, - 0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB, - 0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E, - 0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76, - 0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4, - 0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C, - 0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19, - 0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1, - 0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40, - 0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8, - 0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D, - 0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65, - 0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7, - 0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F, - 0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A, - 0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2, - 0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75, - 0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D, - 0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8, - 0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50, - 0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2, - 0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A, - 0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F, - 0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7, - 0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66, - 0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E, - 0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB, - 0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43, - 0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1, - 0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09, - 0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C, - 0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4 -}; - -static uint8_t sbc_crc8(const uint8_t *data, size_t len) -{ - uint8_t crc = 0x0f; - size_t i; - uint8_t octet; - - for (i = 0; i < len / 8; i++) - crc = crc_table[crc ^ data[i]]; - - octet = data[i]; - for (i = 0; i < len % 8; i++) { - char bit = ((octet ^ crc) & 0x80) >> 7; - - crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0); - - octet = octet << 1; - } - - return crc; -} - -/* - * Code straight from the spec to calculate the bits array - * Takes a pointer to the frame in question, a pointer to the bits array and - * the sampling frequency (as 2 bit integer) - */ -static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) -{ - uint8_t sf = frame->frequency; - - if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) { - int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; - int ch, sb; - - for (ch = 0; ch < frame->channels; ch++) { - max_bitneed = 0; - if (frame->allocation == SNR) { - for (sb = 0; sb < frame->subbands; sb++) { - bitneed[ch][sb] = frame->scale_factor[ch][sb]; - if (bitneed[ch][sb] > max_bitneed) - max_bitneed = bitneed[ch][sb]; - } - } else { - for (sb = 0; sb < frame->subbands; sb++) { - if (frame->scale_factor[ch][sb] == 0) - bitneed[ch][sb] = -5; - else { - if (frame->subbands == 4) - loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; - else - loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; - if (loudness > 0) - bitneed[ch][sb] = loudness / 2; - else - bitneed[ch][sb] = loudness; - } - if (bitneed[ch][sb] > max_bitneed) - max_bitneed = bitneed[ch][sb]; - } - } - - bitcount = 0; - slicecount = 0; - bitslice = max_bitneed + 1; - do { - bitslice--; - bitcount += slicecount; - slicecount = 0; - for (sb = 0; sb < frame->subbands; sb++) { - if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) - slicecount++; - else if (bitneed[ch][sb] == bitslice + 1) - slicecount += 2; - } - } while (bitcount + slicecount < frame->bitpool); - - if (bitcount + slicecount == frame->bitpool) { - bitcount += slicecount; - bitslice--; - } - - for (sb = 0; sb < frame->subbands; sb++) { - if (bitneed[ch][sb] < bitslice + 2) - bits[ch][sb] = 0; - else { - bits[ch][sb] = bitneed[ch][sb] - bitslice; - if (bits[ch][sb] > 16) - bits[ch][sb] = 16; - } - } - - for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { - if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { - bits[ch][sb]++; - bitcount++; - } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { - bits[ch][sb] = 2; - bitcount += 2; - } - } - - for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { - if (bits[ch][sb] < 16) { - bits[ch][sb]++; - bitcount++; - } - } - - } - - } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) { - int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; - int ch, sb; - - max_bitneed = 0; - if (frame->allocation == SNR) { - for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - bitneed[ch][sb] = frame->scale_factor[ch][sb]; - if (bitneed[ch][sb] > max_bitneed) - max_bitneed = bitneed[ch][sb]; - } - } - } else { - for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - if (frame->scale_factor[ch][sb] == 0) - bitneed[ch][sb] = -5; - else { - if (frame->subbands == 4) - loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; - else - loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; - if (loudness > 0) - bitneed[ch][sb] = loudness / 2; - else - bitneed[ch][sb] = loudness; - } - if (bitneed[ch][sb] > max_bitneed) - max_bitneed = bitneed[ch][sb]; - } - } - } - - bitcount = 0; - slicecount = 0; - bitslice = max_bitneed + 1; - do { - bitslice--; - bitcount += slicecount; - slicecount = 0; - for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) - slicecount++; - else if (bitneed[ch][sb] == bitslice + 1) - slicecount += 2; - } - } - } while (bitcount + slicecount < frame->bitpool); - - if (bitcount + slicecount == frame->bitpool) { - bitcount += slicecount; - bitslice--; - } - - for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - if (bitneed[ch][sb] < bitslice + 2) { - bits[ch][sb] = 0; - } else { - bits[ch][sb] = bitneed[ch][sb] - bitslice; - if (bits[ch][sb] > 16) - bits[ch][sb] = 16; - } - } - } - - ch = 0; - sb = 0; - while (bitcount < frame->bitpool) { - if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { - bits[ch][sb]++; - bitcount++; - } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { - bits[ch][sb] = 2; - bitcount += 2; - } - if (ch == 1) { - ch = 0; - sb++; - if (sb >= frame->subbands) break; - } else - ch = 1; - } - - ch = 0; - sb = 0; - while (bitcount < frame->bitpool) { - if (bits[ch][sb] < 16) { - bits[ch][sb]++; - bitcount++; - } - if (ch == 1) { - ch = 0; - sb++; - if (sb >= frame->subbands) break; - } else - ch = 1; - } - - } - -} - -/* - * Unpacks a SBC frame at the beginning of the stream in data, - * which has at most len bytes into frame. - * Returns the length in bytes of the packed frame, or a negative - * value on error. The error codes are: - * - * -1 Data stream too short - * -2 Sync byte incorrect - * -3 CRC8 incorrect - * -4 Bitpool value out of bounds - */ -static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame, - size_t len) -{ - unsigned int consumed; - /* Will copy the parts of the header that are relevant to crc - * calculation here */ - uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - int crc_pos = 0; - int32_t temp; - - int audio_sample; - int ch, sb, blk, bit; /* channel, subband, block and bit standard - counters */ - int bits[2][8]; /* bits distribution */ - uint32_t levels[2][8]; /* levels derived from that */ - - if (len < 4) - return -1; - - if (data[0] != SBC_SYNCWORD) - return -2; - - frame->frequency = (data[1] >> 6) & 0x03; - - frame->block_mode = (data[1] >> 4) & 0x03; - switch (frame->block_mode) { - case SBC_BLK_4: - frame->blocks = 4; - break; - case SBC_BLK_8: - frame->blocks = 8; - break; - case SBC_BLK_12: - frame->blocks = 12; - break; - case SBC_BLK_16: - frame->blocks = 16; - break; - } - - frame->mode = (data[1] >> 2) & 0x03; - switch (frame->mode) { - case MONO: - frame->channels = 1; - break; - case DUAL_CHANNEL: /* fall-through */ - case STEREO: - case JOINT_STEREO: - frame->channels = 2; - break; - } - - frame->allocation = (data[1] >> 1) & 0x01; - - frame->subband_mode = (data[1] & 0x01); - frame->subbands = frame->subband_mode ? 8 : 4; - - frame->bitpool = data[2]; - - if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && - frame->bitpool > 16 * frame->subbands) - return -4; - - if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && - frame->bitpool > 32 * frame->subbands) - return -4; - - /* data[3] is crc, we're checking it later */ - - consumed = 32; - - crc_header[0] = data[1]; - crc_header[1] = data[2]; - crc_pos = 16; - - if (frame->mode == JOINT_STEREO) { - if (len * 8 < consumed + frame->subbands) - return -1; - - frame->joint = 0x00; - for (sb = 0; sb < frame->subbands - 1; sb++) - frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb; - if (frame->subbands == 4) - crc_header[crc_pos / 8] = data[4] & 0xf0; - else - crc_header[crc_pos / 8] = data[4]; - - consumed += frame->subbands; - crc_pos += frame->subbands; - } - - if (len * 8 < consumed + (4 * frame->subbands * frame->channels)) - return -1; - - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - /* FIXME assert(consumed % 4 == 0); */ - frame->scale_factor[ch][sb] = - (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F; - crc_header[crc_pos >> 3] |= - frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7)); - - consumed += 4; - crc_pos += 4; - } - } - - if (data[3] != sbc_crc8(crc_header, crc_pos)) - return -3; - - sbc_calculate_bits(frame, bits); - - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) - levels[ch][sb] = (1 << bits[ch][sb]) - 1; - } - - for (blk = 0; blk < frame->blocks; blk++) { - for (ch = 0; ch < frame->channels; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { - if (levels[ch][sb] > 0) { - audio_sample = 0; - for (bit = 0; bit < bits[ch][sb]; bit++) { - if (consumed > len * 8) - return -1; - - if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01) - audio_sample |= 1 << (bits[ch][sb] - bit - 1); - - consumed++; - } - - frame->sb_sample[blk][ch][sb] = - (((audio_sample << 1) | 1) << frame->scale_factor[ch][sb]) / - levels[ch][sb] - (1 << frame->scale_factor[ch][sb]); - } else - frame->sb_sample[blk][ch][sb] = 0; - } - } - } - - if (frame->mode == JOINT_STEREO) { - for (blk = 0; blk < frame->blocks; blk++) { - for (sb = 0; sb < frame->subbands; sb++) { - if (frame->joint & (0x01 << sb)) { - temp = frame->sb_sample[blk][0][sb] + - frame->sb_sample[blk][1][sb]; - frame->sb_sample[blk][1][sb] = - frame->sb_sample[blk][0][sb] - - frame->sb_sample[blk][1][sb]; - frame->sb_sample[blk][0][sb] = temp; - } - } - } - } - - if ((consumed & 0x7) != 0) - consumed += 8 - (consumed & 0x7); - - return consumed >> 3; -} - -static void sbc_decoder_init(struct sbc_decoder_state *state, - const struct sbc_frame *frame) -{ - int i, ch; - - memset(state->V, 0, sizeof(state->V)); - state->subbands = frame->subbands; - - for (ch = 0; ch < 2; ch++) - for (i = 0; i < frame->subbands * 2; i++) - state->offset[ch][i] = (10 * i + 10); -} - -static inline void sbc_synthesize_four(struct sbc_decoder_state *state, - struct sbc_frame *frame, int ch, int blk) -{ - int i, k, idx; - int32_t *v = state->V[ch]; - int *offset = state->offset[ch]; - - for (i = 0; i < 8; i++) { - /* Shifting */ - offset[i]--; - if (offset[i] < 0) { - offset[i] = 79; - memcpy(v + 80, v, 9 * sizeof(*v)); - } - - /* Distribute the new matrix value to the shifted position */ - v[offset[i]] = SCALE4_STAGED1( - MULA(synmatrix4[i][0], frame->sb_sample[blk][ch][0], - MULA(synmatrix4[i][1], frame->sb_sample[blk][ch][1], - MULA(synmatrix4[i][2], frame->sb_sample[blk][ch][2], - MUL (synmatrix4[i][3], frame->sb_sample[blk][ch][3]))))); - } - - /* Compute the samples */ - for (idx = 0, i = 0; i < 4; i++, idx += 5) { - k = (i + 4) & 0xf; - - /* Store in output, Q0 */ - frame->pcm_sample[ch][blk * 4 + i] = SCALE4_STAGED1( - MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0], - MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0], - MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1], - MULA(v[offset[k] + 3], sbc_proto_4_40m1[idx + 1], - MULA(v[offset[i] + 4], sbc_proto_4_40m0[idx + 2], - MULA(v[offset[k] + 5], sbc_proto_4_40m1[idx + 2], - MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3], - MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3], - MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4], - MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4]))))))))))); - } -} - -static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, - struct sbc_frame *frame, int ch, int blk) -{ - int i, j, k, idx; - int *offset = state->offset[ch]; - - for (i = 0; i < 16; i++) { - /* Shifting */ - offset[i]--; - if (offset[i] < 0) { - offset[i] = 159; - for (j = 0; j < 9; j++) - state->V[ch][j + 160] = state->V[ch][j]; - } - - /* Distribute the new matrix value to the shifted position */ - state->V[ch][offset[i]] = SCALE8_STAGED1( - MULA(synmatrix8[i][0], frame->sb_sample[blk][ch][0], - MULA(synmatrix8[i][1], frame->sb_sample[blk][ch][1], - MULA(synmatrix8[i][2], frame->sb_sample[blk][ch][2], - MULA(synmatrix8[i][3], frame->sb_sample[blk][ch][3], - MULA(synmatrix8[i][4], frame->sb_sample[blk][ch][4], - MULA(synmatrix8[i][5], frame->sb_sample[blk][ch][5], - MULA(synmatrix8[i][6], frame->sb_sample[blk][ch][6], - MUL( synmatrix8[i][7], frame->sb_sample[blk][ch][7]))))))))); - } - - /* Compute the samples */ - for (idx = 0, i = 0; i < 8; i++, idx += 5) { - k = (i + 8) & 0xf; - - /* Store in output */ - frame->pcm_sample[ch][blk * 8 + i] = SCALE8_STAGED1( // Q0 - MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0], - MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0], - MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1], - MULA(state->V[ch][offset[k] + 3], sbc_proto_8_80m1[idx + 1], - MULA(state->V[ch][offset[i] + 4], sbc_proto_8_80m0[idx + 2], - MULA(state->V[ch][offset[k] + 5], sbc_proto_8_80m1[idx + 2], - MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3], - MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3], - MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4], - MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4]))))))))))); - } -} - -static int sbc_synthesize_audio(struct sbc_decoder_state *state, - struct sbc_frame *frame) -{ - int ch, blk; - - switch (frame->subbands) { - case 4: - for (ch = 0; ch < frame->channels; ch++) { - for (blk = 0; blk < frame->blocks; blk++) - sbc_synthesize_four(state, frame, ch, blk); - } - return frame->blocks * 4; - - case 8: - for (ch = 0; ch < frame->channels; ch++) { - for (blk = 0; blk < frame->blocks; blk++) - sbc_synthesize_eight(state, frame, ch, blk); - } - return frame->blocks * 8; - - default: - return -EIO; - } -} - -static int sbc_analyze_audio(struct sbc_encoder_state *state, - struct sbc_frame *frame) -{ - int ch, blk; - int16_t *x; - - switch (frame->subbands) { - case 4: - for (ch = 0; ch < frame->channels; ch++) { - x = &state->X[ch][state->position - 16 + - frame->blocks * 4]; - for (blk = 0; blk < frame->blocks; blk += 4) { - state->sbc_analyze_4b_4s( - x, - frame->sb_sample_f[blk][ch], - frame->sb_sample_f[blk + 1][ch] - - frame->sb_sample_f[blk][ch]); - x -= 16; - } - } - return frame->blocks * 4; - - case 8: - for (ch = 0; ch < frame->channels; ch++) { - x = &state->X[ch][state->position - 32 + - frame->blocks * 8]; - for (blk = 0; blk < frame->blocks; blk += 4) { - state->sbc_analyze_4b_8s( - x, - frame->sb_sample_f[blk][ch], - frame->sb_sample_f[blk + 1][ch] - - frame->sb_sample_f[blk][ch]); - x -= 32; - } - } - return frame->blocks * 8; - - default: - return -EIO; - } -} - -/* Supplementary bitstream writing macros for 'sbc_pack_frame' */ - -#define PUT_BITS(data_ptr, bits_cache, bits_count, v, n) \ - do { \ - bits_cache = (v) | (bits_cache << (n)); \ - bits_count += (n); \ - if (bits_count >= 16) { \ - bits_count -= 8; \ - *data_ptr++ = (uint8_t) \ - (bits_cache >> bits_count); \ - bits_count -= 8; \ - *data_ptr++ = (uint8_t) \ - (bits_cache >> bits_count); \ - } \ - } while (0) - -#define FLUSH_BITS(data_ptr, bits_cache, bits_count) \ - do { \ - while (bits_count >= 8) { \ - bits_count -= 8; \ - *data_ptr++ = (uint8_t) \ - (bits_cache >> bits_count); \ - } \ - if (bits_count > 0) \ - *data_ptr++ = (uint8_t) \ - (bits_cache << (8 - bits_count)); \ - } while (0) - -/* - * Packs the SBC frame from frame into the memory at data. At most len - * bytes will be used, should more memory be needed an appropriate - * error code will be returned. Returns the length of the packed frame - * on success or a negative value on error. - * - * The error codes are: - * -1 Not enough memory reserved - * -2 Unsupported sampling rate - * -3 Unsupported number of blocks - * -4 Unsupported number of subbands - * -5 Bitpool value out of bounds - * -99 not implemented - */ - -static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( - uint8_t *data, struct sbc_frame *frame, size_t len, - int frame_subbands, int frame_channels) -{ - /* Bitstream writer starts from the fourth byte */ - uint8_t *data_ptr = data + 4; - uint32_t bits_cache = 0; - uint32_t bits_count = 0; - - /* Will copy the header parts for CRC-8 calculation here */ - uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - int crc_pos = 0; - - uint32_t audio_sample; - - int ch, sb, blk; /* channel, subband, block and bit counters */ - int bits[2][8]; /* bits distribution */ - uint32_t levels[2][8]; /* levels are derived from that */ - uint32_t sb_sample_delta[2][8]; - - data[0] = SBC_SYNCWORD; - - data[1] = (frame->frequency & 0x03) << 6; - - data[1] |= (frame->block_mode & 0x03) << 4; - - data[1] |= (frame->mode & 0x03) << 2; - - data[1] |= (frame->allocation & 0x01) << 1; - - switch (frame_subbands) { - case 4: - /* Nothing to do */ - break; - case 8: - data[1] |= 0x01; - break; - default: - return -4; - break; - } - - data[2] = frame->bitpool; - - if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && - frame->bitpool > frame_subbands << 4) - return -5; - - if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && - frame->bitpool > frame_subbands << 5) - return -5; - - /* Can't fill in crc yet */ - - crc_header[0] = data[1]; - crc_header[1] = data[2]; - crc_pos = 16; - - if (frame->mode == JOINT_STEREO) { - /* like frame->sb_sample but joint stereo */ - int32_t sb_sample_j[16][2]; - /* scalefactor and scale_factor in joint case */ - uint32_t scalefactor_j[2]; - uint8_t scale_factor_j[2]; - - uint8_t joint = 0; - frame->joint = 0; - - for (sb = 0; sb < frame_subbands - 1; sb++) { - scale_factor_j[0] = 0; - scalefactor_j[0] = 2 << SCALE_OUT_BITS; - scale_factor_j[1] = 0; - scalefactor_j[1] = 2 << SCALE_OUT_BITS; - - for (blk = 0; blk < frame->blocks; blk++) { - uint32_t tmp; - /* Calculate joint stereo signal */ - sb_sample_j[blk][0] = - ASR(frame->sb_sample_f[blk][0][sb], 1) + - ASR(frame->sb_sample_f[blk][1][sb], 1); - sb_sample_j[blk][1] = - ASR(frame->sb_sample_f[blk][0][sb], 1) - - ASR(frame->sb_sample_f[blk][1][sb], 1); - - /* calculate scale_factor_j and scalefactor_j for joint case */ - tmp = fabs(sb_sample_j[blk][0]); - while (scalefactor_j[0] < tmp) { - scale_factor_j[0]++; - scalefactor_j[0] *= 2; - } - tmp = fabs(sb_sample_j[blk][1]); - while (scalefactor_j[1] < tmp) { - scale_factor_j[1]++; - scalefactor_j[1] *= 2; - } - } - - /* decide whether to join this subband */ - if ((frame->scale_factor[0][sb] + - frame->scale_factor[1][sb]) > - (scale_factor_j[0] + - scale_factor_j[1])) { - /* use joint stereo for this subband */ - joint |= 1 << (frame_subbands - 1 - sb); - frame->joint |= 1 << sb; - frame->scale_factor[0][sb] = scale_factor_j[0]; - frame->scale_factor[1][sb] = scale_factor_j[1]; - for (blk = 0; blk < frame->blocks; blk++) { - frame->sb_sample_f[blk][0][sb] = - sb_sample_j[blk][0]; - frame->sb_sample_f[blk][1][sb] = - sb_sample_j[blk][1]; - } - } - } - - PUT_BITS(data_ptr, bits_cache, bits_count, - joint, frame_subbands); - crc_header[crc_pos >> 3] = joint; - crc_pos += frame_subbands; - } - - for (ch = 0; ch < frame_channels; ch++) { - for (sb = 0; sb < frame_subbands; sb++) { - PUT_BITS(data_ptr, bits_cache, bits_count, - frame->scale_factor[ch][sb] & 0x0F, 4); - crc_header[crc_pos >> 3] <<= 4; - crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F; - crc_pos += 4; - } - } - - /* align the last crc byte */ - if (crc_pos % 8) - crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8); - - data[3] = sbc_crc8(crc_header, crc_pos); - - sbc_calculate_bits(frame, bits); - - for (ch = 0; ch < frame_channels; ch++) { - for (sb = 0; sb < frame_subbands; sb++) { - levels[ch][sb] = ((1 << bits[ch][sb]) - 1) << - (32 - (frame->scale_factor[ch][sb] + - SCALE_OUT_BITS + 2)); - sb_sample_delta[ch][sb] = (uint32_t) 1 << - (frame->scale_factor[ch][sb] + - SCALE_OUT_BITS + 1); - } - } - - for (blk = 0; blk < frame->blocks; blk++) { - for (ch = 0; ch < frame_channels; ch++) { - for (sb = 0; sb < frame_subbands; sb++) { - - if (bits[ch][sb] == 0) - continue; - - audio_sample = ((uint64_t) levels[ch][sb] * - (sb_sample_delta[ch][sb] + - frame->sb_sample_f[blk][ch][sb])) >> 32; - - PUT_BITS(data_ptr, bits_cache, bits_count, - audio_sample, bits[ch][sb]); - } - } - } - - FLUSH_BITS(data_ptr, bits_cache, bits_count); - - return data_ptr - data; -} - -static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) -{ - if (frame->subbands == 4) { - if (frame->channels == 1) - return sbc_pack_frame_internal(data, frame, len, 4, 1); - else - return sbc_pack_frame_internal(data, frame, len, 4, 2); - } else { - if (frame->channels == 1) - return sbc_pack_frame_internal(data, frame, len, 8, 1); - else - return sbc_pack_frame_internal(data, frame, len, 8, 2); - } -} - -static void sbc_encoder_init(struct sbc_encoder_state *state, - const struct sbc_frame *frame) -{ - memset(&state->X, 0, sizeof(state->X)); - state->position = SBC_X_BUFFER_SIZE - frame->subbands * 9; - - sbc_init_primitives(state); -} - -struct sbc_priv { - int init; - struct SBC_ALIGNED sbc_frame frame; - struct SBC_ALIGNED sbc_decoder_state dec_state; - struct SBC_ALIGNED sbc_encoder_state enc_state; -}; - -static void sbc_set_defaults(sbc_t *sbc, unsigned long flags) -{ - sbc->frequency = SBC_FREQ_44100; - sbc->mode = SBC_MODE_STEREO; - sbc->subbands = SBC_SB_8; - sbc->blocks = SBC_BLK_16; - sbc->bitpool = 32; -#if __BYTE_ORDER == __LITTLE_ENDIAN - sbc->endian = SBC_LE; -#elif __BYTE_ORDER == __BIG_ENDIAN - sbc->endian = SBC_BE; -#else -#error "Unknown byte order" -#endif -} - -int sbc_init(sbc_t *sbc, unsigned long flags) -{ - if (!sbc) - return -EIO; - - memset(sbc, 0, sizeof(sbc_t)); - - sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK); - if (!sbc->priv_alloc_base) - return -ENOMEM; - - sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base + - SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK)); - - memset(sbc->priv, 0, sizeof(struct sbc_priv)); - - sbc_set_defaults(sbc, flags); - - return 0; -} - -ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len) -{ - return sbc_decode(sbc, input, input_len, NULL, 0, NULL); -} - -ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written) -{ - struct sbc_priv *priv; - char *ptr; - int i, ch, framelen, samples; - - if (!sbc || !input) - return -EIO; - - priv = sbc->priv; - - framelen = sbc_unpack_frame(input, &priv->frame, input_len); - - if (!priv->init) { - sbc_decoder_init(&priv->dec_state, &priv->frame); - priv->init = 1; - - sbc->frequency = priv->frame.frequency; - sbc->mode = priv->frame.mode; - sbc->subbands = priv->frame.subband_mode; - sbc->blocks = priv->frame.block_mode; - sbc->allocation = priv->frame.allocation; - sbc->bitpool = priv->frame.bitpool; - - priv->frame.codesize = sbc_get_codesize(sbc); - priv->frame.length = framelen; - } else if (priv->frame.bitpool != sbc->bitpool) - sbc->bitpool = priv->frame.bitpool; - - if (!output) - return framelen; - - if (written) - *written = 0; - - if (framelen <= 0) - return framelen; - - samples = sbc_synthesize_audio(&priv->dec_state, &priv->frame); - - ptr = output; - - if (output_len < (size_t) (samples * priv->frame.channels * 2)) - samples = output_len / (priv->frame.channels * 2); - - for (i = 0; i < samples; i++) { - for (ch = 0; ch < priv->frame.channels; ch++) { - int16_t s; - s = priv->frame.pcm_sample[ch][i]; - - if (sbc->endian == SBC_BE) { - *ptr++ = (s & 0xff00) >> 8; - *ptr++ = (s & 0x00ff); - } else { - *ptr++ = (s & 0x00ff); - *ptr++ = (s & 0xff00) >> 8; - } - } - } - - if (written) - *written = samples * priv->frame.channels * 2; - - return framelen; -} - -ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written) -{ - struct sbc_priv *priv; - int framelen, samples; - int (*sbc_enc_process_input)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - - if (!sbc || !input) - return -EIO; - - priv = sbc->priv; - - if (written) - *written = 0; - - if (!priv->init) { - priv->frame.frequency = sbc->frequency; - priv->frame.mode = sbc->mode; - priv->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; - priv->frame.allocation = sbc->allocation; - priv->frame.subband_mode = sbc->subbands; - priv->frame.subbands = sbc->subbands ? 8 : 4; - priv->frame.block_mode = sbc->blocks; - priv->frame.blocks = 4 + (sbc->blocks * 4); - priv->frame.bitpool = sbc->bitpool; - priv->frame.codesize = sbc_get_codesize(sbc); - priv->frame.length = sbc_get_frame_length(sbc); - - sbc_encoder_init(&priv->enc_state, &priv->frame); - priv->init = 1; - } else if (priv->frame.bitpool != sbc->bitpool) { - priv->frame.length = sbc_get_frame_length(sbc); - priv->frame.bitpool = sbc->bitpool; - } - - /* input must be large enough to encode a complete frame */ - if (input_len < priv->frame.codesize) - return 0; - - /* output must be large enough to receive the encoded frame */ - if (!output || output_len < priv->frame.length) - return -ENOSPC; - - /* Select the needed input data processing function and call it */ - if (priv->frame.subbands == 8) { - if (sbc->endian == SBC_BE) - sbc_enc_process_input = - priv->enc_state.sbc_enc_process_input_8s_be; - else - sbc_enc_process_input = - priv->enc_state.sbc_enc_process_input_8s_le; - } else { - if (sbc->endian == SBC_BE) - sbc_enc_process_input = - priv->enc_state.sbc_enc_process_input_4s_be; - else - sbc_enc_process_input = - priv->enc_state.sbc_enc_process_input_4s_le; - } - - priv->enc_state.position = sbc_enc_process_input( - priv->enc_state.position, (const uint8_t *) input, - priv->enc_state.X, priv->frame.subbands * priv->frame.blocks, - priv->frame.channels); - - samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); - - priv->enc_state.sbc_calc_scalefactors( - priv->frame.sb_sample_f, priv->frame.scale_factor, - priv->frame.blocks, priv->frame.channels, priv->frame.subbands); - - framelen = sbc_pack_frame(output, &priv->frame, output_len); - - if (written) - *written = framelen; - - return samples * priv->frame.channels * 2; -} - -void sbc_finish(sbc_t *sbc) -{ - if (!sbc) - return; - - if (sbc->priv_alloc_base) - free(sbc->priv_alloc_base); - - memset(sbc, 0, sizeof(sbc_t)); -} - -size_t sbc_get_frame_length(sbc_t *sbc) -{ - size_t ret; - uint8_t subbands, channels, blocks, joint, bitpool; - struct sbc_priv *priv; - - priv = sbc->priv; - if (priv->init && priv->frame.bitpool == sbc->bitpool) - return priv->frame.length; - - subbands = sbc->subbands ? 8 : 4; - blocks = 4 + (sbc->blocks * 4); - channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; - joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0; - bitpool = sbc->bitpool; - - ret = 4 + (4 * subbands * channels) / 8; - /* This term is not always evenly divide so we round it up */ - if (channels == 1) - ret += ((blocks * channels * bitpool) + 7) / 8; - else - ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8; - - return ret; -} - -unsigned sbc_get_frame_duration(sbc_t *sbc) -{ - uint8_t subbands, blocks; - uint16_t frequency; - struct sbc_priv *priv; - - priv = sbc->priv; - if (!priv->init) { - subbands = sbc->subbands ? 8 : 4; - blocks = 4 + (sbc->blocks * 4); - } else { - subbands = priv->frame.subbands; - blocks = priv->frame.blocks; - } - - switch (sbc->frequency) { - case SBC_FREQ_16000: - frequency = 16000; - break; - - case SBC_FREQ_32000: - frequency = 32000; - break; - - case SBC_FREQ_44100: - frequency = 44100; - break; - - case SBC_FREQ_48000: - frequency = 48000; - break; - default: - return 0; - } - - return (1000000 * blocks * subbands) / frequency; -} - -size_t sbc_get_codesize(sbc_t *sbc) -{ - uint16_t subbands, channels, blocks; - struct sbc_priv *priv; - - priv = sbc->priv; - if (!priv->init) { - subbands = sbc->subbands ? 8 : 4; - blocks = 4 + (sbc->blocks * 4); - channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; - } else { - subbands = priv->frame.subbands; - blocks = priv->frame.blocks; - channels = priv->frame.channels; - } - - return subbands * blocks * channels * 2; -} - -const char *sbc_get_implementation_info(sbc_t *sbc) -{ - struct sbc_priv *priv; - - if (!sbc) - return NULL; - - priv = sbc->priv; - if (!priv) - return NULL; - - return priv->enc_state.implementation_info; -} - -int sbc_reinit(sbc_t *sbc, unsigned long flags) -{ - struct sbc_priv *priv; - - if (!sbc || !sbc->priv) - return -EIO; - - priv = sbc->priv; - - if (priv->init == 1) - memset(sbc->priv, 0, sizeof(struct sbc_priv)); - - sbc_set_defaults(sbc, flags); - - return 0; -} diff --git a/src/modules/bluetooth/sbc.h b/src/modules/bluetooth/sbc.h deleted file mode 100644 index 65435884..00000000 --- a/src/modules/bluetooth/sbc.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2006 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_H -#define __SBC_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -/* sampling frequency */ -#define SBC_FREQ_16000 0x00 -#define SBC_FREQ_32000 0x01 -#define SBC_FREQ_44100 0x02 -#define SBC_FREQ_48000 0x03 - -/* blocks */ -#define SBC_BLK_4 0x00 -#define SBC_BLK_8 0x01 -#define SBC_BLK_12 0x02 -#define SBC_BLK_16 0x03 - -/* channel mode */ -#define SBC_MODE_MONO 0x00 -#define SBC_MODE_DUAL_CHANNEL 0x01 -#define SBC_MODE_STEREO 0x02 -#define SBC_MODE_JOINT_STEREO 0x03 - -/* allocation method */ -#define SBC_AM_LOUDNESS 0x00 -#define SBC_AM_SNR 0x01 - -/* subbands */ -#define SBC_SB_4 0x00 -#define SBC_SB_8 0x01 - -/* Data endianess */ -#define SBC_LE 0x00 -#define SBC_BE 0x01 - -struct sbc_struct { - unsigned long flags; - - uint8_t frequency; - uint8_t blocks; - uint8_t subbands; - uint8_t mode; - uint8_t allocation; - uint8_t bitpool; - uint8_t endian; - - void *priv; - void *priv_alloc_base; -}; - -typedef struct sbc_struct sbc_t; - -int sbc_init(sbc_t *sbc, unsigned long flags); -int sbc_reinit(sbc_t *sbc, unsigned long flags); - -ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len); - -ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written); - -/* Encodes ONE input block into ONE output block */ -ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written); - -/* Returns the output block size in bytes */ -size_t sbc_get_frame_length(sbc_t *sbc); - -/* Returns the time one input/output block takes to play in msec*/ -unsigned sbc_get_frame_duration(sbc_t *sbc); - -/* Returns the input block size in bytes */ -size_t sbc_get_codesize(sbc_t *sbc); - -const char *sbc_get_implementation_info(sbc_t *sbc); -void sbc_finish(sbc_t *sbc); - -#ifdef __cplusplus -} -#endif - -#endif /* __SBC_H */ diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c new file mode 100644 index 00000000..5157c70f --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc.c @@ -0,0 +1,1252 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2008 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* todo items: + + use a log2 table for byte integer scale factors calculation (sum log2 results + for high and low bytes) fill bitpool by 16 bits instead of one at a time in + bits allocation/bitpool generation port to the dsp + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc.h" +#include "sbc_primitives.h" + +#define SBC_SYNCWORD 0x9C + +/* This structure contains an unpacked SBC frame. + Yes, there is probably quite some unused space herein */ +struct sbc_frame { + uint8_t frequency; + uint8_t block_mode; + uint8_t blocks; + enum { + MONO = SBC_MODE_MONO, + DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL, + STEREO = SBC_MODE_STEREO, + JOINT_STEREO = SBC_MODE_JOINT_STEREO + } mode; + uint8_t channels; + enum { + LOUDNESS = SBC_AM_LOUDNESS, + SNR = SBC_AM_SNR + } allocation; + uint8_t subband_mode; + uint8_t subbands; + uint8_t bitpool; + uint16_t codesize; + uint8_t length; + + /* bit number x set means joint stereo has been used in subband x */ + uint8_t joint; + + /* only the lower 4 bits of every element are to be used */ + uint32_t scale_factor[2][8]; + + /* raw integer subband samples in the frame */ + int32_t SBC_ALIGNED sb_sample_f[16][2][8]; + + /* modified subband samples */ + int32_t SBC_ALIGNED sb_sample[16][2][8]; + + /* original pcm audio samples */ + int16_t SBC_ALIGNED pcm_sample[2][16*8]; +}; + +struct sbc_decoder_state { + int subbands; + int32_t V[2][170]; + int offset[2][16]; +}; + +/* + * Calculates the CRC-8 of the first len bits in data + */ +static const uint8_t crc_table[256] = { + 0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53, + 0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB, + 0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E, + 0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76, + 0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4, + 0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C, + 0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19, + 0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1, + 0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40, + 0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8, + 0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D, + 0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65, + 0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7, + 0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F, + 0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A, + 0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2, + 0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75, + 0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D, + 0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8, + 0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50, + 0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2, + 0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A, + 0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F, + 0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7, + 0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66, + 0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E, + 0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB, + 0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43, + 0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1, + 0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09, + 0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C, + 0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4 +}; + +static uint8_t sbc_crc8(const uint8_t *data, size_t len) +{ + uint8_t crc = 0x0f; + size_t i; + uint8_t octet; + + for (i = 0; i < len / 8; i++) + crc = crc_table[crc ^ data[i]]; + + octet = data[i]; + for (i = 0; i < len % 8; i++) { + char bit = ((octet ^ crc) & 0x80) >> 7; + + crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0); + + octet = octet << 1; + } + + return crc; +} + +/* + * Code straight from the spec to calculate the bits array + * Takes a pointer to the frame in question, a pointer to the bits array and + * the sampling frequency (as 2 bit integer) + */ +static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) +{ + uint8_t sf = frame->frequency; + + if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) { + int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; + int ch, sb; + + for (ch = 0; ch < frame->channels; ch++) { + max_bitneed = 0; + if (frame->allocation == SNR) { + for (sb = 0; sb < frame->subbands; sb++) { + bitneed[ch][sb] = frame->scale_factor[ch][sb]; + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } else { + for (sb = 0; sb < frame->subbands; sb++) { + if (frame->scale_factor[ch][sb] == 0) + bitneed[ch][sb] = -5; + else { + if (frame->subbands == 4) + loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; + else + loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; + if (loudness > 0) + bitneed[ch][sb] = loudness / 2; + else + bitneed[ch][sb] = loudness; + } + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + + bitcount = 0; + slicecount = 0; + bitslice = max_bitneed + 1; + do { + bitslice--; + bitcount += slicecount; + slicecount = 0; + for (sb = 0; sb < frame->subbands; sb++) { + if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) + slicecount++; + else if (bitneed[ch][sb] == bitslice + 1) + slicecount += 2; + } + } while (bitcount + slicecount < frame->bitpool); + + if (bitcount + slicecount == frame->bitpool) { + bitcount += slicecount; + bitslice--; + } + + for (sb = 0; sb < frame->subbands; sb++) { + if (bitneed[ch][sb] < bitslice + 2) + bits[ch][sb] = 0; + else { + bits[ch][sb] = bitneed[ch][sb] - bitslice; + if (bits[ch][sb] > 16) + bits[ch][sb] = 16; + } + } + + for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { + if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { + bits[ch][sb]++; + bitcount++; + } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { + bits[ch][sb] = 2; + bitcount += 2; + } + } + + for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { + if (bits[ch][sb] < 16) { + bits[ch][sb]++; + bitcount++; + } + } + + } + + } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) { + int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; + int ch, sb; + + max_bitneed = 0; + if (frame->allocation == SNR) { + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + bitneed[ch][sb] = frame->scale_factor[ch][sb]; + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + } else { + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (frame->scale_factor[ch][sb] == 0) + bitneed[ch][sb] = -5; + else { + if (frame->subbands == 4) + loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; + else + loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; + if (loudness > 0) + bitneed[ch][sb] = loudness / 2; + else + bitneed[ch][sb] = loudness; + } + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + } + + bitcount = 0; + slicecount = 0; + bitslice = max_bitneed + 1; + do { + bitslice--; + bitcount += slicecount; + slicecount = 0; + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) + slicecount++; + else if (bitneed[ch][sb] == bitslice + 1) + slicecount += 2; + } + } + } while (bitcount + slicecount < frame->bitpool); + + if (bitcount + slicecount == frame->bitpool) { + bitcount += slicecount; + bitslice--; + } + + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (bitneed[ch][sb] < bitslice + 2) { + bits[ch][sb] = 0; + } else { + bits[ch][sb] = bitneed[ch][sb] - bitslice; + if (bits[ch][sb] > 16) + bits[ch][sb] = 16; + } + } + } + + ch = 0; + sb = 0; + while (bitcount < frame->bitpool) { + if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { + bits[ch][sb]++; + bitcount++; + } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { + bits[ch][sb] = 2; + bitcount += 2; + } + if (ch == 1) { + ch = 0; + sb++; + if (sb >= frame->subbands) break; + } else + ch = 1; + } + + ch = 0; + sb = 0; + while (bitcount < frame->bitpool) { + if (bits[ch][sb] < 16) { + bits[ch][sb]++; + bitcount++; + } + if (ch == 1) { + ch = 0; + sb++; + if (sb >= frame->subbands) break; + } else + ch = 1; + } + + } + +} + +/* + * Unpacks a SBC frame at the beginning of the stream in data, + * which has at most len bytes into frame. + * Returns the length in bytes of the packed frame, or a negative + * value on error. The error codes are: + * + * -1 Data stream too short + * -2 Sync byte incorrect + * -3 CRC8 incorrect + * -4 Bitpool value out of bounds + */ +static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame, + size_t len) +{ + unsigned int consumed; + /* Will copy the parts of the header that are relevant to crc + * calculation here */ + uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int crc_pos = 0; + int32_t temp; + + int audio_sample; + int ch, sb, blk, bit; /* channel, subband, block and bit standard + counters */ + int bits[2][8]; /* bits distribution */ + uint32_t levels[2][8]; /* levels derived from that */ + + if (len < 4) + return -1; + + if (data[0] != SBC_SYNCWORD) + return -2; + + frame->frequency = (data[1] >> 6) & 0x03; + + frame->block_mode = (data[1] >> 4) & 0x03; + switch (frame->block_mode) { + case SBC_BLK_4: + frame->blocks = 4; + break; + case SBC_BLK_8: + frame->blocks = 8; + break; + case SBC_BLK_12: + frame->blocks = 12; + break; + case SBC_BLK_16: + frame->blocks = 16; + break; + } + + frame->mode = (data[1] >> 2) & 0x03; + switch (frame->mode) { + case MONO: + frame->channels = 1; + break; + case DUAL_CHANNEL: /* fall-through */ + case STEREO: + case JOINT_STEREO: + frame->channels = 2; + break; + } + + frame->allocation = (data[1] >> 1) & 0x01; + + frame->subband_mode = (data[1] & 0x01); + frame->subbands = frame->subband_mode ? 8 : 4; + + frame->bitpool = data[2]; + + if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && + frame->bitpool > 16 * frame->subbands) + return -4; + + if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && + frame->bitpool > 32 * frame->subbands) + return -4; + + /* data[3] is crc, we're checking it later */ + + consumed = 32; + + crc_header[0] = data[1]; + crc_header[1] = data[2]; + crc_pos = 16; + + if (frame->mode == JOINT_STEREO) { + if (len * 8 < consumed + frame->subbands) + return -1; + + frame->joint = 0x00; + for (sb = 0; sb < frame->subbands - 1; sb++) + frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb; + if (frame->subbands == 4) + crc_header[crc_pos / 8] = data[4] & 0xf0; + else + crc_header[crc_pos / 8] = data[4]; + + consumed += frame->subbands; + crc_pos += frame->subbands; + } + + if (len * 8 < consumed + (4 * frame->subbands * frame->channels)) + return -1; + + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + /* FIXME assert(consumed % 4 == 0); */ + frame->scale_factor[ch][sb] = + (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F; + crc_header[crc_pos >> 3] |= + frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7)); + + consumed += 4; + crc_pos += 4; + } + } + + if (data[3] != sbc_crc8(crc_header, crc_pos)) + return -3; + + sbc_calculate_bits(frame, bits); + + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) + levels[ch][sb] = (1 << bits[ch][sb]) - 1; + } + + for (blk = 0; blk < frame->blocks; blk++) { + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (levels[ch][sb] > 0) { + audio_sample = 0; + for (bit = 0; bit < bits[ch][sb]; bit++) { + if (consumed > len * 8) + return -1; + + if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01) + audio_sample |= 1 << (bits[ch][sb] - bit - 1); + + consumed++; + } + + frame->sb_sample[blk][ch][sb] = + (((audio_sample << 1) | 1) << frame->scale_factor[ch][sb]) / + levels[ch][sb] - (1 << frame->scale_factor[ch][sb]); + } else + frame->sb_sample[blk][ch][sb] = 0; + } + } + } + + if (frame->mode == JOINT_STEREO) { + for (blk = 0; blk < frame->blocks; blk++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (frame->joint & (0x01 << sb)) { + temp = frame->sb_sample[blk][0][sb] + + frame->sb_sample[blk][1][sb]; + frame->sb_sample[blk][1][sb] = + frame->sb_sample[blk][0][sb] - + frame->sb_sample[blk][1][sb]; + frame->sb_sample[blk][0][sb] = temp; + } + } + } + } + + if ((consumed & 0x7) != 0) + consumed += 8 - (consumed & 0x7); + + return consumed >> 3; +} + +static void sbc_decoder_init(struct sbc_decoder_state *state, + const struct sbc_frame *frame) +{ + int i, ch; + + memset(state->V, 0, sizeof(state->V)); + state->subbands = frame->subbands; + + for (ch = 0; ch < 2; ch++) + for (i = 0; i < frame->subbands * 2; i++) + state->offset[ch][i] = (10 * i + 10); +} + +static inline void sbc_synthesize_four(struct sbc_decoder_state *state, + struct sbc_frame *frame, int ch, int blk) +{ + int i, k, idx; + int32_t *v = state->V[ch]; + int *offset = state->offset[ch]; + + for (i = 0; i < 8; i++) { + /* Shifting */ + offset[i]--; + if (offset[i] < 0) { + offset[i] = 79; + memcpy(v + 80, v, 9 * sizeof(*v)); + } + + /* Distribute the new matrix value to the shifted position */ + v[offset[i]] = SCALE4_STAGED1( + MULA(synmatrix4[i][0], frame->sb_sample[blk][ch][0], + MULA(synmatrix4[i][1], frame->sb_sample[blk][ch][1], + MULA(synmatrix4[i][2], frame->sb_sample[blk][ch][2], + MUL (synmatrix4[i][3], frame->sb_sample[blk][ch][3]))))); + } + + /* Compute the samples */ + for (idx = 0, i = 0; i < 4; i++, idx += 5) { + k = (i + 4) & 0xf; + + /* Store in output, Q0 */ + frame->pcm_sample[ch][blk * 4 + i] = SCALE4_STAGED1( + MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0], + MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0], + MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1], + MULA(v[offset[k] + 3], sbc_proto_4_40m1[idx + 1], + MULA(v[offset[i] + 4], sbc_proto_4_40m0[idx + 2], + MULA(v[offset[k] + 5], sbc_proto_4_40m1[idx + 2], + MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3], + MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3], + MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4], + MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4]))))))))))); + } +} + +static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, + struct sbc_frame *frame, int ch, int blk) +{ + int i, j, k, idx; + int *offset = state->offset[ch]; + + for (i = 0; i < 16; i++) { + /* Shifting */ + offset[i]--; + if (offset[i] < 0) { + offset[i] = 159; + for (j = 0; j < 9; j++) + state->V[ch][j + 160] = state->V[ch][j]; + } + + /* Distribute the new matrix value to the shifted position */ + state->V[ch][offset[i]] = SCALE8_STAGED1( + MULA(synmatrix8[i][0], frame->sb_sample[blk][ch][0], + MULA(synmatrix8[i][1], frame->sb_sample[blk][ch][1], + MULA(synmatrix8[i][2], frame->sb_sample[blk][ch][2], + MULA(synmatrix8[i][3], frame->sb_sample[blk][ch][3], + MULA(synmatrix8[i][4], frame->sb_sample[blk][ch][4], + MULA(synmatrix8[i][5], frame->sb_sample[blk][ch][5], + MULA(synmatrix8[i][6], frame->sb_sample[blk][ch][6], + MUL( synmatrix8[i][7], frame->sb_sample[blk][ch][7]))))))))); + } + + /* Compute the samples */ + for (idx = 0, i = 0; i < 8; i++, idx += 5) { + k = (i + 8) & 0xf; + + /* Store in output */ + frame->pcm_sample[ch][blk * 8 + i] = SCALE8_STAGED1( // Q0 + MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0], + MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0], + MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1], + MULA(state->V[ch][offset[k] + 3], sbc_proto_8_80m1[idx + 1], + MULA(state->V[ch][offset[i] + 4], sbc_proto_8_80m0[idx + 2], + MULA(state->V[ch][offset[k] + 5], sbc_proto_8_80m1[idx + 2], + MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3], + MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3], + MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4], + MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4]))))))))))); + } +} + +static int sbc_synthesize_audio(struct sbc_decoder_state *state, + struct sbc_frame *frame) +{ + int ch, blk; + + switch (frame->subbands) { + case 4: + for (ch = 0; ch < frame->channels; ch++) { + for (blk = 0; blk < frame->blocks; blk++) + sbc_synthesize_four(state, frame, ch, blk); + } + return frame->blocks * 4; + + case 8: + for (ch = 0; ch < frame->channels; ch++) { + for (blk = 0; blk < frame->blocks; blk++) + sbc_synthesize_eight(state, frame, ch, blk); + } + return frame->blocks * 8; + + default: + return -EIO; + } +} + +static int sbc_analyze_audio(struct sbc_encoder_state *state, + struct sbc_frame *frame) +{ + int ch, blk; + int16_t *x; + + switch (frame->subbands) { + case 4: + for (ch = 0; ch < frame->channels; ch++) { + x = &state->X[ch][state->position - 16 + + frame->blocks * 4]; + for (blk = 0; blk < frame->blocks; blk += 4) { + state->sbc_analyze_4b_4s( + x, + frame->sb_sample_f[blk][ch], + frame->sb_sample_f[blk + 1][ch] - + frame->sb_sample_f[blk][ch]); + x -= 16; + } + } + return frame->blocks * 4; + + case 8: + for (ch = 0; ch < frame->channels; ch++) { + x = &state->X[ch][state->position - 32 + + frame->blocks * 8]; + for (blk = 0; blk < frame->blocks; blk += 4) { + state->sbc_analyze_4b_8s( + x, + frame->sb_sample_f[blk][ch], + frame->sb_sample_f[blk + 1][ch] - + frame->sb_sample_f[blk][ch]); + x -= 32; + } + } + return frame->blocks * 8; + + default: + return -EIO; + } +} + +/* Supplementary bitstream writing macros for 'sbc_pack_frame' */ + +#define PUT_BITS(data_ptr, bits_cache, bits_count, v, n) \ + do { \ + bits_cache = (v) | (bits_cache << (n)); \ + bits_count += (n); \ + if (bits_count >= 16) { \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + } \ + } while (0) + +#define FLUSH_BITS(data_ptr, bits_cache, bits_count) \ + do { \ + while (bits_count >= 8) { \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + } \ + if (bits_count > 0) \ + *data_ptr++ = (uint8_t) \ + (bits_cache << (8 - bits_count)); \ + } while (0) + +/* + * Packs the SBC frame from frame into the memory at data. At most len + * bytes will be used, should more memory be needed an appropriate + * error code will be returned. Returns the length of the packed frame + * on success or a negative value on error. + * + * The error codes are: + * -1 Not enough memory reserved + * -2 Unsupported sampling rate + * -3 Unsupported number of blocks + * -4 Unsupported number of subbands + * -5 Bitpool value out of bounds + * -99 not implemented + */ + +static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( + uint8_t *data, struct sbc_frame *frame, size_t len, + int frame_subbands, int frame_channels) +{ + /* Bitstream writer starts from the fourth byte */ + uint8_t *data_ptr = data + 4; + uint32_t bits_cache = 0; + uint32_t bits_count = 0; + + /* Will copy the header parts for CRC-8 calculation here */ + uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int crc_pos = 0; + + uint32_t audio_sample; + + int ch, sb, blk; /* channel, subband, block and bit counters */ + int bits[2][8]; /* bits distribution */ + uint32_t levels[2][8]; /* levels are derived from that */ + uint32_t sb_sample_delta[2][8]; + + data[0] = SBC_SYNCWORD; + + data[1] = (frame->frequency & 0x03) << 6; + + data[1] |= (frame->block_mode & 0x03) << 4; + + data[1] |= (frame->mode & 0x03) << 2; + + data[1] |= (frame->allocation & 0x01) << 1; + + switch (frame_subbands) { + case 4: + /* Nothing to do */ + break; + case 8: + data[1] |= 0x01; + break; + default: + return -4; + break; + } + + data[2] = frame->bitpool; + + if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && + frame->bitpool > frame_subbands << 4) + return -5; + + if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && + frame->bitpool > frame_subbands << 5) + return -5; + + /* Can't fill in crc yet */ + + crc_header[0] = data[1]; + crc_header[1] = data[2]; + crc_pos = 16; + + if (frame->mode == JOINT_STEREO) { + /* like frame->sb_sample but joint stereo */ + int32_t sb_sample_j[16][2]; + /* scalefactor and scale_factor in joint case */ + uint32_t scalefactor_j[2]; + uint8_t scale_factor_j[2]; + + uint8_t joint = 0; + frame->joint = 0; + + for (sb = 0; sb < frame_subbands - 1; sb++) { + scale_factor_j[0] = 0; + scalefactor_j[0] = 2 << SCALE_OUT_BITS; + scale_factor_j[1] = 0; + scalefactor_j[1] = 2 << SCALE_OUT_BITS; + + for (blk = 0; blk < frame->blocks; blk++) { + uint32_t tmp; + /* Calculate joint stereo signal */ + sb_sample_j[blk][0] = + ASR(frame->sb_sample_f[blk][0][sb], 1) + + ASR(frame->sb_sample_f[blk][1][sb], 1); + sb_sample_j[blk][1] = + ASR(frame->sb_sample_f[blk][0][sb], 1) - + ASR(frame->sb_sample_f[blk][1][sb], 1); + + /* calculate scale_factor_j and scalefactor_j for joint case */ + tmp = fabs(sb_sample_j[blk][0]); + while (scalefactor_j[0] < tmp) { + scale_factor_j[0]++; + scalefactor_j[0] *= 2; + } + tmp = fabs(sb_sample_j[blk][1]); + while (scalefactor_j[1] < tmp) { + scale_factor_j[1]++; + scalefactor_j[1] *= 2; + } + } + + /* decide whether to join this subband */ + if ((frame->scale_factor[0][sb] + + frame->scale_factor[1][sb]) > + (scale_factor_j[0] + + scale_factor_j[1])) { + /* use joint stereo for this subband */ + joint |= 1 << (frame_subbands - 1 - sb); + frame->joint |= 1 << sb; + frame->scale_factor[0][sb] = scale_factor_j[0]; + frame->scale_factor[1][sb] = scale_factor_j[1]; + for (blk = 0; blk < frame->blocks; blk++) { + frame->sb_sample_f[blk][0][sb] = + sb_sample_j[blk][0]; + frame->sb_sample_f[blk][1][sb] = + sb_sample_j[blk][1]; + } + } + } + + PUT_BITS(data_ptr, bits_cache, bits_count, + joint, frame_subbands); + crc_header[crc_pos >> 3] = joint; + crc_pos += frame_subbands; + } + + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + PUT_BITS(data_ptr, bits_cache, bits_count, + frame->scale_factor[ch][sb] & 0x0F, 4); + crc_header[crc_pos >> 3] <<= 4; + crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F; + crc_pos += 4; + } + } + + /* align the last crc byte */ + if (crc_pos % 8) + crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8); + + data[3] = sbc_crc8(crc_header, crc_pos); + + sbc_calculate_bits(frame, bits); + + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + levels[ch][sb] = ((1 << bits[ch][sb]) - 1) << + (32 - (frame->scale_factor[ch][sb] + + SCALE_OUT_BITS + 2)); + sb_sample_delta[ch][sb] = (uint32_t) 1 << + (frame->scale_factor[ch][sb] + + SCALE_OUT_BITS + 1); + } + } + + for (blk = 0; blk < frame->blocks; blk++) { + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + + if (bits[ch][sb] == 0) + continue; + + audio_sample = ((uint64_t) levels[ch][sb] * + (sb_sample_delta[ch][sb] + + frame->sb_sample_f[blk][ch][sb])) >> 32; + + PUT_BITS(data_ptr, bits_cache, bits_count, + audio_sample, bits[ch][sb]); + } + } + } + + FLUSH_BITS(data_ptr, bits_cache, bits_count); + + return data_ptr - data; +} + +static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) +{ + if (frame->subbands == 4) { + if (frame->channels == 1) + return sbc_pack_frame_internal(data, frame, len, 4, 1); + else + return sbc_pack_frame_internal(data, frame, len, 4, 2); + } else { + if (frame->channels == 1) + return sbc_pack_frame_internal(data, frame, len, 8, 1); + else + return sbc_pack_frame_internal(data, frame, len, 8, 2); + } +} + +static void sbc_encoder_init(struct sbc_encoder_state *state, + const struct sbc_frame *frame) +{ + memset(&state->X, 0, sizeof(state->X)); + state->position = SBC_X_BUFFER_SIZE - frame->subbands * 9; + + sbc_init_primitives(state); +} + +struct sbc_priv { + int init; + struct SBC_ALIGNED sbc_frame frame; + struct SBC_ALIGNED sbc_decoder_state dec_state; + struct SBC_ALIGNED sbc_encoder_state enc_state; +}; + +static void sbc_set_defaults(sbc_t *sbc, unsigned long flags) +{ + sbc->frequency = SBC_FREQ_44100; + sbc->mode = SBC_MODE_STEREO; + sbc->subbands = SBC_SB_8; + sbc->blocks = SBC_BLK_16; + sbc->bitpool = 32; +#if __BYTE_ORDER == __LITTLE_ENDIAN + sbc->endian = SBC_LE; +#elif __BYTE_ORDER == __BIG_ENDIAN + sbc->endian = SBC_BE; +#else +#error "Unknown byte order" +#endif +} + +int sbc_init(sbc_t *sbc, unsigned long flags) +{ + if (!sbc) + return -EIO; + + memset(sbc, 0, sizeof(sbc_t)); + + sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK); + if (!sbc->priv_alloc_base) + return -ENOMEM; + + sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base + + SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK)); + + memset(sbc->priv, 0, sizeof(struct sbc_priv)); + + sbc_set_defaults(sbc, flags); + + return 0; +} + +ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len) +{ + return sbc_decode(sbc, input, input_len, NULL, 0, NULL); +} + +ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written) +{ + struct sbc_priv *priv; + char *ptr; + int i, ch, framelen, samples; + + if (!sbc || !input) + return -EIO; + + priv = sbc->priv; + + framelen = sbc_unpack_frame(input, &priv->frame, input_len); + + if (!priv->init) { + sbc_decoder_init(&priv->dec_state, &priv->frame); + priv->init = 1; + + sbc->frequency = priv->frame.frequency; + sbc->mode = priv->frame.mode; + sbc->subbands = priv->frame.subband_mode; + sbc->blocks = priv->frame.block_mode; + sbc->allocation = priv->frame.allocation; + sbc->bitpool = priv->frame.bitpool; + + priv->frame.codesize = sbc_get_codesize(sbc); + priv->frame.length = framelen; + } else if (priv->frame.bitpool != sbc->bitpool) + sbc->bitpool = priv->frame.bitpool; + + if (!output) + return framelen; + + if (written) + *written = 0; + + if (framelen <= 0) + return framelen; + + samples = sbc_synthesize_audio(&priv->dec_state, &priv->frame); + + ptr = output; + + if (output_len < (size_t) (samples * priv->frame.channels * 2)) + samples = output_len / (priv->frame.channels * 2); + + for (i = 0; i < samples; i++) { + for (ch = 0; ch < priv->frame.channels; ch++) { + int16_t s; + s = priv->frame.pcm_sample[ch][i]; + + if (sbc->endian == SBC_BE) { + *ptr++ = (s & 0xff00) >> 8; + *ptr++ = (s & 0x00ff); + } else { + *ptr++ = (s & 0x00ff); + *ptr++ = (s & 0xff00) >> 8; + } + } + } + + if (written) + *written = samples * priv->frame.channels * 2; + + return framelen; +} + +ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written) +{ + struct sbc_priv *priv; + int framelen, samples; + int (*sbc_enc_process_input)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + + if (!sbc || !input) + return -EIO; + + priv = sbc->priv; + + if (written) + *written = 0; + + if (!priv->init) { + priv->frame.frequency = sbc->frequency; + priv->frame.mode = sbc->mode; + priv->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + priv->frame.allocation = sbc->allocation; + priv->frame.subband_mode = sbc->subbands; + priv->frame.subbands = sbc->subbands ? 8 : 4; + priv->frame.block_mode = sbc->blocks; + priv->frame.blocks = 4 + (sbc->blocks * 4); + priv->frame.bitpool = sbc->bitpool; + priv->frame.codesize = sbc_get_codesize(sbc); + priv->frame.length = sbc_get_frame_length(sbc); + + sbc_encoder_init(&priv->enc_state, &priv->frame); + priv->init = 1; + } else if (priv->frame.bitpool != sbc->bitpool) { + priv->frame.length = sbc_get_frame_length(sbc); + priv->frame.bitpool = sbc->bitpool; + } + + /* input must be large enough to encode a complete frame */ + if (input_len < priv->frame.codesize) + return 0; + + /* output must be large enough to receive the encoded frame */ + if (!output || output_len < priv->frame.length) + return -ENOSPC; + + /* Select the needed input data processing function and call it */ + if (priv->frame.subbands == 8) { + if (sbc->endian == SBC_BE) + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_8s_be; + else + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_8s_le; + } else { + if (sbc->endian == SBC_BE) + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_4s_be; + else + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_4s_le; + } + + priv->enc_state.position = sbc_enc_process_input( + priv->enc_state.position, (const uint8_t *) input, + priv->enc_state.X, priv->frame.subbands * priv->frame.blocks, + priv->frame.channels); + + samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); + + priv->enc_state.sbc_calc_scalefactors( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.channels, priv->frame.subbands); + + framelen = sbc_pack_frame(output, &priv->frame, output_len); + + if (written) + *written = framelen; + + return samples * priv->frame.channels * 2; +} + +void sbc_finish(sbc_t *sbc) +{ + if (!sbc) + return; + + if (sbc->priv_alloc_base) + free(sbc->priv_alloc_base); + + memset(sbc, 0, sizeof(sbc_t)); +} + +size_t sbc_get_frame_length(sbc_t *sbc) +{ + size_t ret; + uint8_t subbands, channels, blocks, joint, bitpool; + struct sbc_priv *priv; + + priv = sbc->priv; + if (priv->init && priv->frame.bitpool == sbc->bitpool) + return priv->frame.length; + + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0; + bitpool = sbc->bitpool; + + ret = 4 + (4 * subbands * channels) / 8; + /* This term is not always evenly divide so we round it up */ + if (channels == 1) + ret += ((blocks * channels * bitpool) + 7) / 8; + else + ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8; + + return ret; +} + +unsigned sbc_get_frame_duration(sbc_t *sbc) +{ + uint8_t subbands, blocks; + uint16_t frequency; + struct sbc_priv *priv; + + priv = sbc->priv; + if (!priv->init) { + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + } else { + subbands = priv->frame.subbands; + blocks = priv->frame.blocks; + } + + switch (sbc->frequency) { + case SBC_FREQ_16000: + frequency = 16000; + break; + + case SBC_FREQ_32000: + frequency = 32000; + break; + + case SBC_FREQ_44100: + frequency = 44100; + break; + + case SBC_FREQ_48000: + frequency = 48000; + break; + default: + return 0; + } + + return (1000000 * blocks * subbands) / frequency; +} + +size_t sbc_get_codesize(sbc_t *sbc) +{ + uint16_t subbands, channels, blocks; + struct sbc_priv *priv; + + priv = sbc->priv; + if (!priv->init) { + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + } else { + subbands = priv->frame.subbands; + blocks = priv->frame.blocks; + channels = priv->frame.channels; + } + + return subbands * blocks * channels * 2; +} + +const char *sbc_get_implementation_info(sbc_t *sbc) +{ + struct sbc_priv *priv; + + if (!sbc) + return NULL; + + priv = sbc->priv; + if (!priv) + return NULL; + + return priv->enc_state.implementation_info; +} + +int sbc_reinit(sbc_t *sbc, unsigned long flags) +{ + struct sbc_priv *priv; + + if (!sbc || !sbc->priv) + return -EIO; + + priv = sbc->priv; + + if (priv->init == 1) + memset(sbc->priv, 0, sizeof(struct sbc_priv)); + + sbc_set_defaults(sbc, flags); + + return 0; +} diff --git a/src/modules/bluetooth/sbc/sbc.h b/src/modules/bluetooth/sbc/sbc.h new file mode 100644 index 00000000..65435884 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc.h @@ -0,0 +1,111 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_H +#define __SBC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/* sampling frequency */ +#define SBC_FREQ_16000 0x00 +#define SBC_FREQ_32000 0x01 +#define SBC_FREQ_44100 0x02 +#define SBC_FREQ_48000 0x03 + +/* blocks */ +#define SBC_BLK_4 0x00 +#define SBC_BLK_8 0x01 +#define SBC_BLK_12 0x02 +#define SBC_BLK_16 0x03 + +/* channel mode */ +#define SBC_MODE_MONO 0x00 +#define SBC_MODE_DUAL_CHANNEL 0x01 +#define SBC_MODE_STEREO 0x02 +#define SBC_MODE_JOINT_STEREO 0x03 + +/* allocation method */ +#define SBC_AM_LOUDNESS 0x00 +#define SBC_AM_SNR 0x01 + +/* subbands */ +#define SBC_SB_4 0x00 +#define SBC_SB_8 0x01 + +/* Data endianess */ +#define SBC_LE 0x00 +#define SBC_BE 0x01 + +struct sbc_struct { + unsigned long flags; + + uint8_t frequency; + uint8_t blocks; + uint8_t subbands; + uint8_t mode; + uint8_t allocation; + uint8_t bitpool; + uint8_t endian; + + void *priv; + void *priv_alloc_base; +}; + +typedef struct sbc_struct sbc_t; + +int sbc_init(sbc_t *sbc, unsigned long flags); +int sbc_reinit(sbc_t *sbc, unsigned long flags); + +ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len); + +ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written); + +/* Encodes ONE input block into ONE output block */ +ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written); + +/* Returns the output block size in bytes */ +size_t sbc_get_frame_length(sbc_t *sbc); + +/* Returns the time one input/output block takes to play in msec*/ +unsigned sbc_get_frame_duration(sbc_t *sbc); + +/* Returns the input block size in bytes */ +size_t sbc_get_codesize(sbc_t *sbc); + +const char *sbc_get_implementation_info(sbc_t *sbc); +void sbc_finish(sbc_t *sbc); + +#ifdef __cplusplus +} +#endif + +#endif /* __SBC_H */ diff --git a/src/modules/bluetooth/sbc/sbc_math.h b/src/modules/bluetooth/sbc/sbc_math.h new file mode 100644 index 00000000..b87bc81c --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_math.h @@ -0,0 +1,60 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2008 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#define fabs(x) ((x) < 0 ? -(x) : (x)) +/* C does not provide an explicit arithmetic shift right but this will + always be correct and every compiler *should* generate optimal code */ +#define ASR(val, bits) ((-2 >> 1 == -1) ? \ + ((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits))) + +#define SCALE_SPROTO4_TBL 12 +#define SCALE_SPROTO8_TBL 14 +#define SCALE_NPROTO4_TBL 11 +#define SCALE_NPROTO8_TBL 11 +#define SCALE4_STAGED1_BITS 15 +#define SCALE4_STAGED2_BITS 16 +#define SCALE8_STAGED1_BITS 15 +#define SCALE8_STAGED2_BITS 16 + +typedef int32_t sbc_fixed_t; + +#define SCALE4_STAGED1(src) ASR(src, SCALE4_STAGED1_BITS) +#define SCALE4_STAGED2(src) ASR(src, SCALE4_STAGED2_BITS) +#define SCALE8_STAGED1(src) ASR(src, SCALE8_STAGED1_BITS) +#define SCALE8_STAGED2(src) ASR(src, SCALE8_STAGED2_BITS) + +#define SBC_FIXED_0(val) { val = 0; } +#define MUL(a, b) ((a) * (b)) +#ifdef __arm__ +#define MULA(a, b, res) ({ \ + int tmp = res; \ + __asm__( \ + "mla %0, %2, %3, %0" \ + : "=&r" (tmp) \ + : "0" (tmp), "r" (a), "r" (b)); \ + tmp; }) +#else +#define MULA(a, b, res) ((a) * (b) + (res)) +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives.c b/src/modules/bluetooth/sbc/sbc_primitives.c new file mode 100644 index 00000000..6b0be3f5 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives.c @@ -0,0 +1,470 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives.h" +#include "sbc_primitives_mmx.h" +#include "sbc_primitives_neon.h" + +/* + * A reference C code of analysis filter with SIMD-friendly tables + * reordering and code layout. This code can be used to develop platform + * specific SIMD optimizations. Also it may be used as some kind of test + * for compiler autovectorization capabilities (who knows, if the compiler + * is very good at this stuff, hand optimized assembly may be not strictly + * needed for some platform). + * + * Note: It is also possible to make a simple variant of analysis filter, + * which needs only a single constants table without taking care about + * even/odd cases. This simple variant of filter can be implemented without + * input data permutation. The only thing that would be lost is the + * possibility to use pairwise SIMD multiplications. But for some simple + * CPU cores without SIMD extensions it can be useful. If anybody is + * interested in implementing such variant of a filter, sourcecode from + * bluez versions 4.26/4.27 can be used as a reference and the history of + * the changes in git repository done around that time may be worth checking. + */ + +static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + FIXED_A t1[4]; + FIXED_T t2[4]; + int hop = 0; + + /* rounding coefficient */ + t1[0] = t1[1] = t1[2] = t1[3] = + (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); + + /* low pass polyphase filter */ + for (hop = 0; hop < 40; hop += 8) { + t1[0] += (FIXED_A) in[hop] * consts[hop]; + t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; + t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; + t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; + t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; + t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; + t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; + t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; + } + + /* scaling */ + t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; + t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; + t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; + t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; + + /* do the cos transform */ + t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; + t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; + t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; + t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; + t1[2] = (FIXED_A) t2[0] * consts[40 + 4]; + t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; + t1[3] = (FIXED_A) t2[0] * consts[40 + 6]; + t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; + + t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; + t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; + t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; + t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; + t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; + t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; + t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; + t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; + + out[0] = t1[0] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[1] = t1[1] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[2] = t1[2] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[3] = t1[3] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + FIXED_A t1[8]; + FIXED_T t2[8]; + int i, hop; + + /* rounding coefficient */ + t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = + (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); + + /* low pass polyphase filter */ + for (hop = 0; hop < 80; hop += 16) { + t1[0] += (FIXED_A) in[hop] * consts[hop]; + t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; + t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; + t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; + t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; + t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; + t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; + t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; + t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; + t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; + t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; + t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; + t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; + t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; + t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; + t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; + } + + /* scaling */ + t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; + t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; + t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; + t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; + t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; + t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; + t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; + t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; + + + /* do the cos transform */ + t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; + + for (i = 0; i < 4; i++) { + t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; + t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; + t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; + t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; + t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; + t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; + t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; + t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; + t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; + t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; + t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; + t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; + t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; + t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; + t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; + t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; + } + + for (i = 0; i < 8; i++) + out[i] = t1[i] >> + (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_4b_4s_simd(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_simd(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); +} + +static inline int16_t unaligned16_be(const uint8_t *ptr) +{ + return (int16_t) ((ptr[0] << 8) | ptr[1]); +} + +static inline int16_t unaligned16_le(const uint8_t *ptr) +{ + return (int16_t) (ptr[0] | (ptr[1] << 8)); +} + +/* + * Internal helper functions for input data processing. In order to get + * optimal performance, it is important to have "nsamples", "nchannels" + * and "big_endian" arguments used with this inline function as compile + * time constants. + */ + +static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + /* handle X buffer wraparound */ + if (position < nsamples) { + if (nchannels > 0) + memcpy(&X[0][SBC_X_BUFFER_SIZE - 36], &X[0][position], + 36 * sizeof(int16_t)); + if (nchannels > 1) + memcpy(&X[1][SBC_X_BUFFER_SIZE - 36], &X[1][position], + 36 * sizeof(int16_t)); + position = SBC_X_BUFFER_SIZE - 36; + } + + #define PCM(i) (big_endian ? \ + unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) + + /* copy/permutate audio samples */ + while ((nsamples -= 8) >= 0) { + position -= 8; + if (nchannels > 0) { + int16_t *x = &X[0][position]; + x[0] = PCM(0 + 7 * nchannels); + x[1] = PCM(0 + 3 * nchannels); + x[2] = PCM(0 + 6 * nchannels); + x[3] = PCM(0 + 4 * nchannels); + x[4] = PCM(0 + 0 * nchannels); + x[5] = PCM(0 + 2 * nchannels); + x[6] = PCM(0 + 1 * nchannels); + x[7] = PCM(0 + 5 * nchannels); + } + if (nchannels > 1) { + int16_t *x = &X[1][position]; + x[0] = PCM(1 + 7 * nchannels); + x[1] = PCM(1 + 3 * nchannels); + x[2] = PCM(1 + 6 * nchannels); + x[3] = PCM(1 + 4 * nchannels); + x[4] = PCM(1 + 0 * nchannels); + x[5] = PCM(1 + 2 * nchannels); + x[6] = PCM(1 + 1 * nchannels); + x[7] = PCM(1 + 5 * nchannels); + } + pcm += 16 * nchannels; + } + #undef PCM + + return position; +} + +static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + /* handle X buffer wraparound */ + if (position < nsamples) { + if (nchannels > 0) + memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position], + 72 * sizeof(int16_t)); + if (nchannels > 1) + memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position], + 72 * sizeof(int16_t)); + position = SBC_X_BUFFER_SIZE - 72; + } + + #define PCM(i) (big_endian ? \ + unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) + + /* copy/permutate audio samples */ + while ((nsamples -= 16) >= 0) { + position -= 16; + if (nchannels > 0) { + int16_t *x = &X[0][position]; + x[0] = PCM(0 + 15 * nchannels); + x[1] = PCM(0 + 7 * nchannels); + x[2] = PCM(0 + 14 * nchannels); + x[3] = PCM(0 + 8 * nchannels); + x[4] = PCM(0 + 13 * nchannels); + x[5] = PCM(0 + 9 * nchannels); + x[6] = PCM(0 + 12 * nchannels); + x[7] = PCM(0 + 10 * nchannels); + x[8] = PCM(0 + 11 * nchannels); + x[9] = PCM(0 + 3 * nchannels); + x[10] = PCM(0 + 6 * nchannels); + x[11] = PCM(0 + 0 * nchannels); + x[12] = PCM(0 + 5 * nchannels); + x[13] = PCM(0 + 1 * nchannels); + x[14] = PCM(0 + 4 * nchannels); + x[15] = PCM(0 + 2 * nchannels); + } + if (nchannels > 1) { + int16_t *x = &X[1][position]; + x[0] = PCM(1 + 15 * nchannels); + x[1] = PCM(1 + 7 * nchannels); + x[2] = PCM(1 + 14 * nchannels); + x[3] = PCM(1 + 8 * nchannels); + x[4] = PCM(1 + 13 * nchannels); + x[5] = PCM(1 + 9 * nchannels); + x[6] = PCM(1 + 12 * nchannels); + x[7] = PCM(1 + 10 * nchannels); + x[8] = PCM(1 + 11 * nchannels); + x[9] = PCM(1 + 3 * nchannels); + x[10] = PCM(1 + 6 * nchannels); + x[11] = PCM(1 + 0 * nchannels); + x[12] = PCM(1 + 5 * nchannels); + x[13] = PCM(1 + 1 * nchannels); + x[14] = PCM(1 + 4 * nchannels); + x[15] = PCM(1 + 2 * nchannels); + } + pcm += 32 * nchannels; + } + #undef PCM + + return position; +} + +/* + * Input data processing functions. The data is endian converted if needed, + * channels are deintrleaved and audio samples are reordered for use in + * SIMD-friendly analysis filter function. The results are put into "X" + * array, getting appended to the previous data (or it is better to say + * prepended, as the buffer is filled from top to bottom). Old data is + * discarded when neededed, but availability of (10 * nrof_subbands) + * contiguous samples is always guaranteed for the input to the analysis + * filter. This is achieved by copying a sufficient part of old data + * to the top of the buffer on buffer wraparound. + */ + +static int sbc_enc_process_input_4s_le(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 2, 0); + else + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 1, 0); +} + +static int sbc_enc_process_input_4s_be(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 2, 1); + else + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 1, 1); +} + +static int sbc_enc_process_input_8s_le(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 2, 0); + else + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 1, 0); +} + +static int sbc_enc_process_input_8s_be(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 2, 1); + else + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 1, 1); +} + +/* Supplementary function to count the number of leading zeros */ + +static inline int sbc_clz(uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz(x); +#else + /* TODO: this should be replaced with something better if good + * performance is wanted when using compilers other than gcc */ + int cnt = 0; + while (x) { + cnt++; + x >>= 1; + } + return 32 - cnt; +#endif +} + +static void sbc_calc_scalefactors( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + int ch, sb, blk; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb++) { + uint32_t x = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); + if (tmp != 0) + x |= tmp - 1; + } + scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(x); + } + } +} + +/* + * Detect CPU features and setup function pointers + */ +void sbc_init_primitives(struct sbc_encoder_state *state) +{ + /* Default implementation for analyze functions */ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd; + + /* Default implementation for input reordering / deinterleaving */ + state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le; + state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be; + state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; + state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; + + /* Default implementation for scale factors calculation */ + state->sbc_calc_scalefactors = sbc_calc_scalefactors; + state->implementation_info = "Generic C"; + + /* X86/AMD64 optimizations */ +#ifdef SBC_BUILD_WITH_MMX_SUPPORT + sbc_init_primitives_mmx(state); +#endif + + /* ARM optimizations */ +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + sbc_init_primitives_neon(state); +#endif +} diff --git a/src/modules/bluetooth/sbc/sbc_primitives.h b/src/modules/bluetooth/sbc/sbc_primitives.h new file mode 100644 index 00000000..3d01c115 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives.h @@ -0,0 +1,75 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_H +#define __SBC_PRIMITIVES_H + +#define SCALE_OUT_BITS 15 +#define SBC_X_BUFFER_SIZE 328 + +#ifdef __GNUC__ +#define SBC_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define SBC_ALWAYS_INLINE inline +#endif + +struct sbc_encoder_state { + int position; + int16_t SBC_ALIGNED X[2][SBC_X_BUFFER_SIZE]; + /* Polyphase analysis filter for 4 subbands configuration, + * it handles 4 blocks at once */ + void (*sbc_analyze_4b_4s)(int16_t *x, int32_t *out, int out_stride); + /* Polyphase analysis filter for 8 subbands configuration, + * it handles 4 blocks at once */ + void (*sbc_analyze_4b_8s)(int16_t *x, int32_t *out, int out_stride); + /* Process input data (deinterleave, endian conversion, reordering), + * depending on the number of subbands and input data byte order */ + int (*sbc_enc_process_input_4s_le)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_4s_be)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_8s_le)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_8s_be)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + /* Scale factors calculation */ + void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands); + const char *implementation_info; +}; + +/* + * Initialize pointers to the functions which are the basic "building bricks" + * of SBC codec. Best implementation is selected based on target CPU + * capabilities. + */ +void sbc_init_primitives(struct sbc_encoder_state *encoder_state); + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_mmx.c b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c new file mode 100644 index 00000000..08e9ca28 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c @@ -0,0 +1,320 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_mmx.h" + +/* + * MMX optimizations + */ + +#ifdef SBC_BUILD_WITH_MMX_SUPPORT + +static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + static const SBC_ALIGNED int32_t round_c[2] = { + 1 << (SBC_PROTO_FIXED4_SCALE - 1), + 1 << (SBC_PROTO_FIXED4_SCALE - 1), + }; + asm volatile ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "pmaddwd (%1), %%mm0\n" + "pmaddwd 8(%1), %%mm1\n" + "paddd (%2), %%mm0\n" + "paddd (%2), %%mm1\n" + "\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "pmaddwd 16(%1), %%mm2\n" + "pmaddwd 24(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 32(%0), %%mm2\n" + "movq 40(%0), %%mm3\n" + "pmaddwd 32(%1), %%mm2\n" + "pmaddwd 40(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 48(%0), %%mm2\n" + "movq 56(%0), %%mm3\n" + "pmaddwd 48(%1), %%mm2\n" + "pmaddwd 56(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 64(%0), %%mm2\n" + "movq 72(%0), %%mm3\n" + "pmaddwd 64(%1), %%mm2\n" + "pmaddwd 72(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "psrad %4, %%mm0\n" + "psrad %4, %%mm1\n" + "packssdw %%mm0, %%mm0\n" + "packssdw %%mm1, %%mm1\n" + "\n" + "movq %%mm0, %%mm2\n" + "pmaddwd 80(%1), %%mm0\n" + "pmaddwd 88(%1), %%mm2\n" + "\n" + "movq %%mm1, %%mm3\n" + "pmaddwd 96(%1), %%mm1\n" + "pmaddwd 104(%1), %%mm3\n" + "paddd %%mm1, %%mm0\n" + "paddd %%mm3, %%mm2\n" + "\n" + "movq %%mm0, (%3)\n" + "movq %%mm2, 8(%3)\n" + : + : "r" (in), "r" (consts), "r" (&round_c), "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "memory"); +} + +static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + static const SBC_ALIGNED int32_t round_c[2] = { + 1 << (SBC_PROTO_FIXED8_SCALE - 1), + 1 << (SBC_PROTO_FIXED8_SCALE - 1), + }; + asm volatile ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "pmaddwd (%1), %%mm0\n" + "pmaddwd 8(%1), %%mm1\n" + "pmaddwd 16(%1), %%mm2\n" + "pmaddwd 24(%1), %%mm3\n" + "paddd (%2), %%mm0\n" + "paddd (%2), %%mm1\n" + "paddd (%2), %%mm2\n" + "paddd (%2), %%mm3\n" + "\n" + "movq 32(%0), %%mm4\n" + "movq 40(%0), %%mm5\n" + "movq 48(%0), %%mm6\n" + "movq 56(%0), %%mm7\n" + "pmaddwd 32(%1), %%mm4\n" + "pmaddwd 40(%1), %%mm5\n" + "pmaddwd 48(%1), %%mm6\n" + "pmaddwd 56(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 64(%0), %%mm4\n" + "movq 72(%0), %%mm5\n" + "movq 80(%0), %%mm6\n" + "movq 88(%0), %%mm7\n" + "pmaddwd 64(%1), %%mm4\n" + "pmaddwd 72(%1), %%mm5\n" + "pmaddwd 80(%1), %%mm6\n" + "pmaddwd 88(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 96(%0), %%mm4\n" + "movq 104(%0), %%mm5\n" + "movq 112(%0), %%mm6\n" + "movq 120(%0), %%mm7\n" + "pmaddwd 96(%1), %%mm4\n" + "pmaddwd 104(%1), %%mm5\n" + "pmaddwd 112(%1), %%mm6\n" + "pmaddwd 120(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 128(%0), %%mm4\n" + "movq 136(%0), %%mm5\n" + "movq 144(%0), %%mm6\n" + "movq 152(%0), %%mm7\n" + "pmaddwd 128(%1), %%mm4\n" + "pmaddwd 136(%1), %%mm5\n" + "pmaddwd 144(%1), %%mm6\n" + "pmaddwd 152(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "psrad %4, %%mm0\n" + "psrad %4, %%mm1\n" + "psrad %4, %%mm2\n" + "psrad %4, %%mm3\n" + "\n" + "packssdw %%mm0, %%mm0\n" + "packssdw %%mm1, %%mm1\n" + "packssdw %%mm2, %%mm2\n" + "packssdw %%mm3, %%mm3\n" + "\n" + "movq %%mm0, %%mm4\n" + "movq %%mm0, %%mm5\n" + "pmaddwd 160(%1), %%mm4\n" + "pmaddwd 168(%1), %%mm5\n" + "\n" + "movq %%mm1, %%mm6\n" + "movq %%mm1, %%mm7\n" + "pmaddwd 192(%1), %%mm6\n" + "pmaddwd 200(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm2, %%mm6\n" + "movq %%mm2, %%mm7\n" + "pmaddwd 224(%1), %%mm6\n" + "pmaddwd 232(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm3, %%mm6\n" + "movq %%mm3, %%mm7\n" + "pmaddwd 256(%1), %%mm6\n" + "pmaddwd 264(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm4, (%3)\n" + "movq %%mm5, 8(%3)\n" + "\n" + "movq %%mm0, %%mm5\n" + "pmaddwd 176(%1), %%mm0\n" + "pmaddwd 184(%1), %%mm5\n" + "\n" + "movq %%mm1, %%mm7\n" + "pmaddwd 208(%1), %%mm1\n" + "pmaddwd 216(%1), %%mm7\n" + "paddd %%mm1, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm2, %%mm7\n" + "pmaddwd 240(%1), %%mm2\n" + "pmaddwd 248(%1), %%mm7\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm3, %%mm7\n" + "pmaddwd 272(%1), %%mm3\n" + "pmaddwd 280(%1), %%mm7\n" + "paddd %%mm3, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm0, 16(%3)\n" + "movq %%mm5, 24(%3)\n" + : + : "r" (in), "r" (consts), "r" (&round_c), "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "memory"); +} + +static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even); + + asm volatile ("emms\n"); +} + +static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even); + + asm volatile ("emms\n"); +} + +static int check_mmx_support(void) +{ +#ifdef __amd64__ + return 1; /* We assume that all 64-bit processors have MMX support */ +#else + int cpuid_feature_information; + asm volatile ( + /* According to Intel manual, CPUID instruction is supported + * if the value of ID bit (bit 21) in EFLAGS can be modified */ + "pushf\n" + "movl (%%esp), %0\n" + "xorl $0x200000, (%%esp)\n" /* try to modify ID bit */ + "popf\n" + "pushf\n" + "xorl (%%esp), %0\n" /* check if ID bit changed */ + "jz 1f\n" + "push %%eax\n" + "push %%ebx\n" + "push %%ecx\n" + "mov $1, %%eax\n" + "cpuid\n" + "pop %%ecx\n" + "pop %%ebx\n" + "pop %%eax\n" + "1:\n" + "popf\n" + : "=d" (cpuid_feature_information) + : + : "cc"); + return cpuid_feature_information & (1 << 23); +#endif +} + +void sbc_init_primitives_mmx(struct sbc_encoder_state *state) +{ + if (check_mmx_support()) { + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; + state->implementation_info = "MMX"; + } +} + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_mmx.h b/src/modules/bluetooth/sbc/sbc_primitives_mmx.h new file mode 100644 index 00000000..c1e44a5d --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_mmx.h @@ -0,0 +1,40 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_MMX_H +#define __SBC_PRIMITIVES_MMX_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_MMX_SUPPORT + +void sbc_init_primitives_mmx(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.c b/src/modules/bluetooth/sbc/sbc_primitives_neon.c new file mode 100644 index 00000000..f1bc7b48 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.c @@ -0,0 +1,246 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_neon.h" + +/* + * ARM NEON optimizations + */ + +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + +static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vmlal.s16 q1, d5, d9\n" + + "vpadd.s32 d0, d0, d1\n" + "vpadd.s32 d1, d2, d3\n" + + "vrshrn.s32 d0, q0, %3\n" + + "vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n" + + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vmull.s16 q3, d2, d0\n" + "vmull.s16 q4, d3, d0\n" + "vmlal.s16 q3, d4, d1\n" + "vmlal.s16 q4, d5, d1\n" + + "vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11"); +} + +static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmull.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmull.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q8, d6, d10\n" + "vmlal.s16 q9, d7, d11\n" + + "vpadd.s32 d0, d12, d13\n" + "vpadd.s32 d1, d14, d15\n" + "vpadd.s32 d2, d16, d17\n" + "vpadd.s32 d3, d18, d19\n" + + "vrshr.s32 q0, q0, %3\n" + "vrshr.s32 q1, q1, %3\n" + "vmovn.s32 d0, q0\n" + "vmovn.s32 d1, q1\n" + + "vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */ + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmull.s16 q6, d4, d0\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmull.s16 q7, d5, d0\n" + "vmull.s16 q8, d6, d0\n" + "vmull.s16 q9, d7, d0\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d1\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d1\n" + "vmlal.s16 q8, d6, d1\n" + "vmlal.s16 q9, d7, d1\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d2\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d2\n" + "vmlal.s16 q8, d6, d2\n" + "vmlal.s16 q9, d7, d2\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d3\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d3\n" + "vmlal.s16 q8, d6, d3\n" + "vmlal.s16 q9, d7, d3\n" + + "vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */ + "vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */ + "vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1, d2, d3}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11", + "d12", "d13", "d14", "d15", "d16", "d17", + "d18", "d19"); +} + +static inline void sbc_analyze_4b_4s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + _sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + _sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_neon(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; + state->implementation_info = "NEON"; +} + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.h b/src/modules/bluetooth/sbc/sbc_primitives_neon.h new file mode 100644 index 00000000..30766ed8 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.h @@ -0,0 +1,40 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_NEON_H +#define __SBC_PRIMITIVES_NEON_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && defined(__ARM_NEON__) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_NEON_SUPPORT + +void sbc_init_primitives_neon(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_tables.h b/src/modules/bluetooth/sbc/sbc_tables.h new file mode 100644 index 00000000..0057c73f --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_tables.h @@ -0,0 +1,659 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* A2DP specification: Appendix B, page 69 */ +static const int sbc_offset4[4][4] = { + { -1, 0, 0, 0 }, + { -2, 0, 0, 1 }, + { -2, 0, 0, 1 }, + { -2, 0, 0, 1 } +}; + +/* A2DP specification: Appendix B, page 69 */ +static const int sbc_offset8[4][8] = { + { -2, 0, 0, 0, 0, 0, 0, 1 }, + { -3, 0, 0, 0, 0, 0, 1, 2 }, + { -4, 0, 0, 0, 0, 0, 1, 2 }, + { -4, 0, 0, 0, 0, 0, 1, 2 } +}; + + +#define SS4(val) ASR(val, SCALE_SPROTO4_TBL) +#define SS8(val) ASR(val, SCALE_SPROTO8_TBL) +#define SN4(val) ASR(val, SCALE_NPROTO4_TBL) +#define SN8(val) ASR(val, SCALE_NPROTO8_TBL) + +static const int32_t sbc_proto_4_40m0[] = { + SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8), + SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8), + SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330), + SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00), + SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7) +}; + +static const int32_t sbc_proto_4_40m1[] = { + SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475), + SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370), + SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b), + SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b), + SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7) +}; + +static const int32_t sbc_proto_8_80m0[] = { + SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100), + SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0), + SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c), + SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705), + SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db), + SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948), + SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200), + SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358), + SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31), + SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170) +}; + +static const int32_t sbc_proto_8_80m1[] = { + SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620), + SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40), + SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01), + SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94), + SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b), + SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68), + SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20), + SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410), + SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da), + SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a) +}; + +static const int32_t synmatrix4[8][4] = { + { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) }, + { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) }, + { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) }, + { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) }, + { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) }, + { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }, + { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) }, + { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) } +}; + +static const int32_t synmatrix8[16][8] = { + { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798), + SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) }, + { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988), + SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) }, + { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac), + SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) }, + { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10), + SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) }, + { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), + SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) }, + { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0), + SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) }, + { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54), + SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) }, + { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678), + SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) }, + { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868), + SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) }, + { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), + SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }, + { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), + SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, + { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), + SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, + { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), + SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) }, + { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), + SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, + { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), + SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, + { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), + SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) } +}; + +/* Uncomment the following line to enable high precision build of SBC encoder */ + +/* #define SBC_HIGH_PRECISION */ + +#ifdef SBC_HIGH_PRECISION +#define FIXED_A int64_t /* data type for fixed point accumulator */ +#define FIXED_T int32_t /* data type for fixed point constants */ +#define SBC_FIXED_EXTRA_BITS 16 +#else +#define FIXED_A int32_t /* data type for fixed point accumulator */ +#define FIXED_T int16_t /* data type for fixed point constants */ +#define SBC_FIXED_EXTRA_BITS 0 +#endif + +/* A2DP specification: Section 12.8 Tables + * + * Original values are premultiplied by 2 for better precision (that is the + * maximum which is possible without overflows) + * + * Note: in each block of 8 numbers sign was changed for elements 2 and 7 + * in order to compensate the same change applied to cos_table_fixed_4 + */ +#define SBC_PROTO_FIXED4_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) +#define F_PROTO4(x) (FIXED_A) ((x * 2) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO4(x) +static const FIXED_T _sbc_proto_fixed4[40] = { + F(0.00000000E+00), F(5.36548976E-04), + -F(1.49188357E-03), F(2.73370904E-03), + F(3.83720193E-03), F(3.89205149E-03), + F(1.86581691E-03), F(3.06012286E-03), + + F(1.09137620E-02), F(2.04385087E-02), + -F(2.88757392E-02), F(3.21939290E-02), + F(2.58767811E-02), F(6.13245186E-03), + -F(2.88217274E-02), F(7.76463494E-02), + + F(1.35593274E-01), F(1.94987841E-01), + -F(2.46636662E-01), F(2.81828203E-01), + F(2.94315332E-01), F(2.81828203E-01), + F(2.46636662E-01), -F(1.94987841E-01), + + -F(1.35593274E-01), -F(7.76463494E-02), + F(2.88217274E-02), F(6.13245186E-03), + F(2.58767811E-02), F(3.21939290E-02), + F(2.88757392E-02), -F(2.04385087E-02), + + -F(1.09137620E-02), -F(3.06012286E-03), + -F(1.86581691E-03), F(3.89205149E-03), + F(3.83720193E-03), F(2.73370904E-03), + F(1.49188357E-03), -F(5.36548976E-04), +}; +#undef F + +/* + * To produce this cosine matrix in Octave: + * + * b = zeros(4, 8); + * for i = 0:3 + * for j = 0:7 b(i+1, j+1) = cos((i + 0.5) * (j - 2) * (pi/4)) + * endfor + * endfor; + * printf("%.10f, ", b'); + * + * Note: in each block of 8 numbers sign was changed for elements 2 and 7 + * + * Change of sign for element 2 allows to replace constant 1.0 (not + * representable in Q15 format) with -1.0 (fine with Q15). + * Changed sign for element 7 allows to have more similar constants + * and simplify subband filter function code. + */ +#define SBC_COS_TABLE_FIXED4_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) +#define F_COS4(x) (FIXED_A) ((x) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS4(x) +static const FIXED_T cos_table_fixed_4[32] = { + F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), + F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), + + -F(0.7071067812), F(0.3826834324), -F(1.0000000000), F(0.3826834324), + -F(0.7071067812), -F(0.9238795325), -F(0.0000000000), -F(0.9238795325), + + -F(0.7071067812), -F(0.3826834324), -F(1.0000000000), -F(0.3826834324), + -F(0.7071067812), F(0.9238795325), F(0.0000000000), F(0.9238795325), + + F(0.7071067812), -F(0.9238795325), -F(1.0000000000), -F(0.9238795325), + F(0.7071067812), -F(0.3826834324), -F(0.0000000000), -F(0.3826834324), +}; +#undef F + +/* A2DP specification: Section 12.8 Tables + * + * Original values are premultiplied by 4 for better precision (that is the + * maximum which is possible without overflows) + * + * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 + * in order to compensate the same change applied to cos_table_fixed_8 + */ +#define SBC_PROTO_FIXED8_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) +#define F_PROTO8(x) (FIXED_A) ((x * 2) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO8(x) +static const FIXED_T _sbc_proto_fixed8[80] = { + F(0.00000000E+00), F(1.56575398E-04), + F(3.43256425E-04), F(5.54620202E-04), + -F(8.23919506E-04), F(1.13992507E-03), + F(1.47640169E-03), F(1.78371725E-03), + F(2.01182542E-03), F(2.10371989E-03), + F(1.99454554E-03), F(1.61656283E-03), + F(9.02154502E-04), F(1.78805361E-04), + F(1.64973098E-03), F(3.49717454E-03), + + F(5.65949473E-03), F(8.02941163E-03), + F(1.04584443E-02), F(1.27472335E-02), + -F(1.46525263E-02), F(1.59045603E-02), + F(1.62208471E-02), F(1.53184106E-02), + F(1.29371806E-02), F(8.85757540E-03), + F(2.92408442E-03), -F(4.91578024E-03), + -F(1.46404076E-02), F(2.61098752E-02), + F(3.90751381E-02), F(5.31873032E-02), + + F(6.79989431E-02), F(8.29847578E-02), + F(9.75753918E-02), F(1.11196689E-01), + -F(1.23264548E-01), F(1.33264415E-01), + F(1.40753505E-01), F(1.45389847E-01), + F(1.46955068E-01), F(1.45389847E-01), + F(1.40753505E-01), F(1.33264415E-01), + F(1.23264548E-01), -F(1.11196689E-01), + -F(9.75753918E-02), -F(8.29847578E-02), + + -F(6.79989431E-02), -F(5.31873032E-02), + -F(3.90751381E-02), -F(2.61098752E-02), + F(1.46404076E-02), -F(4.91578024E-03), + F(2.92408442E-03), F(8.85757540E-03), + F(1.29371806E-02), F(1.53184106E-02), + F(1.62208471E-02), F(1.59045603E-02), + F(1.46525263E-02), -F(1.27472335E-02), + -F(1.04584443E-02), -F(8.02941163E-03), + + -F(5.65949473E-03), -F(3.49717454E-03), + -F(1.64973098E-03), -F(1.78805361E-04), + -F(9.02154502E-04), F(1.61656283E-03), + F(1.99454554E-03), F(2.10371989E-03), + F(2.01182542E-03), F(1.78371725E-03), + F(1.47640169E-03), F(1.13992507E-03), + F(8.23919506E-04), -F(5.54620202E-04), + -F(3.43256425E-04), -F(1.56575398E-04), +}; +#undef F + +/* + * To produce this cosine matrix in Octave: + * + * b = zeros(8, 16); + * for i = 0:7 + * for j = 0:15 b(i+1, j+1) = cos((i + 0.5) * (j - 4) * (pi/8)) + * endfor endfor; + * printf("%.10f, ", b'); + * + * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 + * + * Change of sign for element 4 allows to replace constant 1.0 (not + * representable in Q15 format) with -1.0 (fine with Q15). + * Changed signs for elements 13, 14, 15 allow to have more similar constants + * and simplify subband filter function code. + */ +#define SBC_COS_TABLE_FIXED8_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) +#define F_COS8(x) (FIXED_A) ((x) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS8(x) +static const FIXED_T cos_table_fixed_8[128] = { + F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), + -F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123), + F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220), + F(0.0000000000), F(0.1950903220), F(0.3826834324), F(0.5555702330), + + -F(0.7071067812), -F(0.1950903220), F(0.3826834324), F(0.8314696123), + -F(1.0000000000), F(0.8314696123), F(0.3826834324), -F(0.1950903220), + -F(0.7071067812), -F(0.9807852804), -F(0.9238795325), -F(0.5555702330), + -F(0.0000000000), -F(0.5555702330), -F(0.9238795325), -F(0.9807852804), + + -F(0.7071067812), -F(0.9807852804), -F(0.3826834324), F(0.5555702330), + -F(1.0000000000), F(0.5555702330), -F(0.3826834324), -F(0.9807852804), + -F(0.7071067812), F(0.1950903220), F(0.9238795325), F(0.8314696123), + F(0.0000000000), F(0.8314696123), F(0.9238795325), F(0.1950903220), + + F(0.7071067812), -F(0.5555702330), -F(0.9238795325), F(0.1950903220), + -F(1.0000000000), F(0.1950903220), -F(0.9238795325), -F(0.5555702330), + F(0.7071067812), F(0.8314696123), -F(0.3826834324), -F(0.9807852804), + -F(0.0000000000), -F(0.9807852804), -F(0.3826834324), F(0.8314696123), + + F(0.7071067812), F(0.5555702330), -F(0.9238795325), -F(0.1950903220), + -F(1.0000000000), -F(0.1950903220), -F(0.9238795325), F(0.5555702330), + F(0.7071067812), -F(0.8314696123), -F(0.3826834324), F(0.9807852804), + F(0.0000000000), F(0.9807852804), -F(0.3826834324), -F(0.8314696123), + + -F(0.7071067812), F(0.9807852804), -F(0.3826834324), -F(0.5555702330), + -F(1.0000000000), -F(0.5555702330), -F(0.3826834324), F(0.9807852804), + -F(0.7071067812), -F(0.1950903220), F(0.9238795325), -F(0.8314696123), + -F(0.0000000000), -F(0.8314696123), F(0.9238795325), -F(0.1950903220), + + -F(0.7071067812), F(0.1950903220), F(0.3826834324), -F(0.8314696123), + -F(1.0000000000), -F(0.8314696123), F(0.3826834324), F(0.1950903220), + -F(0.7071067812), F(0.9807852804), -F(0.9238795325), F(0.5555702330), + -F(0.0000000000), F(0.5555702330), -F(0.9238795325), F(0.9807852804), + + F(0.7071067812), -F(0.8314696123), F(0.9238795325), -F(0.9807852804), + -F(1.0000000000), -F(0.9807852804), F(0.9238795325), -F(0.8314696123), + F(0.7071067812), -F(0.5555702330), F(0.3826834324), -F(0.1950903220), + -F(0.0000000000), -F(0.1950903220), F(0.3826834324), -F(0.5555702330), +}; +#undef F + +/* + * Enforce 16 byte alignment for the data, which is supposed to be used + * with SIMD optimized code. + */ + +#define SBC_ALIGN_BITS 4 +#define SBC_ALIGN_MASK ((1 << (SBC_ALIGN_BITS)) - 1) + +#ifdef __GNUC__ +#define SBC_ALIGNED __attribute__((aligned(1 << (SBC_ALIGN_BITS)))) +#else +#define SBC_ALIGNED +#endif + +/* + * Constant tables for the use in SIMD optimized analysis filters + * Each table consists of two parts: + * 1. reordered "proto" table + * 2. reordered "cos" table + * + * Due to non-symmetrical reordering, separate tables for "even" + * and "odd" cases are needed + */ + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_even[40 + 16] = { +#define C0 1.0932568993 +#define C1 1.3056875580 +#define C2 1.3056875580 +#define C3 1.6772280856 + +#define F(x) F_PROTO4(x) + F(0.00000000E+00 * C0), F(3.83720193E-03 * C0), + F(5.36548976E-04 * C1), F(2.73370904E-03 * C1), + F(3.06012286E-03 * C2), F(3.89205149E-03 * C2), + F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3), + F(1.09137620E-02 * C0), F(2.58767811E-02 * C0), + F(2.04385087E-02 * C1), F(3.21939290E-02 * C1), + F(7.76463494E-02 * C2), F(6.13245186E-03 * C2), + F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3), + F(1.35593274E-01 * C0), F(2.94315332E-01 * C0), + F(1.94987841E-01 * C1), F(2.81828203E-01 * C1), + -F(1.94987841E-01 * C2), F(2.81828203E-01 * C2), + F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3), + -F(1.35593274E-01 * C0), F(2.58767811E-02 * C0), + -F(7.76463494E-02 * C1), F(6.13245186E-03 * C1), + -F(2.04385087E-02 * C2), F(3.21939290E-02 * C2), + F(0.00000000E+00 * C3), F(2.88217274E-02 * C3), + -F(1.09137620E-02 * C0), F(3.83720193E-03 * C0), + -F(3.06012286E-03 * C1), F(3.89205149E-03 * C1), + -F(5.36548976E-04 * C2), F(2.73370904E-03 * C2), + F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3), +#undef F +#define F(x) F_COS4(x) + F(0.7071067812 / C0), F(0.9238795325 / C1), + -F(0.7071067812 / C0), F(0.3826834324 / C1), + -F(0.7071067812 / C0), -F(0.3826834324 / C1), + F(0.7071067812 / C0), -F(0.9238795325 / C1), + F(0.3826834324 / C2), -F(1.0000000000 / C3), + -F(0.9238795325 / C2), -F(1.0000000000 / C3), + F(0.9238795325 / C2), -F(1.0000000000 / C3), + -F(0.3826834324 / C2), -F(1.0000000000 / C3), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_odd[40 + 16] = { +#define C0 1.3056875580 +#define C1 1.6772280856 +#define C2 1.0932568993 +#define C3 1.3056875580 + +#define F(x) F_PROTO4(x) + F(2.73370904E-03 * C0), F(5.36548976E-04 * C0), + -F(1.49188357E-03 * C1), F(0.00000000E+00 * C1), + F(3.83720193E-03 * C2), F(1.09137620E-02 * C2), + F(3.89205149E-03 * C3), F(3.06012286E-03 * C3), + F(3.21939290E-02 * C0), F(2.04385087E-02 * C0), + -F(2.88757392E-02 * C1), F(0.00000000E+00 * C1), + F(2.58767811E-02 * C2), F(1.35593274E-01 * C2), + F(6.13245186E-03 * C3), F(7.76463494E-02 * C3), + F(2.81828203E-01 * C0), F(1.94987841E-01 * C0), + -F(2.46636662E-01 * C1), F(0.00000000E+00 * C1), + F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2), + F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3), + F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0), + F(2.88217274E-02 * C1), F(0.00000000E+00 * C1), + F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2), + F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3), + F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0), + -F(1.86581691E-03 * C1), F(0.00000000E+00 * C1), + F(3.83720193E-03 * C2), F(0.00000000E+00 * C2), + F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3), +#undef F +#define F(x) F_COS4(x) + F(0.9238795325 / C0), -F(1.0000000000 / C1), + F(0.3826834324 / C0), -F(1.0000000000 / C1), + -F(0.3826834324 / C0), -F(1.0000000000 / C1), + -F(0.9238795325 / C0), -F(1.0000000000 / C1), + F(0.7071067812 / C2), F(0.3826834324 / C3), + -F(0.7071067812 / C2), -F(0.9238795325 / C3), + -F(0.7071067812 / C2), F(0.9238795325 / C3), + F(0.7071067812 / C2), -F(0.3826834324 / C3), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_even[80 + 64] = { +#define C0 2.7906148894 +#define C1 2.4270044280 +#define C2 2.8015616024 +#define C3 3.1710363741 +#define C4 2.5377944043 +#define C5 2.4270044280 +#define C6 2.8015616024 +#define C7 3.1710363741 + +#define F(x) F_PROTO8(x) + F(0.00000000E+00 * C0), F(2.01182542E-03 * C0), + F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), + F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), + F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), + -F(8.23919506E-04 * C4), F(0.00000000E+00 * C4), + F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), + F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), + F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), + F(5.65949473E-03 * C0), F(1.29371806E-02 * C0), + F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), + F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), + F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), + -F(1.46525263E-02 * C4), F(0.00000000E+00 * C4), + F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), + F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), + -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), + F(6.79989431E-02 * C0), F(1.46955068E-01 * C0), + F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), + F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), + F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), + -F(1.23264548E-01 * C4), F(0.00000000E+00 * C4), + F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), + F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), + F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), + -F(6.79989431E-02 * C0), F(1.29371806E-02 * C0), + -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), + -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), + -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), + F(1.46404076E-02 * C4), F(0.00000000E+00 * C4), + F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), + F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), + F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), + -F(5.65949473E-03 * C0), F(2.01182542E-03 * C0), + -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), + -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), + -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), + -F(9.02154502E-04 * C4), F(0.00000000E+00 * C4), + F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), + F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), + F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), +#undef F +#define F(x) F_COS8(x) + F(0.7071067812 / C0), F(0.8314696123 / C1), + -F(0.7071067812 / C0), -F(0.1950903220 / C1), + -F(0.7071067812 / C0), -F(0.9807852804 / C1), + F(0.7071067812 / C0), -F(0.5555702330 / C1), + F(0.7071067812 / C0), F(0.5555702330 / C1), + -F(0.7071067812 / C0), F(0.9807852804 / C1), + -F(0.7071067812 / C0), F(0.1950903220 / C1), + F(0.7071067812 / C0), -F(0.8314696123 / C1), + F(0.9238795325 / C2), F(0.9807852804 / C3), + F(0.3826834324 / C2), F(0.8314696123 / C3), + -F(0.3826834324 / C2), F(0.5555702330 / C3), + -F(0.9238795325 / C2), F(0.1950903220 / C3), + -F(0.9238795325 / C2), -F(0.1950903220 / C3), + -F(0.3826834324 / C2), -F(0.5555702330 / C3), + F(0.3826834324 / C2), -F(0.8314696123 / C3), + F(0.9238795325 / C2), -F(0.9807852804 / C3), + -F(1.0000000000 / C4), F(0.5555702330 / C5), + -F(1.0000000000 / C4), -F(0.9807852804 / C5), + -F(1.0000000000 / C4), F(0.1950903220 / C5), + -F(1.0000000000 / C4), F(0.8314696123 / C5), + -F(1.0000000000 / C4), -F(0.8314696123 / C5), + -F(1.0000000000 / C4), -F(0.1950903220 / C5), + -F(1.0000000000 / C4), F(0.9807852804 / C5), + -F(1.0000000000 / C4), -F(0.5555702330 / C5), + F(0.3826834324 / C6), F(0.1950903220 / C7), + -F(0.9238795325 / C6), -F(0.5555702330 / C7), + F(0.9238795325 / C6), F(0.8314696123 / C7), + -F(0.3826834324 / C6), -F(0.9807852804 / C7), + -F(0.3826834324 / C6), F(0.9807852804 / C7), + F(0.9238795325 / C6), -F(0.8314696123 / C7), + -F(0.9238795325 / C6), F(0.5555702330 / C7), + F(0.3826834324 / C6), -F(0.1950903220 / C7), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_odd[80 + 64] = { +#define C0 2.5377944043 +#define C1 2.4270044280 +#define C2 2.8015616024 +#define C3 3.1710363741 +#define C4 2.7906148894 +#define C5 2.4270044280 +#define C6 2.8015616024 +#define C7 3.1710363741 + +#define F(x) F_PROTO8(x) + F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0), + F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), + F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), + F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), + F(2.01182542E-03 * C4), F(5.65949473E-03 * C4), + F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), + F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), + F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), + F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0), + F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), + F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), + F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), + F(1.29371806E-02 * C4), F(6.79989431E-02 * C4), + F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), + F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), + -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), + F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0), + F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), + F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), + F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), + F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4), + F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), + F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), + F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), + F(0.00000000E+00 * C0), F(1.46404076E-02 * C0), + -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), + -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), + -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), + F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4), + F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), + F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), + F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), + F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0), + -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), + -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), + -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), + F(2.01182542E-03 * C4), F(0.00000000E+00 * C4), + F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), + F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), + F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), +#undef F +#define F(x) F_COS8(x) + -F(1.0000000000 / C0), F(0.8314696123 / C1), + -F(1.0000000000 / C0), -F(0.1950903220 / C1), + -F(1.0000000000 / C0), -F(0.9807852804 / C1), + -F(1.0000000000 / C0), -F(0.5555702330 / C1), + -F(1.0000000000 / C0), F(0.5555702330 / C1), + -F(1.0000000000 / C0), F(0.9807852804 / C1), + -F(1.0000000000 / C0), F(0.1950903220 / C1), + -F(1.0000000000 / C0), -F(0.8314696123 / C1), + F(0.9238795325 / C2), F(0.9807852804 / C3), + F(0.3826834324 / C2), F(0.8314696123 / C3), + -F(0.3826834324 / C2), F(0.5555702330 / C3), + -F(0.9238795325 / C2), F(0.1950903220 / C3), + -F(0.9238795325 / C2), -F(0.1950903220 / C3), + -F(0.3826834324 / C2), -F(0.5555702330 / C3), + F(0.3826834324 / C2), -F(0.8314696123 / C3), + F(0.9238795325 / C2), -F(0.9807852804 / C3), + F(0.7071067812 / C4), F(0.5555702330 / C5), + -F(0.7071067812 / C4), -F(0.9807852804 / C5), + -F(0.7071067812 / C4), F(0.1950903220 / C5), + F(0.7071067812 / C4), F(0.8314696123 / C5), + F(0.7071067812 / C4), -F(0.8314696123 / C5), + -F(0.7071067812 / C4), -F(0.1950903220 / C5), + -F(0.7071067812 / C4), F(0.9807852804 / C5), + F(0.7071067812 / C4), -F(0.5555702330 / C5), + F(0.3826834324 / C6), F(0.1950903220 / C7), + -F(0.9238795325 / C6), -F(0.5555702330 / C7), + F(0.9238795325 / C6), F(0.8314696123 / C7), + -F(0.3826834324 / C6), -F(0.9807852804 / C7), + -F(0.3826834324 / C6), F(0.9807852804 / C7), + F(0.9238795325 / C6), -F(0.8314696123 / C7), + -F(0.9238795325 / C6), F(0.5555702330 / C7), + F(0.3826834324 / C6), -F(0.1950903220 / C7), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +}; diff --git a/src/modules/bluetooth/sbc_math.h b/src/modules/bluetooth/sbc_math.h deleted file mode 100644 index b87bc81c..00000000 --- a/src/modules/bluetooth/sbc_math.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2008 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#define fabs(x) ((x) < 0 ? -(x) : (x)) -/* C does not provide an explicit arithmetic shift right but this will - always be correct and every compiler *should* generate optimal code */ -#define ASR(val, bits) ((-2 >> 1 == -1) ? \ - ((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits))) - -#define SCALE_SPROTO4_TBL 12 -#define SCALE_SPROTO8_TBL 14 -#define SCALE_NPROTO4_TBL 11 -#define SCALE_NPROTO8_TBL 11 -#define SCALE4_STAGED1_BITS 15 -#define SCALE4_STAGED2_BITS 16 -#define SCALE8_STAGED1_BITS 15 -#define SCALE8_STAGED2_BITS 16 - -typedef int32_t sbc_fixed_t; - -#define SCALE4_STAGED1(src) ASR(src, SCALE4_STAGED1_BITS) -#define SCALE4_STAGED2(src) ASR(src, SCALE4_STAGED2_BITS) -#define SCALE8_STAGED1(src) ASR(src, SCALE8_STAGED1_BITS) -#define SCALE8_STAGED2(src) ASR(src, SCALE8_STAGED2_BITS) - -#define SBC_FIXED_0(val) { val = 0; } -#define MUL(a, b) ((a) * (b)) -#ifdef __arm__ -#define MULA(a, b, res) ({ \ - int tmp = res; \ - __asm__( \ - "mla %0, %2, %3, %0" \ - : "=&r" (tmp) \ - : "0" (tmp), "r" (a), "r" (b)); \ - tmp; }) -#else -#define MULA(a, b, res) ((a) * (b) + (res)) -#endif diff --git a/src/modules/bluetooth/sbc_primitives.c b/src/modules/bluetooth/sbc_primitives.c deleted file mode 100644 index 6b0be3f5..00000000 --- a/src/modules/bluetooth/sbc_primitives.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2006 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include -#include -#include -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives.h" -#include "sbc_primitives_mmx.h" -#include "sbc_primitives_neon.h" - -/* - * A reference C code of analysis filter with SIMD-friendly tables - * reordering and code layout. This code can be used to develop platform - * specific SIMD optimizations. Also it may be used as some kind of test - * for compiler autovectorization capabilities (who knows, if the compiler - * is very good at this stuff, hand optimized assembly may be not strictly - * needed for some platform). - * - * Note: It is also possible to make a simple variant of analysis filter, - * which needs only a single constants table without taking care about - * even/odd cases. This simple variant of filter can be implemented without - * input data permutation. The only thing that would be lost is the - * possibility to use pairwise SIMD multiplications. But for some simple - * CPU cores without SIMD extensions it can be useful. If anybody is - * interested in implementing such variant of a filter, sourcecode from - * bluez versions 4.26/4.27 can be used as a reference and the history of - * the changes in git repository done around that time may be worth checking. - */ - -static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - FIXED_A t1[4]; - FIXED_T t2[4]; - int hop = 0; - - /* rounding coefficient */ - t1[0] = t1[1] = t1[2] = t1[3] = - (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); - - /* low pass polyphase filter */ - for (hop = 0; hop < 40; hop += 8) { - t1[0] += (FIXED_A) in[hop] * consts[hop]; - t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; - t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; - t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; - t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; - t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; - t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; - t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; - } - - /* scaling */ - t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; - t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; - t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; - t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; - - /* do the cos transform */ - t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; - t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; - t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; - t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; - t1[2] = (FIXED_A) t2[0] * consts[40 + 4]; - t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; - t1[3] = (FIXED_A) t2[0] * consts[40 + 6]; - t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; - - t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; - t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; - t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; - t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; - t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; - t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; - t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; - t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; - - out[0] = t1[0] >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); - out[1] = t1[1] >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); - out[2] = t1[2] >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); - out[3] = t1[3] >> - (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); -} - -static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - FIXED_A t1[8]; - FIXED_T t2[8]; - int i, hop; - - /* rounding coefficient */ - t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = - (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); - - /* low pass polyphase filter */ - for (hop = 0; hop < 80; hop += 16) { - t1[0] += (FIXED_A) in[hop] * consts[hop]; - t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; - t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; - t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; - t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; - t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; - t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; - t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; - t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; - t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; - t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; - t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; - t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; - t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; - t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; - t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; - } - - /* scaling */ - t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; - t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; - t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; - t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; - t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; - t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; - t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; - t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; - - - /* do the cos transform */ - t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; - - for (i = 0; i < 4; i++) { - t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; - t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; - t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; - t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; - t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; - t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; - t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; - t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; - t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; - t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; - t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; - t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; - t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; - t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; - t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; - t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; - } - - for (i = 0; i < 8; i++) - out[i] = t1[i] >> - (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); -} - -static inline void sbc_analyze_4b_4s_simd(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); -} - -static inline void sbc_analyze_4b_8s_simd(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); -} - -static inline int16_t unaligned16_be(const uint8_t *ptr) -{ - return (int16_t) ((ptr[0] << 8) | ptr[1]); -} - -static inline int16_t unaligned16_le(const uint8_t *ptr) -{ - return (int16_t) (ptr[0] | (ptr[1] << 8)); -} - -/* - * Internal helper functions for input data processing. In order to get - * optimal performance, it is important to have "nsamples", "nchannels" - * and "big_endian" arguments used with this inline function as compile - * time constants. - */ - -static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( - int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels, int big_endian) -{ - /* handle X buffer wraparound */ - if (position < nsamples) { - if (nchannels > 0) - memcpy(&X[0][SBC_X_BUFFER_SIZE - 36], &X[0][position], - 36 * sizeof(int16_t)); - if (nchannels > 1) - memcpy(&X[1][SBC_X_BUFFER_SIZE - 36], &X[1][position], - 36 * sizeof(int16_t)); - position = SBC_X_BUFFER_SIZE - 36; - } - - #define PCM(i) (big_endian ? \ - unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) - - /* copy/permutate audio samples */ - while ((nsamples -= 8) >= 0) { - position -= 8; - if (nchannels > 0) { - int16_t *x = &X[0][position]; - x[0] = PCM(0 + 7 * nchannels); - x[1] = PCM(0 + 3 * nchannels); - x[2] = PCM(0 + 6 * nchannels); - x[3] = PCM(0 + 4 * nchannels); - x[4] = PCM(0 + 0 * nchannels); - x[5] = PCM(0 + 2 * nchannels); - x[6] = PCM(0 + 1 * nchannels); - x[7] = PCM(0 + 5 * nchannels); - } - if (nchannels > 1) { - int16_t *x = &X[1][position]; - x[0] = PCM(1 + 7 * nchannels); - x[1] = PCM(1 + 3 * nchannels); - x[2] = PCM(1 + 6 * nchannels); - x[3] = PCM(1 + 4 * nchannels); - x[4] = PCM(1 + 0 * nchannels); - x[5] = PCM(1 + 2 * nchannels); - x[6] = PCM(1 + 1 * nchannels); - x[7] = PCM(1 + 5 * nchannels); - } - pcm += 16 * nchannels; - } - #undef PCM - - return position; -} - -static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal( - int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels, int big_endian) -{ - /* handle X buffer wraparound */ - if (position < nsamples) { - if (nchannels > 0) - memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position], - 72 * sizeof(int16_t)); - if (nchannels > 1) - memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position], - 72 * sizeof(int16_t)); - position = SBC_X_BUFFER_SIZE - 72; - } - - #define PCM(i) (big_endian ? \ - unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) - - /* copy/permutate audio samples */ - while ((nsamples -= 16) >= 0) { - position -= 16; - if (nchannels > 0) { - int16_t *x = &X[0][position]; - x[0] = PCM(0 + 15 * nchannels); - x[1] = PCM(0 + 7 * nchannels); - x[2] = PCM(0 + 14 * nchannels); - x[3] = PCM(0 + 8 * nchannels); - x[4] = PCM(0 + 13 * nchannels); - x[5] = PCM(0 + 9 * nchannels); - x[6] = PCM(0 + 12 * nchannels); - x[7] = PCM(0 + 10 * nchannels); - x[8] = PCM(0 + 11 * nchannels); - x[9] = PCM(0 + 3 * nchannels); - x[10] = PCM(0 + 6 * nchannels); - x[11] = PCM(0 + 0 * nchannels); - x[12] = PCM(0 + 5 * nchannels); - x[13] = PCM(0 + 1 * nchannels); - x[14] = PCM(0 + 4 * nchannels); - x[15] = PCM(0 + 2 * nchannels); - } - if (nchannels > 1) { - int16_t *x = &X[1][position]; - x[0] = PCM(1 + 15 * nchannels); - x[1] = PCM(1 + 7 * nchannels); - x[2] = PCM(1 + 14 * nchannels); - x[3] = PCM(1 + 8 * nchannels); - x[4] = PCM(1 + 13 * nchannels); - x[5] = PCM(1 + 9 * nchannels); - x[6] = PCM(1 + 12 * nchannels); - x[7] = PCM(1 + 10 * nchannels); - x[8] = PCM(1 + 11 * nchannels); - x[9] = PCM(1 + 3 * nchannels); - x[10] = PCM(1 + 6 * nchannels); - x[11] = PCM(1 + 0 * nchannels); - x[12] = PCM(1 + 5 * nchannels); - x[13] = PCM(1 + 1 * nchannels); - x[14] = PCM(1 + 4 * nchannels); - x[15] = PCM(1 + 2 * nchannels); - } - pcm += 32 * nchannels; - } - #undef PCM - - return position; -} - -/* - * Input data processing functions. The data is endian converted if needed, - * channels are deintrleaved and audio samples are reordered for use in - * SIMD-friendly analysis filter function. The results are put into "X" - * array, getting appended to the previous data (or it is better to say - * prepended, as the buffer is filled from top to bottom). Old data is - * discarded when neededed, but availability of (10 * nrof_subbands) - * contiguous samples is always guaranteed for the input to the analysis - * filter. This is achieved by copying a sufficient part of old data - * to the top of the buffer on buffer wraparound. - */ - -static int sbc_enc_process_input_4s_le(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - if (nchannels > 1) - return sbc_encoder_process_input_s4_internal( - position, pcm, X, nsamples, 2, 0); - else - return sbc_encoder_process_input_s4_internal( - position, pcm, X, nsamples, 1, 0); -} - -static int sbc_enc_process_input_4s_be(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - if (nchannels > 1) - return sbc_encoder_process_input_s4_internal( - position, pcm, X, nsamples, 2, 1); - else - return sbc_encoder_process_input_s4_internal( - position, pcm, X, nsamples, 1, 1); -} - -static int sbc_enc_process_input_8s_le(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - if (nchannels > 1) - return sbc_encoder_process_input_s8_internal( - position, pcm, X, nsamples, 2, 0); - else - return sbc_encoder_process_input_s8_internal( - position, pcm, X, nsamples, 1, 0); -} - -static int sbc_enc_process_input_8s_be(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels) -{ - if (nchannels > 1) - return sbc_encoder_process_input_s8_internal( - position, pcm, X, nsamples, 2, 1); - else - return sbc_encoder_process_input_s8_internal( - position, pcm, X, nsamples, 1, 1); -} - -/* Supplementary function to count the number of leading zeros */ - -static inline int sbc_clz(uint32_t x) -{ -#ifdef __GNUC__ - return __builtin_clz(x); -#else - /* TODO: this should be replaced with something better if good - * performance is wanted when using compilers other than gcc */ - int cnt = 0; - while (x) { - cnt++; - x >>= 1; - } - return 32 - cnt; -#endif -} - -static void sbc_calc_scalefactors( - int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int channels, int subbands) -{ - int ch, sb, blk; - for (ch = 0; ch < channels; ch++) { - for (sb = 0; sb < subbands; sb++) { - uint32_t x = 1 << SCALE_OUT_BITS; - for (blk = 0; blk < blocks; blk++) { - int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); - if (tmp != 0) - x |= tmp - 1; - } - scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - - sbc_clz(x); - } - } -} - -/* - * Detect CPU features and setup function pointers - */ -void sbc_init_primitives(struct sbc_encoder_state *state) -{ - /* Default implementation for analyze functions */ - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd; - - /* Default implementation for input reordering / deinterleaving */ - state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le; - state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be; - state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; - state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; - - /* Default implementation for scale factors calculation */ - state->sbc_calc_scalefactors = sbc_calc_scalefactors; - state->implementation_info = "Generic C"; - - /* X86/AMD64 optimizations */ -#ifdef SBC_BUILD_WITH_MMX_SUPPORT - sbc_init_primitives_mmx(state); -#endif - - /* ARM optimizations */ -#ifdef SBC_BUILD_WITH_NEON_SUPPORT - sbc_init_primitives_neon(state); -#endif -} diff --git a/src/modules/bluetooth/sbc_primitives.h b/src/modules/bluetooth/sbc_primitives.h deleted file mode 100644 index 3d01c115..00000000 --- a/src/modules/bluetooth/sbc_primitives.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2006 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_PRIMITIVES_H -#define __SBC_PRIMITIVES_H - -#define SCALE_OUT_BITS 15 -#define SBC_X_BUFFER_SIZE 328 - -#ifdef __GNUC__ -#define SBC_ALWAYS_INLINE __attribute__((always_inline)) -#else -#define SBC_ALWAYS_INLINE inline -#endif - -struct sbc_encoder_state { - int position; - int16_t SBC_ALIGNED X[2][SBC_X_BUFFER_SIZE]; - /* Polyphase analysis filter for 4 subbands configuration, - * it handles 4 blocks at once */ - void (*sbc_analyze_4b_4s)(int16_t *x, int32_t *out, int out_stride); - /* Polyphase analysis filter for 8 subbands configuration, - * it handles 4 blocks at once */ - void (*sbc_analyze_4b_8s)(int16_t *x, int32_t *out, int out_stride); - /* Process input data (deinterleave, endian conversion, reordering), - * depending on the number of subbands and input data byte order */ - int (*sbc_enc_process_input_4s_le)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - int (*sbc_enc_process_input_4s_be)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - int (*sbc_enc_process_input_8s_le)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - int (*sbc_enc_process_input_8s_be)(int position, - const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], - int nsamples, int nchannels); - /* Scale factors calculation */ - void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], - uint32_t scale_factor[2][8], - int blocks, int channels, int subbands); - const char *implementation_info; -}; - -/* - * Initialize pointers to the functions which are the basic "building bricks" - * of SBC codec. Best implementation is selected based on target CPU - * capabilities. - */ -void sbc_init_primitives(struct sbc_encoder_state *encoder_state); - -#endif diff --git a/src/modules/bluetooth/sbc_primitives_mmx.c b/src/modules/bluetooth/sbc_primitives_mmx.c deleted file mode 100644 index 08e9ca28..00000000 --- a/src/modules/bluetooth/sbc_primitives_mmx.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2006 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include -#include -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives_mmx.h" - -/* - * MMX optimizations - */ - -#ifdef SBC_BUILD_WITH_MMX_SUPPORT - -static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - static const SBC_ALIGNED int32_t round_c[2] = { - 1 << (SBC_PROTO_FIXED4_SCALE - 1), - 1 << (SBC_PROTO_FIXED4_SCALE - 1), - }; - asm volatile ( - "movq (%0), %%mm0\n" - "movq 8(%0), %%mm1\n" - "pmaddwd (%1), %%mm0\n" - "pmaddwd 8(%1), %%mm1\n" - "paddd (%2), %%mm0\n" - "paddd (%2), %%mm1\n" - "\n" - "movq 16(%0), %%mm2\n" - "movq 24(%0), %%mm3\n" - "pmaddwd 16(%1), %%mm2\n" - "pmaddwd 24(%1), %%mm3\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm3, %%mm1\n" - "\n" - "movq 32(%0), %%mm2\n" - "movq 40(%0), %%mm3\n" - "pmaddwd 32(%1), %%mm2\n" - "pmaddwd 40(%1), %%mm3\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm3, %%mm1\n" - "\n" - "movq 48(%0), %%mm2\n" - "movq 56(%0), %%mm3\n" - "pmaddwd 48(%1), %%mm2\n" - "pmaddwd 56(%1), %%mm3\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm3, %%mm1\n" - "\n" - "movq 64(%0), %%mm2\n" - "movq 72(%0), %%mm3\n" - "pmaddwd 64(%1), %%mm2\n" - "pmaddwd 72(%1), %%mm3\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm3, %%mm1\n" - "\n" - "psrad %4, %%mm0\n" - "psrad %4, %%mm1\n" - "packssdw %%mm0, %%mm0\n" - "packssdw %%mm1, %%mm1\n" - "\n" - "movq %%mm0, %%mm2\n" - "pmaddwd 80(%1), %%mm0\n" - "pmaddwd 88(%1), %%mm2\n" - "\n" - "movq %%mm1, %%mm3\n" - "pmaddwd 96(%1), %%mm1\n" - "pmaddwd 104(%1), %%mm3\n" - "paddd %%mm1, %%mm0\n" - "paddd %%mm3, %%mm2\n" - "\n" - "movq %%mm0, (%3)\n" - "movq %%mm2, 8(%3)\n" - : - : "r" (in), "r" (consts), "r" (&round_c), "r" (out), - "i" (SBC_PROTO_FIXED4_SCALE) - : "memory"); -} - -static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - static const SBC_ALIGNED int32_t round_c[2] = { - 1 << (SBC_PROTO_FIXED8_SCALE - 1), - 1 << (SBC_PROTO_FIXED8_SCALE - 1), - }; - asm volatile ( - "movq (%0), %%mm0\n" - "movq 8(%0), %%mm1\n" - "movq 16(%0), %%mm2\n" - "movq 24(%0), %%mm3\n" - "pmaddwd (%1), %%mm0\n" - "pmaddwd 8(%1), %%mm1\n" - "pmaddwd 16(%1), %%mm2\n" - "pmaddwd 24(%1), %%mm3\n" - "paddd (%2), %%mm0\n" - "paddd (%2), %%mm1\n" - "paddd (%2), %%mm2\n" - "paddd (%2), %%mm3\n" - "\n" - "movq 32(%0), %%mm4\n" - "movq 40(%0), %%mm5\n" - "movq 48(%0), %%mm6\n" - "movq 56(%0), %%mm7\n" - "pmaddwd 32(%1), %%mm4\n" - "pmaddwd 40(%1), %%mm5\n" - "pmaddwd 48(%1), %%mm6\n" - "pmaddwd 56(%1), %%mm7\n" - "paddd %%mm4, %%mm0\n" - "paddd %%mm5, %%mm1\n" - "paddd %%mm6, %%mm2\n" - "paddd %%mm7, %%mm3\n" - "\n" - "movq 64(%0), %%mm4\n" - "movq 72(%0), %%mm5\n" - "movq 80(%0), %%mm6\n" - "movq 88(%0), %%mm7\n" - "pmaddwd 64(%1), %%mm4\n" - "pmaddwd 72(%1), %%mm5\n" - "pmaddwd 80(%1), %%mm6\n" - "pmaddwd 88(%1), %%mm7\n" - "paddd %%mm4, %%mm0\n" - "paddd %%mm5, %%mm1\n" - "paddd %%mm6, %%mm2\n" - "paddd %%mm7, %%mm3\n" - "\n" - "movq 96(%0), %%mm4\n" - "movq 104(%0), %%mm5\n" - "movq 112(%0), %%mm6\n" - "movq 120(%0), %%mm7\n" - "pmaddwd 96(%1), %%mm4\n" - "pmaddwd 104(%1), %%mm5\n" - "pmaddwd 112(%1), %%mm6\n" - "pmaddwd 120(%1), %%mm7\n" - "paddd %%mm4, %%mm0\n" - "paddd %%mm5, %%mm1\n" - "paddd %%mm6, %%mm2\n" - "paddd %%mm7, %%mm3\n" - "\n" - "movq 128(%0), %%mm4\n" - "movq 136(%0), %%mm5\n" - "movq 144(%0), %%mm6\n" - "movq 152(%0), %%mm7\n" - "pmaddwd 128(%1), %%mm4\n" - "pmaddwd 136(%1), %%mm5\n" - "pmaddwd 144(%1), %%mm6\n" - "pmaddwd 152(%1), %%mm7\n" - "paddd %%mm4, %%mm0\n" - "paddd %%mm5, %%mm1\n" - "paddd %%mm6, %%mm2\n" - "paddd %%mm7, %%mm3\n" - "\n" - "psrad %4, %%mm0\n" - "psrad %4, %%mm1\n" - "psrad %4, %%mm2\n" - "psrad %4, %%mm3\n" - "\n" - "packssdw %%mm0, %%mm0\n" - "packssdw %%mm1, %%mm1\n" - "packssdw %%mm2, %%mm2\n" - "packssdw %%mm3, %%mm3\n" - "\n" - "movq %%mm0, %%mm4\n" - "movq %%mm0, %%mm5\n" - "pmaddwd 160(%1), %%mm4\n" - "pmaddwd 168(%1), %%mm5\n" - "\n" - "movq %%mm1, %%mm6\n" - "movq %%mm1, %%mm7\n" - "pmaddwd 192(%1), %%mm6\n" - "pmaddwd 200(%1), %%mm7\n" - "paddd %%mm6, %%mm4\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm2, %%mm6\n" - "movq %%mm2, %%mm7\n" - "pmaddwd 224(%1), %%mm6\n" - "pmaddwd 232(%1), %%mm7\n" - "paddd %%mm6, %%mm4\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm3, %%mm6\n" - "movq %%mm3, %%mm7\n" - "pmaddwd 256(%1), %%mm6\n" - "pmaddwd 264(%1), %%mm7\n" - "paddd %%mm6, %%mm4\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm4, (%3)\n" - "movq %%mm5, 8(%3)\n" - "\n" - "movq %%mm0, %%mm5\n" - "pmaddwd 176(%1), %%mm0\n" - "pmaddwd 184(%1), %%mm5\n" - "\n" - "movq %%mm1, %%mm7\n" - "pmaddwd 208(%1), %%mm1\n" - "pmaddwd 216(%1), %%mm7\n" - "paddd %%mm1, %%mm0\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm2, %%mm7\n" - "pmaddwd 240(%1), %%mm2\n" - "pmaddwd 248(%1), %%mm7\n" - "paddd %%mm2, %%mm0\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm3, %%mm7\n" - "pmaddwd 272(%1), %%mm3\n" - "pmaddwd 280(%1), %%mm7\n" - "paddd %%mm3, %%mm0\n" - "paddd %%mm7, %%mm5\n" - "\n" - "movq %%mm0, 16(%3)\n" - "movq %%mm5, 24(%3)\n" - : - : "r" (in), "r" (consts), "r" (&round_c), "r" (out), - "i" (SBC_PROTO_FIXED8_SCALE) - : "memory"); -} - -static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out, - int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even); - - asm volatile ("emms\n"); -} - -static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out, - int out_stride) -{ - /* Analyze blocks */ - sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even); - - asm volatile ("emms\n"); -} - -static int check_mmx_support(void) -{ -#ifdef __amd64__ - return 1; /* We assume that all 64-bit processors have MMX support */ -#else - int cpuid_feature_information; - asm volatile ( - /* According to Intel manual, CPUID instruction is supported - * if the value of ID bit (bit 21) in EFLAGS can be modified */ - "pushf\n" - "movl (%%esp), %0\n" - "xorl $0x200000, (%%esp)\n" /* try to modify ID bit */ - "popf\n" - "pushf\n" - "xorl (%%esp), %0\n" /* check if ID bit changed */ - "jz 1f\n" - "push %%eax\n" - "push %%ebx\n" - "push %%ecx\n" - "mov $1, %%eax\n" - "cpuid\n" - "pop %%ecx\n" - "pop %%ebx\n" - "pop %%eax\n" - "1:\n" - "popf\n" - : "=d" (cpuid_feature_information) - : - : "cc"); - return cpuid_feature_information & (1 << 23); -#endif -} - -void sbc_init_primitives_mmx(struct sbc_encoder_state *state) -{ - if (check_mmx_support()) { - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; - state->implementation_info = "MMX"; - } -} - -#endif diff --git a/src/modules/bluetooth/sbc_primitives_mmx.h b/src/modules/bluetooth/sbc_primitives_mmx.h deleted file mode 100644 index c1e44a5d..00000000 --- a/src/modules/bluetooth/sbc_primitives_mmx.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2006 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_PRIMITIVES_MMX_H -#define __SBC_PRIMITIVES_MMX_H - -#include "sbc_primitives.h" - -#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) && \ - !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) - -#define SBC_BUILD_WITH_MMX_SUPPORT - -void sbc_init_primitives_mmx(struct sbc_encoder_state *encoder_state); - -#endif - -#endif diff --git a/src/modules/bluetooth/sbc_primitives_neon.c b/src/modules/bluetooth/sbc_primitives_neon.c deleted file mode 100644 index f1bc7b48..00000000 --- a/src/modules/bluetooth/sbc_primitives_neon.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2006 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include -#include -#include "sbc.h" -#include "sbc_math.h" -#include "sbc_tables.h" - -#include "sbc_primitives_neon.h" - -/* - * ARM NEON optimizations - */ - -#ifdef SBC_BUILD_WITH_NEON_SUPPORT - -static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - /* TODO: merge even and odd cases (or even merge all four calls to this - * function) in order to have only aligned reads from 'in' array - * and reduce number of load instructions */ - asm volatile ( - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmull.s16 q0, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmull.s16 q1, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q0, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q1, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q0, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q1, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q0, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q1, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q0, d4, d8\n" - "vmlal.s16 q1, d5, d9\n" - - "vpadd.s32 d0, d0, d1\n" - "vpadd.s32 d1, d2, d3\n" - - "vrshrn.s32 d0, q0, %3\n" - - "vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n" - - "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ - - "vmull.s16 q3, d2, d0\n" - "vmull.s16 q4, d3, d0\n" - "vmlal.s16 q3, d4, d1\n" - "vmlal.s16 q4, d5, d1\n" - - "vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */ - "vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */ - - "vst1.32 {d0, d1}, [%2, :128]\n" - : "+r" (in), "+r" (consts) - : "r" (out), - "i" (SBC_PROTO_FIXED4_SCALE) - : "memory", - "d0", "d1", "d2", "d3", "d4", "d5", - "d6", "d7", "d8", "d9", "d10", "d11"); -} - -static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out, - const FIXED_T *consts) -{ - /* TODO: merge even and odd cases (or even merge all four calls to this - * function) in order to have only aligned reads from 'in' array - * and reduce number of load instructions */ - asm volatile ( - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmull.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmull.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmull.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmull.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - "vmlal.s16 q8, d6, d10\n" - "vld1.16 {d4, d5}, [%0, :64]!\n" - "vmlal.s16 q9, d7, d11\n" - "vld1.16 {d8, d9}, [%1, :128]!\n" - - "vmlal.s16 q6, d4, d8\n" - "vld1.16 {d6, d7}, [%0, :64]!\n" - "vmlal.s16 q7, d5, d9\n" - "vld1.16 {d10, d11}, [%1, :128]!\n" - - "vmlal.s16 q8, d6, d10\n" - "vmlal.s16 q9, d7, d11\n" - - "vpadd.s32 d0, d12, d13\n" - "vpadd.s32 d1, d14, d15\n" - "vpadd.s32 d2, d16, d17\n" - "vpadd.s32 d3, d18, d19\n" - - "vrshr.s32 q0, q0, %3\n" - "vrshr.s32 q1, q1, %3\n" - "vmovn.s32 d0, q0\n" - "vmovn.s32 d1, q1\n" - - "vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */ - "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ - "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmull.s16 q6, d4, d0\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmull.s16 q7, d5, d0\n" - "vmull.s16 q8, d6, d0\n" - "vmull.s16 q9, d7, d0\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d1\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d1\n" - "vmlal.s16 q8, d6, d1\n" - "vmlal.s16 q9, d7, d1\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d2\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d2\n" - "vmlal.s16 q8, d6, d2\n" - "vmlal.s16 q9, d7, d2\n" - - "vld1.16 {d4, d5}, [%1, :128]!\n" - "vmlal.s16 q6, d4, d3\n" - "vld1.16 {d6, d7}, [%1, :128]!\n" - "vmlal.s16 q7, d5, d3\n" - "vmlal.s16 q8, d6, d3\n" - "vmlal.s16 q9, d7, d3\n" - - "vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */ - "vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */ - "vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */ - "vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */ - - "vst1.32 {d0, d1, d2, d3}, [%2, :128]\n" - : "+r" (in), "+r" (consts) - : "r" (out), - "i" (SBC_PROTO_FIXED8_SCALE) - : "memory", - "d0", "d1", "d2", "d3", "d4", "d5", - "d6", "d7", "d8", "d9", "d10", "d11", - "d12", "d13", "d14", "d15", "d16", "d17", - "d18", "d19"); -} - -static inline void sbc_analyze_4b_4s_neon(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - _sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - _sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even); - out += out_stride; - _sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd); - out += out_stride; - _sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even); -} - -static inline void sbc_analyze_4b_8s_neon(int16_t *x, - int32_t *out, int out_stride) -{ - /* Analyze blocks */ - _sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - _sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even); - out += out_stride; - _sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd); - out += out_stride; - _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); -} - -void sbc_init_primitives_neon(struct sbc_encoder_state *state) -{ - state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; - state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; - state->implementation_info = "NEON"; -} - -#endif diff --git a/src/modules/bluetooth/sbc_primitives_neon.h b/src/modules/bluetooth/sbc_primitives_neon.h deleted file mode 100644 index 30766ed8..00000000 --- a/src/modules/bluetooth/sbc_primitives_neon.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2006 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef __SBC_PRIMITIVES_NEON_H -#define __SBC_PRIMITIVES_NEON_H - -#include "sbc_primitives.h" - -#if defined(__GNUC__) && defined(__ARM_NEON__) && \ - !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) - -#define SBC_BUILD_WITH_NEON_SUPPORT - -void sbc_init_primitives_neon(struct sbc_encoder_state *encoder_state); - -#endif - -#endif diff --git a/src/modules/bluetooth/sbc_tables.h b/src/modules/bluetooth/sbc_tables.h deleted file mode 100644 index 0057c73f..00000000 --- a/src/modules/bluetooth/sbc_tables.h +++ /dev/null @@ -1,659 +0,0 @@ -/* - * - * Bluetooth low-complexity, subband codec (SBC) library - * - * Copyright (C) 2004-2009 Marcel Holtmann - * Copyright (C) 2004-2005 Henryk Ploetz - * Copyright (C) 2005-2006 Brad Midgley - * - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -/* A2DP specification: Appendix B, page 69 */ -static const int sbc_offset4[4][4] = { - { -1, 0, 0, 0 }, - { -2, 0, 0, 1 }, - { -2, 0, 0, 1 }, - { -2, 0, 0, 1 } -}; - -/* A2DP specification: Appendix B, page 69 */ -static const int sbc_offset8[4][8] = { - { -2, 0, 0, 0, 0, 0, 0, 1 }, - { -3, 0, 0, 0, 0, 0, 1, 2 }, - { -4, 0, 0, 0, 0, 0, 1, 2 }, - { -4, 0, 0, 0, 0, 0, 1, 2 } -}; - - -#define SS4(val) ASR(val, SCALE_SPROTO4_TBL) -#define SS8(val) ASR(val, SCALE_SPROTO8_TBL) -#define SN4(val) ASR(val, SCALE_NPROTO4_TBL) -#define SN8(val) ASR(val, SCALE_NPROTO8_TBL) - -static const int32_t sbc_proto_4_40m0[] = { - SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8), - SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8), - SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330), - SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00), - SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7) -}; - -static const int32_t sbc_proto_4_40m1[] = { - SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475), - SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370), - SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b), - SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b), - SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7) -}; - -static const int32_t sbc_proto_8_80m0[] = { - SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100), - SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0), - SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c), - SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705), - SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db), - SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948), - SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200), - SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358), - SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31), - SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170) -}; - -static const int32_t sbc_proto_8_80m1[] = { - SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620), - SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40), - SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01), - SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94), - SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b), - SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68), - SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20), - SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410), - SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da), - SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a) -}; - -static const int32_t synmatrix4[8][4] = { - { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) }, - { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) }, - { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) }, - { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) }, - { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) }, - { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }, - { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) }, - { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) } -}; - -static const int32_t synmatrix8[16][8] = { - { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798), - SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) }, - { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988), - SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) }, - { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac), - SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) }, - { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10), - SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) }, - { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), - SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) }, - { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0), - SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) }, - { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54), - SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) }, - { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678), - SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) }, - { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868), - SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) }, - { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), - SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }, - { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), - SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, - { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), - SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, - { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), - SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) }, - { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), - SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, - { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), - SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, - { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), - SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) } -}; - -/* Uncomment the following line to enable high precision build of SBC encoder */ - -/* #define SBC_HIGH_PRECISION */ - -#ifdef SBC_HIGH_PRECISION -#define FIXED_A int64_t /* data type for fixed point accumulator */ -#define FIXED_T int32_t /* data type for fixed point constants */ -#define SBC_FIXED_EXTRA_BITS 16 -#else -#define FIXED_A int32_t /* data type for fixed point accumulator */ -#define FIXED_T int16_t /* data type for fixed point constants */ -#define SBC_FIXED_EXTRA_BITS 0 -#endif - -/* A2DP specification: Section 12.8 Tables - * - * Original values are premultiplied by 2 for better precision (that is the - * maximum which is possible without overflows) - * - * Note: in each block of 8 numbers sign was changed for elements 2 and 7 - * in order to compensate the same change applied to cos_table_fixed_4 - */ -#define SBC_PROTO_FIXED4_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) -#define F_PROTO4(x) (FIXED_A) ((x * 2) * \ - ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) -#define F(x) F_PROTO4(x) -static const FIXED_T _sbc_proto_fixed4[40] = { - F(0.00000000E+00), F(5.36548976E-04), - -F(1.49188357E-03), F(2.73370904E-03), - F(3.83720193E-03), F(3.89205149E-03), - F(1.86581691E-03), F(3.06012286E-03), - - F(1.09137620E-02), F(2.04385087E-02), - -F(2.88757392E-02), F(3.21939290E-02), - F(2.58767811E-02), F(6.13245186E-03), - -F(2.88217274E-02), F(7.76463494E-02), - - F(1.35593274E-01), F(1.94987841E-01), - -F(2.46636662E-01), F(2.81828203E-01), - F(2.94315332E-01), F(2.81828203E-01), - F(2.46636662E-01), -F(1.94987841E-01), - - -F(1.35593274E-01), -F(7.76463494E-02), - F(2.88217274E-02), F(6.13245186E-03), - F(2.58767811E-02), F(3.21939290E-02), - F(2.88757392E-02), -F(2.04385087E-02), - - -F(1.09137620E-02), -F(3.06012286E-03), - -F(1.86581691E-03), F(3.89205149E-03), - F(3.83720193E-03), F(2.73370904E-03), - F(1.49188357E-03), -F(5.36548976E-04), -}; -#undef F - -/* - * To produce this cosine matrix in Octave: - * - * b = zeros(4, 8); - * for i = 0:3 - * for j = 0:7 b(i+1, j+1) = cos((i + 0.5) * (j - 2) * (pi/4)) - * endfor - * endfor; - * printf("%.10f, ", b'); - * - * Note: in each block of 8 numbers sign was changed for elements 2 and 7 - * - * Change of sign for element 2 allows to replace constant 1.0 (not - * representable in Q15 format) with -1.0 (fine with Q15). - * Changed sign for element 7 allows to have more similar constants - * and simplify subband filter function code. - */ -#define SBC_COS_TABLE_FIXED4_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) -#define F_COS4(x) (FIXED_A) ((x) * \ - ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) -#define F(x) F_COS4(x) -static const FIXED_T cos_table_fixed_4[32] = { - F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), - F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), - - -F(0.7071067812), F(0.3826834324), -F(1.0000000000), F(0.3826834324), - -F(0.7071067812), -F(0.9238795325), -F(0.0000000000), -F(0.9238795325), - - -F(0.7071067812), -F(0.3826834324), -F(1.0000000000), -F(0.3826834324), - -F(0.7071067812), F(0.9238795325), F(0.0000000000), F(0.9238795325), - - F(0.7071067812), -F(0.9238795325), -F(1.0000000000), -F(0.9238795325), - F(0.7071067812), -F(0.3826834324), -F(0.0000000000), -F(0.3826834324), -}; -#undef F - -/* A2DP specification: Section 12.8 Tables - * - * Original values are premultiplied by 4 for better precision (that is the - * maximum which is possible without overflows) - * - * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 - * in order to compensate the same change applied to cos_table_fixed_8 - */ -#define SBC_PROTO_FIXED8_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) -#define F_PROTO8(x) (FIXED_A) ((x * 2) * \ - ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) -#define F(x) F_PROTO8(x) -static const FIXED_T _sbc_proto_fixed8[80] = { - F(0.00000000E+00), F(1.56575398E-04), - F(3.43256425E-04), F(5.54620202E-04), - -F(8.23919506E-04), F(1.13992507E-03), - F(1.47640169E-03), F(1.78371725E-03), - F(2.01182542E-03), F(2.10371989E-03), - F(1.99454554E-03), F(1.61656283E-03), - F(9.02154502E-04), F(1.78805361E-04), - F(1.64973098E-03), F(3.49717454E-03), - - F(5.65949473E-03), F(8.02941163E-03), - F(1.04584443E-02), F(1.27472335E-02), - -F(1.46525263E-02), F(1.59045603E-02), - F(1.62208471E-02), F(1.53184106E-02), - F(1.29371806E-02), F(8.85757540E-03), - F(2.92408442E-03), -F(4.91578024E-03), - -F(1.46404076E-02), F(2.61098752E-02), - F(3.90751381E-02), F(5.31873032E-02), - - F(6.79989431E-02), F(8.29847578E-02), - F(9.75753918E-02), F(1.11196689E-01), - -F(1.23264548E-01), F(1.33264415E-01), - F(1.40753505E-01), F(1.45389847E-01), - F(1.46955068E-01), F(1.45389847E-01), - F(1.40753505E-01), F(1.33264415E-01), - F(1.23264548E-01), -F(1.11196689E-01), - -F(9.75753918E-02), -F(8.29847578E-02), - - -F(6.79989431E-02), -F(5.31873032E-02), - -F(3.90751381E-02), -F(2.61098752E-02), - F(1.46404076E-02), -F(4.91578024E-03), - F(2.92408442E-03), F(8.85757540E-03), - F(1.29371806E-02), F(1.53184106E-02), - F(1.62208471E-02), F(1.59045603E-02), - F(1.46525263E-02), -F(1.27472335E-02), - -F(1.04584443E-02), -F(8.02941163E-03), - - -F(5.65949473E-03), -F(3.49717454E-03), - -F(1.64973098E-03), -F(1.78805361E-04), - -F(9.02154502E-04), F(1.61656283E-03), - F(1.99454554E-03), F(2.10371989E-03), - F(2.01182542E-03), F(1.78371725E-03), - F(1.47640169E-03), F(1.13992507E-03), - F(8.23919506E-04), -F(5.54620202E-04), - -F(3.43256425E-04), -F(1.56575398E-04), -}; -#undef F - -/* - * To produce this cosine matrix in Octave: - * - * b = zeros(8, 16); - * for i = 0:7 - * for j = 0:15 b(i+1, j+1) = cos((i + 0.5) * (j - 4) * (pi/8)) - * endfor endfor; - * printf("%.10f, ", b'); - * - * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 - * - * Change of sign for element 4 allows to replace constant 1.0 (not - * representable in Q15 format) with -1.0 (fine with Q15). - * Changed signs for elements 13, 14, 15 allow to have more similar constants - * and simplify subband filter function code. - */ -#define SBC_COS_TABLE_FIXED8_SCALE \ - ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) -#define F_COS8(x) (FIXED_A) ((x) * \ - ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) -#define F(x) F_COS8(x) -static const FIXED_T cos_table_fixed_8[128] = { - F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), - -F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123), - F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220), - F(0.0000000000), F(0.1950903220), F(0.3826834324), F(0.5555702330), - - -F(0.7071067812), -F(0.1950903220), F(0.3826834324), F(0.8314696123), - -F(1.0000000000), F(0.8314696123), F(0.3826834324), -F(0.1950903220), - -F(0.7071067812), -F(0.9807852804), -F(0.9238795325), -F(0.5555702330), - -F(0.0000000000), -F(0.5555702330), -F(0.9238795325), -F(0.9807852804), - - -F(0.7071067812), -F(0.9807852804), -F(0.3826834324), F(0.5555702330), - -F(1.0000000000), F(0.5555702330), -F(0.3826834324), -F(0.9807852804), - -F(0.7071067812), F(0.1950903220), F(0.9238795325), F(0.8314696123), - F(0.0000000000), F(0.8314696123), F(0.9238795325), F(0.1950903220), - - F(0.7071067812), -F(0.5555702330), -F(0.9238795325), F(0.1950903220), - -F(1.0000000000), F(0.1950903220), -F(0.9238795325), -F(0.5555702330), - F(0.7071067812), F(0.8314696123), -F(0.3826834324), -F(0.9807852804), - -F(0.0000000000), -F(0.9807852804), -F(0.3826834324), F(0.8314696123), - - F(0.7071067812), F(0.5555702330), -F(0.9238795325), -F(0.1950903220), - -F(1.0000000000), -F(0.1950903220), -F(0.9238795325), F(0.5555702330), - F(0.7071067812), -F(0.8314696123), -F(0.3826834324), F(0.9807852804), - F(0.0000000000), F(0.9807852804), -F(0.3826834324), -F(0.8314696123), - - -F(0.7071067812), F(0.9807852804), -F(0.3826834324), -F(0.5555702330), - -F(1.0000000000), -F(0.5555702330), -F(0.3826834324), F(0.9807852804), - -F(0.7071067812), -F(0.1950903220), F(0.9238795325), -F(0.8314696123), - -F(0.0000000000), -F(0.8314696123), F(0.9238795325), -F(0.1950903220), - - -F(0.7071067812), F(0.1950903220), F(0.3826834324), -F(0.8314696123), - -F(1.0000000000), -F(0.8314696123), F(0.3826834324), F(0.1950903220), - -F(0.7071067812), F(0.9807852804), -F(0.9238795325), F(0.5555702330), - -F(0.0000000000), F(0.5555702330), -F(0.9238795325), F(0.9807852804), - - F(0.7071067812), -F(0.8314696123), F(0.9238795325), -F(0.9807852804), - -F(1.0000000000), -F(0.9807852804), F(0.9238795325), -F(0.8314696123), - F(0.7071067812), -F(0.5555702330), F(0.3826834324), -F(0.1950903220), - -F(0.0000000000), -F(0.1950903220), F(0.3826834324), -F(0.5555702330), -}; -#undef F - -/* - * Enforce 16 byte alignment for the data, which is supposed to be used - * with SIMD optimized code. - */ - -#define SBC_ALIGN_BITS 4 -#define SBC_ALIGN_MASK ((1 << (SBC_ALIGN_BITS)) - 1) - -#ifdef __GNUC__ -#define SBC_ALIGNED __attribute__((aligned(1 << (SBC_ALIGN_BITS)))) -#else -#define SBC_ALIGNED -#endif - -/* - * Constant tables for the use in SIMD optimized analysis filters - * Each table consists of two parts: - * 1. reordered "proto" table - * 2. reordered "cos" table - * - * Due to non-symmetrical reordering, separate tables for "even" - * and "odd" cases are needed - */ - -static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_even[40 + 16] = { -#define C0 1.0932568993 -#define C1 1.3056875580 -#define C2 1.3056875580 -#define C3 1.6772280856 - -#define F(x) F_PROTO4(x) - F(0.00000000E+00 * C0), F(3.83720193E-03 * C0), - F(5.36548976E-04 * C1), F(2.73370904E-03 * C1), - F(3.06012286E-03 * C2), F(3.89205149E-03 * C2), - F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3), - F(1.09137620E-02 * C0), F(2.58767811E-02 * C0), - F(2.04385087E-02 * C1), F(3.21939290E-02 * C1), - F(7.76463494E-02 * C2), F(6.13245186E-03 * C2), - F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3), - F(1.35593274E-01 * C0), F(2.94315332E-01 * C0), - F(1.94987841E-01 * C1), F(2.81828203E-01 * C1), - -F(1.94987841E-01 * C2), F(2.81828203E-01 * C2), - F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3), - -F(1.35593274E-01 * C0), F(2.58767811E-02 * C0), - -F(7.76463494E-02 * C1), F(6.13245186E-03 * C1), - -F(2.04385087E-02 * C2), F(3.21939290E-02 * C2), - F(0.00000000E+00 * C3), F(2.88217274E-02 * C3), - -F(1.09137620E-02 * C0), F(3.83720193E-03 * C0), - -F(3.06012286E-03 * C1), F(3.89205149E-03 * C1), - -F(5.36548976E-04 * C2), F(2.73370904E-03 * C2), - F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3), -#undef F -#define F(x) F_COS4(x) - F(0.7071067812 / C0), F(0.9238795325 / C1), - -F(0.7071067812 / C0), F(0.3826834324 / C1), - -F(0.7071067812 / C0), -F(0.3826834324 / C1), - F(0.7071067812 / C0), -F(0.9238795325 / C1), - F(0.3826834324 / C2), -F(1.0000000000 / C3), - -F(0.9238795325 / C2), -F(1.0000000000 / C3), - F(0.9238795325 / C2), -F(1.0000000000 / C3), - -F(0.3826834324 / C2), -F(1.0000000000 / C3), -#undef F - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -}; - -static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_odd[40 + 16] = { -#define C0 1.3056875580 -#define C1 1.6772280856 -#define C2 1.0932568993 -#define C3 1.3056875580 - -#define F(x) F_PROTO4(x) - F(2.73370904E-03 * C0), F(5.36548976E-04 * C0), - -F(1.49188357E-03 * C1), F(0.00000000E+00 * C1), - F(3.83720193E-03 * C2), F(1.09137620E-02 * C2), - F(3.89205149E-03 * C3), F(3.06012286E-03 * C3), - F(3.21939290E-02 * C0), F(2.04385087E-02 * C0), - -F(2.88757392E-02 * C1), F(0.00000000E+00 * C1), - F(2.58767811E-02 * C2), F(1.35593274E-01 * C2), - F(6.13245186E-03 * C3), F(7.76463494E-02 * C3), - F(2.81828203E-01 * C0), F(1.94987841E-01 * C0), - -F(2.46636662E-01 * C1), F(0.00000000E+00 * C1), - F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2), - F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3), - F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0), - F(2.88217274E-02 * C1), F(0.00000000E+00 * C1), - F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2), - F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3), - F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0), - -F(1.86581691E-03 * C1), F(0.00000000E+00 * C1), - F(3.83720193E-03 * C2), F(0.00000000E+00 * C2), - F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3), -#undef F -#define F(x) F_COS4(x) - F(0.9238795325 / C0), -F(1.0000000000 / C1), - F(0.3826834324 / C0), -F(1.0000000000 / C1), - -F(0.3826834324 / C0), -F(1.0000000000 / C1), - -F(0.9238795325 / C0), -F(1.0000000000 / C1), - F(0.7071067812 / C2), F(0.3826834324 / C3), - -F(0.7071067812 / C2), -F(0.9238795325 / C3), - -F(0.7071067812 / C2), F(0.9238795325 / C3), - F(0.7071067812 / C2), -F(0.3826834324 / C3), -#undef F - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -}; - -static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_even[80 + 64] = { -#define C0 2.7906148894 -#define C1 2.4270044280 -#define C2 2.8015616024 -#define C3 3.1710363741 -#define C4 2.5377944043 -#define C5 2.4270044280 -#define C6 2.8015616024 -#define C7 3.1710363741 - -#define F(x) F_PROTO8(x) - F(0.00000000E+00 * C0), F(2.01182542E-03 * C0), - F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), - F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), - F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), - -F(8.23919506E-04 * C4), F(0.00000000E+00 * C4), - F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), - F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), - F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), - F(5.65949473E-03 * C0), F(1.29371806E-02 * C0), - F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), - F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), - F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), - -F(1.46525263E-02 * C4), F(0.00000000E+00 * C4), - F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), - F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), - -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), - F(6.79989431E-02 * C0), F(1.46955068E-01 * C0), - F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), - F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), - F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), - -F(1.23264548E-01 * C4), F(0.00000000E+00 * C4), - F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), - F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), - F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), - -F(6.79989431E-02 * C0), F(1.29371806E-02 * C0), - -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), - -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), - -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), - F(1.46404076E-02 * C4), F(0.00000000E+00 * C4), - F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), - F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), - F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), - -F(5.65949473E-03 * C0), F(2.01182542E-03 * C0), - -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), - -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), - -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), - -F(9.02154502E-04 * C4), F(0.00000000E+00 * C4), - F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), - F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), - F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), -#undef F -#define F(x) F_COS8(x) - F(0.7071067812 / C0), F(0.8314696123 / C1), - -F(0.7071067812 / C0), -F(0.1950903220 / C1), - -F(0.7071067812 / C0), -F(0.9807852804 / C1), - F(0.7071067812 / C0), -F(0.5555702330 / C1), - F(0.7071067812 / C0), F(0.5555702330 / C1), - -F(0.7071067812 / C0), F(0.9807852804 / C1), - -F(0.7071067812 / C0), F(0.1950903220 / C1), - F(0.7071067812 / C0), -F(0.8314696123 / C1), - F(0.9238795325 / C2), F(0.9807852804 / C3), - F(0.3826834324 / C2), F(0.8314696123 / C3), - -F(0.3826834324 / C2), F(0.5555702330 / C3), - -F(0.9238795325 / C2), F(0.1950903220 / C3), - -F(0.9238795325 / C2), -F(0.1950903220 / C3), - -F(0.3826834324 / C2), -F(0.5555702330 / C3), - F(0.3826834324 / C2), -F(0.8314696123 / C3), - F(0.9238795325 / C2), -F(0.9807852804 / C3), - -F(1.0000000000 / C4), F(0.5555702330 / C5), - -F(1.0000000000 / C4), -F(0.9807852804 / C5), - -F(1.0000000000 / C4), F(0.1950903220 / C5), - -F(1.0000000000 / C4), F(0.8314696123 / C5), - -F(1.0000000000 / C4), -F(0.8314696123 / C5), - -F(1.0000000000 / C4), -F(0.1950903220 / C5), - -F(1.0000000000 / C4), F(0.9807852804 / C5), - -F(1.0000000000 / C4), -F(0.5555702330 / C5), - F(0.3826834324 / C6), F(0.1950903220 / C7), - -F(0.9238795325 / C6), -F(0.5555702330 / C7), - F(0.9238795325 / C6), F(0.8314696123 / C7), - -F(0.3826834324 / C6), -F(0.9807852804 / C7), - -F(0.3826834324 / C6), F(0.9807852804 / C7), - F(0.9238795325 / C6), -F(0.8314696123 / C7), - -F(0.9238795325 / C6), F(0.5555702330 / C7), - F(0.3826834324 / C6), -F(0.1950903220 / C7), -#undef F - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -#undef C4 -#undef C5 -#undef C6 -#undef C7 -}; - -static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_odd[80 + 64] = { -#define C0 2.5377944043 -#define C1 2.4270044280 -#define C2 2.8015616024 -#define C3 3.1710363741 -#define C4 2.7906148894 -#define C5 2.4270044280 -#define C6 2.8015616024 -#define C7 3.1710363741 - -#define F(x) F_PROTO8(x) - F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0), - F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), - F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), - F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), - F(2.01182542E-03 * C4), F(5.65949473E-03 * C4), - F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), - F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), - F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), - F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0), - F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), - F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), - F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), - F(1.29371806E-02 * C4), F(6.79989431E-02 * C4), - F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), - F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), - -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), - F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0), - F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), - F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), - F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), - F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4), - F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), - F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), - F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), - F(0.00000000E+00 * C0), F(1.46404076E-02 * C0), - -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), - -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), - -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), - F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4), - F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), - F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), - F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), - F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0), - -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), - -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), - -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), - F(2.01182542E-03 * C4), F(0.00000000E+00 * C4), - F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), - F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), - F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), -#undef F -#define F(x) F_COS8(x) - -F(1.0000000000 / C0), F(0.8314696123 / C1), - -F(1.0000000000 / C0), -F(0.1950903220 / C1), - -F(1.0000000000 / C0), -F(0.9807852804 / C1), - -F(1.0000000000 / C0), -F(0.5555702330 / C1), - -F(1.0000000000 / C0), F(0.5555702330 / C1), - -F(1.0000000000 / C0), F(0.9807852804 / C1), - -F(1.0000000000 / C0), F(0.1950903220 / C1), - -F(1.0000000000 / C0), -F(0.8314696123 / C1), - F(0.9238795325 / C2), F(0.9807852804 / C3), - F(0.3826834324 / C2), F(0.8314696123 / C3), - -F(0.3826834324 / C2), F(0.5555702330 / C3), - -F(0.9238795325 / C2), F(0.1950903220 / C3), - -F(0.9238795325 / C2), -F(0.1950903220 / C3), - -F(0.3826834324 / C2), -F(0.5555702330 / C3), - F(0.3826834324 / C2), -F(0.8314696123 / C3), - F(0.9238795325 / C2), -F(0.9807852804 / C3), - F(0.7071067812 / C4), F(0.5555702330 / C5), - -F(0.7071067812 / C4), -F(0.9807852804 / C5), - -F(0.7071067812 / C4), F(0.1950903220 / C5), - F(0.7071067812 / C4), F(0.8314696123 / C5), - F(0.7071067812 / C4), -F(0.8314696123 / C5), - -F(0.7071067812 / C4), -F(0.1950903220 / C5), - -F(0.7071067812 / C4), F(0.9807852804 / C5), - F(0.7071067812 / C4), -F(0.5555702330 / C5), - F(0.3826834324 / C6), F(0.1950903220 / C7), - -F(0.9238795325 / C6), -F(0.5555702330 / C7), - F(0.9238795325 / C6), F(0.8314696123 / C7), - -F(0.3826834324 / C6), -F(0.9807852804 / C7), - -F(0.3826834324 / C6), F(0.9807852804 / C7), - F(0.9238795325 / C6), -F(0.8314696123 / C7), - -F(0.9238795325 / C6), F(0.5555702330 / C7), - F(0.3826834324 / C6), -F(0.1950903220 / C7), -#undef F - -#undef C0 -#undef C1 -#undef C2 -#undef C3 -#undef C4 -#undef C5 -#undef C6 -#undef C7 -}; -- cgit From 4d2f0daba17617bb60e0bd40f7a0329181dc09e4 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:01:19 -0300 Subject: sbc: ensure 16-byte buffer position alignment for 4 subbands encoding Buffer position in X array was not always 16-bytes aligned. Strict 16-byte alignment is strictly required for powerpc altivec simd optimizations because altivec does not have support for unaligned vector loads at all. --- src/modules/bluetooth/sbc/sbc.c | 2 +- src/modules/bluetooth/sbc/sbc_primitives.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c index 5157c70f..5c546ca8 100644 --- a/src/modules/bluetooth/sbc/sbc.c +++ b/src/modules/bluetooth/sbc/sbc.c @@ -924,7 +924,7 @@ static void sbc_encoder_init(struct sbc_encoder_state *state, const struct sbc_frame *frame) { memset(&state->X, 0, sizeof(state->X)); - state->position = SBC_X_BUFFER_SIZE - frame->subbands * 9; + state->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7; sbc_init_primitives(state); } diff --git a/src/modules/bluetooth/sbc/sbc_primitives.c b/src/modules/bluetooth/sbc/sbc_primitives.c index 6b0be3f5..2105280e 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives.c +++ b/src/modules/bluetooth/sbc/sbc_primitives.c @@ -231,12 +231,12 @@ static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( /* handle X buffer wraparound */ if (position < nsamples) { if (nchannels > 0) - memcpy(&X[0][SBC_X_BUFFER_SIZE - 36], &X[0][position], + memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position], 36 * sizeof(int16_t)); if (nchannels > 1) - memcpy(&X[1][SBC_X_BUFFER_SIZE - 36], &X[1][position], + memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position], 36 * sizeof(int16_t)); - position = SBC_X_BUFFER_SIZE - 36; + position = SBC_X_BUFFER_SIZE - 40; } #define PCM(i) (big_endian ? \ -- cgit From 84d91fb7084118730229fb83e2fc5479bc8f35f2 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:07:38 -0300 Subject: sbc: added saturated clipping of decoder output to 16-bit This prevents overflows and audible artefacts for the audio files which originally had loudness maximized. Music from audio CD disks is an example of such files, see http://en.wikipedia.org/wiki/Loudness_war --- src/modules/bluetooth/sbc/sbc.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c index 5c546ca8..3af0a03c 100644 --- a/src/modules/bluetooth/sbc/sbc.c +++ b/src/modules/bluetooth/sbc/sbc.c @@ -534,6 +534,16 @@ static void sbc_decoder_init(struct sbc_decoder_state *state, state->offset[ch][i] = (10 * i + 10); } +static SBC_ALWAYS_INLINE int16_t sbc_clip16(int32_t s) +{ + if (s > 0x7FFF) + return 0x7FFF; + else if (s < -0x8000) + return -0x8000; + else + return s; +} + static inline void sbc_synthesize_four(struct sbc_decoder_state *state, struct sbc_frame *frame, int ch, int blk) { @@ -562,7 +572,7 @@ static inline void sbc_synthesize_four(struct sbc_decoder_state *state, k = (i + 4) & 0xf; /* Store in output, Q0 */ - frame->pcm_sample[ch][blk * 4 + i] = SCALE4_STAGED1( + frame->pcm_sample[ch][blk * 4 + i] = sbc_clip16(SCALE4_STAGED1( MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0], MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0], MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1], @@ -572,7 +582,7 @@ static inline void sbc_synthesize_four(struct sbc_decoder_state *state, MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3], MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3], MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4], - MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4]))))))))))); + MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4])))))))))))); } } @@ -607,8 +617,8 @@ static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, for (idx = 0, i = 0; i < 8; i++, idx += 5) { k = (i + 8) & 0xf; - /* Store in output */ - frame->pcm_sample[ch][blk * 8 + i] = SCALE8_STAGED1( // Q0 + /* Store in output, Q0 */ + frame->pcm_sample[ch][blk * 8 + i] = sbc_clip16(SCALE8_STAGED1( MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0], MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0], MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1], @@ -618,7 +628,7 @@ static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3], MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3], MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4], - MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4]))))))))))); + MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4])))))))))))); } } -- cgit From 16a05e52c635e192eb878ec5b3a87e3e00d91aed Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Mon, 14 Mar 2011 15:09:50 -0300 Subject: sbc: Fix redundant null check on calling free() Issues found by smatch static check: http://smatch.sourceforge.net/ --- src/modules/bluetooth/sbc/sbc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c index 3af0a03c..5c5c1112 100644 --- a/src/modules/bluetooth/sbc/sbc.c +++ b/src/modules/bluetooth/sbc/sbc.c @@ -1141,8 +1141,7 @@ void sbc_finish(sbc_t *sbc) if (!sbc) return; - if (sbc->priv_alloc_base) - free(sbc->priv_alloc_base); + free(sbc->priv_alloc_base); memset(sbc, 0, sizeof(sbc_t)); } -- cgit From c2b2fc1640b380b5bb17960975ae6b21c3f8de2d Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:16:30 -0300 Subject: sbc: new 'sbc_calc_scalefactors_j' function added to sbc primitives The code for scale factors calculation with joint stereo support has been moved to a separate function. It can get platform-specific SIMD optimizations later for best possible performance. But even this change in C code improves performance because of the use of __builtin_clz() instead of loops similar to what was done to sbc_calc_scalefactors earlier. Also technically it does loop unrolling by processing two channels at once, which might be either good or bad for performance (if the registers pressure is increased and more data is spilled to memory). But the benchmark from 32-bit x86 system (pentium-m) shows that it got clearly faster: $ time ./sbcenc.old -b53 -s8 -j test.au > /dev/null real 0m1.868s user 0m1.808s sys 0m0.048s $ time ./sbcenc.new -b53 -s8 -j test.au > /dev/null real 0m1.742s user 0m1.668s sys 0m0.064s --- src/modules/bluetooth/sbc/sbc.c | 91 ++++++++---------------------- src/modules/bluetooth/sbc/sbc_primitives.c | 75 ++++++++++++++++++++++++ src/modules/bluetooth/sbc/sbc_primitives.h | 4 ++ 3 files changed, 102 insertions(+), 68 deletions(-) diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c index 5c5c1112..512341fa 100644 --- a/src/modules/bluetooth/sbc/sbc.c +++ b/src/modules/bluetooth/sbc/sbc.c @@ -744,7 +744,7 @@ static int sbc_analyze_audio(struct sbc_encoder_state *state, static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( uint8_t *data, struct sbc_frame *frame, size_t len, - int frame_subbands, int frame_channels) + int frame_subbands, int frame_channels, int joint) { /* Bitstream writer starts from the fourth byte */ uint8_t *data_ptr = data + 4; @@ -801,63 +801,6 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( crc_pos = 16; if (frame->mode == JOINT_STEREO) { - /* like frame->sb_sample but joint stereo */ - int32_t sb_sample_j[16][2]; - /* scalefactor and scale_factor in joint case */ - uint32_t scalefactor_j[2]; - uint8_t scale_factor_j[2]; - - uint8_t joint = 0; - frame->joint = 0; - - for (sb = 0; sb < frame_subbands - 1; sb++) { - scale_factor_j[0] = 0; - scalefactor_j[0] = 2 << SCALE_OUT_BITS; - scale_factor_j[1] = 0; - scalefactor_j[1] = 2 << SCALE_OUT_BITS; - - for (blk = 0; blk < frame->blocks; blk++) { - uint32_t tmp; - /* Calculate joint stereo signal */ - sb_sample_j[blk][0] = - ASR(frame->sb_sample_f[blk][0][sb], 1) + - ASR(frame->sb_sample_f[blk][1][sb], 1); - sb_sample_j[blk][1] = - ASR(frame->sb_sample_f[blk][0][sb], 1) - - ASR(frame->sb_sample_f[blk][1][sb], 1); - - /* calculate scale_factor_j and scalefactor_j for joint case */ - tmp = fabs(sb_sample_j[blk][0]); - while (scalefactor_j[0] < tmp) { - scale_factor_j[0]++; - scalefactor_j[0] *= 2; - } - tmp = fabs(sb_sample_j[blk][1]); - while (scalefactor_j[1] < tmp) { - scale_factor_j[1]++; - scalefactor_j[1] *= 2; - } - } - - /* decide whether to join this subband */ - if ((frame->scale_factor[0][sb] + - frame->scale_factor[1][sb]) > - (scale_factor_j[0] + - scale_factor_j[1])) { - /* use joint stereo for this subband */ - joint |= 1 << (frame_subbands - 1 - sb); - frame->joint |= 1 << sb; - frame->scale_factor[0][sb] = scale_factor_j[0]; - frame->scale_factor[1][sb] = scale_factor_j[1]; - for (blk = 0; blk < frame->blocks; blk++) { - frame->sb_sample_f[blk][0][sb] = - sb_sample_j[blk][0]; - frame->sb_sample_f[blk][1][sb] = - sb_sample_j[blk][1]; - } - } - } - PUT_BITS(data_ptr, bits_cache, bits_count, joint, frame_subbands); crc_header[crc_pos >> 3] = joint; @@ -915,18 +858,23 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( return data_ptr - data; } -static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) +static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len, + int joint) { if (frame->subbands == 4) { if (frame->channels == 1) - return sbc_pack_frame_internal(data, frame, len, 4, 1); + return sbc_pack_frame_internal( + data, frame, len, 4, 1, joint); else - return sbc_pack_frame_internal(data, frame, len, 4, 2); + return sbc_pack_frame_internal( + data, frame, len, 4, 2, joint); } else { if (frame->channels == 1) - return sbc_pack_frame_internal(data, frame, len, 8, 1); + return sbc_pack_frame_internal( + data, frame, len, 8, 1, joint); else - return sbc_pack_frame_internal(data, frame, len, 8, 2); + return sbc_pack_frame_internal( + data, frame, len, 8, 2, joint); } } @@ -1124,11 +1072,18 @@ ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); - priv->enc_state.sbc_calc_scalefactors( - priv->frame.sb_sample_f, priv->frame.scale_factor, - priv->frame.blocks, priv->frame.channels, priv->frame.subbands); - - framelen = sbc_pack_frame(output, &priv->frame, output_len); + if (priv->frame.mode == JOINT_STEREO) { + int j = priv->enc_state.sbc_calc_scalefactors_j( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.subbands); + framelen = sbc_pack_frame(output, &priv->frame, output_len, j); + } else { + priv->enc_state.sbc_calc_scalefactors( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.channels, + priv->frame.subbands); + framelen = sbc_pack_frame(output, &priv->frame, output_len, 0); + } if (written) *written = framelen; diff --git a/src/modules/bluetooth/sbc/sbc_primitives.c b/src/modules/bluetooth/sbc/sbc_primitives.c index 2105280e..82cd399d 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives.c +++ b/src/modules/bluetooth/sbc/sbc_primitives.c @@ -439,6 +439,80 @@ static void sbc_calc_scalefactors( } } +static int sbc_calc_scalefactors_j( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands) +{ + int blk, joint = 0; + int32_t tmp0, tmp1; + uint32_t x, y; + + /* last subband does not use joint stereo */ + int sb = subbands - 1; + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = fabs(sb_sample_f[blk][0][sb]); + tmp1 = fabs(sb_sample_f[blk][1][sb]); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(x); + scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(y); + + /* the rest of subbands can use joint stereo */ + while (--sb >= 0) { + int32_t sb_sample_j[16][2]; + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = sb_sample_f[blk][0][sb]; + tmp1 = sb_sample_f[blk][1][sb]; + sb_sample_j[blk][0] = ASR(tmp0, 1) + ASR(tmp1, 1); + sb_sample_j[blk][1] = ASR(tmp0, 1) - ASR(tmp1, 1); + tmp0 = fabs(tmp0); + tmp1 = fabs(tmp1); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(x); + scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(y); + x = 1 << SCALE_OUT_BITS; + y = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + tmp0 = fabs(sb_sample_j[blk][0]); + tmp1 = fabs(sb_sample_j[blk][1]); + if (tmp0 != 0) + x |= tmp0 - 1; + if (tmp1 != 0) + y |= tmp1 - 1; + } + x = (31 - SCALE_OUT_BITS) - sbc_clz(x); + y = (31 - SCALE_OUT_BITS) - sbc_clz(y); + + /* decide whether to use joint stereo for this subband */ + if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) { + joint |= 1 << (subbands - 1 - sb); + scale_factor[0][sb] = x; + scale_factor[1][sb] = y; + for (blk = 0; blk < blocks; blk++) { + sb_sample_f[blk][0][sb] = sb_sample_j[blk][0]; + sb_sample_f[blk][1][sb] = sb_sample_j[blk][1]; + } + } + } + + /* bitmask with the information about subbands using joint stereo */ + return joint; +} + /* * Detect CPU features and setup function pointers */ @@ -456,6 +530,7 @@ void sbc_init_primitives(struct sbc_encoder_state *state) /* Default implementation for scale factors calculation */ state->sbc_calc_scalefactors = sbc_calc_scalefactors; + state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; state->implementation_info = "Generic C"; /* X86/AMD64 optimizations */ diff --git a/src/modules/bluetooth/sbc/sbc_primitives.h b/src/modules/bluetooth/sbc/sbc_primitives.h index 3d01c115..b4b9df2f 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives.h +++ b/src/modules/bluetooth/sbc/sbc_primitives.h @@ -62,6 +62,10 @@ struct sbc_encoder_state { void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], uint32_t scale_factor[2][8], int blocks, int channels, int subbands); + /* Scale factors calculation with joint stereo support */ + int (*sbc_calc_scalefactors_j)(int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands); const char *implementation_info; }; -- cgit From 1f617ea9ec74956688a30da3901905c9abe34a65 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:17:31 -0300 Subject: sbc: MMX optimization for scale factors calculation Improves SBC encoding performance when joint stereo is not used. Benchmarked on Pentium-M: == Before: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m1.439s user 0m1.336s sys 0m0.104s samples % image name symbol name 8642 33.7473 sbcenc sbc_pack_frame 5873 22.9342 sbcenc sbc_analyze_4b_8s_mmx 4435 17.3188 sbcenc sbc_calc_scalefactors 4285 16.7331 sbcenc sbc_calculate_bits 1942 7.5836 sbcenc sbc_enc_process_input_8s_be 322 1.2574 sbcenc sbc_encode == After: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m1.319s user 0m1.220s sys 0m0.084s samples % image name symbol name 8706 37.9959 sbcenc sbc_pack_frame 5740 25.0513 sbcenc sbc_analyze_4b_8s_mmx 4307 18.7972 sbcenc sbc_calculate_bits 1937 8.4537 sbcenc sbc_enc_process_input_8s_be 1801 7.8602 sbcenc sbc_calc_scalefactors_mmx 307 1.3399 sbcenc sbc_encode --- src/modules/bluetooth/sbc/sbc_primitives_mmx.c | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/modules/bluetooth/sbc/sbc_primitives_mmx.c b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c index 08e9ca28..d8373b3a 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives_mmx.c +++ b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c @@ -275,6 +275,59 @@ static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out, asm volatile ("emms\n"); } +static void sbc_calc_scalefactors_mmx( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + static const SBC_ALIGNED int32_t consts[2] = { + 1 << SCALE_OUT_BITS, + 1 << SCALE_OUT_BITS, + }; + int ch, sb; + intptr_t blk; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb += 2) { + blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0])); + asm volatile ( + "movq (%4), %%mm0\n" + "1:\n" + "movq (%1, %0), %%mm1\n" + "pxor %%mm2, %%mm2\n" + "pcmpgtd %%mm2, %%mm1\n" + "paddd (%1, %0), %%mm1\n" + "pcmpgtd %%mm1, %%mm2\n" + "pxor %%mm2, %%mm1\n" + + "por %%mm1, %%mm0\n" + + "sub %2, %0\n" + "jns 1b\n" + + "movd %%mm0, %k0\n" + "psrlq $32, %%mm0\n" + "bsrl %k0, %k0\n" + "subl %5, %k0\n" + "movl %k0, (%3)\n" + + "movd %%mm0, %k0\n" + "bsrl %k0, %k0\n" + "subl %5, %k0\n" + "movl %k0, 4(%3)\n" + : "+r" (blk) + : "r" (&sb_sample_f[0][ch][sb]), + "i" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + "r" (&scale_factor[ch][sb]), + "r" (&consts), + "i" (SCALE_OUT_BITS) + : "memory"); + } + } + asm volatile ("emms\n"); +} + static int check_mmx_support(void) { #ifdef __amd64__ @@ -313,6 +366,7 @@ void sbc_init_primitives_mmx(struct sbc_encoder_state *state) if (check_mmx_support()) { state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; + state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx; state->implementation_info = "MMX"; } } -- cgit From fd7dc68ded44bd307802c24c3d02366984d829a3 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:18:46 -0300 Subject: sbc: ARM NEON optimization for scale factors calculation Improves SBC encoding performance when joint stereo is not used. Benchmarked on ARM Cortex-A8: == Before: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m4.756s user 0m4.313s sys 0m0.438s samples % image name symbol name 2569 27.6296 sbcenc sbc_pack_frame 1934 20.8002 sbcenc sbc_analyze_4b_8s_neon 1386 14.9064 sbcenc sbc_calculate_bits 1221 13.1319 sbcenc sbc_calc_scalefactors 996 10.7120 sbcenc sbc_enc_process_input_8s_be 878 9.4429 no-vmlinux /no-vmlinux 204 2.1940 sbcenc sbc_encode 56 0.6023 libc-2.10.1.so memcpy == After: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m4.220s user 0m3.797s sys 0m0.422s samples % image name symbol name 2563 31.3249 sbcenc sbc_pack_frame 1892 23.1239 sbcenc sbc_analyze_4b_8s_neon 1368 16.7196 sbcenc sbc_calculate_bits 961 11.7453 sbcenc sbc_enc_process_input_8s_be 836 10.2176 no-vmlinux /no-vmlinux 262 3.2022 sbcenc sbc_calc_scalefactors_neon 199 2.4322 sbcenc sbc_encode 49 0.5989 libc-2.10.1.so memcpy --- src/modules/bluetooth/sbc/sbc.c | 2 +- src/modules/bluetooth/sbc/sbc_primitives_neon.c | 58 +++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c index 512341fa..bebca41e 100644 --- a/src/modules/bluetooth/sbc/sbc.c +++ b/src/modules/bluetooth/sbc/sbc.c @@ -77,7 +77,7 @@ struct sbc_frame { uint8_t joint; /* only the lower 4 bits of every element are to be used */ - uint32_t scale_factor[2][8]; + uint32_t SBC_ALIGNED scale_factor[2][8]; /* raw integer subband samples in the frame */ int32_t SBC_ALIGNED sb_sample_f[16][2][8]; diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.c b/src/modules/bluetooth/sbc/sbc_primitives_neon.c index f1bc7b48..aa902b6f 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives_neon.c +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.c @@ -236,10 +236,68 @@ static inline void sbc_analyze_4b_8s_neon(int16_t *x, _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); } +static void sbc_calc_scalefactors_neon( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + int ch, sb; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb += 4) { + int blk = blocks; + int32_t *in = &sb_sample_f[0][ch][sb]; + asm volatile ( + "vmov.s32 q0, %[c1]\n" + "vmov.s32 q1, %[c1]\n" + "1:\n" + "vld1.32 {d16, d17}, [%[in], :128], %[inc]\n" + "vabs.s32 q8, q8\n" + "vld1.32 {d18, d19}, [%[in], :128], %[inc]\n" + "vabs.s32 q9, q9\n" + "vld1.32 {d20, d21}, [%[in], :128], %[inc]\n" + "vabs.s32 q10, q10\n" + "vld1.32 {d22, d23}, [%[in], :128], %[inc]\n" + "vabs.s32 q11, q11\n" + "vcgt.s32 q12, q8, #0\n" + "vcgt.s32 q13, q9, #0\n" + "vcgt.s32 q14, q10, #0\n" + "vcgt.s32 q15, q11, #0\n" + "vadd.s32 q8, q8, q12\n" + "vadd.s32 q9, q9, q13\n" + "vadd.s32 q10, q10, q14\n" + "vadd.s32 q11, q11, q15\n" + "vorr.s32 q0, q0, q8\n" + "vorr.s32 q1, q1, q9\n" + "vorr.s32 q0, q0, q10\n" + "vorr.s32 q1, q1, q11\n" + "subs %[blk], %[blk], #4\n" + "bgt 1b\n" + "vorr.s32 q0, q0, q1\n" + "vmov.s32 q15, %[c2]\n" + "vclz.s32 q0, q0\n" + "vsub.s32 q0, q15, q0\n" + "vst1.32 {d0, d1}, [%[out], :128]\n" + : + [blk] "+r" (blk), + [in] "+r" (in) + : + [inc] "r" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + [out] "r" (&scale_factor[ch][sb]), + [c1] "i" (1 << SCALE_OUT_BITS), + [c2] "i" (31 - SCALE_OUT_BITS) + : "d0", "d1", "d2", "d3", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23", "d24", "d25", "d26", + "d27", "d28", "d29", "d30", "d31", "cc", "memory"); + } + } +} + void sbc_init_primitives_neon(struct sbc_encoder_state *state) { state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; + state->sbc_calc_scalefactors = sbc_calc_scalefactors_neon; state->implementation_info = "NEON"; } -- cgit From 177948a6f23cbc58530ae394f791ba9fd764899a Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:21:53 -0300 Subject: sbc: fix signedness of parameters The written parameter of sbc_encode can be negative so it should be ssize_t instead of size_t. --- src/modules/bluetooth/sbc/sbc.c | 9 +++++---- src/modules/bluetooth/sbc/sbc.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c index bebca41e..ae744296 100644 --- a/src/modules/bluetooth/sbc/sbc.c +++ b/src/modules/bluetooth/sbc/sbc.c @@ -742,7 +742,7 @@ static int sbc_analyze_audio(struct sbc_encoder_state *state, * -99 not implemented */ -static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( +static SBC_ALWAYS_INLINE ssize_t sbc_pack_frame_internal( uint8_t *data, struct sbc_frame *frame, size_t len, int frame_subbands, int frame_channels, int joint) { @@ -858,7 +858,7 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( return data_ptr - data; } -static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len, +static ssize_t sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len, int joint) { if (frame->subbands == 4) { @@ -1004,10 +1004,11 @@ ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, } ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written) + void *output, size_t output_len, ssize_t *written) { struct sbc_priv *priv; - int framelen, samples; + int samples; + ssize_t framelen; int (*sbc_enc_process_input)(int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], int nsamples, int nchannels); diff --git a/src/modules/bluetooth/sbc/sbc.h b/src/modules/bluetooth/sbc/sbc.h index 65435884..c9c56d38 100644 --- a/src/modules/bluetooth/sbc/sbc.h +++ b/src/modules/bluetooth/sbc/sbc.h @@ -90,7 +90,7 @@ ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, /* Encodes ONE input block into ONE output block */ ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, - void *output, size_t output_len, size_t *written); + void *output, size_t output_len, ssize_t *written); /* Returns the output block size in bytes */ size_t sbc_get_frame_length(sbc_t *sbc); -- cgit From 718fe73cab5d2b9d77ffc94b7141e7be44305968 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:27:30 -0300 Subject: sbc: ARM NEON optimized joint stereo processing in SBC encoder Improves SBC encoding performance when joint stereo is used, which is a typical A2DP configuration. Benchmarked on ARM Cortex-A8: == Before: == $ time ./sbcenc -b53 -s8 -j test.au > /dev/null real 0m5.239s user 0m4.805s sys 0m0.430s samples % image name symbol name 26083 25.0856 sbcenc sbc_pack_frame 21548 20.7240 sbcenc sbc_calc_scalefactors_j 19910 19.1486 sbcenc sbc_analyze_4b_8s_neon 14377 13.8272 sbcenc sbc_calculate_bits 9990 9.6080 sbcenc sbc_enc_process_input_8s_be 8667 8.3356 no-vmlinux /no-vmlinux 2263 2.1765 sbcenc sbc_encode 696 0.6694 libc-2.10.1.so memcpy == After: == $ time ./sbcenc -b53 -s8 -j test.au > /dev/null real 0m4.389s user 0m3.969s sys 0m0.422s samples % image name symbol name 26234 29.9625 sbcenc sbc_pack_frame 20057 22.9076 sbcenc sbc_analyze_4b_8s_neon 14306 16.3393 sbcenc sbc_calculate_bits 9866 11.2682 sbcenc sbc_enc_process_input_8s_be 8506 9.7149 no-vmlinux /no-vmlinux 5219 5.9608 sbcenc sbc_calc_scalefactors_j_neon 2280 2.6040 sbcenc sbc_encode 661 0.7549 libc-2.10.1.so memcpy --- src/modules/bluetooth/sbc/sbc_primitives_neon.c | 243 ++++++++++++++++++++++++ 1 file changed, 243 insertions(+) diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.c b/src/modules/bluetooth/sbc/sbc_primitives_neon.c index aa902b6f..46842008 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives_neon.c +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.c @@ -293,11 +293,254 @@ static void sbc_calc_scalefactors_neon( } } +int sbc_calc_scalefactors_j_neon( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int subbands) +{ + static SBC_ALIGNED int32_t joint_bits_mask[8] = { + 8, 4, 2, 1, 128, 64, 32, 16 + }; + int joint, i; + int32_t *in0, *in1; + int32_t *in = &sb_sample_f[0][0][0]; + uint32_t *out0, *out1; + uint32_t *out = &scale_factor[0][0]; + int32_t *consts = joint_bits_mask; + + i = subbands; + + asm volatile ( + /* + * constants: q13 = (31 - SCALE_OUT_BITS), q14 = 1 + * input: q0 = ((1 << SCALE_OUT_BITS) + 1) + * %[in0] - samples for channel 0 + * %[in1] - samples for shannel 1 + * output: q0, q1 - scale factors without joint stereo + * q2, q3 - scale factors with joint stereo + * q15 - joint stereo selection mask + */ + ".macro calc_scalefactors\n" + "vmov.s32 q1, q0\n" + "vmov.s32 q2, q0\n" + "vmov.s32 q3, q0\n" + "mov %[i], %[blocks]\n" + "1:\n" + "vld1.32 {d18, d19}, [%[in1], :128], %[inc]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d16, d17}, [%[in0], :128], %[inc]\n" + "vhadd.s32 q10, q8, q11\n" + "vhsub.s32 q11, q8, q11\n" + "vabs.s32 q8, q8\n" + "vabs.s32 q9, q9\n" + "vabs.s32 q10, q10\n" + "vabs.s32 q11, q11\n" + "vmax.s32 q0, q0, q8\n" + "vmax.s32 q1, q1, q9\n" + "vmax.s32 q2, q2, q10\n" + "vmax.s32 q3, q3, q11\n" + "subs %[i], %[i], #1\n" + "bgt 1b\n" + "vsub.s32 q0, q0, q14\n" + "vsub.s32 q1, q1, q14\n" + "vsub.s32 q2, q2, q14\n" + "vsub.s32 q3, q3, q14\n" + "vclz.s32 q0, q0\n" + "vclz.s32 q1, q1\n" + "vclz.s32 q2, q2\n" + "vclz.s32 q3, q3\n" + "vsub.s32 q0, q13, q0\n" + "vsub.s32 q1, q13, q1\n" + "vsub.s32 q2, q13, q2\n" + "vsub.s32 q3, q13, q3\n" + ".endm\n" + /* + * constants: q14 = 1 + * input: q15 - joint stereo selection mask + * %[in0] - value set by calc_scalefactors macro + * %[in1] - value set by calc_scalefactors macro + */ + ".macro update_joint_stereo_samples\n" + "sub %[out1], %[in1], %[inc]\n" + "sub %[out0], %[in0], %[inc]\n" + "sub %[in1], %[in1], %[inc], asl #1\n" + "sub %[in0], %[in0], %[inc], asl #1\n" + "vld1.32 {d18, d19}, [%[in1], :128]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d16, d17}, [%[in0], :128]\n" + "vld1.32 {d2, d3}, [%[out1], :128]\n" + "vbic.s32 q3, q1, q14\n" + "vld1.32 {d0, d1}, [%[out0], :128]\n" + "vhsub.s32 q10, q8, q11\n" + "vhadd.s32 q11, q8, q11\n" + "vhsub.s32 q2, q0, q3\n" + "vhadd.s32 q3, q0, q3\n" + "vbif.s32 q10, q9, q15\n" + "vbif.s32 d22, d16, d30\n" + "sub %[inc], %[zero], %[inc], asl #1\n" + "sub %[i], %[blocks], #2\n" + "2:\n" + "vbif.s32 d23, d17, d31\n" + "vst1.32 {d20, d21}, [%[in1], :128], %[inc]\n" + "vbif.s32 d4, d2, d30\n" + "vld1.32 {d18, d19}, [%[in1], :128]\n" + "vbif.s32 d5, d3, d31\n" + "vst1.32 {d22, d23}, [%[in0], :128], %[inc]\n" + "vbif.s32 d6, d0, d30\n" + "vld1.32 {d16, d17}, [%[in0], :128]\n" + "vbif.s32 d7, d1, d31\n" + "vst1.32 {d4, d5}, [%[out1], :128], %[inc]\n" + "vbic.s32 q11, q9, q14\n" + "vld1.32 {d2, d3}, [%[out1], :128]\n" + "vst1.32 {d6, d7}, [%[out0], :128], %[inc]\n" + "vbic.s32 q3, q1, q14\n" + "vld1.32 {d0, d1}, [%[out0], :128]\n" + "vhsub.s32 q10, q8, q11\n" + "vhadd.s32 q11, q8, q11\n" + "vhsub.s32 q2, q0, q3\n" + "vhadd.s32 q3, q0, q3\n" + "vbif.s32 q10, q9, q15\n" + "vbif.s32 d22, d16, d30\n" + "subs %[i], %[i], #2\n" + "bgt 2b\n" + "sub %[inc], %[zero], %[inc], asr #1\n" + "vbif.s32 d23, d17, d31\n" + "vst1.32 {d20, d21}, [%[in1], :128]\n" + "vbif.s32 q2, q1, q15\n" + "vst1.32 {d22, d23}, [%[in0], :128]\n" + "vbif.s32 q3, q0, q15\n" + "vst1.32 {d4, d5}, [%[out1], :128]\n" + "vst1.32 {d6, d7}, [%[out0], :128]\n" + ".endm\n" + + "vmov.s32 q14, #1\n" + "vmov.s32 q13, %[c2]\n" + + "cmp %[i], #4\n" + "bne 8f\n" + + "4:\n" /* 4 subbands */ + "add %[in0], %[in], #0\n" + "add %[in1], %[in], #32\n" + "add %[out0], %[out], #0\n" + "add %[out1], %[out], #32\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 0, 1, 2 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */ + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* calculate and save to memory 'joint' variable */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + " vpadd.s32 d16, d16, d16\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vst1.32 {d16[0]}, [%[joint]]\n" + + "update_joint_stereo_samples\n" + "b 9f\n" + + "8:\n" /* 8 subbands */ + "add %[in0], %[in], #16\n\n" + "add %[in1], %[in], #48\n" + "add %[out0], %[out], #16\n\n" + "add %[out1], %[out], #48\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 4, 5, 6 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vmov.s32 d31[1], %[zero]\n" /* last subband -> no joint */ + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* calculate part of 'joint' variable and save it to d24 */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vpadd.s32 d24, d16, d16\n" + + "update_joint_stereo_samples\n" + + "add %[in0], %[in], #0\n" + "add %[in1], %[in], #32\n" + "add %[out0], %[out], #0\n\n" + "add %[out1], %[out], #32\n" + "vmov.s32 q0, %[c1]\n" + "vadd.s32 q0, q0, q14\n" + + "calc_scalefactors\n" + + /* check whether to use joint stereo for subbands 0, 1, 2, 3 */ + "vadd.s32 q15, q0, q1\n" + "vadd.s32 q9, q2, q3\n" + "vld1.32 {d16, d17}, [%[consts], :128]!\n" + "vcgt.s32 q15, q15, q9\n" + + /* combine last part of 'joint' with d24 and save to memory */ + /* update and save scale factors to memory */ + " vand.s32 q8, q8, q15\n" + "vbit.s32 q0, q2, q15\n" + " vpadd.s32 d16, d16, d17\n" + "vbit.s32 q1, q3, q15\n" + " vpadd.s32 d16, d16, d16\n" + "vst1.32 {d0, d1}, [%[out0], :128]\n" + " vadd.s32 d16, d16, d24\n" + "vst1.32 {d2, d3}, [%[out1], :128]\n" + " vst1.32 {d16[0]}, [%[joint]]\n" + + "update_joint_stereo_samples\n" + "9:\n" + ".purgem calc_scalefactors\n" + ".purgem update_joint_stereo_samples\n" + : + [i] "+&r" (i), + [in] "+&r" (in), + [in0] "=&r" (in0), + [in1] "=&r" (in1), + [out] "+&r" (out), + [out0] "=&r" (out0), + [out1] "=&r" (out1), + [consts] "+&r" (consts) + : + [inc] "r" ((char *) &sb_sample_f[1][0][0] - + (char *) &sb_sample_f[0][0][0]), + [blocks] "r" (blocks), + [joint] "r" (&joint), + [c1] "i" (1 << SCALE_OUT_BITS), + [c2] "i" (31 - SCALE_OUT_BITS), + [zero] "r" (0) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", + "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "d30", "d31", "cc", "memory"); + + return joint; +} + void sbc_init_primitives_neon(struct sbc_encoder_state *state) { state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; state->sbc_calc_scalefactors = sbc_calc_scalefactors_neon; + state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j_neon; state->implementation_info = "NEON"; } -- cgit From 68bdf5526eeafa85308dcce6a1a8c33f307b6faf Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:28:31 -0300 Subject: sbc: ARM NEON optimizations for input permutation in SBC encoder Using SIMD optimizations for 'sbc_enc_process_input_*' functions provides a modest, but consistent speedup in all SBC encoding cases. Benchmarked on ARM Cortex-A8: == Before: == $ time ./sbcenc -b53 -s8 -j test.au > /dev/null real 0m4.389s user 0m3.969s sys 0m0.422s samples % image name symbol name 26234 29.9625 sbcenc sbc_pack_frame 20057 22.9076 sbcenc sbc_analyze_4b_8s_neon 14306 16.3393 sbcenc sbc_calculate_bits 9866 11.2682 sbcenc sbc_enc_process_input_8s_be 8506 9.7149 no-vmlinux /no-vmlinux 5219 5.9608 sbcenc sbc_calc_scalefactors_j_neon 2280 2.6040 sbcenc sbc_encode 661 0.7549 libc-2.10.1.so memcpy == After: == $ time ./sbcenc -b53 -s8 -j test.au > /dev/null real 0m3.989s user 0m3.602s sys 0m0.391s samples % image name symbol name 26057 32.6128 sbcenc sbc_pack_frame 20003 25.0357 sbcenc sbc_analyze_4b_8s_neon 14220 17.7977 sbcenc sbc_calculate_bits 8498 10.6361 no-vmlinux /no-vmlinux 5300 6.6335 sbcenc sbc_calc_scalefactors_j_neon 3235 4.0489 sbcenc sbc_enc_process_input_8s_be_neon 2172 2.7185 sbcenc sbc_encode --- src/modules/bluetooth/sbc/sbc_primitives_neon.c | 350 ++++++++++++++++++++++++ 1 file changed, 350 insertions(+) diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.c b/src/modules/bluetooth/sbc/sbc_primitives_neon.c index 46842008..1f39213e 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives_neon.c +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.c @@ -535,12 +535,362 @@ int sbc_calc_scalefactors_j_neon( return joint; } +#define PERM_BE(a, b, c, d) { \ + (a * 2) + 1, (a * 2) + 0, \ + (b * 2) + 1, (b * 2) + 0, \ + (c * 2) + 1, (c * 2) + 0, \ + (d * 2) + 1, (d * 2) + 0 \ + } +#define PERM_LE(a, b, c, d) { \ + (a * 2) + 0, (a * 2) + 1, \ + (b * 2) + 0, (b * 2) + 1, \ + (c * 2) + 0, (c * 2) + 1, \ + (d * 2) + 0, (d * 2) + 1 \ + } + +static SBC_ALWAYS_INLINE int sbc_enc_process_input_4s_neon_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + static SBC_ALIGNED uint8_t perm_be[2][8] = { + PERM_BE(7, 3, 6, 4), + PERM_BE(0, 2, 1, 5) + }; + static SBC_ALIGNED uint8_t perm_le[2][8] = { + PERM_LE(7, 3, 6, 4), + PERM_LE(0, 2, 1, 5) + }; + /* handle X buffer wraparound */ + if (position < nsamples) { + int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 40]; + int16_t *src = &X[0][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0}, [%[src], :64]!\n" + "vst1.16 {d0}, [%[dst], :64]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + if (nchannels > 1) { + dst = &X[1][SBC_X_BUFFER_SIZE - 40]; + src = &X[1][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0}, [%[src], :64]!\n" + "vst1.16 {d0}, [%[dst], :64]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + } + position = SBC_X_BUFFER_SIZE - 40; + } + + if ((nchannels > 1) && ((uintptr_t)pcm & 1)) { + /* poor 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[y], %[y], #16\n" + "sub %[position], %[position], #8\n" + "vld1.8 {d4, d5}, [%[pcm]]!\n" + "vuzp.16 d4, d5\n" + "vld1.8 {d20, d21}, [%[pcm]]!\n" + "vuzp.16 d20, d21\n" + "vswp d5, d20\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vtbl.8 d18, {d20, d21}, d0\n" + "vtbl.8 d19, {d20, d21}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "vst1.16 {d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else if (nchannels > 1) { + /* proper 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[y], %[y], #16\n" + "sub %[position], %[position], #8\n" + "vld2.16 {d4, d5}, [%[pcm]]!\n" + "vld2.16 {d20, d21}, [%[pcm]]!\n" + "vswp d5, d20\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vtbl.8 d18, {d20, d21}, d0\n" + "vtbl.8 d19, {d20, d21}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "vst1.16 {d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else { + int16_t *x = &X[0][position]; + asm volatile ( + "vld1.8 {d0, d1}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #16\n" + "sub %[position], %[position], #8\n" + "vld1.8 {d4, d5}, [%[pcm]]!\n" + "vtbl.8 d16, {d4, d5}, d0\n" + "vtbl.8 d17, {d4, d5}, d1\n" + "vst1.16 {d16, d17}, [%[x], :128]\n" + "subs %[nsamples], %[nsamples], #8\n" + "bgt 1b\n" + : + [x] "+r" (x), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19"); + } + return position; +} + +static SBC_ALWAYS_INLINE int sbc_enc_process_input_8s_neon_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + static SBC_ALIGNED uint8_t perm_be[4][8] = { + PERM_BE(15, 7, 14, 8), + PERM_BE(13, 9, 12, 10), + PERM_BE(11, 3, 6, 0), + PERM_BE(5, 1, 4, 2) + }; + static SBC_ALIGNED uint8_t perm_le[4][8] = { + PERM_LE(15, 7, 14, 8), + PERM_LE(13, 9, 12, 10), + PERM_LE(11, 3, 6, 0), + PERM_LE(5, 1, 4, 2) + }; + /* handle X buffer wraparound */ + if (position < nsamples) { + int16_t *dst = &X[0][SBC_X_BUFFER_SIZE - 72]; + int16_t *src = &X[0][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1}, [%[src], :128]!\n" + "vst1.16 {d0, d1}, [%[dst], :128]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + if (nchannels > 1) { + dst = &X[1][SBC_X_BUFFER_SIZE - 72]; + src = &X[1][position]; + asm volatile ( + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1, d2, d3}, [%[src], :128]!\n" + "vst1.16 {d0, d1, d2, d3}, [%[dst], :128]!\n" + "vld1.16 {d0, d1}, [%[src], :128]!\n" + "vst1.16 {d0, d1}, [%[dst], :128]!\n" + : + [dst] "+r" (dst), + [src] "+r" (src) + : : "memory", "d0", "d1", "d2", "d3"); + } + position = SBC_X_BUFFER_SIZE - 72; + } + + if ((nchannels > 1) && ((uintptr_t)pcm & 1)) { + /* poor 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[y], %[y], #32\n" + "sub %[position], %[position], #16\n" + "vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vuzp.16 q2, q3\n" + "vld1.8 {d20, d21, d22, d23}, [%[pcm]]!\n" + "vuzp.16 q10, q11\n" + "vswp q3, q10\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "vtbl.8 d16, {d20, d21, d22, d23}, d0\n" + "vtbl.8 d17, {d20, d21, d22, d23}, d1\n" + "vtbl.8 d18, {d20, d21, d22, d23}, d2\n" + "vtbl.8 d19, {d20, d21, d22, d23}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else if (nchannels > 1) { + /* proper 'pcm' alignment */ + int16_t *x = &X[0][position]; + int16_t *y = &X[1][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[y], %[y], #32\n" + "sub %[position], %[position], #16\n" + "vld2.16 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vld2.16 {d20, d21, d22, d23}, [%[pcm]]!\n" + "vswp q3, q10\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "vtbl.8 d16, {d20, d21, d22, d23}, d0\n" + "vtbl.8 d17, {d20, d21, d22, d23}, d1\n" + "vtbl.8 d18, {d20, d21, d22, d23}, d2\n" + "vtbl.8 d19, {d20, d21, d22, d23}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[y], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [y] "+r" (y), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23"); + } else { + int16_t *x = &X[0][position]; + asm volatile ( + "vld1.8 {d0, d1, d2, d3}, [%[perm], :128]\n" + "1:\n" + "sub %[x], %[x], #32\n" + "sub %[position], %[position], #16\n" + "vld1.8 {d4, d5, d6, d7}, [%[pcm]]!\n" + "vtbl.8 d16, {d4, d5, d6, d7}, d0\n" + "vtbl.8 d17, {d4, d5, d6, d7}, d1\n" + "vtbl.8 d18, {d4, d5, d6, d7}, d2\n" + "vtbl.8 d19, {d4, d5, d6, d7}, d3\n" + "vst1.16 {d16, d17, d18, d19}, [%[x], :128]\n" + "subs %[nsamples], %[nsamples], #16\n" + "bgt 1b\n" + : + [x] "+r" (x), + [pcm] "+r" (pcm), + [nsamples] "+r" (nsamples), + [position] "+r" (position) + : + [perm] "r" (big_endian ? perm_be : perm_le) + : "cc", "memory", "d0", "d1", "d2", "d3", "d4", + "d5", "d6", "d7", "d16", "d17", "d18", "d19"); + } + return position; +} + +#undef PERM_BE +#undef PERM_LE + +static int sbc_enc_process_input_4s_be_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_4s_neon_internal( + position, pcm, X, nsamples, nchannels, 1); +} + +static int sbc_enc_process_input_4s_le_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_4s_neon_internal( + position, pcm, X, nsamples, nchannels, 0); +} + +static int sbc_enc_process_input_8s_be_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_8s_neon_internal( + position, pcm, X, nsamples, nchannels, 1); +} + +static int sbc_enc_process_input_8s_le_neon(int position, const uint8_t *pcm, + int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + return sbc_enc_process_input_8s_neon_internal( + position, pcm, X, nsamples, nchannels, 0); +} + void sbc_init_primitives_neon(struct sbc_encoder_state *state) { state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; state->sbc_calc_scalefactors = sbc_calc_scalefactors_neon; state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j_neon; + state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le_neon; + state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be_neon; + state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le_neon; + state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be_neon; state->implementation_info = "NEON"; } -- cgit From 899791700024a92619bc79ef3f03467d82f24c11 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:29:38 -0300 Subject: sbc: slightly faster 'sbc_calc_scalefactors_neon' Previous variant was basically derived from C and MMX implementations. Now new variant makes use of 'vmax' instruction, which is available in NEON and can do this job faster. The same method for calculating scale factors is also used in 'sbc_calc_scalefactors_j_neon'. Benchmarked without joint stereo on ARM Cortex-A8: == Before: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m3.851s user 0m3.375s sys 0m0.469s samples % image name symbol name 26260 34.2672 sbcenc sbc_pack_frame 20013 26.1154 sbcenc sbc_analyze_4b_8s_neon 13796 18.0027 sbcenc sbc_calculate_bits 8388 10.9457 no-vmlinux /no-vmlinux 3229 4.2136 sbcenc sbc_enc_process_input_8s_be_neon 2408 3.1422 sbcenc sbc_calc_scalefactors_neon 2093 2.7312 sbcenc sbc_encode == After: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m3.796s user 0m3.344s sys 0m0.438s samples % image name symbol name 26582 34.8726 sbcenc sbc_pack_frame 20032 26.2797 sbcenc sbc_analyze_4b_8s_neon 13808 18.1146 sbcenc sbc_calculate_bits 8374 10.9858 no-vmlinux /no-vmlinux 3187 4.1810 sbcenc sbc_enc_process_input_8s_be_neon 2027 2.6592 sbcenc sbc_encode 1766 2.3168 sbcenc sbc_calc_scalefactors_neon --- src/modules/bluetooth/sbc/sbc_primitives_neon.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.c b/src/modules/bluetooth/sbc/sbc_primitives_neon.c index 1f39213e..c233d3c6 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives_neon.c +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.c @@ -247,8 +247,11 @@ static void sbc_calc_scalefactors_neon( int blk = blocks; int32_t *in = &sb_sample_f[0][ch][sb]; asm volatile ( - "vmov.s32 q0, %[c1]\n" + "vmov.s32 q0, #0\n" "vmov.s32 q1, %[c1]\n" + "vmov.s32 q14, #1\n" + "vmov.s32 q15, %[c2]\n" + "vadd.s32 q1, q1, q14\n" "1:\n" "vld1.32 {d16, d17}, [%[in], :128], %[inc]\n" "vabs.s32 q8, q8\n" @@ -258,22 +261,14 @@ static void sbc_calc_scalefactors_neon( "vabs.s32 q10, q10\n" "vld1.32 {d22, d23}, [%[in], :128], %[inc]\n" "vabs.s32 q11, q11\n" - "vcgt.s32 q12, q8, #0\n" - "vcgt.s32 q13, q9, #0\n" - "vcgt.s32 q14, q10, #0\n" - "vcgt.s32 q15, q11, #0\n" - "vadd.s32 q8, q8, q12\n" - "vadd.s32 q9, q9, q13\n" - "vadd.s32 q10, q10, q14\n" - "vadd.s32 q11, q11, q15\n" - "vorr.s32 q0, q0, q8\n" - "vorr.s32 q1, q1, q9\n" - "vorr.s32 q0, q0, q10\n" - "vorr.s32 q1, q1, q11\n" + "vmax.s32 q0, q0, q8\n" + "vmax.s32 q1, q1, q9\n" + "vmax.s32 q0, q0, q10\n" + "vmax.s32 q1, q1, q11\n" "subs %[blk], %[blk], #4\n" "bgt 1b\n" - "vorr.s32 q0, q0, q1\n" - "vmov.s32 q15, %[c2]\n" + "vmax.s32 q0, q0, q1\n" + "vsub.s32 q0, q0, q14\n" "vclz.s32 q0, q0\n" "vsub.s32 q0, q15, q0\n" "vst1.32 {d0, d1}, [%[out], :128]\n" -- cgit From 5423dc16449306ad429580faa4652b35dd82ff55 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:31:30 -0300 Subject: sbc: faster 'sbc_calculate_bits' function By using SBC_ALWAYS_INLINE trick, the implementation of 'sbc_calculate_bits' function is split into two branches, each having 'subband' variable value known at compile time. It helps the compiler to generate more optimal code by saving at least one extra register, and also provides more obvious opportunities for loops unrolling. Benchmarked on ARM Cortex-A8: == Before: == $ time ./sbcenc -b53 -s8 -j test.au > /dev/null real 0m3.989s user 0m3.602s sys 0m0.391s samples % image name symbol name 26057 32.6128 sbcenc sbc_pack_frame 20003 25.0357 sbcenc sbc_analyze_4b_8s_neon 14220 17.7977 sbcenc sbc_calculate_bits 8498 10.6361 no-vmlinux /no-vmlinux 5300 6.6335 sbcenc sbc_calc_scalefactors_j_neon 3235 4.0489 sbcenc sbc_enc_process_input_8s_be_neon 2172 2.7185 sbcenc sbc_encode == After: == $ time ./sbcenc -b53 -s8 -j test.au > /dev/null real 0m3.652s user 0m3.195s sys 0m0.445s samples % image name symbol name 26207 36.0095 sbcenc sbc_pack_frame 19820 27.2335 sbcenc sbc_analyze_4b_8s_neon 8629 11.8566 no-vmlinux /no-vmlinux 6988 9.6018 sbcenc sbc_calculate_bits 5094 6.9994 sbcenc sbc_calc_scalefactors_j_neon 3351 4.6044 sbcenc sbc_enc_process_input_8s_be_neon 2182 2.9982 sbcenc sbc_encode --- src/modules/bluetooth/sbc/sbc.c | 43 +++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c index ae744296..98b236bd 100644 --- a/src/modules/bluetooth/sbc/sbc.c +++ b/src/modules/bluetooth/sbc/sbc.c @@ -159,7 +159,8 @@ static uint8_t sbc_crc8(const uint8_t *data, size_t len) * Takes a pointer to the frame in question, a pointer to the bits array and * the sampling frequency (as 2 bit integer) */ -static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) +static SBC_ALWAYS_INLINE void sbc_calculate_bits_internal( + const struct sbc_frame *frame, int (*bits)[8], int subbands) { uint8_t sf = frame->frequency; @@ -170,17 +171,17 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) for (ch = 0; ch < frame->channels; ch++) { max_bitneed = 0; if (frame->allocation == SNR) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { bitneed[ch][sb] = frame->scale_factor[ch][sb]; if (bitneed[ch][sb] > max_bitneed) max_bitneed = bitneed[ch][sb]; } } else { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if (frame->scale_factor[ch][sb] == 0) bitneed[ch][sb] = -5; else { - if (frame->subbands == 4) + if (subbands == 4) loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; else loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; @@ -201,7 +202,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) bitslice--; bitcount += slicecount; slicecount = 0; - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) slicecount++; else if (bitneed[ch][sb] == bitslice + 1) @@ -214,7 +215,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) bitslice--; } - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if (bitneed[ch][sb] < bitslice + 2) bits[ch][sb] = 0; else { @@ -224,7 +225,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } } - for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { + for (sb = 0; bitcount < frame->bitpool && + sb < subbands; sb++) { if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { bits[ch][sb]++; bitcount++; @@ -234,7 +236,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } } - for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { + for (sb = 0; bitcount < frame->bitpool && + sb < subbands; sb++) { if (bits[ch][sb] < 16) { bits[ch][sb]++; bitcount++; @@ -250,7 +253,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) max_bitneed = 0; if (frame->allocation == SNR) { for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { bitneed[ch][sb] = frame->scale_factor[ch][sb]; if (bitneed[ch][sb] > max_bitneed) max_bitneed = bitneed[ch][sb]; @@ -258,11 +261,11 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } } else { for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if (frame->scale_factor[ch][sb] == 0) bitneed[ch][sb] = -5; else { - if (frame->subbands == 4) + if (subbands == 4) loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; else loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; @@ -285,7 +288,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) bitcount += slicecount; slicecount = 0; for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) slicecount++; else if (bitneed[ch][sb] == bitslice + 1) @@ -300,7 +303,7 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } for (ch = 0; ch < 2; ch++) { - for (sb = 0; sb < frame->subbands; sb++) { + for (sb = 0; sb < subbands; sb++) { if (bitneed[ch][sb] < bitslice + 2) { bits[ch][sb] = 0; } else { @@ -324,7 +327,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) if (ch == 1) { ch = 0; sb++; - if (sb >= frame->subbands) break; + if (sb >= subbands) + break; } else ch = 1; } @@ -339,7 +343,8 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) if (ch == 1) { ch = 0; sb++; - if (sb >= frame->subbands) break; + if (sb >= subbands) + break; } else ch = 1; } @@ -348,6 +353,14 @@ static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) } +static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) +{ + if (frame->subbands == 4) + sbc_calculate_bits_internal(frame, bits, 4); + else + sbc_calculate_bits_internal(frame, bits, 8); +} + /* * Unpacks a SBC frame at the beginning of the stream in data, * which has at most len bytes into frame. -- cgit From 51d5f3c9fda454e78e87e05029a34b56b0815186 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:36:07 -0300 Subject: sbc: added "cc" to the clobber list of mmx inline assembly In the case of scale factors calculation optimizations, the inline assembly code has instructions which update flags register, but "cc" was not mentioned in the clobber list. When optimizing code, gcc theoretically is allowed to do a comparison before the inline assembly block, and a conditional branch after it which would lead to a problem if the flags register gets clobbered. While this is apparently not happening in practice with the current versions of gcc, the clobber list needs to be corrected. Regarding the other inline assembly blocks. While most likely it is actually unnecessary based on quick review, "cc" is also added there to the clobber list because it should have no impact on performance in practice. It's kind of cargo cult, but relieves us from the need to track the potential updates of flags register in all these places. --- src/modules/bluetooth/sbc/sbc_primitives_mmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/modules/bluetooth/sbc/sbc_primitives_mmx.c b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c index d8373b3a..ab89d074 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives_mmx.c +++ b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c @@ -100,7 +100,7 @@ static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out, : : "r" (in), "r" (consts), "r" (&round_c), "r" (out), "i" (SBC_PROTO_FIXED4_SCALE) - : "memory"); + : "cc", "memory"); } static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, @@ -242,7 +242,7 @@ static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, : : "r" (in), "r" (consts), "r" (&round_c), "r" (out), "i" (SBC_PROTO_FIXED8_SCALE) - : "memory"); + : "cc", "memory"); } static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out, @@ -322,7 +322,7 @@ static void sbc_calc_scalefactors_mmx( "r" (&scale_factor[ch][sb]), "r" (&consts), "i" (SCALE_OUT_BITS) - : "memory"); + : "cc", "memory"); } } asm volatile ("emms\n"); -- cgit From 82ef8346d8b962fedff58b6cd579b1bb25227c49 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:35:03 -0300 Subject: sbc: ARMv6 optimized version of analysis filter for SBC encoder The optimized filter gets enabled when the code is compiled with -mcpu=/-march options set to target the processors which support ARMv6 instructions. This code is also disabled when NEON is used (which is a lot better alternative). For additional safety ARM EABI is required and thumb mode should not be used. Benchmarks from ARM11: == 8 subbands == $ time ./sbcenc -b53 -s8 -j test.au > /dev/null real 0m 35.65s user 0m 34.17s sys 0m 1.28s $ time ./sbcenc.armv6 -b53 -s8 -j test.au > /dev/null real 0m 17.29s user 0m 15.47s sys 0m 0.67s == 4 subbands == $ time ./sbcenc -b53 -s4 -j test.au > /dev/null real 0m 25.28s user 0m 23.76s sys 0m 1.32s $ time ./sbcenc.armv6 -b53 -s4 -j test.au > /dev/null real 0m 18.64s user 0m 15.78s sys 0m 2.22s --- src/Makefile.am | 2 +- src/modules/bluetooth/sbc/sbc_primitives.c | 4 + src/modules/bluetooth/sbc/sbc_primitives_armv6.c | 299 +++++++++++++++++++++++ src/modules/bluetooth/sbc/sbc_primitives_armv6.h | 52 ++++ 4 files changed, 356 insertions(+), 1 deletion(-) create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_armv6.c create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_armv6.h diff --git a/src/Makefile.am b/src/Makefile.am index d4a72832..77f6d657 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1820,7 +1820,7 @@ module_bluetooth_discover_la_LDFLAGS = $(MODULE_LDFLAGS) module_bluetooth_discover_la_LIBADD = $(MODULE_LIBADD) $(DBUS_LIBS) libbluetooth-util.la module_bluetooth_discover_la_CFLAGS = $(AM_CFLAGS) $(DBUS_CFLAGS) -libbluetooth_sbc_la_SOURCES = modules/bluetooth/sbc/sbc.c modules/bluetooth/sbc/sbc.h modules/bluetooth/sbc/sbc_tables.h modules/bluetooth/sbc/sbc_math.h modules/bluetooth/sbc/sbc_primitives.h modules/bluetooth/sbc/sbc_primitives.c modules/bluetooth/sbc/sbc_primitives_mmx.h modules/bluetooth/sbc/sbc_primitives_neon.h modules/bluetooth/sbc/sbc_primitives_mmx.c modules/bluetooth/sbc/sbc_primitives_neon.c +libbluetooth_sbc_la_SOURCES = modules/bluetooth/sbc/sbc.c modules/bluetooth/sbc/sbc.h modules/bluetooth/sbc/sbc_tables.h modules/bluetooth/sbc/sbc_math.h modules/bluetooth/sbc/sbc_primitives.h modules/bluetooth/sbc/sbc_primitives.c modules/bluetooth/sbc/sbc_primitives_mmx.h modules/bluetooth/sbc/sbc_primitives_neon.h modules/bluetooth/sbc/sbc_primitives_mmx.c modules/bluetooth/sbc/sbc_primitives_neon.c modules/bluetooth/sbc/sbc_primitives_armv6.c libbluetooth_sbc_la_LDFLAGS = -avoid-version libbluetooth_sbc_la_LIBADD = $(MODULE_LIBADD) libbluetooth_sbc_la_CFLAGS = $(AM_CFLAGS) diff --git a/src/modules/bluetooth/sbc/sbc_primitives.c b/src/modules/bluetooth/sbc/sbc_primitives.c index 82cd399d..66e20a0a 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives.c +++ b/src/modules/bluetooth/sbc/sbc_primitives.c @@ -33,6 +33,7 @@ #include "sbc_primitives.h" #include "sbc_primitives_mmx.h" #include "sbc_primitives_neon.h" +#include "sbc_primitives_armv6.h" /* * A reference C code of analysis filter with SIMD-friendly tables @@ -539,6 +540,9 @@ void sbc_init_primitives(struct sbc_encoder_state *state) #endif /* ARM optimizations */ +#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT + sbc_init_primitives_armv6(state); +#endif #ifdef SBC_BUILD_WITH_NEON_SUPPORT sbc_init_primitives_neon(state); #endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_armv6.c b/src/modules/bluetooth/sbc/sbc_primitives_armv6.c new file mode 100644 index 00000000..95860980 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_armv6.c @@ -0,0 +1,299 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_armv6.h" + +/* + * ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline. + */ + +#ifdef SBC_BUILD_WITH_ARMV6_SUPPORT + +static void __attribute__((naked)) sbc_analyze_four_armv6() +{ + /* r0 = in, r1 = out, r2 = consts */ + asm volatile ( + "push {r1, r4-r7, lr}\n" + "push {r8-r11}\n" + "ldrd r4, r5, [r0, #0]\n" + "ldrd r6, r7, [r2, #0]\n" + "ldrd r8, r9, [r0, #16]\n" + "ldrd r10, r11, [r2, #16]\n" + "mov r14, #0x8000\n" + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #32]\n" + "ldrd r6, r7, [r2, #32]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #48]\n" + "ldrd r10, r11, [r2, #48]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #64]\n" + "ldrd r6, r7, [r2, #64]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #8]\n" + "ldrd r10, r11, [r2, #8]\n" + "smlad r3, r4, r6, r3\n" /* t1[0] is done */ + "smlad r12, r5, r7, r12\n" /* t1[1] is done */ + "ldrd r4, r5, [r0, #24]\n" + "ldrd r6, r7, [r2, #24]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */ + "smlad r12, r8, r10, r14\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #40]\n" + "ldrd r10, r11, [r2, #40]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r0, #56]\n" + "ldrd r6, r7, [r2, #56]\n" + "smlad r12, r8, r10, r12\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #72]\n" + "ldrd r10, r11, [r2, #72]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r2, #80]\n" /* start loading cos table */ + "smlad r12, r8, r10, r12\n" /* t1[2] is done */ + "smlad r14, r9, r11, r14\n" /* t1[3] is done */ + "ldrd r6, r7, [r2, #88]\n" + "ldrd r8, r9, [r2, #96]\n" + "ldrd r10, r11, [r2, #104]\n" /* cos table fully loaded */ + "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */ + "smuad r4, r3, r4\n" + "smuad r5, r3, r5\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "smuad r6, r3, r6\n" + "smuad r7, r3, r7\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "pop {r8-r11}\n" + "stmia r1, {r4, r5, r6, r7}\n" + "pop {r1, r4-r7, pc}\n" + ); +} + +#define sbc_analyze_four(in, out, consts) \ + ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \ + sbc_analyze_four_armv6)((in), (out), (consts)) + +static void __attribute__((naked)) sbc_analyze_eight_armv6() +{ + /* r0 = in, r1 = out, r2 = consts */ + asm volatile ( + "push {r1, r4-r7, lr}\n" + "push {r8-r11}\n" + "ldrd r4, r5, [r0, #24]\n" + "ldrd r6, r7, [r2, #24]\n" + "ldrd r8, r9, [r0, #56]\n" + "ldrd r10, r11, [r2, #56]\n" + "mov r14, #0x8000\n" + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #88]\n" + "ldrd r6, r7, [r2, #88]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #120]\n" + "ldrd r10, r11, [r2, #120]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #152]\n" + "ldrd r6, r7, [r2, #152]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #16]\n" + "ldrd r10, r11, [r2, #16]\n" + "smlad r3, r4, r6, r3\n" /* t1[6] is done */ + "smlad r12, r5, r7, r12\n" /* t1[7] is done */ + "ldrd r4, r5, [r0, #48]\n" + "ldrd r6, r7, [r2, #48]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[6] and t1[7] */ + "str r3, [sp, #-4]!\n" /* save to stack */ + "smlad r3, r8, r10, r14\n" + "smlad r12, r9, r11, r14\n" + "ldrd r8, r9, [r0, #80]\n" + "ldrd r10, r11, [r2, #80]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #112]\n" + "ldrd r6, r7, [r2, #112]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #144]\n" + "ldrd r10, r11, [r2, #144]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #0]\n" + "ldrd r6, r7, [r2, #0]\n" + "smlad r3, r8, r10, r3\n" /* t1[4] is done */ + "smlad r12, r9, r11, r12\n" /* t1[5] is done */ + "ldrd r8, r9, [r0, #32]\n" + "ldrd r10, r11, [r2, #32]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[4] and t1[5] */ + "str r3, [sp, #-4]!\n" /* save to stack */ + "smlad r3, r4, r6, r14\n" + "smlad r12, r5, r7, r14\n" + "ldrd r4, r5, [r0, #64]\n" + "ldrd r6, r7, [r2, #64]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #96]\n" + "ldrd r10, r11, [r2, #96]\n" + "smlad r3, r4, r6, r3\n" + "smlad r12, r5, r7, r12\n" + "ldrd r4, r5, [r0, #128]\n" + "ldrd r6, r7, [r2, #128]\n" + "smlad r3, r8, r10, r3\n" + "smlad r12, r9, r11, r12\n" + "ldrd r8, r9, [r0, #8]\n" + "ldrd r10, r11, [r2, #8]\n" + "smlad r3, r4, r6, r3\n" /* t1[0] is done */ + "smlad r12, r5, r7, r12\n" /* t1[1] is done */ + "ldrd r4, r5, [r0, #40]\n" + "ldrd r6, r7, [r2, #40]\n" + "pkhtb r3, r12, r3, asr #16\n" /* combine t1[0] and t1[1] */ + "smlad r12, r8, r10, r14\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #72]\n" + "ldrd r10, r11, [r2, #72]\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r0, #104]\n" + "ldrd r6, r7, [r2, #104]\n" + "smlad r12, r8, r10, r12\n" + "smlad r14, r9, r11, r14\n" + "ldrd r8, r9, [r0, #136]\n" + "ldrd r10, r11, [r2, #136]!\n" + "smlad r12, r4, r6, r12\n" + "smlad r14, r5, r7, r14\n" + "ldrd r4, r5, [r2, #(160 - 136 + 0)]\n" + "smlad r12, r8, r10, r12\n" /* t1[2] is done */ + "smlad r14, r9, r11, r14\n" /* t1[3] is done */ + "ldrd r6, r7, [r2, #(160 - 136 + 8)]\n" + "smuad r4, r3, r4\n" + "smuad r5, r3, r5\n" + "pkhtb r12, r14, r12, asr #16\n" /* combine t1[2] and t1[3] */ + /* r3 = t2[0:1] */ + /* r12 = t2[2:3] */ + "pop {r0, r14}\n" /* t2[4:5], t2[6:7] */ + "ldrd r8, r9, [r2, #(160 - 136 + 32)]\n" + "smuad r6, r3, r6\n" + "smuad r7, r3, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 40)]\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 64)]\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 72)]\n" + "smlad r4, r0, r8, r4\n" + "smlad r5, r0, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 96)]\n" + "smlad r6, r0, r10, r6\n" + "smlad r7, r0, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 104)]\n" + "smlad r4, r14, r8, r4\n" + "smlad r5, r14, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)]\n" + "smlad r6, r14, r10, r6\n" + "smlad r7, r14, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)]\n" + "stmia r1!, {r4, r5}\n" + "smuad r4, r3, r8\n" + "smuad r5, r3, r9\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)]\n" + "stmia r1!, {r6, r7}\n" + "smuad r6, r3, r10\n" + "smuad r7, r3, r11\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)]\n" + "smlad r4, r12, r8, r4\n" + "smlad r5, r12, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)]\n" + "smlad r6, r12, r10, r6\n" + "smlad r7, r12, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)]\n" + "smlad r4, r0, r8, r4\n" + "smlad r5, r0, r9, r5\n" + "ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)]\n" + "smlad r6, r0, r10, r6\n" + "smlad r7, r0, r11, r7\n" + "ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)]\n" + "smlad r4, r14, r8, r4\n" + "smlad r5, r14, r9, r5\n" + "smlad r6, r14, r10, r6\n" + "smlad r7, r14, r11, r7\n" + "pop {r8-r11}\n" + "stmia r1!, {r4, r5, r6, r7}\n" + "pop {r1, r4-r7, pc}\n" + ); +} + +#define sbc_analyze_eight(in, out, consts) \ + ((void (*)(int16_t *, int32_t *, const FIXED_T*)) \ + sbc_analyze_eight_armv6)((in), (out), (consts)) + +static void sbc_analyze_4b_4s_armv6(int16_t *x, int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static void sbc_analyze_4b_8s_armv6(int16_t *x, int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_armv6(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_armv6; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_armv6; + state->implementation_info = "ARMv6 SIMD"; +} + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_armv6.h b/src/modules/bluetooth/sbc/sbc_primitives_armv6.h new file mode 100644 index 00000000..1862aede --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_armv6.h @@ -0,0 +1,52 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_ARMV6_H +#define __SBC_PRIMITIVES_ARMV6_H + +#include "sbc_primitives.h" + +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) +#define SBC_HAVE_ARMV6 1 +#endif + +#if !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) && \ + defined(__GNUC__) && defined(SBC_HAVE_ARMV6) && \ + defined(__ARM_EABI__) && !defined(__thumb__) && \ + !defined(__ARM_NEON__) + +#define SBC_BUILD_WITH_ARMV6_SUPPORT + +void sbc_init_primitives_armv6(struct sbc_encoder_state *encoder_state); + +#endif + +#endif -- cgit From ee93eff6b7a04193e7afa3c5aa4fe71558634b21 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 14 Mar 2011 15:37:42 -0300 Subject: sbc: add iwmmxt optimization for sbc for pxa series cpu Benchmarked on ARM PXA platform: === Before (4 bands) ==== $ time ./sbcenc_orig -s 4 long.au > /dev/null real 0m 2.44s user 0m 2.39s sys 0m 0.05s === After (4 bands) ==== $ time ./sbcenc -s 4 long.au > /dev/null real 0m 1.59s user 0m 1.49s sys 0m 0.10s === Before (8 bands) ==== $ time ./sbcenc_orig -s 8 long.au > /dev/null real 0m 4.05s user 0m 3.98s sys 0m 0.07s === After (8 bands) ==== $ time ./sbcenc -s 8 long.au > /dev/null real 0m 1.48s user 0m 1.41s sys 0m 0.06s === Before (a2dp usage) ==== $ time ./sbcenc_orig -b53 -s8 -j long.au > /dev/null real 0m 4.51s user 0m 4.41s sys 0m 0.10s === After (a2dp usage) ==== $ time ./sbcenc -b53 -s8 -j long.au > /dev/null real 0m 2.05s user 0m 1.99s sys 0m 0.06s --- src/Makefile.am | 2 +- src/modules/bluetooth/sbc/sbc_primitives.c | 4 + src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c | 304 ++++++++++++++++++++++ src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h | 42 +++ 4 files changed, 351 insertions(+), 1 deletion(-) create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h diff --git a/src/Makefile.am b/src/Makefile.am index 77f6d657..a60f1827 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1820,7 +1820,7 @@ module_bluetooth_discover_la_LDFLAGS = $(MODULE_LDFLAGS) module_bluetooth_discover_la_LIBADD = $(MODULE_LIBADD) $(DBUS_LIBS) libbluetooth-util.la module_bluetooth_discover_la_CFLAGS = $(AM_CFLAGS) $(DBUS_CFLAGS) -libbluetooth_sbc_la_SOURCES = modules/bluetooth/sbc/sbc.c modules/bluetooth/sbc/sbc.h modules/bluetooth/sbc/sbc_tables.h modules/bluetooth/sbc/sbc_math.h modules/bluetooth/sbc/sbc_primitives.h modules/bluetooth/sbc/sbc_primitives.c modules/bluetooth/sbc/sbc_primitives_mmx.h modules/bluetooth/sbc/sbc_primitives_neon.h modules/bluetooth/sbc/sbc_primitives_mmx.c modules/bluetooth/sbc/sbc_primitives_neon.c modules/bluetooth/sbc/sbc_primitives_armv6.c +libbluetooth_sbc_la_SOURCES = modules/bluetooth/sbc/sbc.c modules/bluetooth/sbc/sbc.h modules/bluetooth/sbc/sbc_tables.h modules/bluetooth/sbc/sbc_math.h modules/bluetooth/sbc/sbc_primitives.h modules/bluetooth/sbc/sbc_primitives.c modules/bluetooth/sbc/sbc_primitives_mmx.h modules/bluetooth/sbc/sbc_primitives_neon.h modules/bluetooth/sbc/sbc_primitives_mmx.c modules/bluetooth/sbc/sbc_primitives_neon.c modules/bluetooth/sbc/sbc_primitives_armv6.c modules/bluetooth/sbc/sbc_primitives_iwmmxt.c libbluetooth_sbc_la_LDFLAGS = -avoid-version libbluetooth_sbc_la_LIBADD = $(MODULE_LIBADD) libbluetooth_sbc_la_CFLAGS = $(AM_CFLAGS) diff --git a/src/modules/bluetooth/sbc/sbc_primitives.c b/src/modules/bluetooth/sbc/sbc_primitives.c index 66e20a0a..3a76a7a0 100644 --- a/src/modules/bluetooth/sbc/sbc_primitives.c +++ b/src/modules/bluetooth/sbc/sbc_primitives.c @@ -32,6 +32,7 @@ #include "sbc_primitives.h" #include "sbc_primitives_mmx.h" +#include "sbc_primitives_iwmmxt.h" #include "sbc_primitives_neon.h" #include "sbc_primitives_armv6.h" @@ -543,6 +544,9 @@ void sbc_init_primitives(struct sbc_encoder_state *state) #ifdef SBC_BUILD_WITH_ARMV6_SUPPORT sbc_init_primitives_armv6(state); #endif +#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT + sbc_init_primitives_iwmmxt(state); +#endif #ifdef SBC_BUILD_WITH_NEON_SUPPORT sbc_init_primitives_neon(state); #endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c new file mode 100644 index 00000000..213967ef --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.c @@ -0,0 +1,304 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2010 Keith Mok + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_iwmmxt.h" + +/* + * IWMMXT optimizations + */ + +#ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT + +static inline void sbc_analyze_four_iwmmxt(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + asm volatile ( + "wldrd wr0, [%0]\n" + "tbcstw wr4, %2\n" + "wldrd wr2, [%1]\n" + "wldrd wr1, [%0, #8]\n" + "wldrd wr3, [%1, #8]\n" + "wmadds wr0, wr2, wr0\n" + " wldrd wr6, [%0, #16]\n" + "wmadds wr1, wr3, wr1\n" + " wldrd wr7, [%0, #24]\n" + "waddwss wr0, wr0, wr4\n" + " wldrd wr8, [%1, #16]\n" + "waddwss wr1, wr1, wr4\n" + " wldrd wr9, [%1, #24]\n" + " wmadds wr6, wr8, wr6\n" + " wldrd wr2, [%0, #32]\n" + " wmadds wr7, wr9, wr7\n" + " wldrd wr3, [%0, #40]\n" + " waddwss wr0, wr6, wr0\n" + " wldrd wr4, [%1, #32]\n" + " waddwss wr1, wr7, wr1\n" + " wldrd wr5, [%1, #40]\n" + " wmadds wr2, wr4, wr2\n" + "wldrd wr6, [%0, #48]\n" + " wmadds wr3, wr5, wr3\n" + "wldrd wr7, [%0, #56]\n" + " waddwss wr0, wr2, wr0\n" + "wldrd wr8, [%1, #48]\n" + " waddwss wr1, wr3, wr1\n" + "wldrd wr9, [%1, #56]\n" + "wmadds wr6, wr8, wr6\n" + " wldrd wr2, [%0, #64]\n" + "wmadds wr7, wr9, wr7\n" + " wldrd wr3, [%0, #72]\n" + "waddwss wr0, wr6, wr0\n" + " wldrd wr4, [%1, #64]\n" + "waddwss wr1, wr7, wr1\n" + " wldrd wr5, [%1, #72]\n" + " wmadds wr2, wr4, wr2\n" + "tmcr wcgr0, %4\n" + " wmadds wr3, wr5, wr3\n" + " waddwss wr0, wr2, wr0\n" + " waddwss wr1, wr3, wr1\n" + "\n" + "wsrawg wr0, wr0, wcgr0\n" + " wldrd wr4, [%1, #80]\n" + "wsrawg wr1, wr1, wcgr0\n" + " wldrd wr5, [%1, #88]\n" + "wpackwss wr0, wr0, wr0\n" + " wldrd wr6, [%1, #96]\n" + "wpackwss wr1, wr1, wr1\n" + "wmadds wr2, wr5, wr0\n" + " wldrd wr7, [%1, #104]\n" + "wmadds wr0, wr4, wr0\n" + "\n" + " wmadds wr3, wr7, wr1\n" + " wmadds wr1, wr6, wr1\n" + " waddwss wr2, wr3, wr2\n" + " waddwss wr0, wr1, wr0\n" + "\n" + "wstrd wr0, [%3]\n" + "wstrd wr2, [%3, #8]\n" + : + : "r" (in), "r" (consts), + "r" (1 << (SBC_PROTO_FIXED4_SCALE - 1)), "r" (out), + "r" (SBC_PROTO_FIXED4_SCALE) + : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7", + "wr8", "wr9", "wcgr0", "memory"); +} + +static inline void sbc_analyze_eight_iwmmxt(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + asm volatile ( + "wldrd wr0, [%0]\n" + "tbcstw wr15, %2\n" + "wldrd wr1, [%0, #8]\n" + "wldrd wr2, [%0, #16]\n" + "wldrd wr3, [%0, #24]\n" + "wldrd wr4, [%1]\n" + "wldrd wr5, [%1, #8]\n" + "wldrd wr6, [%1, #16]\n" + "wldrd wr7, [%1, #24]\n" + "wmadds wr0, wr0, wr4\n" + " wldrd wr8, [%1, #32]\n" + "wmadds wr1, wr1, wr5\n" + " wldrd wr9, [%1, #40]\n" + "wmadds wr2, wr2, wr6\n" + " wldrd wr10, [%1, #48]\n" + "wmadds wr3, wr3, wr7\n" + " wldrd wr11, [%1, #56]\n" + "waddwss wr0, wr0, wr15\n" + " wldrd wr4, [%0, #32]\n" + "waddwss wr1, wr1, wr15\n" + " wldrd wr5, [%0, #40]\n" + "waddwss wr2, wr2, wr15\n" + " wldrd wr6, [%0, #48]\n" + "waddwss wr3, wr3, wr15\n" + " wldrd wr7, [%0, #56]\n" + " wmadds wr4, wr4, wr8\n" + " wldrd wr12, [%0, #64]\n" + " wmadds wr5, wr5, wr9\n" + " wldrd wr13, [%0, #72]\n" + " wmadds wr6, wr6, wr10\n" + " wldrd wr14, [%0, #80]\n" + " wmadds wr7, wr7, wr11\n" + " wldrd wr15, [%0, #88]\n" + " waddwss wr0, wr4, wr0\n" + " wldrd wr8, [%1, #64]\n" + " waddwss wr1, wr5, wr1\n" + " wldrd wr9, [%1, #72]\n" + " waddwss wr2, wr6, wr2\n" + " wldrd wr10, [%1, #80]\n" + " waddwss wr3, wr7, wr3\n" + " wldrd wr11, [%1, #88]\n" + " wmadds wr12, wr12, wr8\n" + "wldrd wr4, [%0, #96]\n" + " wmadds wr13, wr13, wr9\n" + "wldrd wr5, [%0, #104]\n" + " wmadds wr14, wr14, wr10\n" + "wldrd wr6, [%0, #112]\n" + " wmadds wr15, wr15, wr11\n" + "wldrd wr7, [%0, #120]\n" + " waddwss wr0, wr12, wr0\n" + "wldrd wr8, [%1, #96]\n" + " waddwss wr1, wr13, wr1\n" + "wldrd wr9, [%1, #104]\n" + " waddwss wr2, wr14, wr2\n" + "wldrd wr10, [%1, #112]\n" + " waddwss wr3, wr15, wr3\n" + "wldrd wr11, [%1, #120]\n" + "wmadds wr4, wr4, wr8\n" + " wldrd wr12, [%0, #128]\n" + "wmadds wr5, wr5, wr9\n" + " wldrd wr13, [%0, #136]\n" + "wmadds wr6, wr6, wr10\n" + " wldrd wr14, [%0, #144]\n" + "wmadds wr7, wr7, wr11\n" + " wldrd wr15, [%0, #152]\n" + "waddwss wr0, wr4, wr0\n" + " wldrd wr8, [%1, #128]\n" + "waddwss wr1, wr5, wr1\n" + " wldrd wr9, [%1, #136]\n" + "waddwss wr2, wr6, wr2\n" + " wldrd wr10, [%1, #144]\n" + " waddwss wr3, wr7, wr3\n" + " wldrd wr11, [%1, #152]\n" + " wmadds wr12, wr12, wr8\n" + "tmcr wcgr0, %4\n" + " wmadds wr13, wr13, wr9\n" + " wmadds wr14, wr14, wr10\n" + " wmadds wr15, wr15, wr11\n" + " waddwss wr0, wr12, wr0\n" + " waddwss wr1, wr13, wr1\n" + " waddwss wr2, wr14, wr2\n" + " waddwss wr3, wr15, wr3\n" + "\n" + "wsrawg wr0, wr0, wcgr0\n" + "wsrawg wr1, wr1, wcgr0\n" + "wsrawg wr2, wr2, wcgr0\n" + "wsrawg wr3, wr3, wcgr0\n" + "\n" + "wpackwss wr0, wr0, wr0\n" + "wpackwss wr1, wr1, wr1\n" + " wldrd wr4, [%1, #160]\n" + "wpackwss wr2, wr2, wr2\n" + " wldrd wr5, [%1, #168]\n" + "wpackwss wr3, wr3, wr3\n" + " wldrd wr6, [%1, #192]\n" + " wmadds wr4, wr4, wr0\n" + " wldrd wr7, [%1, #200]\n" + " wmadds wr5, wr5, wr0\n" + " wldrd wr8, [%1, #224]\n" + " wmadds wr6, wr6, wr1\n" + " wldrd wr9, [%1, #232]\n" + " wmadds wr7, wr7, wr1\n" + " waddwss wr4, wr6, wr4\n" + " waddwss wr5, wr7, wr5\n" + " wmadds wr8, wr8, wr2\n" + "wldrd wr6, [%1, #256]\n" + " wmadds wr9, wr9, wr2\n" + "wldrd wr7, [%1, #264]\n" + "waddwss wr4, wr8, wr4\n" + " waddwss wr5, wr9, wr5\n" + "wmadds wr6, wr6, wr3\n" + "wmadds wr7, wr7, wr3\n" + "waddwss wr4, wr6, wr4\n" + "waddwss wr5, wr7, wr5\n" + "\n" + "wstrd wr4, [%3]\n" + "wstrd wr5, [%3, #8]\n" + "\n" + "wldrd wr6, [%1, #176]\n" + "wldrd wr5, [%1, #184]\n" + "wmadds wr5, wr5, wr0\n" + "wldrd wr8, [%1, #208]\n" + "wmadds wr0, wr6, wr0\n" + "wldrd wr9, [%1, #216]\n" + "wmadds wr9, wr9, wr1\n" + "wldrd wr6, [%1, #240]\n" + "wmadds wr1, wr8, wr1\n" + "wldrd wr7, [%1, #248]\n" + "waddwss wr0, wr1, wr0\n" + "waddwss wr5, wr9, wr5\n" + "wmadds wr7, wr7, wr2\n" + "wldrd wr8, [%1, #272]\n" + "wmadds wr2, wr6, wr2\n" + "wldrd wr9, [%1, #280]\n" + "waddwss wr0, wr2, wr0\n" + "waddwss wr5, wr7, wr5\n" + "wmadds wr9, wr9, wr3\n" + "wmadds wr3, wr8, wr3\n" + "waddwss wr0, wr3, wr0\n" + "waddwss wr5, wr9, wr5\n" + "\n" + "wstrd wr0, [%3, #16]\n" + "wstrd wr5, [%3, #24]\n" + : + : "r" (in), "r" (consts), + "r" (1 << (SBC_PROTO_FIXED8_SCALE - 1)), "r" (out), + "r" (SBC_PROTO_FIXED8_SCALE) + : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7", + "wr8", "wr9", "wr10", "wr11", "wr12", "wr13", "wr14", "wr15", + "wcgr0", "memory"); +} + +static inline void sbc_analyze_4b_4s_iwmmxt(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_iwmmxt(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_iwmmxt(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_iwmmxt(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_iwmmxt(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_iwmmxt(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_iwmmxt; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_iwmmxt; + state->implementation_info = "IWMMXT"; +} + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h new file mode 100644 index 00000000..b535e686 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_iwmmxt.h @@ -0,0 +1,42 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2010 Keith Mok + * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2004-2010 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_IWMMXT_H +#define __SBC_PRIMITIVES_IWMMXT_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && defined(__IWMMXT__) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_IWMMXT_SUPPORT + +void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *encoder_state); + +#endif + +#endif -- cgit