driver safety & efficiency improvements

- implement MIDI input batching for reduced ALSA core overhead
- minimize playback spinlock scope by moving memcpy outside the lock
- optimize capture bit-transposition using 8x8 Butterfly algorithm
- use put_unaligned_le32 for safe capture DMA writes
- add defensive checks for stream validity in URB completion handlers
This commit is contained in:
Šerif Rami 2026-01-19 14:56:02 +01:00
parent 2f53fd05bc
commit 6a39e6fd6d
4 changed files with 120 additions and 124 deletions

View File

@ -159,8 +159,10 @@ void tascam_stop_work_handler(struct work_struct *work)
usb_kill_anchored_urbs(&tascam->playback_anchor);
usb_kill_anchored_urbs(&tascam->feedback_anchor);
usb_kill_anchored_urbs(&tascam->capture_anchor);
}
static void tascam_card_private_free(struct snd_card *card)
{
struct tascam_card *tascam = card->private_data;

View File

@ -73,7 +73,7 @@ static snd_pcm_uframes_t tascam_capture_pointer(struct snd_pcm_substream *substr
static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
{
struct tascam_card *tascam = snd_pcm_substream_chip(substream);
int i, u, ret = 0;
int i, ret = 0;
bool start = false;
bool stop = false;
unsigned long flags;
@ -97,17 +97,9 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
ret = -EINVAL;
break;
}
spin_unlock_irqrestore(&tascam->lock, flags);
if (stop) {
smp_mb();
for (i = 0; i < NUM_CAPTURE_URBS; i++) {
if (tascam->capture_urbs[i])
usb_unlink_urb(tascam->capture_urbs[i]);
}
if (tascam->running_ghost_playback) {
atomic_set(&tascam->playback_active, 0);
tascam->running_ghost_playback = false;
for (i = 0; i < NUM_PLAYBACK_URBS; i++) {
@ -136,8 +128,8 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
atomic_inc(&tascam->active_urbs);
}
if (!atomic_read(&tascam->playback_active)) {
atomic_set(&tascam->playback_active, 1);
if (ret == 0 && !atomic_read(&tascam->playback_active)) {
int u;
tascam->running_ghost_playback = true;
tascam->phase_accum = 0;
tascam->freq_q16 = div_u64(((u64)tascam->current_rate << 16), 8000);
@ -155,7 +147,6 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
usb_anchor_urb(f_urb, &tascam->feedback_anchor);
if (usb_submit_urb(f_urb, GFP_ATOMIC) < 0) {
usb_unanchor_urb(f_urb);
atomic_dec(&tascam->active_urbs);
} else {
atomic_inc(&tascam->active_urbs);
}
@ -177,39 +168,41 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
usb_anchor_urb(urb, &tascam->playback_anchor);
if (usb_submit_urb(urb, GFP_ATOMIC) < 0) {
usb_unanchor_urb(urb);
atomic_dec(&tascam->active_urbs);
} else {
atomic_inc(&tascam->active_urbs);
}
}
}
}
spin_unlock_irqrestore(&tascam->lock, flags);
return ret;
}
static inline void tascam_unpack_8bytes(const u8 *src, u8 *out_bit0, u8 *out_bit1)
{
u64 val = get_unaligned_le64(src);
/* The hardware sends bits in a layout that requires both transposition
* and bit-reversal within the result. swab64() + Butterfly Transpose
* achieves exactly the same mapping as the original bit-by-bit loop.
*/
u64 x = get_unaligned_le64(src);
u64 t;
*out_bit0 =
(((val >> 0) & 1) << 7) |
(((val >> 8) & 1) << 6) |
(((val >> 16) & 1) << 5) |
(((val >> 24) & 1) << 4) |
(((val >> 32) & 1) << 3) |
(((val >> 40) & 1) << 2) |
(((val >> 48) & 1) << 1) |
(((val >> 56) & 1) << 0);
/* Stage 0: Reverse byte order to handle the hardware's MSB-first nature */
x = __builtin_bswap64(x);
*out_bit1 =
(((val >> 1) & 1) << 7) |
(((val >> 9) & 1) << 6) |
(((val >> 17) & 1) << 5) |
(((val >> 25) & 1) << 4) |
(((val >> 33) & 1) << 3) |
(((val >> 41) & 1) << 2) |
(((val >> 49) & 1) << 1) |
(((val >> 57) & 1) << 0);
/* 8x8 Bit Transposition (Butterfly) */
t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AAULL;
x = x ^ t ^ (t << 7);
t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCULL;
x = x ^ t ^ (t << 14);
t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0ULL;
x = x ^ t ^ (t << 28);
/* Extract the untangled bits for the first two sample planes */
*out_bit0 = (u8)(x >> 0);
*out_bit1 = (u8)(x >> 8);
}
static void tascam_decode_capture_chunk(const u8 *src, u32 *dst, int frames_to_decode)
@ -229,10 +222,10 @@ static void tascam_decode_capture_chunk(const u8 *src, u32 *dst, int frames_to_d
tascam_unpack_8bytes(p_src_b + 8, &m[1], &m[3]);
tascam_unpack_8bytes(p_src_b + 16, &l[1], &l[3]);
*dst++ = (h[0] << 24) | (m[0] << 16) | (l[0] << 8);
*dst++ = (h[1] << 24) | (m[1] << 16) | (l[1] << 8);
*dst++ = (h[2] << 24) | (m[2] << 16) | (l[2] << 8);
*dst++ = (h[3] << 24) | (m[3] << 16) | (l[3] << 8);
put_unaligned_le32((h[0] << 24) | (m[0] << 16) | (l[0] << 8), dst++);
put_unaligned_le32((h[1] << 24) | (m[1] << 16) | (l[1] << 8), dst++);
put_unaligned_le32((h[2] << 24) | (m[2] << 16) | (l[2] << 8), dst++);
put_unaligned_le32((h[3] << 24) | (m[3] << 16) | (l[3] << 8), dst++);
}
}
@ -318,18 +311,20 @@ void capture_urb_complete(struct urb *urb)
need_period_elapsed = true;
}
}
spin_unlock_irqrestore(&tascam->lock, flags);
if (need_period_elapsed)
snd_pcm_period_elapsed(substream);
}
usb_anchor_urb(urb, &tascam->capture_anchor);
if (usb_submit_urb(urb, GFP_ATOMIC) < 0) {
usb_unanchor_urb(urb);
atomic_dec(&tascam->active_urbs);
spin_unlock_irqrestore(&tascam->lock, flags);
return;
}
spin_unlock_irqrestore(&tascam->lock, flags);
if (need_period_elapsed && substream)
snd_pcm_period_elapsed(substream);
}
const struct snd_pcm_ops tascam_capture_ops = {

View File

@ -93,7 +93,6 @@ static void tascam_midi_in_complete(struct urb *urb)
struct tascam_card *tascam = urb->context;
struct snd_rawmidi_substream *substream;
unsigned long flags;
int i;
if (urb->status)
return;
@ -104,12 +103,14 @@ static void tascam_midi_in_complete(struct urb *urb)
if (urb->actual_length == MIDI_PACKET_SIZE && substream) {
u8 *data = urb->transfer_buffer;
int len = 0;
for (i = 0; i < MIDI_PAYLOAD_SIZE; i++) {
if (data[i] == 0xFD)
break;
snd_rawmidi_receive(substream, &data[i], 1);
}
/* Find the actual length of the MIDI message (stop at 0xFD padding) */
while (len < MIDI_PAYLOAD_SIZE && data[len] != 0xFD)
len++;
if (len > 0)
snd_rawmidi_receive(substream, data, len);
}
usb_anchor_urb(urb, &tascam->midi_anchor);
@ -123,10 +124,11 @@ static void tascam_midi_input_trigger(struct snd_rawmidi_substream *substream, i
unsigned long flags;
spin_lock_irqsave(&tascam->midi_lock, flags);
if (up)
if (up) {
tascam->midi_input = substream;
else
} else {
tascam->midi_input = NULL;
}
spin_unlock_irqrestore(&tascam->midi_lock, flags);
}

View File

@ -117,63 +117,42 @@ static int tascam_playback_trigger(struct snd_pcm_substream *substream, int cmd)
int i, ret = 0;
unsigned long flags;
spin_lock_irqsave(&tascam->lock, flags);
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
case SNDRV_PCM_TRIGGER_RESUME:
spin_lock_irqsave(&tascam->lock, flags);
if (atomic_read(&tascam->playback_active)) {
spin_unlock_irqrestore(&tascam->lock, flags);
return 0;
}
if (!atomic_read(&tascam->playback_active)) {
atomic_set(&tascam->playback_active, 1);
tascam->feedback_synced = false;
spin_unlock_irqrestore(&tascam->lock, flags);
tascam->running_ghost_playback = false;
for (i = 0; i < NUM_FEEDBACK_URBS; i++) {
usb_anchor_urb(tascam->feedback_urbs[i], &tascam->feedback_anchor);
if (usb_submit_urb(tascam->feedback_urbs[i], GFP_ATOMIC) < 0) {
usb_unanchor_urb(tascam->feedback_urbs[i]);
ret = -EIO;
goto error;
}
} else {
atomic_inc(&tascam->active_urbs);
}
}
for (i = 0; i < NUM_PLAYBACK_URBS; i++) {
usb_anchor_urb(tascam->playback_urbs[i], &tascam->playback_anchor);
if (usb_submit_urb(tascam->playback_urbs[i], GFP_ATOMIC) < 0) {
usb_unanchor_urb(tascam->playback_urbs[i]);
ret = -EIO;
goto error;
}
} else {
atomic_inc(&tascam->active_urbs);
}
}
}
break;
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
atomic_set(&tascam->playback_active, 0);
for (i = 0; i < NUM_PLAYBACK_URBS; i++) {
if (tascam->playback_urbs[i])
usb_unlink_urb(tascam->playback_urbs[i]);
}
for (i = 0; i < NUM_FEEDBACK_URBS; i++) {
if (tascam->feedback_urbs[i])
usb_unlink_urb(tascam->feedback_urbs[i]);
}
break;
default:
return -EINVAL;
ret = -EINVAL;
break;
}
spin_unlock_irqrestore(&tascam->lock, flags);
return 0;
error:
atomic_set(&tascam->playback_active, 0);
usb_kill_anchored_urbs(&tascam->playback_anchor);
usb_kill_anchored_urbs(&tascam->feedback_anchor);
return ret;
}
@ -201,12 +180,11 @@ void playback_urb_complete(struct urb *urb)
goto exit_clear;
}
if (!atomic_read(&tascam->playback_active))
if (!atomic_read(&tascam->playback_active) && !tascam->running_ghost_playback)
goto exit_clear;
substream = tascam->playback_substream;
spin_lock_irqsave(&tascam->lock, flags);
substream = tascam->playback_substream;
for (i = 0; i < urb->number_of_packets; i++) {
unsigned int frames_for_packet;
@ -224,15 +202,28 @@ void playback_urb_complete(struct urb *urb)
}
urb->transfer_buffer_length = total_bytes_for_urb;
if (total_bytes_for_urb > 0) {
if (substream) {
if (total_bytes_for_urb == 0) {
spin_unlock_irqrestore(&tascam->lock, flags);
goto resubmit;
}
if (!substream) {
memset(urb->transfer_buffer, 0, total_bytes_for_urb);
spin_unlock_irqrestore(&tascam->lock, flags);
goto resubmit;
}
runtime = substream->runtime;
size_t playback_pos = tascam->driver_playback_pos;
spin_unlock_irqrestore(&tascam->lock, flags);
/* Copy from ALSA DMA buffer - outside of the main device lock */
u8 *dst_buf = urb->transfer_buffer;
size_t ptr_bytes = frames_to_bytes(runtime, tascam->driver_playback_pos);
size_t ptr_bytes = frames_to_bytes(runtime, playback_pos);
frames_to_copy = bytes_to_frames(runtime, total_bytes_for_urb);
if (tascam->driver_playback_pos + frames_to_copy > runtime->buffer_size) {
size_t part1 = runtime->buffer_size - tascam->driver_playback_pos;
if (playback_pos + frames_to_copy > runtime->buffer_size) {
size_t part1 = runtime->buffer_size - playback_pos;
size_t part1_bytes = frames_to_bytes(runtime, part1);
memcpy(dst_buf, runtime->dma_area + ptr_bytes, part1_bytes);
@ -241,33 +232,38 @@ void playback_urb_complete(struct urb *urb)
memcpy(dst_buf, runtime->dma_area + ptr_bytes, total_bytes_for_urb);
}
/* Re-acquire lock to update shared stream state */
spin_lock_irqsave(&tascam->lock, flags);
tascam->driver_playback_pos += frames_to_copy;
if (tascam->driver_playback_pos >= runtime->buffer_size)
tascam->driver_playback_pos -= runtime->buffer_size;
tascam->playback_frames_consumed += frames_to_copy;
if (div_u64(tascam->playback_frames_consumed, runtime->period_size) > tascam->last_pb_period_pos) {
if (runtime->period_size > 0 &&
div_u64(tascam->playback_frames_consumed, runtime->period_size) > tascam->last_pb_period_pos) {
tascam->last_pb_period_pos = div_u64(tascam->playback_frames_consumed, runtime->period_size);
need_period_elapsed = true;
}
} else {
memset(urb->transfer_buffer, 0, total_bytes_for_urb);
}
}
spin_unlock_irqrestore(&tascam->lock, flags);
resubmit:
usb_anchor_urb(urb, &tascam->playback_anchor);
if (usb_submit_urb(urb, GFP_ATOMIC) < 0) {
goto exit_clear_locked;
}
if (need_period_elapsed && substream)
snd_pcm_period_elapsed(substream);
usb_anchor_urb(urb, &tascam->playback_anchor);
if (usb_submit_urb(urb, GFP_ATOMIC) < 0)
goto exit_clear;
return;
exit_clear_locked:
spin_lock_irqsave(&tascam->lock, flags);
exit_clear:
usb_unanchor_urb(urb);
spin_unlock_irqrestore(&tascam->lock, flags);
atomic_dec(&tascam->active_urbs);
}
@ -288,7 +284,8 @@ void feedback_urb_complete(struct urb *urb)
unsigned long flags;
int p;
if (urb->status || !tascam || !atomic_read(&tascam->playback_active)) {
if (urb->status || !tascam ||
(!atomic_read(&tascam->playback_active) && !tascam->running_ghost_playback)) {
usb_unanchor_urb(urb);
atomic_dec(&tascam->active_urbs);
return;