driver safety & efficiency improvements

- implement MIDI input batching for reduced ALSA core overhead
- minimize playback spinlock scope by moving memcpy outside the lock
- optimize capture bit-transposition using 8x8 Butterfly algorithm
- use put_unaligned_le32 for safe capture DMA writes
- add defensive checks for stream validity in URB completion handlers
This commit is contained in:
Šerif Rami 2026-01-19 14:56:02 +01:00
parent 2f53fd05bc
commit 6a39e6fd6d
4 changed files with 120 additions and 124 deletions

View File

@ -159,8 +159,10 @@ void tascam_stop_work_handler(struct work_struct *work)
usb_kill_anchored_urbs(&tascam->playback_anchor); usb_kill_anchored_urbs(&tascam->playback_anchor);
usb_kill_anchored_urbs(&tascam->feedback_anchor); usb_kill_anchored_urbs(&tascam->feedback_anchor);
usb_kill_anchored_urbs(&tascam->capture_anchor);
} }
static void tascam_card_private_free(struct snd_card *card) static void tascam_card_private_free(struct snd_card *card)
{ {
struct tascam_card *tascam = card->private_data; struct tascam_card *tascam = card->private_data;

View File

@ -73,7 +73,7 @@ static snd_pcm_uframes_t tascam_capture_pointer(struct snd_pcm_substream *substr
static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd) static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
{ {
struct tascam_card *tascam = snd_pcm_substream_chip(substream); struct tascam_card *tascam = snd_pcm_substream_chip(substream);
int i, u, ret = 0; int i, ret = 0;
bool start = false; bool start = false;
bool stop = false; bool stop = false;
unsigned long flags; unsigned long flags;
@ -97,17 +97,9 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
ret = -EINVAL; ret = -EINVAL;
break; break;
} }
spin_unlock_irqrestore(&tascam->lock, flags);
if (stop) { if (stop) {
smp_mb();
for (i = 0; i < NUM_CAPTURE_URBS; i++) {
if (tascam->capture_urbs[i])
usb_unlink_urb(tascam->capture_urbs[i]);
}
if (tascam->running_ghost_playback) { if (tascam->running_ghost_playback) {
atomic_set(&tascam->playback_active, 0);
tascam->running_ghost_playback = false; tascam->running_ghost_playback = false;
for (i = 0; i < NUM_PLAYBACK_URBS; i++) { for (i = 0; i < NUM_PLAYBACK_URBS; i++) {
@ -136,8 +128,8 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
atomic_inc(&tascam->active_urbs); atomic_inc(&tascam->active_urbs);
} }
if (!atomic_read(&tascam->playback_active)) { if (ret == 0 && !atomic_read(&tascam->playback_active)) {
atomic_set(&tascam->playback_active, 1); int u;
tascam->running_ghost_playback = true; tascam->running_ghost_playback = true;
tascam->phase_accum = 0; tascam->phase_accum = 0;
tascam->freq_q16 = div_u64(((u64)tascam->current_rate << 16), 8000); tascam->freq_q16 = div_u64(((u64)tascam->current_rate << 16), 8000);
@ -155,7 +147,6 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
usb_anchor_urb(f_urb, &tascam->feedback_anchor); usb_anchor_urb(f_urb, &tascam->feedback_anchor);
if (usb_submit_urb(f_urb, GFP_ATOMIC) < 0) { if (usb_submit_urb(f_urb, GFP_ATOMIC) < 0) {
usb_unanchor_urb(f_urb); usb_unanchor_urb(f_urb);
atomic_dec(&tascam->active_urbs);
} else { } else {
atomic_inc(&tascam->active_urbs); atomic_inc(&tascam->active_urbs);
} }
@ -177,39 +168,41 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd)
usb_anchor_urb(urb, &tascam->playback_anchor); usb_anchor_urb(urb, &tascam->playback_anchor);
if (usb_submit_urb(urb, GFP_ATOMIC) < 0) { if (usb_submit_urb(urb, GFP_ATOMIC) < 0) {
usb_unanchor_urb(urb); usb_unanchor_urb(urb);
atomic_dec(&tascam->active_urbs);
} else { } else {
atomic_inc(&tascam->active_urbs); atomic_inc(&tascam->active_urbs);
} }
} }
} }
} }
spin_unlock_irqrestore(&tascam->lock, flags);
return ret; return ret;
} }
static inline void tascam_unpack_8bytes(const u8 *src, u8 *out_bit0, u8 *out_bit1) static inline void tascam_unpack_8bytes(const u8 *src, u8 *out_bit0, u8 *out_bit1)
{ {
u64 val = get_unaligned_le64(src); /* The hardware sends bits in a layout that requires both transposition
* and bit-reversal within the result. swab64() + Butterfly Transpose
* achieves exactly the same mapping as the original bit-by-bit loop.
*/
u64 x = get_unaligned_le64(src);
u64 t;
*out_bit0 = /* Stage 0: Reverse byte order to handle the hardware's MSB-first nature */
(((val >> 0) & 1) << 7) | x = __builtin_bswap64(x);
(((val >> 8) & 1) << 6) |
(((val >> 16) & 1) << 5) |
(((val >> 24) & 1) << 4) |
(((val >> 32) & 1) << 3) |
(((val >> 40) & 1) << 2) |
(((val >> 48) & 1) << 1) |
(((val >> 56) & 1) << 0);
*out_bit1 = /* 8x8 Bit Transposition (Butterfly) */
(((val >> 1) & 1) << 7) | t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AAULL;
(((val >> 9) & 1) << 6) | x = x ^ t ^ (t << 7);
(((val >> 17) & 1) << 5) |
(((val >> 25) & 1) << 4) | t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCULL;
(((val >> 33) & 1) << 3) | x = x ^ t ^ (t << 14);
(((val >> 41) & 1) << 2) |
(((val >> 49) & 1) << 1) | t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0ULL;
(((val >> 57) & 1) << 0); x = x ^ t ^ (t << 28);
/* Extract the untangled bits for the first two sample planes */
*out_bit0 = (u8)(x >> 0);
*out_bit1 = (u8)(x >> 8);
} }
static void tascam_decode_capture_chunk(const u8 *src, u32 *dst, int frames_to_decode) static void tascam_decode_capture_chunk(const u8 *src, u32 *dst, int frames_to_decode)
@ -229,10 +222,10 @@ static void tascam_decode_capture_chunk(const u8 *src, u32 *dst, int frames_to_d
tascam_unpack_8bytes(p_src_b + 8, &m[1], &m[3]); tascam_unpack_8bytes(p_src_b + 8, &m[1], &m[3]);
tascam_unpack_8bytes(p_src_b + 16, &l[1], &l[3]); tascam_unpack_8bytes(p_src_b + 16, &l[1], &l[3]);
*dst++ = (h[0] << 24) | (m[0] << 16) | (l[0] << 8); put_unaligned_le32((h[0] << 24) | (m[0] << 16) | (l[0] << 8), dst++);
*dst++ = (h[1] << 24) | (m[1] << 16) | (l[1] << 8); put_unaligned_le32((h[1] << 24) | (m[1] << 16) | (l[1] << 8), dst++);
*dst++ = (h[2] << 24) | (m[2] << 16) | (l[2] << 8); put_unaligned_le32((h[2] << 24) | (m[2] << 16) | (l[2] << 8), dst++);
*dst++ = (h[3] << 24) | (m[3] << 16) | (l[3] << 8); put_unaligned_le32((h[3] << 24) | (m[3] << 16) | (l[3] << 8), dst++);
} }
} }
@ -318,18 +311,20 @@ void capture_urb_complete(struct urb *urb)
need_period_elapsed = true; need_period_elapsed = true;
} }
} }
spin_unlock_irqrestore(&tascam->lock, flags);
if (need_period_elapsed)
snd_pcm_period_elapsed(substream);
} }
usb_anchor_urb(urb, &tascam->capture_anchor); usb_anchor_urb(urb, &tascam->capture_anchor);
if (usb_submit_urb(urb, GFP_ATOMIC) < 0) { if (usb_submit_urb(urb, GFP_ATOMIC) < 0) {
usb_unanchor_urb(urb); usb_unanchor_urb(urb);
atomic_dec(&tascam->active_urbs); atomic_dec(&tascam->active_urbs);
spin_unlock_irqrestore(&tascam->lock, flags);
return; return;
} }
spin_unlock_irqrestore(&tascam->lock, flags);
if (need_period_elapsed && substream)
snd_pcm_period_elapsed(substream);
} }
const struct snd_pcm_ops tascam_capture_ops = { const struct snd_pcm_ops tascam_capture_ops = {

View File

@ -93,7 +93,6 @@ static void tascam_midi_in_complete(struct urb *urb)
struct tascam_card *tascam = urb->context; struct tascam_card *tascam = urb->context;
struct snd_rawmidi_substream *substream; struct snd_rawmidi_substream *substream;
unsigned long flags; unsigned long flags;
int i;
if (urb->status) if (urb->status)
return; return;
@ -104,12 +103,14 @@ static void tascam_midi_in_complete(struct urb *urb)
if (urb->actual_length == MIDI_PACKET_SIZE && substream) { if (urb->actual_length == MIDI_PACKET_SIZE && substream) {
u8 *data = urb->transfer_buffer; u8 *data = urb->transfer_buffer;
int len = 0;
for (i = 0; i < MIDI_PAYLOAD_SIZE; i++) { /* Find the actual length of the MIDI message (stop at 0xFD padding) */
if (data[i] == 0xFD) while (len < MIDI_PAYLOAD_SIZE && data[len] != 0xFD)
break; len++;
snd_rawmidi_receive(substream, &data[i], 1);
} if (len > 0)
snd_rawmidi_receive(substream, data, len);
} }
usb_anchor_urb(urb, &tascam->midi_anchor); usb_anchor_urb(urb, &tascam->midi_anchor);
@ -123,10 +124,11 @@ static void tascam_midi_input_trigger(struct snd_rawmidi_substream *substream, i
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&tascam->midi_lock, flags); spin_lock_irqsave(&tascam->midi_lock, flags);
if (up) if (up) {
tascam->midi_input = substream; tascam->midi_input = substream;
else } else {
tascam->midi_input = NULL; tascam->midi_input = NULL;
}
spin_unlock_irqrestore(&tascam->midi_lock, flags); spin_unlock_irqrestore(&tascam->midi_lock, flags);
} }

View File

@ -117,63 +117,42 @@ static int tascam_playback_trigger(struct snd_pcm_substream *substream, int cmd)
int i, ret = 0; int i, ret = 0;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&tascam->lock, flags);
switch (cmd) { switch (cmd) {
case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_START:
case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_RESUME:
spin_lock_irqsave(&tascam->lock, flags); if (!atomic_read(&tascam->playback_active)) {
if (atomic_read(&tascam->playback_active)) {
spin_unlock_irqrestore(&tascam->lock, flags);
return 0;
}
atomic_set(&tascam->playback_active, 1); atomic_set(&tascam->playback_active, 1);
tascam->feedback_synced = false; tascam->running_ghost_playback = false;
spin_unlock_irqrestore(&tascam->lock, flags);
for (i = 0; i < NUM_FEEDBACK_URBS; i++) { for (i = 0; i < NUM_FEEDBACK_URBS; i++) {
usb_anchor_urb(tascam->feedback_urbs[i], &tascam->feedback_anchor); usb_anchor_urb(tascam->feedback_urbs[i], &tascam->feedback_anchor);
if (usb_submit_urb(tascam->feedback_urbs[i], GFP_ATOMIC) < 0) { if (usb_submit_urb(tascam->feedback_urbs[i], GFP_ATOMIC) < 0) {
usb_unanchor_urb(tascam->feedback_urbs[i]); usb_unanchor_urb(tascam->feedback_urbs[i]);
ret = -EIO; } else {
goto error;
}
atomic_inc(&tascam->active_urbs); atomic_inc(&tascam->active_urbs);
} }
}
for (i = 0; i < NUM_PLAYBACK_URBS; i++) { for (i = 0; i < NUM_PLAYBACK_URBS; i++) {
usb_anchor_urb(tascam->playback_urbs[i], &tascam->playback_anchor); usb_anchor_urb(tascam->playback_urbs[i], &tascam->playback_anchor);
if (usb_submit_urb(tascam->playback_urbs[i], GFP_ATOMIC) < 0) { if (usb_submit_urb(tascam->playback_urbs[i], GFP_ATOMIC) < 0) {
usb_unanchor_urb(tascam->playback_urbs[i]); usb_unanchor_urb(tascam->playback_urbs[i]);
ret = -EIO; } else {
goto error;
}
atomic_inc(&tascam->active_urbs); atomic_inc(&tascam->active_urbs);
} }
}
}
break; break;
case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH: case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
atomic_set(&tascam->playback_active, 0); atomic_set(&tascam->playback_active, 0);
for (i = 0; i < NUM_PLAYBACK_URBS; i++) {
if (tascam->playback_urbs[i])
usb_unlink_urb(tascam->playback_urbs[i]);
}
for (i = 0; i < NUM_FEEDBACK_URBS; i++) {
if (tascam->feedback_urbs[i])
usb_unlink_urb(tascam->feedback_urbs[i]);
}
break; break;
default: default:
return -EINVAL; ret = -EINVAL;
break;
} }
spin_unlock_irqrestore(&tascam->lock, flags);
return 0;
error:
atomic_set(&tascam->playback_active, 0);
usb_kill_anchored_urbs(&tascam->playback_anchor);
usb_kill_anchored_urbs(&tascam->feedback_anchor);
return ret; return ret;
} }
@ -201,12 +180,11 @@ void playback_urb_complete(struct urb *urb)
goto exit_clear; goto exit_clear;
} }
if (!atomic_read(&tascam->playback_active)) if (!atomic_read(&tascam->playback_active) && !tascam->running_ghost_playback)
goto exit_clear; goto exit_clear;
substream = tascam->playback_substream;
spin_lock_irqsave(&tascam->lock, flags); spin_lock_irqsave(&tascam->lock, flags);
substream = tascam->playback_substream;
for (i = 0; i < urb->number_of_packets; i++) { for (i = 0; i < urb->number_of_packets; i++) {
unsigned int frames_for_packet; unsigned int frames_for_packet;
@ -224,15 +202,28 @@ void playback_urb_complete(struct urb *urb)
} }
urb->transfer_buffer_length = total_bytes_for_urb; urb->transfer_buffer_length = total_bytes_for_urb;
if (total_bytes_for_urb > 0) { if (total_bytes_for_urb == 0) {
if (substream) { spin_unlock_irqrestore(&tascam->lock, flags);
goto resubmit;
}
if (!substream) {
memset(urb->transfer_buffer, 0, total_bytes_for_urb);
spin_unlock_irqrestore(&tascam->lock, flags);
goto resubmit;
}
runtime = substream->runtime; runtime = substream->runtime;
size_t playback_pos = tascam->driver_playback_pos;
spin_unlock_irqrestore(&tascam->lock, flags);
/* Copy from ALSA DMA buffer - outside of the main device lock */
u8 *dst_buf = urb->transfer_buffer; u8 *dst_buf = urb->transfer_buffer;
size_t ptr_bytes = frames_to_bytes(runtime, tascam->driver_playback_pos); size_t ptr_bytes = frames_to_bytes(runtime, playback_pos);
frames_to_copy = bytes_to_frames(runtime, total_bytes_for_urb); frames_to_copy = bytes_to_frames(runtime, total_bytes_for_urb);
if (tascam->driver_playback_pos + frames_to_copy > runtime->buffer_size) { if (playback_pos + frames_to_copy > runtime->buffer_size) {
size_t part1 = runtime->buffer_size - tascam->driver_playback_pos; size_t part1 = runtime->buffer_size - playback_pos;
size_t part1_bytes = frames_to_bytes(runtime, part1); size_t part1_bytes = frames_to_bytes(runtime, part1);
memcpy(dst_buf, runtime->dma_area + ptr_bytes, part1_bytes); memcpy(dst_buf, runtime->dma_area + ptr_bytes, part1_bytes);
@ -241,33 +232,38 @@ void playback_urb_complete(struct urb *urb)
memcpy(dst_buf, runtime->dma_area + ptr_bytes, total_bytes_for_urb); memcpy(dst_buf, runtime->dma_area + ptr_bytes, total_bytes_for_urb);
} }
/* Re-acquire lock to update shared stream state */
spin_lock_irqsave(&tascam->lock, flags);
tascam->driver_playback_pos += frames_to_copy; tascam->driver_playback_pos += frames_to_copy;
if (tascam->driver_playback_pos >= runtime->buffer_size) if (tascam->driver_playback_pos >= runtime->buffer_size)
tascam->driver_playback_pos -= runtime->buffer_size; tascam->driver_playback_pos -= runtime->buffer_size;
tascam->playback_frames_consumed += frames_to_copy; tascam->playback_frames_consumed += frames_to_copy;
if (div_u64(tascam->playback_frames_consumed, runtime->period_size) > tascam->last_pb_period_pos) { if (runtime->period_size > 0 &&
div_u64(tascam->playback_frames_consumed, runtime->period_size) > tascam->last_pb_period_pos) {
tascam->last_pb_period_pos = div_u64(tascam->playback_frames_consumed, runtime->period_size); tascam->last_pb_period_pos = div_u64(tascam->playback_frames_consumed, runtime->period_size);
need_period_elapsed = true; need_period_elapsed = true;
} }
} else {
memset(urb->transfer_buffer, 0, total_bytes_for_urb);
}
}
spin_unlock_irqrestore(&tascam->lock, flags); spin_unlock_irqrestore(&tascam->lock, flags);
resubmit:
usb_anchor_urb(urb, &tascam->playback_anchor);
if (usb_submit_urb(urb, GFP_ATOMIC) < 0) {
goto exit_clear_locked;
}
if (need_period_elapsed && substream) if (need_period_elapsed && substream)
snd_pcm_period_elapsed(substream); snd_pcm_period_elapsed(substream);
usb_anchor_urb(urb, &tascam->playback_anchor);
if (usb_submit_urb(urb, GFP_ATOMIC) < 0)
goto exit_clear;
return; return;
exit_clear_locked:
spin_lock_irqsave(&tascam->lock, flags);
exit_clear: exit_clear:
usb_unanchor_urb(urb); usb_unanchor_urb(urb);
spin_unlock_irqrestore(&tascam->lock, flags);
atomic_dec(&tascam->active_urbs); atomic_dec(&tascam->active_urbs);
} }
@ -288,7 +284,8 @@ void feedback_urb_complete(struct urb *urb)
unsigned long flags; unsigned long flags;
int p; int p;
if (urb->status || !tascam || !atomic_read(&tascam->playback_active)) { if (urb->status || !tascam ||
(!atomic_read(&tascam->playback_active) && !tascam->running_ghost_playback)) {
usb_unanchor_urb(urb); usb_unanchor_urb(urb);
atomic_dec(&tascam->active_urbs); atomic_dec(&tascam->active_urbs);
return; return;