From 6a39e6fd6d8f088fd9c5ddff32eb7945750a7152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0erif=20Rami?= Date: Mon, 19 Jan 2026 14:56:02 +0100 Subject: [PATCH] driver safety & efficiency improvements - implement MIDI input batching for reduced ALSA core overhead - minimize playback spinlock scope by moving memcpy outside the lock - optimize capture bit-transposition using 8x8 Butterfly algorithm - use put_unaligned_le32 for safe capture DMA writes - add defensive checks for stream validity in URB completion handlers --- us144mkii.c | 2 + us144mkii_capture.c | 75 ++++++++++------------ us144mkii_midi.c | 18 +++--- us144mkii_playback.c | 149 +++++++++++++++++++++---------------------- 4 files changed, 120 insertions(+), 124 deletions(-) diff --git a/us144mkii.c b/us144mkii.c index eacab74..d1c4d8f 100644 --- a/us144mkii.c +++ b/us144mkii.c @@ -159,8 +159,10 @@ void tascam_stop_work_handler(struct work_struct *work) usb_kill_anchored_urbs(&tascam->playback_anchor); usb_kill_anchored_urbs(&tascam->feedback_anchor); + usb_kill_anchored_urbs(&tascam->capture_anchor); } + static void tascam_card_private_free(struct snd_card *card) { struct tascam_card *tascam = card->private_data; diff --git a/us144mkii_capture.c b/us144mkii_capture.c index 09fd626..8593a10 100644 --- a/us144mkii_capture.c +++ b/us144mkii_capture.c @@ -73,7 +73,7 @@ static snd_pcm_uframes_t tascam_capture_pointer(struct snd_pcm_substream *substr static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd) { struct tascam_card *tascam = snd_pcm_substream_chip(substream); - int i, u, ret = 0; + int i, ret = 0; bool start = false; bool stop = false; unsigned long flags; @@ -97,17 +97,9 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd) ret = -EINVAL; break; } - spin_unlock_irqrestore(&tascam->lock, flags); if (stop) { - smp_mb(); - for (i = 0; i < NUM_CAPTURE_URBS; i++) { - if (tascam->capture_urbs[i]) - usb_unlink_urb(tascam->capture_urbs[i]); - } - if (tascam->running_ghost_playback) { - atomic_set(&tascam->playback_active, 0); tascam->running_ghost_playback = false; for (i = 0; i < NUM_PLAYBACK_URBS; i++) { @@ -136,8 +128,8 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd) atomic_inc(&tascam->active_urbs); } - if (!atomic_read(&tascam->playback_active)) { - atomic_set(&tascam->playback_active, 1); + if (ret == 0 && !atomic_read(&tascam->playback_active)) { + int u; tascam->running_ghost_playback = true; tascam->phase_accum = 0; tascam->freq_q16 = div_u64(((u64)tascam->current_rate << 16), 8000); @@ -155,7 +147,6 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd) usb_anchor_urb(f_urb, &tascam->feedback_anchor); if (usb_submit_urb(f_urb, GFP_ATOMIC) < 0) { usb_unanchor_urb(f_urb); - atomic_dec(&tascam->active_urbs); } else { atomic_inc(&tascam->active_urbs); } @@ -177,39 +168,41 @@ static int tascam_capture_trigger(struct snd_pcm_substream *substream, int cmd) usb_anchor_urb(urb, &tascam->playback_anchor); if (usb_submit_urb(urb, GFP_ATOMIC) < 0) { usb_unanchor_urb(urb); - atomic_dec(&tascam->active_urbs); } else { atomic_inc(&tascam->active_urbs); } } } } + spin_unlock_irqrestore(&tascam->lock, flags); return ret; } static inline void tascam_unpack_8bytes(const u8 *src, u8 *out_bit0, u8 *out_bit1) { - u64 val = get_unaligned_le64(src); + /* The hardware sends bits in a layout that requires both transposition + * and bit-reversal within the result. swab64() + Butterfly Transpose + * achieves exactly the same mapping as the original bit-by-bit loop. + */ + u64 x = get_unaligned_le64(src); + u64 t; - *out_bit0 = - (((val >> 0) & 1) << 7) | - (((val >> 8) & 1) << 6) | - (((val >> 16) & 1) << 5) | - (((val >> 24) & 1) << 4) | - (((val >> 32) & 1) << 3) | - (((val >> 40) & 1) << 2) | - (((val >> 48) & 1) << 1) | - (((val >> 56) & 1) << 0); + /* Stage 0: Reverse byte order to handle the hardware's MSB-first nature */ + x = __builtin_bswap64(x); - *out_bit1 = - (((val >> 1) & 1) << 7) | - (((val >> 9) & 1) << 6) | - (((val >> 17) & 1) << 5) | - (((val >> 25) & 1) << 4) | - (((val >> 33) & 1) << 3) | - (((val >> 41) & 1) << 2) | - (((val >> 49) & 1) << 1) | - (((val >> 57) & 1) << 0); + /* 8x8 Bit Transposition (Butterfly) */ + t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AAULL; + x = x ^ t ^ (t << 7); + + t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCULL; + x = x ^ t ^ (t << 14); + + t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0ULL; + x = x ^ t ^ (t << 28); + + /* Extract the untangled bits for the first two sample planes */ + *out_bit0 = (u8)(x >> 0); + *out_bit1 = (u8)(x >> 8); } static void tascam_decode_capture_chunk(const u8 *src, u32 *dst, int frames_to_decode) @@ -229,10 +222,10 @@ static void tascam_decode_capture_chunk(const u8 *src, u32 *dst, int frames_to_d tascam_unpack_8bytes(p_src_b + 8, &m[1], &m[3]); tascam_unpack_8bytes(p_src_b + 16, &l[1], &l[3]); - *dst++ = (h[0] << 24) | (m[0] << 16) | (l[0] << 8); - *dst++ = (h[1] << 24) | (m[1] << 16) | (l[1] << 8); - *dst++ = (h[2] << 24) | (m[2] << 16) | (l[2] << 8); - *dst++ = (h[3] << 24) | (m[3] << 16) | (l[3] << 8); + put_unaligned_le32((h[0] << 24) | (m[0] << 16) | (l[0] << 8), dst++); + put_unaligned_le32((h[1] << 24) | (m[1] << 16) | (l[1] << 8), dst++); + put_unaligned_le32((h[2] << 24) | (m[2] << 16) | (l[2] << 8), dst++); + put_unaligned_le32((h[3] << 24) | (m[3] << 16) | (l[3] << 8), dst++); } } @@ -318,18 +311,20 @@ void capture_urb_complete(struct urb *urb) need_period_elapsed = true; } } - spin_unlock_irqrestore(&tascam->lock, flags); - - if (need_period_elapsed) - snd_pcm_period_elapsed(substream); } usb_anchor_urb(urb, &tascam->capture_anchor); if (usb_submit_urb(urb, GFP_ATOMIC) < 0) { usb_unanchor_urb(urb); atomic_dec(&tascam->active_urbs); + spin_unlock_irqrestore(&tascam->lock, flags); return; } + + spin_unlock_irqrestore(&tascam->lock, flags); + + if (need_period_elapsed && substream) + snd_pcm_period_elapsed(substream); } const struct snd_pcm_ops tascam_capture_ops = { diff --git a/us144mkii_midi.c b/us144mkii_midi.c index 399e444..653ac15 100644 --- a/us144mkii_midi.c +++ b/us144mkii_midi.c @@ -93,7 +93,6 @@ static void tascam_midi_in_complete(struct urb *urb) struct tascam_card *tascam = urb->context; struct snd_rawmidi_substream *substream; unsigned long flags; - int i; if (urb->status) return; @@ -104,12 +103,14 @@ static void tascam_midi_in_complete(struct urb *urb) if (urb->actual_length == MIDI_PACKET_SIZE && substream) { u8 *data = urb->transfer_buffer; + int len = 0; - for (i = 0; i < MIDI_PAYLOAD_SIZE; i++) { - if (data[i] == 0xFD) - break; - snd_rawmidi_receive(substream, &data[i], 1); - } + /* Find the actual length of the MIDI message (stop at 0xFD padding) */ + while (len < MIDI_PAYLOAD_SIZE && data[len] != 0xFD) + len++; + + if (len > 0) + snd_rawmidi_receive(substream, data, len); } usb_anchor_urb(urb, &tascam->midi_anchor); @@ -123,10 +124,11 @@ static void tascam_midi_input_trigger(struct snd_rawmidi_substream *substream, i unsigned long flags; spin_lock_irqsave(&tascam->midi_lock, flags); - if (up) + if (up) { tascam->midi_input = substream; - else + } else { tascam->midi_input = NULL; + } spin_unlock_irqrestore(&tascam->midi_lock, flags); } diff --git a/us144mkii_playback.c b/us144mkii_playback.c index 16860d7..3d31716 100644 --- a/us144mkii_playback.c +++ b/us144mkii_playback.c @@ -117,63 +117,42 @@ static int tascam_playback_trigger(struct snd_pcm_substream *substream, int cmd) int i, ret = 0; unsigned long flags; + spin_lock_irqsave(&tascam->lock, flags); switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: - spin_lock_irqsave(&tascam->lock, flags); - if (atomic_read(&tascam->playback_active)) { - spin_unlock_irqrestore(&tascam->lock, flags); - return 0; - } - atomic_set(&tascam->playback_active, 1); - tascam->feedback_synced = false; - spin_unlock_irqrestore(&tascam->lock, flags); - - for (i = 0; i < NUM_FEEDBACK_URBS; i++) { - usb_anchor_urb(tascam->feedback_urbs[i], &tascam->feedback_anchor); - if (usb_submit_urb(tascam->feedback_urbs[i], GFP_ATOMIC) < 0) { - usb_unanchor_urb(tascam->feedback_urbs[i]); - ret = -EIO; - goto error; + if (!atomic_read(&tascam->playback_active)) { + atomic_set(&tascam->playback_active, 1); + tascam->running_ghost_playback = false; + for (i = 0; i < NUM_FEEDBACK_URBS; i++) { + usb_anchor_urb(tascam->feedback_urbs[i], &tascam->feedback_anchor); + if (usb_submit_urb(tascam->feedback_urbs[i], GFP_ATOMIC) < 0) { + usb_unanchor_urb(tascam->feedback_urbs[i]); + } else { + atomic_inc(&tascam->active_urbs); + } } - atomic_inc(&tascam->active_urbs); - } - - for (i = 0; i < NUM_PLAYBACK_URBS; i++) { - usb_anchor_urb(tascam->playback_urbs[i], &tascam->playback_anchor); - if (usb_submit_urb(tascam->playback_urbs[i], GFP_ATOMIC) < 0) { - usb_unanchor_urb(tascam->playback_urbs[i]); - ret = -EIO; - goto error; + for (i = 0; i < NUM_PLAYBACK_URBS; i++) { + usb_anchor_urb(tascam->playback_urbs[i], &tascam->playback_anchor); + if (usb_submit_urb(tascam->playback_urbs[i], GFP_ATOMIC) < 0) { + usb_unanchor_urb(tascam->playback_urbs[i]); + } else { + atomic_inc(&tascam->active_urbs); + } } - atomic_inc(&tascam->active_urbs); } break; - case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: atomic_set(&tascam->playback_active, 0); - for (i = 0; i < NUM_PLAYBACK_URBS; i++) { - if (tascam->playback_urbs[i]) - usb_unlink_urb(tascam->playback_urbs[i]); - } - for (i = 0; i < NUM_FEEDBACK_URBS; i++) { - if (tascam->feedback_urbs[i]) - usb_unlink_urb(tascam->feedback_urbs[i]); - } break; - default: - return -EINVAL; + ret = -EINVAL; + break; } + spin_unlock_irqrestore(&tascam->lock, flags); - return 0; - - error: - atomic_set(&tascam->playback_active, 0); - usb_kill_anchored_urbs(&tascam->playback_anchor); - usb_kill_anchored_urbs(&tascam->feedback_anchor); return ret; } @@ -201,12 +180,11 @@ void playback_urb_complete(struct urb *urb) goto exit_clear; } - if (!atomic_read(&tascam->playback_active)) + if (!atomic_read(&tascam->playback_active) && !tascam->running_ghost_playback) goto exit_clear; - substream = tascam->playback_substream; - spin_lock_irqsave(&tascam->lock, flags); + substream = tascam->playback_substream; for (i = 0; i < urb->number_of_packets; i++) { unsigned int frames_for_packet; @@ -224,50 +202,68 @@ void playback_urb_complete(struct urb *urb) } urb->transfer_buffer_length = total_bytes_for_urb; - if (total_bytes_for_urb > 0) { - if (substream) { - runtime = substream->runtime; - u8 *dst_buf = urb->transfer_buffer; - size_t ptr_bytes = frames_to_bytes(runtime, tascam->driver_playback_pos); - frames_to_copy = bytes_to_frames(runtime, total_bytes_for_urb); + if (total_bytes_for_urb == 0) { + spin_unlock_irqrestore(&tascam->lock, flags); + goto resubmit; + } - if (tascam->driver_playback_pos + frames_to_copy > runtime->buffer_size) { - size_t part1 = runtime->buffer_size - tascam->driver_playback_pos; - size_t part1_bytes = frames_to_bytes(runtime, part1); + if (!substream) { + memset(urb->transfer_buffer, 0, total_bytes_for_urb); + spin_unlock_irqrestore(&tascam->lock, flags); + goto resubmit; + } - memcpy(dst_buf, runtime->dma_area + ptr_bytes, part1_bytes); - memcpy(dst_buf + part1_bytes, runtime->dma_area, total_bytes_for_urb - part1_bytes); - } else { - memcpy(dst_buf, runtime->dma_area + ptr_bytes, total_bytes_for_urb); - } + runtime = substream->runtime; + size_t playback_pos = tascam->driver_playback_pos; + spin_unlock_irqrestore(&tascam->lock, flags); - tascam->driver_playback_pos += frames_to_copy; - if (tascam->driver_playback_pos >= runtime->buffer_size) - tascam->driver_playback_pos -= runtime->buffer_size; + /* Copy from ALSA DMA buffer - outside of the main device lock */ + u8 *dst_buf = urb->transfer_buffer; + size_t ptr_bytes = frames_to_bytes(runtime, playback_pos); + frames_to_copy = bytes_to_frames(runtime, total_bytes_for_urb); - tascam->playback_frames_consumed += frames_to_copy; + if (playback_pos + frames_to_copy > runtime->buffer_size) { + size_t part1 = runtime->buffer_size - playback_pos; + size_t part1_bytes = frames_to_bytes(runtime, part1); - if (div_u64(tascam->playback_frames_consumed, runtime->period_size) > tascam->last_pb_period_pos) { - tascam->last_pb_period_pos = div_u64(tascam->playback_frames_consumed, runtime->period_size); - need_period_elapsed = true; - } - } else { - memset(urb->transfer_buffer, 0, total_bytes_for_urb); - } + memcpy(dst_buf, runtime->dma_area + ptr_bytes, part1_bytes); + memcpy(dst_buf + part1_bytes, runtime->dma_area, total_bytes_for_urb - part1_bytes); + } else { + memcpy(dst_buf, runtime->dma_area + ptr_bytes, total_bytes_for_urb); + } + + /* Re-acquire lock to update shared stream state */ + spin_lock_irqsave(&tascam->lock, flags); + tascam->driver_playback_pos += frames_to_copy; + if (tascam->driver_playback_pos >= runtime->buffer_size) + tascam->driver_playback_pos -= runtime->buffer_size; + + tascam->playback_frames_consumed += frames_to_copy; + + if (runtime->period_size > 0 && + div_u64(tascam->playback_frames_consumed, runtime->period_size) > tascam->last_pb_period_pos) { + tascam->last_pb_period_pos = div_u64(tascam->playback_frames_consumed, runtime->period_size); + need_period_elapsed = true; } spin_unlock_irqrestore(&tascam->lock, flags); +resubmit: + + usb_anchor_urb(urb, &tascam->playback_anchor); + if (usb_submit_urb(urb, GFP_ATOMIC) < 0) { + goto exit_clear_locked; + } + if (need_period_elapsed && substream) snd_pcm_period_elapsed(substream); - usb_anchor_urb(urb, &tascam->playback_anchor); - if (usb_submit_urb(urb, GFP_ATOMIC) < 0) - goto exit_clear; - return; - exit_clear: +exit_clear_locked: + spin_lock_irqsave(&tascam->lock, flags); +exit_clear: usb_unanchor_urb(urb); + spin_unlock_irqrestore(&tascam->lock, flags); atomic_dec(&tascam->active_urbs); } @@ -288,7 +284,8 @@ void feedback_urb_complete(struct urb *urb) unsigned long flags; int p; - if (urb->status || !tascam || !atomic_read(&tascam->playback_active)) { + if (urb->status || !tascam || + (!atomic_read(&tascam->playback_active) && !tascam->running_ghost_playback)) { usb_unanchor_urb(urb); atomic_dec(&tascam->active_urbs); return;