You can bootleg your own fast lossless codec by doing delta-encoding on the raw PCM to get a lot of zeros and then feed it through an off-the-shelf fast compressor like snappy/lz4/zstandard/etc. It won't get remotely close to the dedicated audio algorithms, but I wouldn't be surprised if you cut your data size by a factor 2-4 and essentially no CPU cost compared to raw uncompressed audio.
Looks like my initial estimation of 2-4 was way off (when FLAC achieves ~2 this should've been a red flag), but you do get a ~1.36x reduction in space at basically memory read speed.
Using an encoding for second order differences with storing -127 <= d <= 127 using 1 byte and the others 2 bytes (for an input of 16-bit audio) I got a ratio of ~1.50 for something that can still operate entirely at RAM speed:
orig = samples.tobytes()
deltas = np.diff(samples, prepend=samples.dtype.type(0), axis=0) # Per-channel deltas.
delta_deltas = np.diff(deltas, prepend=samples.dtype.type(0), axis=0) # Per-channel second-order differences.
# Many small differences, encode almost all 1-byte differences using 1 byte,
# using 3 bytes for larger differences. Interleave channels and encode.
small = np.sum(np.abs(delta_deltas.ravel()) <= 127)
bootleg = np.zeros(small + (len(delta_deltas.ravel()) - small) * 3, dtype=np.uint8)
i = 0
for dda in delta_deltas.flatten():
if -127 <= dda <= 127:
bootleg[i] = dda + 127
i += 1
else:
bootleg[i] = 255
bootleg[i + 1] = (dda + 2**15) % 256
bootleg[i + 2] = (dda + 2**15) // 256
i += 3
compressed_bootleg = zstd.ZSTD_compress(bootleg)
print(len(compressed_bootleg))
decompressed_bootleg = zstd.ZSTD_uncompress(compressed_bootleg)
result = []
i = 0
while i < len(bootleg):
if bootleg[i] < 255:
result.append(decompressed_bootleg[i] - 127)
i += 1
else:
lo = decompressed_bootleg[i + 1]
hi = decompressed_bootleg[i + 2]
result.append(256*hi + lo - 2**15)
i += 3
decompressed_delta_deltas = np.array(result, dtype=samples.dtype).reshape(delta_deltas.shape)
decompressed_deltas = np.cumsum(decompressed_delta_deltas, axis=0, dtype=samples.dtype)
decompressed = np.cumsum(decompressed_deltas, axis=0, dtype=samples.dtype)
assert np.array_equal(samples, decompressed)