diff --git a/README.md b/README.md index a9d06c4..a605aa0 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,10 @@ The following features are present: The library provides the `BlockDecoder` class, which can be used to decode. A new instance is created by one of the static factory methods. You can let the library create a new buffer, or pass an existing one to save allocations. +After construction, decoders produce no heap allocations during decoding. Internal working state is pre-allocated and +reused across calls. Note that this also means instances are **not thread-safe**: each thread must use its own decoder +instance. + Given `src` is the compressed data, starting at offset `srcPos`, the following code snippet shows how to decode a BC1 texture. diff --git a/src/main/java/be/twofold/tinybcdec/BC1.java b/src/main/java/be/twofold/tinybcdec/BC1.java index d765db8..0801184 100644 --- a/src/main/java/be/twofold/tinybcdec/BC1.java +++ b/src/main/java/be/twofold/tinybcdec/BC1.java @@ -5,6 +5,7 @@ final class BC1 extends BlockDecoder { private static final int BPP = 4; + private final int[] colors = new int[4]; private final boolean bc2Or3; private final int color3; @@ -29,11 +30,10 @@ public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int g1 = (c1 >>> +5) & 0x3F; int b1 = (c1/* */) & 0x1F; - int[] colors = { - rgb(scale031(r0), scale063(g0), scale031(b0)), - rgb(scale031(r1), scale063(g1), scale031(b1)), - 0, 0 - }; + int[] colors = this.colors; + colors[0] = rgb(scale031(r0), scale063(g0), scale031(b0)); + colors[1] = rgb(scale031(r1), scale063(g1), scale031(b1)); + if (c0 > c1 || bc2Or3) { int r2 = scale093(2 * r0 + r1); int g2 = scale189(2 * g0 + g1); diff --git a/src/main/java/be/twofold/tinybcdec/BC2.java b/src/main/java/be/twofold/tinybcdec/BC2.java index eec2046..3e99d31 100644 --- a/src/main/java/be/twofold/tinybcdec/BC2.java +++ b/src/main/java/be/twofold/tinybcdec/BC2.java @@ -4,17 +4,16 @@ final class BC2 extends BlockDecoder { private static final int BPP = 4; - private static final BC1 COLOR_DECODER = new BC1(BC1Mode.BC2OR3); - static final BlockDecoder INSTANCE = new BC2(); + private final BC1 colorDecoder = new BC1(BC1Mode.BC2OR3); - private BC2() { + BC2() { super(BPP, 16); } @Override public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int stride) { - COLOR_DECODER.decodeBlock(src, srcPos + 8, dst, dstPos, stride); + colorDecoder.decodeBlock(src, srcPos + 8, dst, dstPos, stride); decodeAlpha(src, srcPos, dst, dstPos + 3, stride); } diff --git a/src/main/java/be/twofold/tinybcdec/BC3.java b/src/main/java/be/twofold/tinybcdec/BC3.java index e034a30..e1c06b2 100644 --- a/src/main/java/be/twofold/tinybcdec/BC3.java +++ b/src/main/java/be/twofold/tinybcdec/BC3.java @@ -4,18 +4,17 @@ final class BC3 extends BlockDecoder { private static final int BPP = 4; - private static final BC1 COLOR_DECODER = new BC1(BC1Mode.BC2OR3); - private static final BC4U ALPHA_DECODER = new BC4U(BPP); - static final BlockDecoder INSTANCE = new BC3(); + private final BC1 colorDecoder = new BC1(BC1Mode.BC2OR3); + private final BC4U alphaDecoder = new BC4U(BPP); - private BC3() { + BC3() { super(BPP, 16); } @Override public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int stride) { - COLOR_DECODER.decodeBlock(src, srcPos + 8, dst, dstPos/**/, stride); - ALPHA_DECODER.decodeBlock(src, srcPos/**/, dst, dstPos + 3, stride); + colorDecoder.decodeBlock(src, srcPos + 8, dst, dstPos/**/, stride); + alphaDecoder.decodeBlock(src, srcPos/**/, dst, dstPos + 3, stride); } } diff --git a/src/main/java/be/twofold/tinybcdec/BC4S.java b/src/main/java/be/twofold/tinybcdec/BC4S.java index 6a7f402..02f5616 100644 --- a/src/main/java/be/twofold/tinybcdec/BC4S.java +++ b/src/main/java/be/twofold/tinybcdec/BC4S.java @@ -3,6 +3,8 @@ import java.nio.*; final class BC4S extends BlockDecoder { + private final byte[] alphas = new byte[8]; + BC4S(int pixelStride) { super(pixelStride, 8); } @@ -14,19 +16,24 @@ public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int a0 = Math.max(-127, (byte) (block/* */)); int a1 = Math.max(-127, (byte) (block >>> 8)); - byte[] alphas = {scale127(a0), scale127(a1), 0, 0, 0, 0, 0, (byte) 0xFF}; + byte[] alphas = this.alphas; + alphas[0] = scale127(a0); + alphas[1] = scale127(a1); + if (a0 > a1) { - alphas[2] = scale889(6 * a0 +/* */a1); + alphas[2] = scale889(6 * a0 + /**/a1); alphas[3] = scale889(5 * a0 + 2 * a1); alphas[4] = scale889(4 * a0 + 3 * a1); alphas[5] = scale889(3 * a0 + 4 * a1); alphas[6] = scale889(2 * a0 + 5 * a1); alphas[7] = scale889(/**/a0 + 6 * a1); } else { - alphas[2] = scale635(4 * a0 +/* */a1); + alphas[2] = scale635(4 * a0 + /**/a1); alphas[3] = scale635(3 * a0 + 2 * a1); alphas[4] = scale635(2 * a0 + 3 * a1); alphas[5] = scale635(/**/a0 + 4 * a1); + alphas[6] = (byte) 0x00; + alphas[7] = (byte) 0xFF; } long indices = block >>> 16; diff --git a/src/main/java/be/twofold/tinybcdec/BC4U.java b/src/main/java/be/twofold/tinybcdec/BC4U.java index 9158e12..8355c16 100644 --- a/src/main/java/be/twofold/tinybcdec/BC4U.java +++ b/src/main/java/be/twofold/tinybcdec/BC4U.java @@ -3,6 +3,8 @@ import java.nio.*; final class BC4U extends BlockDecoder { + private final byte[] alphas = new byte[8]; + BC4U(int pixelStride) { super(pixelStride, 8); } @@ -14,19 +16,24 @@ public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int a0 = (int) (block/* */) & 0xFF; int a1 = (int) (block >>> 8) & 0xFF; - byte[] alphas = {(byte) a0, (byte) a1, 0, 0, 0, 0, 0, (byte) 0xFF}; + byte[] alphas = this.alphas; + alphas[0] = (byte) a0; + alphas[1] = (byte) a1; + if (a0 > a1) { - alphas[2] = scale1785(6 * a0 +/* */a1); + alphas[2] = scale1785(6 * a0 + /**/a1); alphas[3] = scale1785(5 * a0 + 2 * a1); alphas[4] = scale1785(4 * a0 + 3 * a1); alphas[5] = scale1785(3 * a0 + 4 * a1); alphas[6] = scale1785(2 * a0 + 5 * a1); alphas[7] = scale1785(/**/a0 + 6 * a1); } else { - alphas[2] = scale1275(4 * a0 +/* */a1); + alphas[2] = scale1275(4 * a0 + /**/a1); alphas[3] = scale1275(3 * a0 + 2 * a1); alphas[4] = scale1275(2 * a0 + 3 * a1); alphas[5] = scale1275(/**/a0 + 4 * a1); + alphas[6] = (byte) 0x00; + alphas[7] = (byte) 0xFF; } long indices = block >>> 16; diff --git a/src/main/java/be/twofold/tinybcdec/BC5S.java b/src/main/java/be/twofold/tinybcdec/BC5S.java index 697f320..0fccd89 100644 --- a/src/main/java/be/twofold/tinybcdec/BC5S.java +++ b/src/main/java/be/twofold/tinybcdec/BC5S.java @@ -4,17 +4,16 @@ final class BC5S extends BlockDecoder { private static final int BPP = 2; - private static final BC4S DECODER = new BC4S(BPP); - static final BlockDecoder INSTANCE = new BC5S(); + private final BC4S decoder = new BC4S(BPP); - private BC5S() { + BC5S() { super(BPP, 16); } @Override public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int stride) { - DECODER.decodeBlock(src, srcPos/**/, dst, dstPos/**/, stride); - DECODER.decodeBlock(src, srcPos + 8, dst, dstPos + 1, stride); + decoder.decodeBlock(src, srcPos/**/, dst, dstPos/**/, stride); + decoder.decodeBlock(src, srcPos + 8, dst, dstPos + 1, stride); } } diff --git a/src/main/java/be/twofold/tinybcdec/BC5U.java b/src/main/java/be/twofold/tinybcdec/BC5U.java index 6d908b1..f4ce4d6 100644 --- a/src/main/java/be/twofold/tinybcdec/BC5U.java +++ b/src/main/java/be/twofold/tinybcdec/BC5U.java @@ -4,17 +4,16 @@ final class BC5U extends BlockDecoder { private static final int BPP = 2; - private static final BC4U DECODER = new BC4U(BPP); - static final BlockDecoder INSTANCE = new BC5U(); + private final BC4U decoder = new BC4U(BPP); - private BC5U() { + BC5U() { super(BPP, 16); } @Override public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int stride) { - DECODER.decodeBlock(src, srcPos/**/, dst, dstPos/**/, stride); - DECODER.decodeBlock(src, srcPos + 8, dst, dstPos + 1, stride); + decoder.decodeBlock(src, srcPos/**/, dst, dstPos/**/, stride); + decoder.decodeBlock(src, srcPos + 8, dst, dstPos + 1, stride); } } diff --git a/src/main/java/be/twofold/tinybcdec/BC6H.java b/src/main/java/be/twofold/tinybcdec/BC6H.java index acbffb8..f3dec01 100644 --- a/src/main/java/be/twofold/tinybcdec/BC6H.java +++ b/src/main/java/be/twofold/tinybcdec/BC6H.java @@ -23,6 +23,8 @@ final class BC6H extends BPTC { new Mode(T, F, 16, +4, +4, +4, new short[]{0x000A, 0x010A, 0x020A, 0x0404, 0x10A6, 0x0504, 0x11A6, 0x0604, 0x12A6}) ); + private final Bits bits = new Bits(); + private final int[] colors = new int[16]; private final boolean signed; BC6H(boolean signed) { @@ -32,7 +34,8 @@ final class BC6H extends BPTC { @Override public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int stride) { - Bits bits = Bits.from(src, srcPos); + Bits bits = this.bits; + bits.read(src, srcPos); int modeIndex = mode(bits); if (modeIndex >= MODES.size()) { @@ -41,7 +44,8 @@ public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, } Mode mode = MODES.get(modeIndex); - int[] colors = new int[16]; + int[] colors = this.colors; + Arrays.fill(colors, 0); for (short op : mode.ops) { readOp(bits, op, colors); } @@ -89,10 +93,10 @@ public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, int weight = weights[(int) (indexBits & mask)]; indexBits >>>= ib; - int pIndex = partitions & 3; - short r = finalUnquantize(interpolate(colors[pIndex * 8/**/], colors[pIndex * 8 + 4], weight), signed); - short g = finalUnquantize(interpolate(colors[pIndex * 8 + 1], colors[pIndex * 8 + 5], weight), signed); - short b = finalUnquantize(interpolate(colors[pIndex * 8 + 2], colors[pIndex * 8 + 6], weight), signed); + int index = (partitions & 3) * 8; + short r = finalUnquantize(interpolate(colors[index/**/], colors[index + 4], weight), signed); + short g = finalUnquantize(interpolate(colors[index + 1], colors[index + 5], weight), signed); + short b = finalUnquantize(interpolate(colors[index + 2], colors[index + 6], weight), signed); partitions >>>= 2; int o = dstPos + x * BPP; diff --git a/src/main/java/be/twofold/tinybcdec/BC7.java b/src/main/java/be/twofold/tinybcdec/BC7.java index 8162c4b..c0b7732 100644 --- a/src/main/java/be/twofold/tinybcdec/BC7.java +++ b/src/main/java/be/twofold/tinybcdec/BC7.java @@ -17,9 +17,10 @@ final class BC7 extends BPTC { new Mode(2, 6, F, F, 5, 5, T, F, 2, 0) ); - static final BC7 INSTANCE = new BC7(); + private final Bits bits = new Bits(); + private final int[] colors = new int[3 * 2 * 4]; - private BC7() { + BC7() { super(BPP); } @@ -31,14 +32,16 @@ public void decodeBlock(ByteBuffer src, int srcPos, ByteBuffer dst, int dstPos, return; } - Bits bits = Bits.from(src, srcPos); + Bits bits = this.bits; + bits.read(src, srcPos); + bits.get(modeIndex + 1); // Skip mode bits Mode mode = MODES.get(modeIndex); int partition = mode.pb != 0 ? bits.get(mode.pb) : 0; int rotation = mode.rb ? bits.get(2) : 0; boolean selection = mode.isb && bits.get1() != 0; - int[] colors = new int[6 * 4]; + int[] colors = this.colors; // Read colors int numColors = mode.ns * 2; diff --git a/src/main/java/be/twofold/tinybcdec/BPTC.java b/src/main/java/be/twofold/tinybcdec/BPTC.java index 62029cb..c1ae789 100644 --- a/src/main/java/be/twofold/tinybcdec/BPTC.java +++ b/src/main/java/be/twofold/tinybcdec/BPTC.java @@ -119,15 +119,12 @@ static final class Bits { private long lo; private long hi; - private Bits(long lo, long hi) { - this.lo = lo; - this.hi = hi; + Bits() { } - static Bits from(ByteBuffer buffer, int index) { - long lo = ByteIO.getLong(buffer, index); - long hi = ByteIO.getLong(buffer, index + 8); - return new Bits(lo, hi); + void read(ByteBuffer buffer, int index) { + lo = ByteIO.getLong(buffer, index); + hi = ByteIO.getLong(buffer, index + 8); } int get(int count) { diff --git a/src/main/java/be/twofold/tinybcdec/BlockDecoder.java b/src/main/java/be/twofold/tinybcdec/BlockDecoder.java index 8bf26fd..fe0deb8 100644 --- a/src/main/java/be/twofold/tinybcdec/BlockDecoder.java +++ b/src/main/java/be/twofold/tinybcdec/BlockDecoder.java @@ -11,6 +11,8 @@ * Depending on if you want to allocate a new byte array or use an existing one. *

* To decode a single block, use the {@link #decodeBlock(ByteBuffer, int, ByteBuffer, int, int)} method. + *

+ * Thread safety: instances are not thread-safe. Each thread should use its own decoder instance. */ public abstract class BlockDecoder { static final int BLOCK_WIDTH = 4; @@ -41,7 +43,7 @@ public static BlockDecoder bc1(boolean opaque) { * @return The block decoder. */ public static BlockDecoder bc2() { - return BC2.INSTANCE; + return new BC2(); } /** @@ -50,7 +52,7 @@ public static BlockDecoder bc2() { * @return The block decoder. */ public static BlockDecoder bc3() { - return BC3.INSTANCE; + return new BC3(); } /** @@ -70,7 +72,7 @@ public static BlockDecoder bc4(boolean signed) { * @return The block decoder. */ public static BlockDecoder bc5(boolean signed) { - return signed ? BC5S.INSTANCE : BC5U.INSTANCE; + return signed ? new BC5S() : new BC5U(); } /** @@ -89,7 +91,7 @@ public static BlockDecoder bc6h(boolean signed) { * @return The block decoder. */ public static BlockDecoder bc7() { - return BC7.INSTANCE; + return new BC7(); } /** @@ -197,12 +199,17 @@ public void decode( throw new IndexOutOfBoundsException("Not enough data in dst buffer"); } + ByteOrder srcOrder = src.order(); + ByteOrder dstOrder = dst.order(); + src.order(ByteOrder.LITTLE_ENDIAN); + dst.order(ByteOrder.LITTLE_ENDIAN); + int srcBlocksW = (srcWidth + (BLOCK_WIDTH - 1)) / BLOCK_WIDTH; int srcLineStride = srcBlocksW * bytesPerBlock; int dstLineStride = dstWidth * bytesPerPixel; - var srcBuf = src.slice().order(ByteOrder.LITTLE_ENDIAN); - var dstBuf = dst.slice().order(ByteOrder.LITTLE_ENDIAN); + int srcBase = src.position(); + int dstBase = dst.position(); for (int y = 0; y < height; ) { int srcRowStart = ((srcY + y) / BLOCK_HEIGHT * srcLineStride); int dstRowStart = ((dstY + y) * dstLineStride); @@ -210,23 +217,25 @@ public void decode( int blockH = Math.min(BLOCK_HEIGHT - blockY, height - y); for (int x = 0; x < width; ) { - int srcPosStart = srcRowStart + ((srcX + x) / BLOCK_WIDTH * bytesPerBlock); - int dstPosStart = dstRowStart + ((dstX + x) * bytesPerPixel); + int srcPosStart = srcBase + srcRowStart + ((srcX + x) / BLOCK_WIDTH * bytesPerBlock); + int dstPosStart = dstBase + dstRowStart + ((dstX + x) * bytesPerPixel); int blockX = (srcX + x) % BLOCK_WIDTH; int blockW = Math.min(BLOCK_WIDTH - blockX, width - x); if (blockX == 0 && x + BLOCK_WIDTH <= width && blockY == 0 && y + BLOCK_HEIGHT <= height) { - decodeBlock(srcBuf, srcPosStart, dstBuf, dstPosStart, dstLineStride); + decodeBlock(src, srcPosStart, dst, dstPosStart, dstLineStride); x += BLOCK_WIDTH; } else { partialBlock( - srcBuf, srcPosStart, dstBuf, dstPosStart, dstLineStride, + src, srcPosStart, dst, dstPosStart, dstLineStride, blockX, blockY, blockW, blockH); x += blockW; } } y += blockH; } + src.order(srcOrder); + dst.order(dstOrder); } /** @@ -269,8 +278,8 @@ private void partialBlock( int offset = blockY * stride + blockX * bytesPerPixel; for (int row = 0; row < blockH; row++) { - var srcOff = offset + (row * stride); - var dstOff = dstPos + (row * lineStride); + int srcOff = offset + (row * stride); + int dstOff = dstPos + (row * lineStride); ByteIO.copy(scratch, srcOff, dst, dstOff, blockW * bytesPerPixel); } } diff --git a/src/test/java/be/twofold/tinybcdec/BlockDecoderTest.java b/src/test/java/be/twofold/tinybcdec/BlockDecoderTest.java index bc3bd93..07514a5 100644 --- a/src/test/java/be/twofold/tinybcdec/BlockDecoderTest.java +++ b/src/test/java/be/twofold/tinybcdec/BlockDecoderTest.java @@ -31,7 +31,7 @@ void testPartialBlockCrop() throws IOException { int dstOffset = 31; ByteBuffer dst = ByteBuffer.allocate(8 * 8 + dstOffset); - var decoder = BlockDecoder.bc4(false); + BlockDecoder decoder = BlockDecoder.bc4(false); for (int h = 1; h <= 8; h++) { for (int w = 1; w <= 8; w++) { decoder.decode(src.position(BCTestUtils.DDS_HEADER_SIZE), srcWidth, srcHeight, dst.position(dstOffset), w, h); @@ -55,7 +55,7 @@ void testPartialBlockCropExtra() throws IOException { int dstOffset = 31; ByteBuffer dst = ByteBuffer.allocate(8 * 8 + dstOffset); - var decoder = BlockDecoder.bc4(false); + BlockDecoder decoder = BlockDecoder.bc4(false); // Test all offsets between 0 and 8 for (int srcY = 1; srcY < 8; srcY++) {