Avoid branching in convertBlackAndWhiteToRGBA

The function is now almost 8x faster than before.
And make the code in the file slightly more readable.
This commit is contained in:
calixteman 2026-02-08 18:33:13 +01:00
parent 2b95a8eb38
commit a9870adfa3
No known key found for this signature in database
GPG Key ID: 0C5442631EE0691F

View File

@ -41,30 +41,36 @@ function convertBlackAndWhiteToRGBA({
: [black, nonBlackColor]; : [black, nonBlackColor];
const widthInSource = width >> 3; const widthInSource = width >> 3;
const widthRemainder = width & 7; const widthRemainder = width & 7;
const xorMask = zeroMapping ^ oneMapping;
const srcLength = src.length; const srcLength = src.length;
dest = new Uint32Array(dest.buffer); dest = new Uint32Array(dest.buffer);
let destPos = 0; let destPos = 0;
for (let i = 0; i < height; i++) { for (let i = 0; i < height; ++i) {
for (const max = srcPos + widthInSource; srcPos < max; srcPos++) { for (
const elem = srcPos < srcLength ? src[srcPos] : 255; const max = srcPos + widthInSource;
dest[destPos++] = elem & 0b10000000 ? oneMapping : zeroMapping; srcPos < max;
dest[destPos++] = elem & 0b1000000 ? oneMapping : zeroMapping; ++srcPos, destPos += 8
dest[destPos++] = elem & 0b100000 ? oneMapping : zeroMapping; ) {
dest[destPos++] = elem & 0b10000 ? oneMapping : zeroMapping; const elem = src[srcPos];
dest[destPos++] = elem & 0b1000 ? oneMapping : zeroMapping; dest[destPos] = zeroMapping ^ (-((elem >> 7) & 1) & xorMask);
dest[destPos++] = elem & 0b100 ? oneMapping : zeroMapping; dest[destPos + 1] = zeroMapping ^ (-((elem >> 6) & 1) & xorMask);
dest[destPos++] = elem & 0b10 ? oneMapping : zeroMapping; dest[destPos + 2] = zeroMapping ^ (-((elem >> 5) & 1) & xorMask);
dest[destPos++] = elem & 0b1 ? oneMapping : zeroMapping; dest[destPos + 3] = zeroMapping ^ (-((elem >> 4) & 1) & xorMask);
dest[destPos + 4] = zeroMapping ^ (-((elem >> 3) & 1) & xorMask);
dest[destPos + 5] = zeroMapping ^ (-((elem >> 2) & 1) & xorMask);
dest[destPos + 6] = zeroMapping ^ (-((elem >> 1) & 1) & xorMask);
dest[destPos + 7] = zeroMapping ^ (-(elem & 1) & xorMask);
} }
if (widthRemainder === 0) { if (widthRemainder === 0) {
continue; continue;
} }
const elem = srcPos < srcLength ? src[srcPos++] : 255; const elem = srcPos < srcLength ? src[srcPos++] : 255;
for (let j = 0; j < widthRemainder; j++) { for (let j = 0; j < widthRemainder; ++j, ++destPos) {
dest[destPos++] = elem & (1 << (7 - j)) ? oneMapping : zeroMapping; dest[destPos] = zeroMapping ^ (-((elem >> (7 - j)) & 1) & xorMask);
} }
} }
return { srcPos, destPos }; return { srcPos, destPos };
} }
@ -80,40 +86,41 @@ function convertRGBToRGBA({
const len = width * height * 3; const len = width * height * 3;
const len32 = len >> 2; const len32 = len >> 2;
const src32 = new Uint32Array(src.buffer, srcPos, len32); const src32 = new Uint32Array(src.buffer, srcPos, len32);
const alphaMask = FeatureTest.isLittleEndian ? 0xff000000 : 0xff;
if (FeatureTest.isLittleEndian) { if (FeatureTest.isLittleEndian) {
// It's a way faster to do the shuffle manually instead of working // It's a way faster to do the shuffle manually instead of working
// component by component with some Uint8 arrays. // component by component with some Uint8 arrays.
for (; i < len32 - 2; i += 3, destPos += 4) { for (; i < len32 - 2; i += 3, destPos += 4) {
const s1 = src32[i]; // R2B1G1R1 const s1 = src32[i], // R2B1G1R1
const s2 = src32[i + 1]; // G3R3B2G2 s2 = src32[i + 1], // G3R3B2G2
const s3 = src32[i + 2]; // B4G4R4B3 s3 = src32[i + 2]; // B4G4R4B3
dest[destPos] = s1 | 0xff000000; dest[destPos] = s1 | alphaMask;
dest[destPos + 1] = (s1 >>> 24) | (s2 << 8) | 0xff000000; dest[destPos + 1] = (s1 >>> 24) | (s2 << 8) | alphaMask;
dest[destPos + 2] = (s2 >>> 16) | (s3 << 16) | 0xff000000; dest[destPos + 2] = (s2 >>> 16) | (s3 << 16) | alphaMask;
dest[destPos + 3] = (s3 >>> 8) | 0xff000000; dest[destPos + 3] = (s3 >>> 8) | alphaMask;
} }
for (let j = i * 4, jj = srcPos + len; j < jj; j += 3) { for (let j = i * 4, jj = srcPos + len; j < jj; j += 3) {
dest[destPos++] = dest[destPos++] =
src[j] | (src[j + 1] << 8) | (src[j + 2] << 16) | 0xff000000; src[j] | (src[j + 1] << 8) | (src[j + 2] << 16) | alphaMask;
} }
} else { } else {
for (; i < len32 - 2; i += 3, destPos += 4) { for (; i < len32 - 2; i += 3, destPos += 4) {
const s1 = src32[i]; // R1G1B1R2 const s1 = src32[i], // R1G1B1R2
const s2 = src32[i + 1]; // G2B2R3G3 s2 = src32[i + 1], // G2B2R3G3
const s3 = src32[i + 2]; // B3R4G4B4 s3 = src32[i + 2]; // B3R4G4B4
dest[destPos] = s1 | 0xff; dest[destPos] = s1 | alphaMask;
dest[destPos + 1] = (s1 << 24) | (s2 >>> 8) | 0xff; dest[destPos + 1] = (s1 << 24) | (s2 >>> 8) | alphaMask;
dest[destPos + 2] = (s2 << 16) | (s3 >>> 16) | 0xff; dest[destPos + 2] = (s2 << 16) | (s3 >>> 16) | alphaMask;
dest[destPos + 3] = (s3 << 8) | 0xff; dest[destPos + 3] = (s3 << 8) | alphaMask;
} }
for (let j = i * 4, jj = srcPos + len; j < jj; j += 3) { for (let j = i * 4, jj = srcPos + len; j < jj; j += 3) {
dest[destPos++] = dest[destPos++] =
(src[j] << 24) | (src[j + 1] << 16) | (src[j + 2] << 8) | 0xff; (src[j] << 24) | (src[j + 1] << 16) | (src[j + 2] << 8) | alphaMask;
} }
} }