GLEGram 12.5 — Initial public release

Based on Swiftgram 12.5 (Telegram iOS 12.5).
All GLEGram features ported and organized in GLEGram/ folder.

Features: Ghost Mode, Saved Deleted Messages, Content Protection Bypass,
Font Replacement, Fake Profile, Chat Export, Plugin System, and more.

See CHANGELOG_12.5.md for full details.
This commit is contained in:
Leeksov
2026-04-06 09:48:12 +03:00
commit 4647310322
39685 changed files with 11052678 additions and 0 deletions
+88
View File
@@ -0,0 +1,88 @@
load("@build_bazel_rules_swift//swift:swift.bzl", "swift_library")
load(
"@build_bazel_rules_apple//apple:resources.bzl",
"apple_resource_bundle",
"apple_resource_group",
)
load("//build-system/bazel-utils:plist_fragment.bzl",
"plist_fragment",
)
filegroup(
name = "AnimationCompressionMetalResources",
srcs = glob([
"Resources/**/*.metal",
]),
visibility = ["//visibility:public"],
)
plist_fragment(
name = "AnimationCompressionBundleInfoPlist",
extension = "plist",
template =
"""
<key>CFBundleIdentifier</key>
<string>org.telegram.AnimationCompression</string>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleName</key>
<string>AnimationCompression</string>
"""
)
apple_resource_bundle(
name = "AnimationCompressionBundle",
infoplists = [
":AnimationCompressionBundleInfoPlist",
],
resources = [
":AnimationCompressionMetalResources",
],
)
swift_library(
name = "AnimationCompression",
module_name = "AnimationCompression",
srcs = glob([
"Sources/**/*.swift",
]),
copts = [
"-warnings-as-errors",
],
data = [
":AnimationCompressionBundle",
],
deps = [
":DctHuffman",
"//submodules/Components/MetalImageView:MetalImageView",
],
visibility = [
"//visibility:public",
],
)
objc_library(
name = "DctHuffman",
enable_modules = True,
module_name = "DctHuffman",
srcs = glob([
"DctHuffman/Sources/**/*.m",
"DctHuffman/Sources/**/*.mm",
"DctHuffman/Sources/**/*.h",
], allow_empty=True),
copts = [],
hdrs = glob([
"DctHuffman/PublicHeaders/**/*.h",
]),
includes = [
"DctHuffman/PublicHeaders",
],
deps = [
],
sdk_frameworks = [
"Foundation",
],
visibility = [
"//visibility:public",
],
)
@@ -0,0 +1,17 @@
#ifndef DctHuffman_h
#define DctHuffman_h
#import <Foundation/Foundation.h>
#ifdef __cplusplus
extern "C" {
#endif
NSData * _Nullable writeDCTBlocks(int width, int height, float const * _Nonnull coefficients);
void readDCTBlocks(int width, int height, NSData * _Nonnull blockData, float * _Nonnull coefficients, int elementsPerRow);
#ifdef __cplusplus
}
#endif
#endif /* DctHuffman_h */
@@ -0,0 +1,630 @@
#import <DctHuffman/DctHuffman.h>
#include <functional>
#include <vector>
namespace DctHuffman {
typedef std::function<void(unsigned char)> WRITE_ONE_BYTE;
}
namespace
{
using uint8_t = unsigned char;
using uint16_t = unsigned short;
using int16_t = short;
using int32_t = int;
const uint8_t ZigZagInv[8*8] = {
0, 1, 8,16, 9, 2, 3, 10,
17,24,32,25,18,11, 4, 5,
12,19,26,33,40,48,41,34,
27,20,13, 6, 7,14,21,28,
35,42,49,56,57,50,43,36,
29,22,15,23,30,37,44,51,
58,59,52,45,38,31,39,46,
53,60,61,54,47,55,62,63
};
const uint8_t ZigZag[] = {
0, 1, 5, 6,14,15,27,28,
2, 4, 7,13,16,26,29,42,
3, 8,12,17,25,30,41,43,
9,11,18,24,31,40,44,53,
10,19,23,32,39,45,52,54,
20,22,33,38,46,51,55,60,
21,34,37,47,50,56,59,61,
35,36,48,49,57,58,62,63
};
// Huffman definitions for first DC/AC tables (luminance / Y channel)
const uint8_t DcLuminanceCodesPerBitsize[16] = { 0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 }; // sum = 12
const uint8_t DcLuminanceValues [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; // => 12 codes
const uint8_t AcLuminanceCodesPerBitsize[16] = { 0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,125 }; // sum = 162
const uint8_t AcLuminanceValues [162] = // => 162 codes
{ 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08, // 16*10+2 symbols because
0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28, // upper 4 bits can be 0..F
0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59, // while lower 4 bits can be 1..A
0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89, // plus two special codes 0x00 and 0xF0
0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6, // order of these symbols was determined empirically by JPEG committee
0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,
0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
// Huffman definitions for second DC/AC tables (chrominance / Cb and Cr channels)
const uint8_t DcChrominanceCodesPerBitsize[16] = { 0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; // sum = 12
const uint8_t DcChrominanceValues [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; // => 12 codes (identical to DcLuminanceValues)
const uint8_t AcChrominanceCodesPerBitsize[16] = { 0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,119 }; // sum = 162
const uint8_t AcChrominanceValues [162] = // => 162 codes
{ 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, // same number of symbol, just different order
0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26, // (which is more efficient for AC coding)
0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,
0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,
0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,
0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
const int16_t CodeWordLimit = 2048; // +/-2^11, maximum value after DCT
// represent a single Huffman code
struct BitCode {
BitCode() = default; // undefined state, must be initialized at a later time
BitCode(uint16_t code_, uint8_t numBits_)
: code(code_), numBits(numBits_) {}
uint16_t code; // JPEG's Huffman codes are limited to 16 bits
uint8_t numBits; // number of valid bits
};
// wrapper for bit output operations
struct BitWriter {
// user-supplied callback that writes/stores one byte
DctHuffman::WRITE_ONE_BYTE output;
// initialize writer
explicit BitWriter(DctHuffman::WRITE_ONE_BYTE output_) : output(output_) {}
// store the most recently encoded bits that are not written yet
struct BitBuffer
{
int32_t data = 0; // actually only at most 24 bits are used
uint8_t numBits = 0; // number of valid bits (the right-most bits)
} buffer;
// write Huffman bits stored in BitCode, keep excess bits in BitBuffer
BitWriter& operator<<(const BitCode& data)
{
// append the new bits to those bits leftover from previous call(s)
buffer.numBits += data.numBits;
buffer.data <<= data.numBits;
buffer.data |= data.code;
// write all "full" bytes
while (buffer.numBits >= 8)
{
// extract highest 8 bits
buffer.numBits -= 8;
auto oneByte = uint8_t(buffer.data >> buffer.numBits);
output(oneByte);
if (oneByte == 0xFF) // 0xFF has a special meaning for JPEGs (it's a block marker)
output(0); // therefore pad a zero to indicate "nope, this one ain't a marker, it's just a coincidence"
// note: I don't clear those written bits, therefore buffer.bits may contain garbage in the high bits
// if you really want to "clean up" (e.g. for debugging purposes) then uncomment the following line
//buffer.bits &= (1 << buffer.numBits) - 1;
}
return *this;
}
// write all non-yet-written bits, fill gaps with 1s (that's a strange JPEG thing)
void flush()
{
// at most seven set bits needed to "fill" the last byte: 0x7F = binary 0111 1111
*this << BitCode(0x7F, 7); // I should set buffer.numBits = 0 but since there are no single bits written after flush() I can safely ignore it
}
// NOTE: all the following BitWriter functions IGNORE the BitBuffer and write straight to output !
// write a single byte
BitWriter& operator<<(uint8_t oneByte)
{
output(oneByte);
return *this;
}
// write an array of bytes
template <typename T, int Size>
BitWriter& operator<<(T (&manyBytes)[Size])
{
for (auto c : manyBytes)
output(c);
return *this;
}
// start a new JFIF block
void addMarker(uint8_t id, uint16_t length)
{
output(0xFF); output(id); // ID, always preceded by 0xFF
output(uint8_t(length >> 8)); // length of the block (big-endian, includes the 2 length bytes as well)
output(uint8_t(length & 0xFF));
}
};
// ////////////////////////////////////////
// functions / templates
// same as std::min()
template <typename Number>
Number minimum(Number value, Number maximum)
{
return value <= maximum ? value : maximum;
}
// restrict a value to the interval [minimum, maximum]
template <typename Number, typename Limit>
Number clamp(Number value, Limit minValue, Limit maxValue)
{
if (value <= minValue) return minValue; // never smaller than the minimum
if (value >= maxValue) return maxValue; // never bigger than the maximum
return value; // value was inside interval, keep it
}
int16_t encodeDCTBlock(BitWriter& writer, float block64[64], int16_t lastDC,
const BitCode huffmanDC[256], const BitCode huffmanAC[256], const BitCode* codewords) {
// encode DC (the first coefficient is the "average color" of the 8x8 block)
auto DC = int(block64[0] + (block64[0] >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
// quantize and zigzag the other 63 coefficients
auto posNonZero = 0; // find last coefficient which is not zero (because trailing zeros are encoded differently)
int16_t quantized[8*8];
for (auto i = 1; i < 8*8; i++) // start at 1 because block64[0]=DC was already processed
{
auto value = block64[ZigZagInv[i]];
// round to nearest integer
quantized[i] = int(value + (value >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
// remember offset of last non-zero coefficient
if (quantized[i] != 0)
posNonZero = i;
}
// same "average color" as previous block ?
auto diff = DC - lastDC;
if (diff == 0)
writer << huffmanDC[0x00]; // yes, write a special short symbol
else
{
auto bits = codewords[diff]; // nope, encode the difference to previous block's average color
writer << huffmanDC[bits.numBits] << bits;
}
// encode ACs (quantized[1..63])
auto offset = 0; // upper 4 bits count the number of consecutive zeros
for (auto i = 1; i <= posNonZero; i++) // quantized[0] was already written, skip all trailing zeros, too
{
// zeros are encoded in a special way
while (quantized[i] == 0) // found another zero ?
{
offset += 0x10; // add 1 to the upper 4 bits
// split into blocks of at most 16 consecutive zeros
if (offset > 0xF0) // remember, the counter is in the upper 4 bits, 0xF = 15
{
writer << huffmanAC[0xF0]; // 0xF0 is a special code for "16 zeros"
offset = 0;
}
i++;
}
auto encoded = codewords[quantized[i]];
// combine number of zeros with the number of bits of the next non-zero value
writer << huffmanAC[offset + encoded.numBits] << encoded; // and the value itself
offset = 0;
}
// send end-of-block code (0x00), only needed if there are trailing zeros
if (posNonZero < 8*8 - 1) // = 63
writer << huffmanAC[0x00];
return DC;
}
// Jon's code includes the pre-generated Huffman codes
// I don't like these "magic constants" and compute them on my own :-)
void generateHuffmanTable(const uint8_t numCodes[16], const uint8_t* values, BitCode result[256])
{
// process all bitsizes 1 thru 16, no JPEG Huffman code is allowed to exceed 16 bits
auto huffmanCode = 0;
for (auto numBits = 1; numBits <= 16; numBits++)
{
// ... and each code of these bitsizes
for (auto i = 0; i < numCodes[numBits - 1]; i++) // note: numCodes array starts at zero, but smallest bitsize is 1
result[*values++] = BitCode(huffmanCode++, numBits);
// next Huffman code needs to be one bit wider
huffmanCode <<= 1;
}
}
} // end of anonymous namespace
// -------------------- externally visible code --------------------
namespace DctHuffman {
bool readMoreData(std::vector<uint8_t> const &bytes, int &readPosition, unsigned int &data, unsigned int &currentDataLength) {
unsigned char binaryData;
// Detect errors
if (currentDataLength > 24) { // Unsigned int can hold at most 32 = 24+8 bits
//cout << "ERROR: Code value not found in Huffman table: "<<data<<endl;
// Truncate data one by one bit in hope that we will eventually find a correct code
data = data - ((data >> (currentDataLength-1)) << (currentDataLength-1));
currentDataLength--;
return true;
}
if (readPosition + 1 >= bytes.size()) {
return false;
}
binaryData = bytes[readPosition];
readPosition++;
// We read byte and put it in low 8 bits of variable data
if (binaryData == 0xFF) {
data = (data << 8) + binaryData;
currentDataLength += 8; // Increase current data length for 8 because we read one new byte
if (readPosition + 1 >= bytes.size()) {
return false;
}
binaryData = bytes[readPosition];
readPosition++;
// End of Image marker
if (binaryData == 0xd9) {
// Drop 0xFF from data
data = data >> 8;
currentDataLength -= 8;
#if DEBUGLEVEL>1
cout << "End of image marker"<<endl;
#endif
return false;
}
// Restart marker means data goes blank
if (binaryData >= 0xd0 && binaryData <= 0xd7) {
/*#if DEBUGLEVEL>1
cout << "Restart marker"<<endl;
#endif*/
data = 0;
currentDataLength = 0;
/*for (uint i=0; i < components.size(); i++)
previousDC[i]=0;*/
}
// If after FF byte comes 0x00 byte, we ignore it, 0xFF is part of data (byte stuffing)
else if (binaryData != 0) {
data = (data << 8) + binaryData;
currentDataLength += 8; //Increase current data length for 8 because we read one new byte
#if DEBUGLEVEL>1
cout << "Stuffing"<<endl;
#endif
}
}
else {
data = (data << 8) + binaryData;
currentDataLength += 8;
}
return true;
}
bool readHuffmanBlock(std::vector<uint8_t> const &bytes, int &readPosition, int *dataBlock, unsigned int &data, unsigned int &currentDataLength, int currentComponent, BitCode const *componentTablesDC, BitCode const *componentTablesAC, int &previousDC) {
// Debugging
static unsigned int byteno = 0;
// Description of the 8x8 block currently being read
enum { AC, DC } ACDC = DC;
// How many AC elements should we read?
int ACcount = 64 - 1;
int m = 0; // Index into dataBlock
// Fill block with zeros
memset ((char*)dataBlock, 0, sizeof(int)*64);
bool endOfFile = false;
// Main loop
do {
// 3 bits is too small for a code
if (currentDataLength<3) {
continue;
}
// Some stats
byteno++;
// Current Huffman table
BitCode const *htable = componentTablesDC;
if (ACDC == AC) {
htable = componentTablesAC;
}
// Every one of 256 elements of the current Huffman table potentially has value, so we must go through all of them
for (int i = 0; i < 256; i++) {
// If code for i-th element is -1, then there is no Huffman code for i-th element
if (htable[i].numBits == 0) {
continue;
}
// If current data length is greater or equal than n, compare first n bits (n - length of current Huffman code)
uint n = htable[i].numBits;
if (currentDataLength < n) {
continue;
}
if (currentDataLength >= n && htable[i].code == data >> (currentDataLength - n)) {
// Remove first n bits from data;
currentDataLength -= n;
data = data - (htable[i].code << currentDataLength);
// Reading of DC coefficients
if (ACDC == DC) {
unsigned char bitLength = i; // Next i bits represent DC coefficient value
// Do we need to read more bits of data?
while (currentDataLength<bitLength) {
if (!readMoreData(bytes, readPosition, data, currentDataLength)) {
endOfFile = true;
break;
}
byteno++;
}
// Read out DC coefficient
int DCCoeficient = data >> (currentDataLength-bitLength);
currentDataLength -= bitLength;
data = data - (DCCoeficient << currentDataLength);
// If MSB in DC coefficient starts with 0, then substract value of DC with 2^bitlength+1
//cout << "Before substract "<<DCCoeficient<<" BL "<<int(bitLength)<<endl;
if ( bitLength != 0 && (DCCoeficient>>(bitLength-1)) == 0 ) {
DCCoeficient = DCCoeficient - (2 << (bitLength-1)) + 1;
}
//cout << "After substract "<<DCCoeficient<<" previousDC "<<previousDC[currentComponent]<<endl;
previousDC = DCCoeficient + previousDC;
dataBlock[m] = previousDC;
m++;
// No AC coefficients required?
if (ACcount == 0) {
return endOfFile;
}
// We generated our DC coefficient, next one is AC coefficient
ACDC = AC;
if (currentDataLength < 3) // If currentData length is < than 3, we need to read new byte, so leave this for loop
break;
i = -1; // CurrentDataLength is not zero, set i=0 to start from first element of array
htable = componentTablesAC;
} else {
// Reading of AC coefficients
unsigned char ACElement=i;
/* Every AC component is composite of 4 bits (RRRRSSSS). R bits tells us relative position of
non zero element from the previous non zero element (number of zeros between two non zero elements)
SSSS bits tels us magnitude range of AC element
Two special values:
00 is END OF BLOCK (all AC elements are zeros)
F0 is 16 zeroes */
if (ACElement == 0x00) {
return endOfFile;
}
else if (ACElement == 0xF0) {
for (int k=0;k<16;k++) {
dataBlock[m] = 0;
m++;
if (m >= ACcount+1) {
//qDebug() << "Huffman error: 16 AC zeros requested, but only "<<k<<" left in block!";
return endOfFile;
}
}
}
else {
/* If AC element is 0xAB for example, then we have to separate it in two nibbles
First nible is RRRR bits, second are SSSS bits
RRRR bits told us how many zero elements are before this element
SSSS bits told us how many binary digits our AC element has (if 1001 then we have to read next 9 elements from file) */
// Let's separate byte to two nibles
unsigned char Rbits = ACElement >> 4;
unsigned char Sbits = ACElement & 0x0F;
// Before our element there is Rbits zero elements
for (int k=0; k<Rbits; k++) {
if (m >= ACcount) {
//qDebug() << "Huffman error: "<<Rbits<<" preceeding AC zeros requested, but only "<<k<<" left in block!";
// in case of error, doing the other stuff will just do more errors so return here
return endOfFile;
}
dataBlock[m] = 0;
m++;
}
// Do we need to read more bits of data?
while (currentDataLength<Sbits) {
if (!readMoreData(bytes, readPosition, data, currentDataLength)) {
endOfFile = true;
//qDebug() << "End of file encountered inside a Huffman code!";
break;
}
byteno++;
}
// Read out AC coefficient
int ACCoeficient = data >> (currentDataLength-Sbits);
currentDataLength -= Sbits;
data = data - (ACCoeficient<<currentDataLength);
// If MSB in AC coefficient starts with 0, then substract value of AC with 2^bitLength+1
if ( Sbits != 0 && (ACCoeficient>>(Sbits-1)) == 0 ) {
ACCoeficient = ACCoeficient - (2 << (Sbits-1)) + 1;
}
dataBlock[m] = ACCoeficient;
m++;
}
// End of block
if (m >= ACcount+1)
return endOfFile;
if (currentDataLength<3) // If currentData length is < 3, we need to read new byte, so leave this for loop
break;
i = -1; // currentDataLength is not zero, set i=0 to start from first element of array
}
}
}
} while(readMoreData(bytes, readPosition, data, currentDataLength));
endOfFile = true; // We reached an end
return endOfFile;
}
NSData * _Nullable writeDCTBlocks(int width, int height, float const *coefficients) {
NSMutableData *result = [[NSMutableData alloc] initWithCapacity:width * 4 * height];
BitWriter bitWriter([result](unsigned char byte) {
[result appendBytes:&byte length:1];
});
BitCode codewordsArray[2 * CodeWordLimit]; // note: quantized[i] is found at codewordsArray[quantized[i] + CodeWordLimit]
BitCode* codewords = &codewordsArray[CodeWordLimit]; // allow negative indices, so quantized[i] is at codewords[quantized[i]]
uint8_t numBits = 1; // each codeword has at least one bit (value == 0 is undefined)
int32_t mask = 1; // mask is always 2^numBits - 1, initial value 2^1-1 = 2-1 = 1
for (int16_t value = 1; value < CodeWordLimit; value++)
{
// numBits = position of highest set bit (ignoring the sign)
// mask = (2^numBits) - 1
if (value > mask) // one more bit ?
{
numBits++;
mask = (mask << 1) | 1; // append a set bit
}
codewords[-value] = BitCode(mask - value, numBits); // note that I use a negative index => codewords[-value] = codewordsArray[CodeWordLimit value]
codewords[+value] = BitCode( value, numBits);
}
BitCode huffmanLuminanceDC[256];
BitCode huffmanLuminanceAC[256];
memset(huffmanLuminanceDC, 0, sizeof(BitCode) * 256);
memset(huffmanLuminanceAC, 0, sizeof(BitCode) * 256);
generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
int16_t lastYDC = 0;
float Y[8 * 8];
for (auto blockY = 0; blockY < height; blockY += 8) {
for (auto blockX = 0; blockX < width; blockX += 8) {
for (auto y = 0; y < 8; y++) {
for (auto x = 0; x < 8; x++) {
Y[y * 8 + x] = coefficients[(blockY + y) * width + blockX + x];
}
}
lastYDC = encodeDCTBlock(bitWriter, Y, lastYDC, huffmanLuminanceDC, huffmanLuminanceAC, codewords);
}
}
//bitWriter.flush();
return result;
}
} // namespace TooJpeg
extern "C"
NSData * _Nullable writeDCTBlocks(int width, int height, float const *coefficients) {
NSData *result = DctHuffman::writeDCTBlocks(width, height, coefficients);
/*std::vector<uint8_t> bytes((uint8_t *)result.bytes, ((uint8_t *)result.bytes) + result.length);
int readPosition = 0;
int targetY[8 * 8];
int Y[8 * 8];
int Yzig[8 * 8];
int previousDC = 0;
unsigned int data = 0;
unsigned int currentDataLength = 0;
BitCode huffmanLuminanceDC[256];
BitCode huffmanLuminanceAC[256];
memset(huffmanLuminanceDC, 0, sizeof(BitCode) * 256);
memset(huffmanLuminanceAC, 0, sizeof(BitCode) * 256);
generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
for (auto blockY = 0; blockY < height; blockY += 8) {
for (auto blockX = 0; blockX < width; blockX += 8) {
for (auto y = 0; y < 8; y++) {
for (auto x = 0; x < 8; x++) {
targetY[y * 8 + x] = coefficients[(blockY + y) * width + blockX + x];
}
}
TooJpeg::readHuffmanBlock(bytes, readPosition, Yzig, data, currentDataLength, 0, huffmanLuminanceDC, huffmanLuminanceAC, previousDC);
for (int i = 0; i < 64; i++) {
Y[i] = Yzig[ZigZag[i]];
}
for (auto y = 0; y < 8; y++) {
for (auto x = 0; x < 8; x++) {
if (Y[y * 8 + x] != targetY[y * 8 + x]) {
printf("fail\n");
}
}
}
}
}*/
return result;
}
extern "C"
void readDCTBlocks(int width, int height, NSData * _Nonnull blockData, float *coefficients, int elementsPerRow) {
std::vector<uint8_t> bytes((uint8_t *)blockData.bytes, ((uint8_t *)blockData.bytes) + blockData.length);
int readPosition = 0;
int Yzig[8 * 8];
int previousDC = 0;
unsigned int data = 0;
unsigned int currentDataLength = 0;
BitCode huffmanLuminanceDC[256];
BitCode huffmanLuminanceAC[256];
memset(huffmanLuminanceDC, 0, sizeof(BitCode) * 256);
memset(huffmanLuminanceAC, 0, sizeof(BitCode) * 256);
generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
for (auto blockY = 0; blockY < height; blockY += 8) {
for (auto blockX = 0; blockX < width; blockX += 8) {
DctHuffman::readHuffmanBlock(bytes, readPosition, Yzig, data, currentDataLength, 0, huffmanLuminanceDC, huffmanLuminanceAC, previousDC);
for (int i = 0; i < 64; i++) {
coefficients[(blockY + (i / 8)) * elementsPerRow + blockX + (i % 8)] = Yzig[ZigZag[i]];
}
}
}
for (auto blockY = height - 8; blockY < height; blockY += 8) {
for (auto blockX = width - 8; blockX < width; blockX += 8) {
for (int i = 0; i < 64; i++) {
coefficients[(blockY + (i / 8)) * elementsPerRow + blockX + (i % 8)] = 0.0f;
}
}
}
}
@@ -0,0 +1,485 @@
#include <metal_stdlib>
using namespace metal;
half4 yuva(half4 rgba) {
half y = (0.257f * rgba.r) + (0.504 * rgba.g) + (0.098 * rgba.b) + (16.0f / 256.0f);
half v = (0.439 * rgba.r) - (0.368 * rgba.g) - (0.071 * rgba.b) + (128.0f / 256.0f);
half u = -(0.148 * rgba.r) - (0.291 * rgba.g) + (0.439 * rgba.b) + (128.0f / 256.0f);
return half4(y, u, v, rgba.a);
}
half4 rgb(half4 yuva) {
half y = yuva.r - 16.0f / 256.0f;
half u = yuva.g - 128.0f / 256.0f;
half v = yuva.b - 128.0f / 256.0f;
half b = 1.164 * y + 2.018 * u;
half g = 1.164 * y - 0.813 * v - 0.391 * u;
half r = 1.164 * y + 1.596 * v;
return half4(r, g, b, yuva.a);
}
typedef struct {
vector_float2 position;
vector_float2 textureCoordinate;
} Vertex;
constant Vertex quadVertices[6] = {
{{ 2.0, 0.0 }, { 1.0, 1.0 }},
{{ 0.0, 0.0 }, { 0.0, 1.0 }},
{{ 0.0, 2.0 }, { 0.0, 0.0 }},
{{ 2.0, 0.0 }, { 1.0, 1.0 }},
{{ 0.0, 2.0 }, { 0.0, 0.0 }},
{{ 2.0, 2.0 }, { 1.0, 0.0 }}
};
struct RasterizerData {
float4 clipSpacePosition [[position]];
float2 textureCoordinate;
};
vertex RasterizerData vertexShader(
uint vid [[vertex_id]]
) {
RasterizerData out;
float2 pixelSpacePosition = quadVertices[vid].position.xy;
pixelSpacePosition.x -= 1.0f;
pixelSpacePosition.y -= 1.0f;
out.clipSpacePosition.xy = pixelSpacePosition;
out.clipSpacePosition.z = 0.0f;
out.clipSpacePosition.w = 1.0f;
out.textureCoordinate = quadVertices[vid].textureCoordinate;
return out;
}
fragment float4 samplingIdctShader(
RasterizerData in [[stage_in]],
texture2d<half, access::sample> colorTexture0 [[texture(0)]],
texture2d<half, access::sample> colorTexture1 [[texture(1)]],
texture2d<half, access::sample> colorTexture2 [[texture(2)]],
texture2d<half, access::sample> colorTexture3 [[texture(3)]]
) {
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear);
const half color0 = colorTexture0.sample(textureSampler, in.textureCoordinate).r;
const half color1 = colorTexture1.sample(textureSampler, in.textureCoordinate).r;
const half color2 = colorTexture2.sample(textureSampler, in.textureCoordinate).r;
const half color3 = colorTexture3.sample(textureSampler, in.textureCoordinate).r;
const half4 yuva = half4(color0, color1, color2, color3);
const half4 color = rgb(yuva);
return float4(color.r * color.a, color.g * color.a, color.b * color.a, color.a);
}
fragment float4 samplingRgbShader(
RasterizerData in [[stage_in]],
texture2d<half, access::sample> colorTexture [[texture(0)]]
) {
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear);
half4 color = colorTexture.sample(textureSampler, in.textureCoordinate);
color.r *= color.a;
color.g *= color.a;
color.b *= color.a;
return float4(color.r, color.g, color.b, color.a);
}
half4 samplePoint(texture2d<half, access::sample> textureY, texture2d<half, access::sample> textureCbCr, sampler s, float2 texcoord) {
half y;
half2 uv;
y = textureY.sample(s, texcoord).r;
uv = textureCbCr.sample(s, texcoord).rg - half2(0.5, 0.5);
// Conversion for YUV to rgb from http://www.fourcc.org/fccyvrgb.php
half4 out = half4(y + 1.403 * uv.y, y - 0.344 * uv.x - 0.714 * uv.y, y + 1.770 * uv.x, 1.0);
return out;
}
fragment float4 samplingYuvaShader(
RasterizerData in [[stage_in]],
texture2d<half, access::sample> yTexture [[texture(0)]],
texture2d<half, access::sample> cbcrTexture [[texture(1)]],
texture2d<uint, access::read> alphaTexture [[texture(2)]],
constant uint2 &alphaSize [[buffer(3)]]
) {
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear);
half4 color = samplePoint(yTexture, cbcrTexture, textureSampler, in.textureCoordinate);
int alphaX = (int)(in.textureCoordinate.x * alphaSize.x);
int alphaY = (int)(in.textureCoordinate.y * alphaSize.y);
uint32_t packedAlpha = alphaTexture.read(uint2(alphaX / 2, alphaY)).r;
uint32_t a1 = (packedAlpha & (0xf0U));
uint32_t a2 = (packedAlpha & (0x0fU)) << 4;
uint32_t left = (a1 >> 4) | a1;
uint32_t right = (a2 >> 4) | a2;
uint32_t chooseLeft = alphaX % 2 == 0;
uint32_t resolvedAlpha = chooseLeft * left + (1 - chooseLeft) * right;
float alpha = resolvedAlpha / 255.0f;
color.r *= alpha;
color.g *= alpha;
color.b *= alpha;
color.a = alpha;
return float4(color);
}
#define BLOCK_SIZE 8
#define BLOCK_SIZE2 BLOCK_SIZE * BLOCK_SIZE
#define BLOCK_SIZE_LOG2 3
#define chromaQp 60
#define lumaQp 70
#define alphaQp 60
constant float DCTv8matrix[] = {
0.3535533905932738f, 0.4903926402016152f, 0.4619397662556434f, 0.4157348061512726f, 0.3535533905932738f, 0.2777851165098011f, 0.1913417161825449f, 0.0975451610080642f,
0.3535533905932738f, 0.4157348061512726f, 0.1913417161825449f, -0.0975451610080641f, -0.3535533905932737f, -0.4903926402016152f, -0.4619397662556434f, -0.2777851165098011f,
0.3535533905932738f, 0.2777851165098011f, -0.1913417161825449f, -0.4903926402016152f, -0.3535533905932738f, 0.0975451610080642f, 0.4619397662556433f, 0.4157348061512727f,
0.3535533905932738f, 0.0975451610080642f, -0.4619397662556434f, -0.2777851165098011f, 0.3535533905932737f, 0.4157348061512727f, -0.1913417161825450f, -0.4903926402016153f,
0.3535533905932738f, -0.0975451610080641f, -0.4619397662556434f, 0.2777851165098009f, 0.3535533905932738f, -0.4157348061512726f, -0.1913417161825453f, 0.4903926402016152f,
0.3535533905932738f, -0.2777851165098010f, -0.1913417161825452f, 0.4903926402016153f, -0.3535533905932733f, -0.0975451610080649f, 0.4619397662556437f, -0.4157348061512720f,
0.3535533905932738f, -0.4157348061512727f, 0.1913417161825450f, 0.0975451610080640f, -0.3535533905932736f, 0.4903926402016152f, -0.4619397662556435f, 0.2777851165098022f,
0.3535533905932738f, -0.4903926402016152f, 0.4619397662556433f, -0.4157348061512721f, 0.3535533905932733f, -0.2777851165098008f, 0.1913417161825431f, -0.0975451610080625f
};
constant float baseQLuma[BLOCK_SIZE2] = {
16.0f, 11.0f, 10.0f, 16.0f, 24.0f, 40.0f, 51.0f, 61.0f,
12.0f, 12.0f, 14.0f, 19.0f, 26.0f, 58.0f, 60.0f, 55.0f,
14.0f, 13.0f, 16.0f, 24.0f, 40.0f, 57.0f, 69.0f, 56.0f,
14.0f, 17.0f, 22.0f, 29.0f, 51.0f, 87.0f, 80.0f, 62.0f,
18.0f, 22.0f, 37.0f, 56.0f, 68.0f, 109.0f, 103.0f, 77.0f,
24.0f, 35.0f, 55.0f, 64.0f, 81.0f, 104.0f, 113.0f, 92.0f,
49.0f, 64.0f, 78.0f, 87.0f, 103.0f, 121.0f, 120.0f, 101.0f,
72.0f, 92.0f, 95.0f, 98.0f, 112.0f, 100.0f, 103.0f, 99.0f
};
constant float baseQChroma[BLOCK_SIZE2] = {
17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99
};
float adjustQ(int qp, int index, bool isChroma) {
float baseValue;
if (isChroma) {
baseValue = baseQChroma[index];
} else {
baseValue = baseQLuma[index];
}
float s = 0.0f;
if (qp < 50) {
s = 5000.0f / (float)qp;
} else {
s = 200.0 - (2.0 * (float)qp);
}
float r = floor(s * baseValue + 50.0f) / 100.0f;
return r;
}
void copyTextureBlockIn(
half4 inColorRgb,
int colorPlane,
uint2 blockPosition,
threadgroup float *block
) {
half4 inColor = yuva(inColorRgb);
half color;
if (colorPlane == 0) {
color = inColor.r;
} else if (colorPlane == 1) {
color = inColor.g;
} else if (colorPlane == 2) {
color = inColor.b;
} else {
color = inColor.a;
}
block[(blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x] = color;
}
void copyTextureBlockInDequantize(
texture2d<half, access::read> texture,
uint2 pixelPosition,
uint2 blockPosition,
threadgroup float *block,
int qp,
bool isChroma
) {
half inColor = (half)texture.read(pixelPosition).r;
int index = (blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x;
float q = adjustQ(qp, index, isChroma);
float dequantized = inColor * q;
block[index] = dequantized;
}
void copyTextureBlockOut(
uint2 pixelPosition,
uint2 blockPosition,
threadgroup float *block,
texture2d<half, access::write> texture
) {
half result = block[(blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x];
texture.write(half4(result, result, result, 1.0), pixelPosition);
}
void copyTextureBlockOutFloat(
uint2 pixelPosition,
uint2 blockPosition,
threadgroup float *block,
texture2d<half, access::write> texture
) {
int rawIndex = (blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x;
int index = rawIndex;
half result = block[index];
texture.write(half(result), pixelPosition);
}
void reorderBlockZigzag(threadgroup float *blockIn, threadgroup float *blockOut, uint2 blockPosition) {
int rawIndex = (blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x;
int index = rawIndex;
blockOut[index] = blockIn[rawIndex];
}
void DCT(
uint2 blockPosition,
threadgroup float *CurBlockLocal1,
threadgroup float *CurBlockLocal2
) {
int tx = blockPosition.x;
int ty = blockPosition.y;
float curelem = 0;
int DCTv8matrixIndex = 0 * BLOCK_SIZE + ty;
int CurBlockLocal1Index = 0 * BLOCK_SIZE + tx;
#pragma unroll
for (int i=0; i < BLOCK_SIZE; i++)
{
curelem += DCTv8matrix[DCTv8matrixIndex] * (CurBlockLocal1[CurBlockLocal1Index] * 255.0f - 128.0f);
DCTv8matrixIndex += BLOCK_SIZE;
CurBlockLocal1Index += BLOCK_SIZE;
}
CurBlockLocal2[(ty << BLOCK_SIZE_LOG2) + tx] = curelem;
threadgroup_barrier(mem_flags::mem_threadgroup);
curelem = 0;
int CurBlockLocal2Index = (ty << BLOCK_SIZE_LOG2) + 0;
DCTv8matrixIndex = 0 * BLOCK_SIZE + tx;
#pragma unroll
for (int i=0; i<BLOCK_SIZE; i++)
{
curelem += CurBlockLocal2[CurBlockLocal2Index] * DCTv8matrix[DCTv8matrixIndex];
CurBlockLocal2Index += 1;
DCTv8matrixIndex += BLOCK_SIZE;
}
CurBlockLocal1[(ty << BLOCK_SIZE_LOG2) + tx ] = curelem;
}
void IDCT(
uint2 blockPosition,
threadgroup float *CurBlockLocal1,
threadgroup float *CurBlockLocal2
) {
int tx = blockPosition.x;
int ty = blockPosition.y;
float curelem = 0;
int DCTv8matrixIndex = (ty << BLOCK_SIZE_LOG2) + 0;
int CurBlockLocal1Index = 0 * BLOCK_SIZE + tx;
#pragma unroll
for (int i=0; i<BLOCK_SIZE; i++)
{
curelem += DCTv8matrix[DCTv8matrixIndex] * CurBlockLocal1[CurBlockLocal1Index];
DCTv8matrixIndex += 1;
CurBlockLocal1Index += BLOCK_SIZE;
}
CurBlockLocal2[(ty << BLOCK_SIZE_LOG2) + tx ] = curelem;
threadgroup_barrier(mem_flags::mem_threadgroup);
curelem = 0;
int CurBlockLocal2Index = (ty << BLOCK_SIZE_LOG2) + 0;
DCTv8matrixIndex = (tx << BLOCK_SIZE_LOG2) + 0;
#pragma unroll
for (int i=0; i<BLOCK_SIZE; i++)
{
curelem += CurBlockLocal2[CurBlockLocal2Index] * DCTv8matrix[DCTv8matrixIndex];
CurBlockLocal2Index += 1;
DCTv8matrixIndex += 1;
}
CurBlockLocal1[(ty << BLOCK_SIZE_LOG2) + tx ] = (curelem + 128.0f) / 255.0f;
}
void quantize(
int qp,
threadgroup float *sourceBlock,
threadgroup float *destinationBlock,
int index,
bool isChroma
) {
float q = adjustQ(qp, index, isChroma);
float value = sourceBlock[index];
float quantized = round(value / q);
destinationBlock[index] = quantized;
}
void dequantize(
int qp,
threadgroup float *sourceBlock,
threadgroup float *destinationBlock,
int index,
bool isChroma
) {
float q = adjustQ(qp, index, isChroma);
float value = sourceBlock[index];
float dequantized = value * q;
destinationBlock[index] = dequantized;
}
kernel void dctKernel(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 pixelPosition [[thread_position_in_grid]],
uint2 blockPosition [[thread_position_in_threadgroup]],
constant int &colorPlane [[buffer(2)]]
) {
threadgroup float CurBlockLocal1[BLOCK_SIZE2];
threadgroup float CurBlockLocal2[BLOCK_SIZE2];
half4 rgbPixelIn;
int imageQp;
bool isChroma = false;
if (colorPlane == 1 || colorPlane == 2) {
imageQp = chromaQp;
isChroma = true;
half4 rgbPixelIn0 = inTexture.read(uint2(pixelPosition.x * 2, pixelPosition.y * 2));
half4 rgbPixelNextX = inTexture.read(uint2(pixelPosition.x * 2 + 1, pixelPosition.y * 2));
half4 rgbPixelNextY = inTexture.read(uint2(pixelPosition.x * 2, pixelPosition.y * 2 + 1));
half4 rgbPixelNextXY = inTexture.read(uint2(pixelPosition.x * 2 + 1, pixelPosition.y * 2 + 1));
rgbPixelIn = mix(rgbPixelIn0, rgbPixelNextX, 0.5);
rgbPixelIn = mix(rgbPixelIn, rgbPixelNextY, 0.5);
rgbPixelIn = mix(rgbPixelIn, rgbPixelNextXY, 0.5);
} else {
if (colorPlane == 3) {
imageQp = alphaQp;
} else {
imageQp = lumaQp;
}
rgbPixelIn = inTexture.read(pixelPosition);
}
copyTextureBlockIn(rgbPixelIn, colorPlane, blockPosition, CurBlockLocal1);
threadgroup_barrier(mem_flags::mem_threadgroup);
DCT(
blockPosition,
CurBlockLocal1,
CurBlockLocal2
);
threadgroup_barrier(mem_flags::mem_threadgroup);
int index = (blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x;
quantize(imageQp, CurBlockLocal1, CurBlockLocal2, index, isChroma);
threadgroup_barrier(mem_flags::mem_threadgroup);
reorderBlockZigzag(CurBlockLocal2, CurBlockLocal1, blockPosition);
threadgroup_barrier(mem_flags::mem_threadgroup);
copyTextureBlockOutFloat(
pixelPosition,
blockPosition,
CurBlockLocal1,
outTexture
);
}
kernel void idctKernel(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 pixelPosition [[thread_position_in_grid]],
uint2 blockPosition [[thread_position_in_threadgroup]],
constant int &colorPlane [[buffer(2)]]
) {
threadgroup float CurBlockLocal1[BLOCK_SIZE2];
threadgroup float CurBlockLocal2[BLOCK_SIZE2];
int imageQp;
bool isChroma = false;
if (colorPlane == 1 || colorPlane == 2) {
isChroma = true;
imageQp = chromaQp;
} else {
if (colorPlane == 3) {
imageQp = alphaQp;
} else {
imageQp = lumaQp;
}
}
copyTextureBlockInDequantize(inTexture, pixelPosition, blockPosition, CurBlockLocal1, imageQp, isChroma);
threadgroup_barrier(mem_flags::mem_threadgroup);
IDCT(
blockPosition,
CurBlockLocal1,
CurBlockLocal2
);
threadgroup_barrier(mem_flags::mem_threadgroup);
copyTextureBlockOut(
pixelPosition,
blockPosition,
CurBlockLocal1,
outTexture
);
}
@@ -0,0 +1,393 @@
import Foundation
import Metal
import DctHuffman
private final class BundleHelper: NSObject {
}
private func alignUp(size: Int, align: Int) -> Int {
precondition(((align - 1) & align) == 0, "Align must be a power of two")
let alignmentMask = align - 1
return (size + alignmentMask) & ~alignmentMask
}
final class Texture {
final class DirectBuffer {
let buffer: MTLBuffer
let bytesPerRow: Int
init?(device: MTLDevice, width: Int, height: Int, bytesPerRow: Int) {
#if targetEnvironment(simulator)
return nil
#else
if #available(iOS 12.0, *) {
let pagesize = Int(getpagesize())
let allocationSize = alignUp(size: bytesPerRow * height, align: pagesize)
var data: UnsafeMutableRawPointer? = nil
let result = posix_memalign(&data, pagesize, allocationSize)
if result == noErr, let data = data {
self.bytesPerRow = bytesPerRow
guard let buffer = device.makeBuffer(
bytesNoCopy: data,
length: allocationSize,
options: .storageModeShared,
deallocator: { _, _ in
free(data)
}
) else {
return nil
}
self.buffer = buffer
} else {
return nil
}
} else {
return nil
}
#endif
}
}
let width: Int
let height: Int
let texture: MTLTexture
let directBuffer: DirectBuffer?
init?(
device: MTLDevice,
width: Int,
height: Int,
pixelFormat: MTLPixelFormat,
usage: MTLTextureUsage,
isShared: Bool
) {
self.width = width
self.height = height
if #available(iOS 12.0, *), isShared, usage.contains(.shaderRead) {
switch pixelFormat {
case .r32Float, .bgra8Unorm:
let bytesPerPixel = 4
let pixelRowAlignment = device.minimumTextureBufferAlignment(for: pixelFormat)
let bytesPerRow = alignUp(size: width * bytesPerPixel, align: pixelRowAlignment)
self.directBuffer = DirectBuffer(device: device, width: width, height: height, bytesPerRow: bytesPerRow)
case .r8Unorm, .r8Uint:
let bytesPerPixel = 1
let pixelRowAlignment = device.minimumTextureBufferAlignment(for: pixelFormat)
let bytesPerRow = alignUp(size: width * bytesPerPixel, align: pixelRowAlignment)
self.directBuffer = DirectBuffer(device: device, width: width, height: height, bytesPerRow: bytesPerRow)
case .rg8Unorm:
let bytesPerPixel = 2
let pixelRowAlignment = device.minimumTextureBufferAlignment(for: pixelFormat)
let bytesPerRow = alignUp(size: width * bytesPerPixel, align: pixelRowAlignment)
self.directBuffer = DirectBuffer(device: device, width: width, height: height, bytesPerRow: bytesPerRow)
default:
self.directBuffer = nil
}
} else {
self.directBuffer = nil
}
let textureDescriptor = MTLTextureDescriptor()
textureDescriptor.textureType = .type2D
textureDescriptor.pixelFormat = pixelFormat
textureDescriptor.width = width
textureDescriptor.height = height
textureDescriptor.usage = usage
if let directBuffer = self.directBuffer {
textureDescriptor.storageMode = directBuffer.buffer.storageMode
guard let texture = directBuffer.buffer.makeTexture(descriptor: textureDescriptor, offset: 0, bytesPerRow: directBuffer.bytesPerRow) else {
return nil
}
self.texture = texture
} else {
guard let texture = device.makeTexture(descriptor: textureDescriptor) else {
return nil
}
self.texture = texture
}
}
func replace(with image: AnimationCompressor.ImageData) {
if image.width != self.width || image.height != self.height {
assert(false, "Image size does not match")
return
}
let region = MTLRegion(origin: MTLOrigin(x: 0, y: 0, z: 0), size: MTLSize(width: image.width, height: image.height, depth: 1))
if let directBuffer = self.directBuffer, directBuffer.bytesPerRow == image.bytesPerRow {
image.data.withUnsafeBytes { bytes in
let _ = memcpy(directBuffer.buffer.contents(), bytes.baseAddress!, image.bytesPerRow * self.height)
}
} else {
image.data.withUnsafeBytes { bytes in
self.texture.replace(region: region, mipmapLevel: 0, withBytes: bytes.baseAddress!, bytesPerRow: image.bytesPerRow)
}
}
}
func readDirect(width: Int, height: Int, bytesPerRow: Int, read: (UnsafeMutableRawPointer?) -> UnsafeRawPointer) {
if let directBuffer = self.directBuffer, width == self.width, height == self.height, bytesPerRow == directBuffer.bytesPerRow {
let _ = read(directBuffer.buffer.contents())
} else {
let region = MTLRegion(origin: MTLOrigin(x: 0, y: 0, z: 0), size: MTLSize(width: width, height: height, depth: 1))
self.texture.replace(region: region, mipmapLevel: 0, withBytes: read(nil), bytesPerRow: bytesPerRow)
}
}
}
final class TextureSet {
struct Description {
let fractionWidth: Int
let fractionHeight: Int
let pixelFormat: MTLPixelFormat
}
let width: Int
let height: Int
let textures: [Texture]
init?(
device: MTLDevice,
width: Int,
height: Int,
descriptions: [Description],
usage: MTLTextureUsage,
isShared: Bool
) {
self.width = width
self.height = height
var textures: [Texture] = []
for i in 0 ..< descriptions.count {
let planeWidth = width / descriptions[i].fractionWidth
let planeHeight = height / descriptions[i].fractionHeight
guard let texture = Texture(
device: device,
width: planeWidth,
height: planeHeight,
pixelFormat: descriptions[i].pixelFormat,
usage: usage,
isShared: isShared
) else {
return nil
}
textures.append(texture)
}
self.textures = textures
}
}
public final class AnimationCompressor {
public final class ImageData {
public let width: Int
public let height: Int
public let bytesPerRow: Int
public let data: Data
public init(width: Int, height: Int, bytesPerRow: Int, data: Data) {
self.width = width
self.height = height
self.bytesPerRow = bytesPerRow
self.data = data
}
}
public final class CompressedImageData {
public let data: Data
public init(data: Data) {
self.data = data
}
}
public final class SharedContext {
public static let shared: SharedContext = SharedContext()!
public let device: MTLDevice
let defaultLibrary: MTLLibrary
private let computeDctPipelineState: MTLComputePipelineState
private let commandQueue: MTLCommandQueue
public init?() {
guard let device = MTLCreateSystemDefaultDevice() else {
return nil
}
self.device = device
let mainBundle = Bundle(for: BundleHelper.self)
guard let path = mainBundle.path(forResource: "AnimationCompressionBundle", ofType: "bundle") else {
return nil
}
guard let bundle = Bundle(path: path) else {
return nil
}
if #available(iOS 10.0, *) {
guard let defaultLibrary = try? device.makeDefaultLibrary(bundle: bundle) else {
return nil
}
self.defaultLibrary = defaultLibrary
} else {
preconditionFailure()
}
guard let dctFunction = self.defaultLibrary.makeFunction(name: "dctKernel") else {
return nil
}
guard let computeDctPipelineState = try? self.device.makeComputePipelineState(function: dctFunction) else {
return nil
}
self.computeDctPipelineState = computeDctPipelineState
guard let commandQueue = self.device.makeCommandQueue() else {
return nil
}
self.commandQueue = commandQueue
}
func compress(compressor: AnimationCompressor, image: ImageData, completion: @escaping (CompressedImageData) -> Void) {
let threadgroupSize = MTLSize(width: 8, height: 8, depth: 1)
assert(image.width % 8 == 0)
assert(image.height % 8 == 0)
let inputTexture: Texture
if let current = compressor.inputTexture, current.width == image.width, current.height == image.height {
inputTexture = current
} else {
guard let texture = Texture(
device: self.device,
width: image.width,
height: image.height,
pixelFormat: .bgra8Unorm,
usage: .shaderRead,
isShared: true
) else {
return
}
inputTexture = texture
compressor.inputTexture = texture
}
inputTexture.replace(with: image)
let compressedTextures: TextureSet
if let current = compressor.compressedTextures, current.width == image.width, current.height == image.height {
compressedTextures = current
} else {
guard let textures = TextureSet(
device: self.device,
width: image.width,
height: image.height,
descriptions: [
TextureSet.Description(
fractionWidth: 1, fractionHeight: 1,
pixelFormat: .r32Float
),
TextureSet.Description(
fractionWidth: 2, fractionHeight: 2,
pixelFormat: .r32Float
),
TextureSet.Description(
fractionWidth: 2, fractionHeight: 2,
pixelFormat: .r32Float
),
TextureSet.Description(
fractionWidth: 1, fractionHeight: 1,
pixelFormat: .r32Float
)
],
usage: [.shaderWrite],
isShared: false
) else {
return
}
compressedTextures = textures
compressor.compressedTextures = textures
}
guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
return
}
commandBuffer.label = "ImageCompressor"
guard let computeEncoder = commandBuffer.makeComputeCommandEncoder() else {
return
}
computeEncoder.setComputePipelineState(self.computeDctPipelineState)
computeEncoder.setTexture(inputTexture.texture, index: 0)
for colorPlane in 0 ..< 4 {
computeEncoder.setTexture(compressedTextures.textures[colorPlane].texture, index: 1)
var colorPlaneInt32 = Int32(colorPlane)
computeEncoder.setBytes(&colorPlaneInt32, length: 4, index: 2)
let threadgroupCount = MTLSize(width: (compressedTextures.textures[colorPlane].width + threadgroupSize.width - 1) / threadgroupSize.width, height: (compressedTextures.textures[colorPlane].height + threadgroupSize.height - 1) / threadgroupSize.height, depth: 1)
computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize)
}
computeEncoder.endEncoding()
commandBuffer.addCompletedHandler { _ in
let buffer = WriteBuffer()
buffer.writeInt32(0x543ee445)
buffer.writeInt32(4)
buffer.writeInt32(Int32(compressedTextures.textures[0].width))
buffer.writeInt32(Int32(compressedTextures.textures[0].height))
for i in 0 ..< 4 {
let region = MTLRegion(origin: MTLOrigin(x: 0, y: 0, z: 0), size: MTLSize(width: compressedTextures.textures[i].width, height: compressedTextures.textures[i].height, depth: 1))
let bytesPerRow = 4 * compressedTextures.textures[i].width
buffer.writeInt32(Int32(compressedTextures.textures[i].width))
buffer.writeInt32(Int32(compressedTextures.textures[i].height))
buffer.writeInt32(Int32(bytesPerRow))
var textureBytes = Data(count: bytesPerRow * compressedTextures.textures[i].height)
textureBytes.withUnsafeMutableBytes { bytes in
compressedTextures.textures[i].texture.getBytes(bytes.baseAddress!, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerRow * compressedTextures.textures[i].height, from: region, mipmapLevel: 0, slice: 0)
let huffmanData = writeDCTBlocks(Int32(compressedTextures.textures[i].width), Int32(compressedTextures.textures[i].height), bytes.baseAddress!.assumingMemoryBound(to: Float32.self))!
buffer.writeInt32(Int32(huffmanData.count))
buffer.write(huffmanData)
}
}
DispatchQueue.main.async {
completion(CompressedImageData(data: buffer.makeData()))
}
}
commandBuffer.commit()
}
}
private let sharedContext: SharedContext
private var inputTexture: Texture?
private var compressedTextures: TextureSet?
public init(sharedContext: SharedContext) {
self.sharedContext = sharedContext
}
public func compress(image: ImageData, completion: @escaping (CompressedImageData) -> Void) {
self.sharedContext.compress(compressor: self, image: image, completion: completion)
}
}
@@ -0,0 +1,110 @@
import Foundation
class MemoryBuffer {
var data: Data
var length: Int
init(data: Data) {
self.data = data
self.length = data.count
}
}
final class WriteBuffer: MemoryBuffer {
var offset = 0
init() {
super.init(data: Data())
}
func makeData() -> Data {
return self.data
}
func reset() {
self.offset = 0
}
func write(_ data: UnsafeRawPointer, offset: Int = 0, length: Int) {
if self.offset + length > self.data.count {
self.data.count = self.offset + length + 256
}
self.data.withUnsafeMutableBytes { bytes in
let _ = memcpy(bytes.baseAddress!.advanced(by: self.offset), data + offset, length)
}
self.offset += length
self.length = self.offset
}
func write(_ data: Data) {
data.withUnsafeBytes { bytes in
self.write(bytes.baseAddress!, length: bytes.count)
}
}
func writeInt8(_ value: Int8) {
var value = value
self.write(&value, length: 1)
}
func writeInt32(_ value: Int32) {
var value = value
self.write(&value, length: 4)
}
func writeFloat(_ value: Float) {
var value: Float32 = value
self.write(&value, length: 4)
}
func seek(offset: Int) {
self.offset = offset
}
}
final class ReadBuffer: MemoryBuffer {
var offset = 0
override init(data: Data) {
super.init(data: data)
}
func read(_ data: UnsafeMutableRawPointer, length: Int) {
self.data.copyBytes(to: data.assumingMemoryBound(to: UInt8.self), from: self.offset ..< (self.offset + length))
self.offset += length
}
func readDataNoCopy(length: Int) -> Data {
let result = self.data.withUnsafeBytes { bytes -> Data in
return Data(bytesNoCopy: UnsafeMutableRawPointer(mutating: bytes.baseAddress!.advanced(by: self.offset)), count: length, deallocator: .none)
}
self.offset += length
return result
}
func readInt8() -> Int8 {
var result: Int8 = 0
self.read(&result, length: 1)
return result
}
func readInt32() -> Int32 {
var result: Int32 = 0
self.read(&result, length: 4)
return result
}
func readFloat() -> Float {
var result: Float32 = 0
self.read(&result, length: 4)
return result
}
func skip(_ length: Int) {
self.offset += length
}
func reset() {
self.offset = 0
}
}
@@ -0,0 +1,545 @@
import Foundation
import UIKit
import Metal
import MetalKit
import simd
import DctHuffman
import MetalImageView
private struct Vertex {
var position: vector_float2
var textureCoordinate: vector_float2
}
public final class CompressedImageRenderer {
private final class Shared {
static let shared: Shared = {
return Shared(sharedContext: AnimationCompressor.SharedContext.shared)!
}()
let sharedContext: AnimationCompressor.SharedContext
let computeIdctPipelineState: MTLComputePipelineState
let renderIdctPipelineState: MTLRenderPipelineState
let renderRgbPipelineState: MTLRenderPipelineState
let renderYuvaPipelineState: MTLRenderPipelineState
init?(sharedContext: AnimationCompressor.SharedContext) {
self.sharedContext = sharedContext
guard let idctFunction = self.sharedContext.defaultLibrary.makeFunction(name: "idctKernel") else {
return nil
}
guard let computeIdctPipelineState = try? self.sharedContext.device.makeComputePipelineState(function: idctFunction) else {
return nil
}
self.computeIdctPipelineState = computeIdctPipelineState
guard let vertexShader = self.sharedContext.defaultLibrary.makeFunction(name: "vertexShader") else {
return nil
}
guard let samplingIdctShader = self.sharedContext.defaultLibrary.makeFunction(name: "samplingIdctShader") else {
return nil
}
guard let samplingRgbShader = self.sharedContext.defaultLibrary.makeFunction(name: "samplingRgbShader") else {
return nil
}
guard let samplingYuvaShader = self.sharedContext.defaultLibrary.makeFunction(name: "samplingYuvaShader") else {
return nil
}
let idctPipelineStateDescriptor = MTLRenderPipelineDescriptor()
idctPipelineStateDescriptor.label = "Render IDCT Pipeline"
idctPipelineStateDescriptor.vertexFunction = vertexShader
idctPipelineStateDescriptor.fragmentFunction = samplingIdctShader
idctPipelineStateDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
guard let renderIdctPipelineState = try? self.sharedContext.device.makeRenderPipelineState(descriptor: idctPipelineStateDescriptor) else {
return nil
}
self.renderIdctPipelineState = renderIdctPipelineState
let rgbPipelineStateDescriptor = MTLRenderPipelineDescriptor()
rgbPipelineStateDescriptor.label = "Render RGB Pipeline"
rgbPipelineStateDescriptor.vertexFunction = vertexShader
rgbPipelineStateDescriptor.fragmentFunction = samplingRgbShader
rgbPipelineStateDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
guard let renderRgbPipelineState = try? self.sharedContext.device.makeRenderPipelineState(descriptor: rgbPipelineStateDescriptor) else {
return nil
}
self.renderRgbPipelineState = renderRgbPipelineState
let yuvaPipelineStateDescriptor = MTLRenderPipelineDescriptor()
yuvaPipelineStateDescriptor.label = "Render YUVA Pipeline"
yuvaPipelineStateDescriptor.vertexFunction = vertexShader
yuvaPipelineStateDescriptor.fragmentFunction = samplingYuvaShader
yuvaPipelineStateDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
guard let renderYuvaPipelineState = try? self.sharedContext.device.makeRenderPipelineState(descriptor: yuvaPipelineStateDescriptor) else {
return nil
}
self.renderYuvaPipelineState = renderYuvaPipelineState
}
}
private let sharedContext: AnimationCompressor.SharedContext
private let shared: Shared
private var compressedTextures: TextureSet?
private var outputTextures: TextureSet?
private var rgbTexture: Texture?
private var yuvaTextures: TextureSet?
private let commandQueue: MTLCommandQueue
private var isRendering: Bool = false
public init?(sharedContext: AnimationCompressor.SharedContext) {
self.sharedContext = sharedContext
self.shared = Shared.shared
guard let commandQueue = self.sharedContext.device.makeCommandQueue() else {
return nil
}
self.commandQueue = commandQueue
}
private var drawableRequestTimestamp: Double?
private func getNextDrawable(layer: MetalImageLayer, drawableSize: CGSize) -> MetalImageLayer.Drawable? {
layer.renderer.drawableSize = drawableSize
return layer.renderer.nextDrawable()
}
private func updateIdctTextures(compressedImage: AnimationCompressor.CompressedImageData) {
self.rgbTexture = nil
self.yuvaTextures = nil
let readBuffer = ReadBuffer(data: compressedImage.data)
if readBuffer.readInt32() != 0x543ee445 {
return
}
if readBuffer.readInt32() != 4 {
return
}
let width = Int(readBuffer.readInt32())
let height = Int(readBuffer.readInt32())
let compressedTextures: TextureSet
if let current = self.compressedTextures, current.width == width, current.height == height {
compressedTextures = current
} else {
guard let textures = TextureSet(
device: self.sharedContext.device,
width: width,
height: height,
descriptions: [
TextureSet.Description(
fractionWidth: 1, fractionHeight: 1,
pixelFormat: .r32Float
),
TextureSet.Description(
fractionWidth: 2, fractionHeight: 2,
pixelFormat: .r32Float
),
TextureSet.Description(
fractionWidth: 2, fractionHeight: 2,
pixelFormat: .r32Float
),
TextureSet.Description(
fractionWidth: 1, fractionHeight: 1,
pixelFormat: .r32Float
)
],
usage: .shaderRead,
isShared: true
) else {
return
}
self.compressedTextures = textures
compressedTextures = textures
}
for i in 0 ..< 4 {
let planeWidth = Int(readBuffer.readInt32())
let planeHeight = Int(readBuffer.readInt32())
let bytesPerRow = Int(readBuffer.readInt32())
let planeSize = Int(readBuffer.readInt32())
let planeData = readBuffer.readDataNoCopy(length: planeSize)
var tempData: Data?
compressedTextures.textures[i].readDirect(width: planeWidth, height: planeHeight, bytesPerRow: bytesPerRow, read: { destinationBytes in
if let destinationBytes = destinationBytes {
readDCTBlocks(Int32(planeWidth), Int32(planeHeight), planeData, destinationBytes.assumingMemoryBound(to: Float32.self), Int32(bytesPerRow / 4))
return UnsafeRawPointer(destinationBytes)
} else {
tempData = Data(count: bytesPerRow * planeHeight)
return tempData!.withUnsafeMutableBytes { bytes -> UnsafeRawPointer in
readDCTBlocks(Int32(planeWidth), Int32(planeHeight), planeData, bytes.baseAddress!.assumingMemoryBound(to: Float32.self), Int32(bytesPerRow / 4))
return UnsafeRawPointer(bytes.baseAddress!)
}
}
})
}
}
public func renderIdct(layer: MetalImageLayer, compressedImage: AnimationCompressor.CompressedImageData, completion: @escaping () -> Void) {
DispatchQueue.global().async {
self.updateIdctTextures(compressedImage: compressedImage)
DispatchQueue.main.async {
guard let compressedTextures = self.compressedTextures else {
return
}
guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
return
}
commandBuffer.label = "MyCommand"
guard let computeEncoder = commandBuffer.makeComputeCommandEncoder() else {
return
}
computeEncoder.setComputePipelineState(self.shared.computeIdctPipelineState)
let outputTextures: TextureSet
if let current = self.outputTextures, current.width == compressedTextures.textures[0].width, current.height == compressedTextures.textures[0].height {
outputTextures = current
} else {
guard let textures = TextureSet(
device: self.sharedContext.device,
width: compressedTextures.textures[0].width,
height: compressedTextures.textures[0].height,
descriptions: [
TextureSet.Description(
fractionWidth: 1, fractionHeight: 1,
pixelFormat: .r8Unorm
),
TextureSet.Description(
fractionWidth: 2, fractionHeight: 2,
pixelFormat: .r8Unorm
),
TextureSet.Description(
fractionWidth: 2, fractionHeight: 2,
pixelFormat: .r8Unorm
),
TextureSet.Description(
fractionWidth: 1, fractionHeight: 1,
pixelFormat: .r8Unorm
)
],
usage: [.shaderRead, .shaderWrite],
isShared: false
) else {
return
}
self.outputTextures = textures
outputTextures = textures
}
for i in 0 ..< 4 {
computeEncoder.setTexture(compressedTextures.textures[i].texture, index: 0)
computeEncoder.setTexture(outputTextures.textures[i].texture, index: 1)
var colorPlaneInt32 = Int32(i)
computeEncoder.setBytes(&colorPlaneInt32, length: 4, index: 2)
let threadgroupSize = MTLSize(width: 8, height: 8, depth: 1)
let threadgroupCount = MTLSize(width: (compressedTextures.textures[i].width + threadgroupSize.width - 1) / threadgroupSize.width, height: (compressedTextures.textures[i].height + threadgroupSize.height - 1) / threadgroupSize.height, depth: 1)
computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize)
}
computeEncoder.endEncoding()
let drawableSize = CGSize(width: CGFloat(outputTextures.textures[0].width), height: CGFloat(outputTextures.textures[0].height))
guard let drawable = self.getNextDrawable(layer: layer, drawableSize: drawableSize) else {
commandBuffer.commit()
completion()
return
}
let renderPassDescriptor = MTLRenderPassDescriptor()
renderPassDescriptor.colorAttachments[0].texture = drawable.texture
renderPassDescriptor.colorAttachments[0].loadAction = .clear
renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
guard let renderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor) else {
return
}
renderEncoder.label = "MyRenderEncoder"
renderEncoder.setRenderPipelineState(self.shared.renderIdctPipelineState)
for i in 0 ..< 4 {
renderEncoder.setFragmentTexture(outputTextures.textures[i].texture, index: i)
}
renderEncoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6)
renderEncoder.endEncoding()
var storedDrawable: MetalImageLayer.Drawable? = drawable
commandBuffer.addCompletedHandler { _ in
DispatchQueue.main.async {
autoreleasepool {
storedDrawable?.present(completion: completion)
storedDrawable = nil
}
}
}
commandBuffer.commit()
}
}
}
private func updateRgbTexture(width: Int, height: Int, bytesPerRow: Int, data: Data) {
self.compressedTextures = nil
self.outputTextures = nil
self.yuvaTextures = nil
let rgbTexture: Texture
if let current = self.rgbTexture, current.width == width, current.height == height {
rgbTexture = current
} else {
guard let texture = Texture(device: self.sharedContext.device, width: width, height: height, pixelFormat: .bgra8Unorm, usage: .shaderRead, isShared: true) else {
return
}
self.rgbTexture = texture
rgbTexture = texture
}
rgbTexture.readDirect(width: width, height: height, bytesPerRow: bytesPerRow, read: { destinationBytes in
return data.withUnsafeBytes { bytes -> UnsafeRawPointer in
if let destinationBytes = destinationBytes {
memcpy(destinationBytes, bytes.baseAddress!, bytes.count)
return UnsafeRawPointer(destinationBytes)
} else {
return bytes.baseAddress!
}
}
})
}
public func renderRgb(layer: MetalImageLayer, width: Int, height: Int, bytesPerRow: Int, data: Data, completion: @escaping () -> Void) {
self.updateRgbTexture(width: width, height: height, bytesPerRow: bytesPerRow, data: data)
guard let rgbTexture = self.rgbTexture else {
return
}
guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
return
}
commandBuffer.label = "MyCommand"
let drawableSize = CGSize(width: CGFloat(rgbTexture.width), height: CGFloat(rgbTexture.height))
guard let drawable = self.getNextDrawable(layer: layer, drawableSize: drawableSize) else {
commandBuffer.commit()
completion()
return
}
let renderPassDescriptor = MTLRenderPassDescriptor()
renderPassDescriptor.colorAttachments[0].texture = drawable.texture
renderPassDescriptor.colorAttachments[0].loadAction = .clear
renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
guard let renderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor) else {
return
}
renderEncoder.label = "MyRenderEncoder"
renderEncoder.setRenderPipelineState(self.shared.renderRgbPipelineState)
renderEncoder.setFragmentTexture(rgbTexture.texture, index: 0)
renderEncoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6)
renderEncoder.endEncoding()
var storedDrawable: MetalImageLayer.Drawable? = drawable
commandBuffer.addCompletedHandler { _ in
DispatchQueue.main.async {
autoreleasepool {
storedDrawable?.present(completion: completion)
storedDrawable = nil
}
}
}
commandBuffer.commit()
}
private func updateYuvaTextures(width: Int, height: Int, data: Data) {
if width % 2 != 0 || height % 2 != 0 {
return
}
self.compressedTextures = nil
self.outputTextures = nil
self.rgbTexture = nil
let yuvaTextures: TextureSet
if let current = self.yuvaTextures, current.width == width, current.height == height {
yuvaTextures = current
} else {
guard let textures = TextureSet(
device: self.sharedContext.device,
width: width,
height: height,
descriptions: [
TextureSet.Description(
fractionWidth: 1, fractionHeight: 1,
pixelFormat: .r8Unorm
),
TextureSet.Description(
fractionWidth: 2, fractionHeight: 2,
pixelFormat: .rg8Unorm
),
TextureSet.Description(
fractionWidth: 2, fractionHeight: 1,
pixelFormat: .r8Uint
)
],
usage: .shaderRead,
isShared: true
) else {
return
}
self.yuvaTextures = textures
yuvaTextures = textures
}
data.withUnsafeBytes { yuvaBuffer in
guard let yuva = yuvaBuffer.baseAddress?.assumingMemoryBound(to: UInt8.self) else {
return
}
yuvaTextures.textures[0].readDirect(width: width, height: height, bytesPerRow: width, read: { destinationBytes in
if let destinationBytes = destinationBytes {
memcpy(destinationBytes, yuva.advanced(by: 0), width * height)
return UnsafeRawPointer(destinationBytes)
} else {
return UnsafeRawPointer(yuva.advanced(by: 0))
}
})
yuvaTextures.textures[1].readDirect(width: width / 2, height: height / 2, bytesPerRow: width, read: { destinationBytes in
if let destinationBytes = destinationBytes {
memcpy(destinationBytes, yuva.advanced(by: width * height), width * height / 2)
return UnsafeRawPointer(destinationBytes)
} else {
return UnsafeRawPointer(yuva.advanced(by: width * height))
}
})
yuvaTextures.textures[2].readDirect(width: width / 2, height: height, bytesPerRow: width / 2, read: { destinationBytes in
if let destinationBytes = destinationBytes {
memcpy(destinationBytes, yuva.advanced(by: width * height * 2), width / 2 * height)
return UnsafeRawPointer(destinationBytes)
} else {
return UnsafeRawPointer(yuva.advanced(by: width * height * 2))
}
})
}
}
public func renderYuva(layer: MetalImageLayer, width: Int, height: Int, data: Data, completion: @escaping () -> Void) {
DispatchQueue.global().async {
autoreleasepool {
//let renderStartTime = CFAbsoluteTimeGetCurrent()
var beginTime: Double = 0.0
var duration: Double = 0.0
beginTime = CFAbsoluteTimeGetCurrent()
self.updateYuvaTextures(width: width, height: height, data: data)
duration = CFAbsoluteTimeGetCurrent() - beginTime
if duration > 1.0 / 60.0 {
print("update textures lag \(duration * 1000.0)")
}
guard let yuvaTextures = self.yuvaTextures else {
DispatchQueue.main.async {
completion()
}
return
}
beginTime = CFAbsoluteTimeGetCurrent()
guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
DispatchQueue.main.async {
completion()
}
return
}
commandBuffer.label = "MyCommand"
let drawableSize = CGSize(width: CGFloat(yuvaTextures.width), height: CGFloat(yuvaTextures.height))
guard let drawable = self.getNextDrawable(layer: layer, drawableSize: drawableSize) else {
commandBuffer.commit()
DispatchQueue.main.async {
completion()
}
return
}
let renderPassDescriptor = MTLRenderPassDescriptor()
renderPassDescriptor.colorAttachments[0].texture = drawable.texture
renderPassDescriptor.colorAttachments[0].loadAction = .clear
renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
guard let renderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor) else {
DispatchQueue.main.async {
completion()
}
return
}
renderEncoder.label = "MyRenderEncoder"
renderEncoder.setRenderPipelineState(self.shared.renderYuvaPipelineState)
renderEncoder.setFragmentTexture(yuvaTextures.textures[0].texture, index: 0)
renderEncoder.setFragmentTexture(yuvaTextures.textures[1].texture, index: 1)
renderEncoder.setFragmentTexture(yuvaTextures.textures[2].texture, index: 2)
var alphaSize = simd_uint2(UInt32(yuvaTextures.textures[0].texture.width), UInt32(yuvaTextures.textures[0].texture.height))
renderEncoder.setFragmentBytes(&alphaSize, length: 8, index: 3)
renderEncoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6)
renderEncoder.endEncoding()
var storedDrawable: MetalImageLayer.Drawable? = drawable
commandBuffer.addCompletedHandler { _ in
DispatchQueue.main.async {
autoreleasepool {
storedDrawable?.present(completion: completion)
storedDrawable = nil
}
}
}
commandBuffer.commit()
duration = CFAbsoluteTimeGetCurrent() - beginTime
if duration > 1.0 / 60.0 {
print("commit lag \(duration * 1000.0)")
}
}
}
}
}