GLEGram 12.5 — Initial public release

Based on Swiftgram 12.5 (Telegram iOS 12.5). All GLEGram features ported and organized in GLEGram/ folder. Features: Ghost Mode, Saved Deleted Messages, Content Protection Bypass, Font Replacement, Fake Profile, Chat Export, Plugin System, and more. See CHANGELOG_12.5.md for full details.
2026-06-25 07:20:05 +02:00 · 2026-04-06 09:48:12 +03:00
commit 4647310322
39685 changed files with 11052678 additions and 0 deletions
@@ -0,0 +1,88 @@
+load("@build_bazel_rules_swift//swift:swift.bzl", "swift_library")
+load(
+    "@build_bazel_rules_apple//apple:resources.bzl",
+    "apple_resource_bundle",
+    "apple_resource_group",
+)
+load("//build-system/bazel-utils:plist_fragment.bzl",
+    "plist_fragment",
+)
+
+filegroup(
+    name = "AnimationCompressionMetalResources",
+    srcs = glob([
+        "Resources/**/*.metal",
+    ]),
+    visibility = ["//visibility:public"],
+)
+
+plist_fragment(
+    name = "AnimationCompressionBundleInfoPlist",
+    extension = "plist",
+    template =
+    """
+    <key>CFBundleIdentifier</key>
+    <string>org.telegram.AnimationCompression</string>
+    <key>CFBundleDevelopmentRegion</key>
+    <string>en</string>
+    <key>CFBundleName</key>
+    <string>AnimationCompression</string>
+    """
+)
+
+apple_resource_bundle(
+    name = "AnimationCompressionBundle",
+    infoplists = [
+        ":AnimationCompressionBundleInfoPlist",
+    ],
+    resources = [
+        ":AnimationCompressionMetalResources",
+    ],
+)
+
+swift_library(
+    name = "AnimationCompression",
+    module_name = "AnimationCompression",
+    srcs = glob([
+        "Sources/**/*.swift",
+    ]),
+    copts = [
+        "-warnings-as-errors",
+    ],
+    data = [
+        ":AnimationCompressionBundle",
+    ],
+    deps = [
+        ":DctHuffman",
+        "//submodules/Components/MetalImageView:MetalImageView",
+    ],
+    visibility = [
+        "//visibility:public",
+    ],
+)
+
+objc_library(
+    name = "DctHuffman",
+    enable_modules = True,
+    module_name = "DctHuffman",
+    srcs = glob([
+        "DctHuffman/Sources/**/*.m",
+        "DctHuffman/Sources/**/*.mm",
+        "DctHuffman/Sources/**/*.h",
+    ], allow_empty=True),
+    copts = [],
+    hdrs = glob([
+        "DctHuffman/PublicHeaders/**/*.h",
+    ]),
+    includes = [
+        "DctHuffman/PublicHeaders",
+    ],
+    deps = [
+    ],
+    sdk_frameworks = [
+        "Foundation",
+    ],
+    visibility = [
+        "//visibility:public",
+    ],
+)
@@ -0,0 +1,17 @@
+#ifndef DctHuffman_h
+#define DctHuffman_h
+
+#import <Foundation/Foundation.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+NSData * _Nullable writeDCTBlocks(int width, int height, float const * _Nonnull coefficients);
+void readDCTBlocks(int width, int height, NSData * _Nonnull blockData, float * _Nonnull coefficients, int elementsPerRow);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* DctHuffman_h */
@@ -0,0 +1,630 @@
+#import <DctHuffman/DctHuffman.h>
+
+#include <functional>
+#include <vector>
+
+namespace DctHuffman {
+    typedef std::function<void(unsigned char)> WRITE_ONE_BYTE;
+}
+
+namespace
+{
+
+using uint8_t  = unsigned char;
+using uint16_t = unsigned short;
+using int16_t = short;
+using int32_t = int;
+
+const uint8_t ZigZagInv[8*8] = {
+    0, 1, 8,16, 9, 2, 3, 10,
+    17,24,32,25,18,11, 4, 5,
+    12,19,26,33,40,48,41,34,
+    27,20,13, 6, 7,14,21,28,
+    35,42,49,56,57,50,43,36,
+    29,22,15,23,30,37,44,51,
+    58,59,52,45,38,31,39,46,
+    53,60,61,54,47,55,62,63
+};
+
+const uint8_t ZigZag[] = {
+    0, 1, 5, 6,14,15,27,28,
+    2, 4, 7,13,16,26,29,42,
+    3, 8,12,17,25,30,41,43,
+    9,11,18,24,31,40,44,53,
+    10,19,23,32,39,45,52,54,
+    20,22,33,38,46,51,55,60,
+    21,34,37,47,50,56,59,61,
+    35,36,48,49,57,58,62,63
+};
+
+// Huffman definitions for first DC/AC tables (luminance / Y channel)
+const uint8_t DcLuminanceCodesPerBitsize[16]   = { 0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 };   // sum = 12
+const uint8_t DcLuminanceValues         [12]   = { 0,1,2,3,4,5,6,7,8,9,10,11 };         // => 12 codes
+const uint8_t AcLuminanceCodesPerBitsize[16]   = { 0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,125 }; // sum = 162
+const uint8_t AcLuminanceValues        [162]   =                                        // => 162 codes
+{ 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08, // 16*10+2 symbols because
+    0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28, // upper 4 bits can be 0..F
+    0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59, // while lower 4 bits can be 1..A
+    0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89, // plus two special codes 0x00 and 0xF0
+    0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6, // order of these symbols was determined empirically by JPEG committee
+    0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,
+    0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
+// Huffman definitions for second DC/AC tables (chrominance / Cb and Cr channels)
+const uint8_t DcChrominanceCodesPerBitsize[16] = { 0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };   // sum = 12
+const uint8_t DcChrominanceValues         [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 };         // => 12 codes (identical to DcLuminanceValues)
+const uint8_t AcChrominanceCodesPerBitsize[16] = { 0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,119 }; // sum = 162
+const uint8_t AcChrominanceValues        [162] =                                        // => 162 codes
+{ 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, // same number of symbol, just different order
+    0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26, // (which is more efficient for AC coding)
+    0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,
+    0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
+    0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,
+    0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,
+    0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
+const int16_t CodeWordLimit = 2048; // +/-2^11, maximum value after DCT
+
+// represent a single Huffman code
+struct BitCode {
+    BitCode() = default; // undefined state, must be initialized at a later time
+    BitCode(uint16_t code_, uint8_t numBits_)
+    : code(code_), numBits(numBits_) {}
+    uint16_t code;       // JPEG's Huffman codes are limited to 16 bits
+    uint8_t  numBits;    // number of valid bits
+};
+
+// wrapper for bit output operations
+struct BitWriter {
+    // user-supplied callback that writes/stores one byte
+    DctHuffman::WRITE_ONE_BYTE output;
+    // initialize writer
+    explicit BitWriter(DctHuffman::WRITE_ONE_BYTE output_) : output(output_) {}
+    
+    // store the most recently encoded bits that are not written yet
+    struct BitBuffer
+    {
+        int32_t data    = 0; // actually only at most 24 bits are used
+        uint8_t numBits = 0; // number of valid bits (the right-most bits)
+    } buffer;
+    
+    // write Huffman bits stored in BitCode, keep excess bits in BitBuffer
+    BitWriter& operator<<(const BitCode& data)
+    {
+        // append the new bits to those bits leftover from previous call(s)
+        buffer.numBits += data.numBits;
+        buffer.data   <<= data.numBits;
+        buffer.data    |= data.code;
+        
+        // write all "full" bytes
+        while (buffer.numBits >= 8)
+        {
+            // extract highest 8 bits
+            buffer.numBits -= 8;
+            auto oneByte = uint8_t(buffer.data >> buffer.numBits);
+            output(oneByte);
+            
+            if (oneByte == 0xFF) // 0xFF has a special meaning for JPEGs (it's a block marker)
+                output(0);         // therefore pad a zero to indicate "nope, this one ain't a marker, it's just a coincidence"
+            
+            // note: I don't clear those written bits, therefore buffer.bits may contain garbage in the high bits
+            //       if you really want to "clean up" (e.g. for debugging purposes) then uncomment the following line
+            //buffer.bits &= (1 << buffer.numBits) - 1;
+        }
+        return *this;
+    }
+    
+    // write all non-yet-written bits, fill gaps with 1s (that's a strange JPEG thing)
+    void flush()
+    {
+        // at most seven set bits needed to "fill" the last byte: 0x7F = binary 0111 1111
+        *this << BitCode(0x7F, 7); // I should set buffer.numBits = 0 but since there are no single bits written after flush() I can safely ignore it
+    }
+    
+    // NOTE: all the following BitWriter functions IGNORE the BitBuffer and write straight to output !
+    // write a single byte
+    BitWriter& operator<<(uint8_t oneByte)
+    {
+        output(oneByte);
+        return *this;
+    }
+    
+    // write an array of bytes
+    template <typename T, int Size>
+    BitWriter& operator<<(T (&manyBytes)[Size])
+    {
+        for (auto c : manyBytes)
+            output(c);
+        return *this;
+    }
+    
+    // start a new JFIF block
+    void addMarker(uint8_t id, uint16_t length)
+    {
+        output(0xFF); output(id);     // ID, always preceded by 0xFF
+        output(uint8_t(length >> 8)); // length of the block (big-endian, includes the 2 length bytes as well)
+        output(uint8_t(length & 0xFF));
+    }
+};
+
+// ////////////////////////////////////////
+// functions / templates
+
+// same as std::min()
+template <typename Number>
+Number minimum(Number value, Number maximum)
+{
+    return value <= maximum ? value : maximum;
+}
+
+// restrict a value to the interval [minimum, maximum]
+template <typename Number, typename Limit>
+Number clamp(Number value, Limit minValue, Limit maxValue)
+{
+    if (value <= minValue) return minValue; // never smaller than the minimum
+    if (value >= maxValue) return maxValue; // never bigger  than the maximum
+    return value;                           // value was inside interval, keep it
+}
+
+int16_t encodeDCTBlock(BitWriter& writer, float block64[64], int16_t lastDC,
+                       const BitCode huffmanDC[256], const BitCode huffmanAC[256], const BitCode* codewords) {
+    // encode DC (the first coefficient is the "average color" of the 8x8 block)
+    auto DC = int(block64[0] + (block64[0] >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
+    
+    // quantize and zigzag the other 63 coefficients
+    auto posNonZero = 0; // find last coefficient which is not zero (because trailing zeros are encoded differently)
+    int16_t quantized[8*8];
+    for (auto i = 1; i < 8*8; i++) // start at 1 because block64[0]=DC was already processed
+    {
+        auto value = block64[ZigZagInv[i]];
+        // round to nearest integer
+        quantized[i] = int(value + (value >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
+        // remember offset of last non-zero coefficient
+        if (quantized[i] != 0)
+            posNonZero = i;
+    }
+    
+    // same "average color" as previous block ?
+    auto diff = DC - lastDC;
+    if (diff == 0)
+        writer << huffmanDC[0x00];   // yes, write a special short symbol
+    else
+    {
+        auto bits = codewords[diff]; // nope, encode the difference to previous block's average color
+        writer << huffmanDC[bits.numBits] << bits;
+    }
+    
+    // encode ACs (quantized[1..63])
+    auto offset = 0; // upper 4 bits count the number of consecutive zeros
+    for (auto i = 1; i <= posNonZero; i++) // quantized[0] was already written, skip all trailing zeros, too
+    {
+        // zeros are encoded in a special way
+        while (quantized[i] == 0) // found another zero ?
+        {
+            offset    += 0x10; // add 1 to the upper 4 bits
+            // split into blocks of at most 16 consecutive zeros
+            if (offset > 0xF0) // remember, the counter is in the upper 4 bits, 0xF = 15
+            {
+                writer << huffmanAC[0xF0]; // 0xF0 is a special code for "16 zeros"
+                offset = 0;
+            }
+            i++;
+        }
+        
+        auto encoded = codewords[quantized[i]];
+        // combine number of zeros with the number of bits of the next non-zero value
+        writer << huffmanAC[offset + encoded.numBits] << encoded; // and the value itself
+        offset = 0;
+    }
+    
+    // send end-of-block code (0x00), only needed if there are trailing zeros
+    if (posNonZero < 8*8 - 1) // = 63
+        writer << huffmanAC[0x00];
+    
+    return DC;
+}
+
+// Jon's code includes the pre-generated Huffman codes
+// I don't like these "magic constants" and compute them on my own :-)
+void generateHuffmanTable(const uint8_t numCodes[16], const uint8_t* values, BitCode result[256])
+{
+    // process all bitsizes 1 thru 16, no JPEG Huffman code is allowed to exceed 16 bits
+    auto huffmanCode = 0;
+    for (auto numBits = 1; numBits <= 16; numBits++)
+    {
+        // ... and each code of these bitsizes
+        for (auto i = 0; i < numCodes[numBits - 1]; i++) // note: numCodes array starts at zero, but smallest bitsize is 1
+            result[*values++] = BitCode(huffmanCode++, numBits);
+        
+        // next Huffman code needs to be one bit wider
+        huffmanCode <<= 1;
+    }
+}
+
+} // end of anonymous namespace
+
+// -------------------- externally visible code --------------------
+
+namespace DctHuffman {
+
+bool readMoreData(std::vector<uint8_t> const &bytes, int &readPosition, unsigned int &data, unsigned int &currentDataLength) {
+    unsigned char binaryData;
+    
+    // Detect errors
+    if (currentDataLength > 24) { // Unsigned int can hold at most 32 = 24+8 bits
+        //cout << "ERROR: Code value not found in Huffman table: "<<data<<endl;
+        
+        // Truncate data one by one bit in hope that we will eventually find a correct code
+        data = data - ((data >> (currentDataLength-1)) << (currentDataLength-1));
+        currentDataLength--;
+        return true;
+    }
+    
+    if (readPosition + 1 >= bytes.size()) {
+        return false;
+    }
+    binaryData = bytes[readPosition];
+    readPosition++;
+    
+    // We read byte and put it in low 8 bits of variable data
+    if (binaryData == 0xFF) {
+        data = (data << 8) + binaryData;
+        currentDataLength += 8; // Increase current data length for 8 because we read one new byte
+        
+        if (readPosition + 1 >= bytes.size()) {
+            return false;
+        }
+        binaryData = bytes[readPosition];
+        readPosition++;
+        
+        // End of Image marker
+        if (binaryData == 0xd9) {
+            // Drop 0xFF from data
+            data = data >> 8;
+            currentDataLength -= 8;
+#if DEBUGLEVEL>1
+            cout << "End of image marker"<<endl;
+#endif
+            return false;
+        }
+        
+        // Restart marker means data goes blank
+        if (binaryData >= 0xd0 && binaryData <= 0xd7) {
+            /*#if DEBUGLEVEL>1
+             cout << "Restart marker"<<endl;
+             #endif*/
+            
+            data = 0;
+            currentDataLength = 0;
+            /*for (uint i=0; i < components.size(); i++)
+             previousDC[i]=0;*/
+        }
+        
+        // If after FF byte comes 0x00 byte, we ignore it, 0xFF is part of data (byte stuffing)
+        else if (binaryData != 0) {
+            data = (data << 8) + binaryData;
+            currentDataLength += 8; //Increase current data length for 8 because we read one new byte
+#if DEBUGLEVEL>1
+            cout << "Stuffing"<<endl;
+#endif
+        }
+    }
+    else {
+        data = (data << 8) + binaryData;
+        currentDataLength += 8;
+    }
+    return true;
+}
+
+bool readHuffmanBlock(std::vector<uint8_t> const &bytes, int &readPosition, int *dataBlock, unsigned int &data, unsigned int &currentDataLength, int currentComponent, BitCode const *componentTablesDC, BitCode const *componentTablesAC, int &previousDC) {
+    // Debugging
+    static unsigned int byteno = 0;
+    
+    // Description of the 8x8 block currently being read
+    enum { AC, DC } ACDC = DC;
+    
+    // How many AC elements should we read?
+    int ACcount = 64 - 1;
+    
+    int m = 0; // Index into dataBlock
+    
+    // Fill block with zeros
+    memset ((char*)dataBlock, 0, sizeof(int)*64);
+    
+    bool endOfFile = false;
+    
+    // Main loop
+    do {
+        // 3 bits is too small for a code
+        if (currentDataLength<3) {
+            continue;
+        }
+        
+        // Some stats
+        byteno++;
+        
+        // Current Huffman table
+        BitCode const *htable = componentTablesDC;
+        if (ACDC == AC) {
+            htable = componentTablesAC;
+        }
+        
+        // Every one of 256 elements of the current Huffman table potentially has value, so we must go through all of them
+        for (int i = 0; i < 256; i++) {
+            // If code for i-th element is -1, then there is no Huffman code for i-th element
+            if (htable[i].numBits == 0) {
+                continue;
+            }
+            
+            // If current data length is greater or equal than n, compare first n bits (n - length of current Huffman code)
+            uint n = htable[i].numBits;
+            
+            if (currentDataLength < n) {
+                continue;
+            }
+            
+            if (currentDataLength >= n && htable[i].code == data >> (currentDataLength - n)) {
+                // Remove first n bits from data;
+                currentDataLength -= n;
+                data = data - (htable[i].code << currentDataLength);
+                
+                // Reading of DC coefficients
+                if (ACDC == DC) {
+                    unsigned char bitLength = i; // Next i bits represent DC coefficient value
+                    
+                    // Do we need to read more bits of data?
+                    while (currentDataLength<bitLength) {
+                        if (!readMoreData(bytes, readPosition, data, currentDataLength)) {
+                            endOfFile = true;
+                            break;
+                        }
+                        byteno++;
+                    }
+                    
+                    // Read out DC coefficient
+                    int DCCoeficient = data >> (currentDataLength-bitLength);
+                    currentDataLength -= bitLength;
+                    data = data - (DCCoeficient << currentDataLength);
+                    
+                    // If MSB in DC coefficient starts with 0, then substract value of DC with 2^bitlength+1
+                    //cout << "Before substract "<<DCCoeficient<<" BL "<<int(bitLength)<<endl;
+                    if ( bitLength != 0 && (DCCoeficient>>(bitLength-1)) == 0 ) {
+                        DCCoeficient = DCCoeficient - (2 << (bitLength-1)) + 1;
+                    }
+                    //cout << "After substract "<<DCCoeficient<<" previousDC "<<previousDC[currentComponent]<<endl;
+                    
+                    previousDC = DCCoeficient + previousDC;
+                    dataBlock[m] = previousDC;
+                    
+                    m++;
+                    
+                    // No AC coefficients required?
+                    if (ACcount == 0) {
+                        return endOfFile;
+                    }
+                    
+                    // We generated our DC coefficient, next one is AC coefficient
+                    ACDC = AC;
+                    if (currentDataLength < 3) // If currentData length is < than 3, we need to read new byte, so leave this for loop
+                        break;
+                    i = -1; // CurrentDataLength is not zero, set i=0 to start from first element of array
+                    htable = componentTablesAC;
+                } else {
+                    // Reading of AC coefficients
+                    unsigned char ACElement=i;
+                    
+                    /* Every AC component is composite of 4 bits (RRRRSSSS). R bits tells us relative position of
+                     non zero element from the previous non zero element (number of zeros between two non zero elements)
+                     SSSS bits tels us magnitude range of AC element
+                     Two special values:
+                     00 is END OF BLOCK (all AC elements are zeros)
+                     F0 is 16 zeroes */
+                    
+                    if (ACElement == 0x00) {
+                        return endOfFile;
+                    }
+                    
+                    else if (ACElement == 0xF0) {
+                        for (int k=0;k<16;k++) {
+                            dataBlock[m] = 0;
+                            m++;
+                            if (m >= ACcount+1) {
+                                //qDebug() << "Huffman error: 16 AC zeros requested, but only "<<k<<" left in block!";
+                                return endOfFile;
+                            }
+                        }
+                    }
+                    else {
+                        /* If AC element is 0xAB for example, then we have to separate it in two nibbles
+                         First nible is RRRR bits, second are SSSS bits
+                         RRRR bits told us how many zero elements are before this element
+                         SSSS bits told us how many binary digits our AC element has (if 1001 then we have to read next 9 elements from file) */
+                        
+                        // Let's separate byte to two nibles
+                        unsigned char Rbits = ACElement >> 4;
+                        unsigned char Sbits = ACElement & 0x0F;
+                        
+                        // Before our element there is Rbits zero elements
+                        for (int k=0; k<Rbits; k++) {
+                            if (m >= ACcount) {
+                                //qDebug() << "Huffman error: "<<Rbits<<" preceeding AC zeros requested, but only "<<k<<" left in block!";
+                                // in case of error, doing the other stuff will just do more errors so return here
+                                return endOfFile;
+                            }
+                            dataBlock[m] = 0;
+                            m++;
+                        }
+                        
+                        // Do we need to read more bits of data?
+                        while (currentDataLength<Sbits) {
+                            if (!readMoreData(bytes, readPosition, data, currentDataLength)) {
+                                endOfFile = true;
+                                //qDebug() << "End of file encountered inside a Huffman code!";
+                                break;
+                            }
+                            byteno++;
+                        }
+                        
+                        // Read out AC coefficient
+                        int ACCoeficient = data >> (currentDataLength-Sbits);
+                        currentDataLength -= Sbits;
+                        data = data - (ACCoeficient<<currentDataLength);
+                        
+                        // If MSB in AC coefficient starts with 0, then substract value of AC with 2^bitLength+1
+                        if ( Sbits != 0 && (ACCoeficient>>(Sbits-1)) == 0 ) {
+                            ACCoeficient = ACCoeficient - (2 << (Sbits-1)) + 1;
+                        }
+                        dataBlock[m] = ACCoeficient;
+                        m++;
+                    }
+                    
+                    // End of block
+                    if (m >= ACcount+1)
+                        return endOfFile;
+                    
+                    if (currentDataLength<3) // If currentData length is < 3, we need to read new byte, so leave this for loop
+                        break;
+                    i = -1; // currentDataLength is not zero, set i=0 to start from first element of array
+                }
+                
+            }
+        }
+    } while(readMoreData(bytes, readPosition, data, currentDataLength));
+    
+    endOfFile = true; // We reached an end
+    return endOfFile;
+}
+
+NSData * _Nullable writeDCTBlocks(int width, int height, float const *coefficients) {
+    NSMutableData *result = [[NSMutableData alloc] initWithCapacity:width * 4 * height];
+    BitWriter bitWriter([result](unsigned char byte) {
+        [result appendBytes:&byte length:1];
+    });
+    
+    BitCode  codewordsArray[2 * CodeWordLimit];          // note: quantized[i] is found at codewordsArray[quantized[i] + CodeWordLimit]
+    BitCode* codewords = &codewordsArray[CodeWordLimit]; // allow negative indices, so quantized[i] is at codewords[quantized[i]]
+    uint8_t numBits = 1; // each codeword has at least one bit (value == 0 is undefined)
+    int32_t mask    = 1; // mask is always 2^numBits - 1, initial value 2^1-1 = 2-1 = 1
+    for (int16_t value = 1; value < CodeWordLimit; value++)
+    {
+        // numBits = position of highest set bit (ignoring the sign)
+        // mask    = (2^numBits) - 1
+        if (value > mask) // one more bit ?
+        {
+            numBits++;
+            mask = (mask << 1) | 1; // append a set bit
+        }
+        codewords[-value] = BitCode(mask - value, numBits); // note that I use a negative index => codewords[-value] = codewordsArray[CodeWordLimit  value]
+        codewords[+value] = BitCode(       value, numBits);
+    }
+    
+    BitCode huffmanLuminanceDC[256];
+    BitCode huffmanLuminanceAC[256];
+    memset(huffmanLuminanceDC, 0, sizeof(BitCode) * 256);
+    memset(huffmanLuminanceAC, 0, sizeof(BitCode) * 256);
+    generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
+    generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
+    
+    int16_t lastYDC = 0;
+    float Y[8 * 8];
+    
+    for (auto blockY = 0; blockY < height; blockY += 8) {
+        for (auto blockX = 0; blockX < width; blockX += 8) {
+            for (auto y = 0; y < 8; y++)  {
+                for (auto x = 0; x < 8; x++)  {
+                    Y[y * 8 + x] = coefficients[(blockY + y) * width + blockX + x];
+                }
+            }
+            
+            lastYDC = encodeDCTBlock(bitWriter, Y, lastYDC, huffmanLuminanceDC, huffmanLuminanceAC, codewords);
+        }
+    }
+    
+    //bitWriter.flush();
+    
+    return result;
+}
+
+} // namespace TooJpeg
+
+extern "C"
+NSData * _Nullable writeDCTBlocks(int width, int height, float const *coefficients) {
+    NSData *result = DctHuffman::writeDCTBlocks(width, height, coefficients);
+    
+    /*std::vector<uint8_t> bytes((uint8_t *)result.bytes, ((uint8_t *)result.bytes) + result.length);
+     int readPosition = 0;
+     
+     int targetY[8 * 8];
+     int Y[8 * 8];
+     int Yzig[8 * 8];
+     int previousDC = 0;
+     
+     unsigned int data = 0;
+     unsigned int currentDataLength = 0;
+     
+     BitCode huffmanLuminanceDC[256];
+     BitCode huffmanLuminanceAC[256];
+     memset(huffmanLuminanceDC, 0, sizeof(BitCode) * 256);
+     memset(huffmanLuminanceAC, 0, sizeof(BitCode) * 256);
+     generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
+     generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
+     
+     for (auto blockY = 0; blockY < height; blockY += 8) {
+     for (auto blockX = 0; blockX < width; blockX += 8) {
+     for (auto y = 0; y < 8; y++)  {
+     for (auto x = 0; x < 8; x++)  {
+     targetY[y * 8 + x] = coefficients[(blockY + y) * width + blockX + x];
+     }
+     }
+     
+     TooJpeg::readHuffmanBlock(bytes, readPosition, Yzig, data, currentDataLength, 0, huffmanLuminanceDC, huffmanLuminanceAC, previousDC);
+     for (int i = 0; i < 64; i++) {
+     Y[i] = Yzig[ZigZag[i]];
+     }
+     
+     for (auto y = 0; y < 8; y++)  {
+     for (auto x = 0; x < 8; x++) {
+     if (Y[y * 8 + x] != targetY[y * 8 + x]) {
+     printf("fail\n");
+     }
+     }
+     }
+     }
+     }*/
+    
+    return result;
+}
+
+extern "C"
+void readDCTBlocks(int width, int height, NSData * _Nonnull blockData, float *coefficients, int elementsPerRow) {
+    std::vector<uint8_t> bytes((uint8_t *)blockData.bytes, ((uint8_t *)blockData.bytes) + blockData.length);
+    int readPosition = 0;
+    
+    int Yzig[8 * 8];
+    int previousDC = 0;
+    
+    unsigned int data = 0;
+    unsigned int currentDataLength = 0;
+    
+    BitCode huffmanLuminanceDC[256];
+    BitCode huffmanLuminanceAC[256];
+    memset(huffmanLuminanceDC, 0, sizeof(BitCode) * 256);
+    memset(huffmanLuminanceAC, 0, sizeof(BitCode) * 256);
+    generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
+    generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
+    
+    for (auto blockY = 0; blockY < height; blockY += 8) {
+        for (auto blockX = 0; blockX < width; blockX += 8) {
+            DctHuffman::readHuffmanBlock(bytes, readPosition, Yzig, data, currentDataLength, 0, huffmanLuminanceDC, huffmanLuminanceAC, previousDC);
+            for (int i = 0; i < 64; i++) {
+                coefficients[(blockY + (i / 8)) * elementsPerRow + blockX + (i % 8)] = Yzig[ZigZag[i]];
+            }
+        }
+    }
+    
+    for (auto blockY = height - 8; blockY < height; blockY += 8) {
+        for (auto blockX = width - 8; blockX < width; blockX += 8) {
+            for (int i = 0; i < 64; i++) {
+                coefficients[(blockY + (i / 8)) * elementsPerRow + blockX + (i % 8)] = 0.0f;
+            }
+        }
+    }
+}
@@ -0,0 +1,485 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+half4 yuva(half4 rgba) {
+    half y = (0.257f * rgba.r) + (0.504 * rgba.g) + (0.098 * rgba.b) + (16.0f / 256.0f);
+    half v = (0.439 * rgba.r) - (0.368 * rgba.g) - (0.071 * rgba.b) + (128.0f / 256.0f);
+    half u = -(0.148 * rgba.r) - (0.291 * rgba.g) + (0.439 * rgba.b) + (128.0f / 256.0f);
+    
+    return half4(y, u, v, rgba.a);
+}
+
+half4 rgb(half4 yuva) {
+    half y = yuva.r - 16.0f / 256.0f;
+    half u = yuva.g - 128.0f / 256.0f;
+    half v = yuva.b - 128.0f / 256.0f;
+    
+    half b = 1.164 * y + 2.018 * u;
+    half g = 1.164 * y - 0.813 * v - 0.391 * u;
+    half r = 1.164 * y + 1.596 * v;
+    
+    return half4(r, g, b, yuva.a);
+}
+
+typedef struct {
+    vector_float2 position;
+    vector_float2 textureCoordinate;
+} Vertex;
+
+constant Vertex quadVertices[6] = {
+    {{ 2.0, 0.0 }, { 1.0, 1.0 }},
+    {{ 0.0, 0.0 }, { 0.0, 1.0 }},
+    {{ 0.0, 2.0 }, { 0.0, 0.0 }},
+    {{ 2.0, 0.0 }, { 1.0, 1.0 }},
+    {{ 0.0, 2.0 }, { 0.0, 0.0 }},
+    {{ 2.0, 2.0 }, { 1.0, 0.0 }}
+};
+
+struct RasterizerData {
+    float4 clipSpacePosition [[position]];
+    float2 textureCoordinate;
+};
+
+vertex RasterizerData vertexShader(
+    uint vid [[vertex_id]]
+) {
+    RasterizerData out;
+
+    float2 pixelSpacePosition = quadVertices[vid].position.xy;
+    pixelSpacePosition.x -= 1.0f;
+    pixelSpacePosition.y -= 1.0f;
+
+    out.clipSpacePosition.xy = pixelSpacePosition;
+    out.clipSpacePosition.z = 0.0f;
+    out.clipSpacePosition.w = 1.0f;
+
+    out.textureCoordinate = quadVertices[vid].textureCoordinate;
+
+    return out;
+}
+
+fragment float4 samplingIdctShader(
+    RasterizerData in [[stage_in]],
+    texture2d<half, access::sample> colorTexture0 [[texture(0)]],
+    texture2d<half, access::sample> colorTexture1 [[texture(1)]],
+    texture2d<half, access::sample> colorTexture2 [[texture(2)]],
+    texture2d<half, access::sample> colorTexture3 [[texture(3)]]
+) {
+    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear);
+
+    const half color0 = colorTexture0.sample(textureSampler, in.textureCoordinate).r;
+    const half color1 = colorTexture1.sample(textureSampler, in.textureCoordinate).r;
+    const half color2 = colorTexture2.sample(textureSampler, in.textureCoordinate).r;
+    const half color3 = colorTexture3.sample(textureSampler, in.textureCoordinate).r;
+    
+    const half4 yuva = half4(color0, color1, color2, color3);
+    
+    const half4 color = rgb(yuva);
+    
+    return float4(color.r * color.a, color.g * color.a, color.b * color.a, color.a);
+}
+
+fragment float4 samplingRgbShader(
+    RasterizerData in [[stage_in]],
+    texture2d<half, access::sample> colorTexture [[texture(0)]]
+) {
+    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear);
+
+    half4 color = colorTexture.sample(textureSampler, in.textureCoordinate);
+    
+    color.r *= color.a;
+    color.g *= color.a;
+    color.b *= color.a;
+    
+    return float4(color.r, color.g, color.b, color.a);
+}
+
+half4 samplePoint(texture2d<half, access::sample> textureY, texture2d<half, access::sample> textureCbCr, sampler s, float2 texcoord) {
+    half y;
+    half2 uv;
+    y = textureY.sample(s, texcoord).r;
+    uv = textureCbCr.sample(s, texcoord).rg - half2(0.5, 0.5);
+
+    // Conversion for YUV to rgb from http://www.fourcc.org/fccyvrgb.php
+    half4 out = half4(y + 1.403 * uv.y, y - 0.344 * uv.x - 0.714 * uv.y, y + 1.770 * uv.x, 1.0);
+    return out;
+}
+
+fragment float4 samplingYuvaShader(
+    RasterizerData in [[stage_in]],
+    texture2d<half, access::sample> yTexture [[texture(0)]],
+    texture2d<half, access::sample> cbcrTexture [[texture(1)]],
+    texture2d<uint, access::read> alphaTexture [[texture(2)]],
+    constant uint2 &alphaSize [[buffer(3)]]
+) {
+    constexpr sampler textureSampler(mag_filter::linear, min_filter::linear);
+    
+    half4 color = samplePoint(yTexture, cbcrTexture, textureSampler, in.textureCoordinate);
+    
+    int alphaX = (int)(in.textureCoordinate.x * alphaSize.x);
+    int alphaY = (int)(in.textureCoordinate.y * alphaSize.y);
+    
+    uint32_t packedAlpha = alphaTexture.read(uint2(alphaX / 2, alphaY)).r;
+    uint32_t a1 = (packedAlpha & (0xf0U));
+    uint32_t a2 = (packedAlpha & (0x0fU)) << 4;
+    
+    uint32_t left = (a1 >> 4) | a1;
+    uint32_t right = (a2 >> 4) | a2;
+    
+    uint32_t chooseLeft = alphaX % 2 == 0;
+    uint32_t resolvedAlpha = chooseLeft * left + (1 - chooseLeft) * right;
+    
+    float alpha = resolvedAlpha / 255.0f;
+    
+    color.r *= alpha;
+    color.g *= alpha;
+    color.b *= alpha;
+    
+    color.a = alpha;
+    
+    return float4(color);
+}
+
+#define BLOCK_SIZE 8
+#define BLOCK_SIZE2 BLOCK_SIZE * BLOCK_SIZE
+#define BLOCK_SIZE_LOG2 3
+
+#define chromaQp 60
+#define lumaQp 70
+#define alphaQp 60
+
+constant float DCTv8matrix[] = {
+    0.3535533905932738f,  0.4903926402016152f,  0.4619397662556434f,  0.4157348061512726f,  0.3535533905932738f,  0.2777851165098011f,  0.1913417161825449f,  0.0975451610080642f,
+    0.3535533905932738f,  0.4157348061512726f,  0.1913417161825449f, -0.0975451610080641f, -0.3535533905932737f, -0.4903926402016152f, -0.4619397662556434f, -0.2777851165098011f,
+    0.3535533905932738f,  0.2777851165098011f, -0.1913417161825449f, -0.4903926402016152f, -0.3535533905932738f,  0.0975451610080642f,  0.4619397662556433f,  0.4157348061512727f,
+    0.3535533905932738f,  0.0975451610080642f, -0.4619397662556434f, -0.2777851165098011f,  0.3535533905932737f,  0.4157348061512727f, -0.1913417161825450f, -0.4903926402016153f,
+    0.3535533905932738f, -0.0975451610080641f, -0.4619397662556434f,  0.2777851165098009f,  0.3535533905932738f, -0.4157348061512726f, -0.1913417161825453f,  0.4903926402016152f,
+    0.3535533905932738f, -0.2777851165098010f, -0.1913417161825452f,  0.4903926402016153f, -0.3535533905932733f, -0.0975451610080649f,  0.4619397662556437f, -0.4157348061512720f,
+    0.3535533905932738f, -0.4157348061512727f,  0.1913417161825450f,  0.0975451610080640f, -0.3535533905932736f,  0.4903926402016152f, -0.4619397662556435f,  0.2777851165098022f,
+    0.3535533905932738f, -0.4903926402016152f,  0.4619397662556433f, -0.4157348061512721f,  0.3535533905932733f, -0.2777851165098008f,  0.1913417161825431f, -0.0975451610080625f
+};
+
+constant float baseQLuma[BLOCK_SIZE2] = {
+    16.0f, 11.0f, 10.0f, 16.0f, 24.0f, 40.0f, 51.0f, 61.0f,
+    12.0f, 12.0f, 14.0f, 19.0f, 26.0f, 58.0f, 60.0f, 55.0f,
+    14.0f, 13.0f, 16.0f, 24.0f, 40.0f, 57.0f, 69.0f, 56.0f,
+    14.0f, 17.0f, 22.0f, 29.0f, 51.0f, 87.0f, 80.0f, 62.0f,
+    18.0f, 22.0f, 37.0f, 56.0f, 68.0f, 109.0f, 103.0f, 77.0f,
+    24.0f, 35.0f, 55.0f, 64.0f, 81.0f, 104.0f, 113.0f, 92.0f,
+    49.0f, 64.0f, 78.0f, 87.0f, 103.0f, 121.0f, 120.0f, 101.0f,
+    72.0f, 92.0f, 95.0f, 98.0f, 112.0f, 100.0f, 103.0f, 99.0f
+};
+
+constant float baseQChroma[BLOCK_SIZE2] = {
+    17, 18, 24, 47, 99, 99, 99, 99,
+    18, 21, 26, 66, 99, 99, 99, 99,
+    24, 26, 56, 99, 99, 99, 99, 99,
+    47, 66, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99
+};
+
+float adjustQ(int qp, int index, bool isChroma) {
+    float baseValue;
+    if (isChroma) {
+        baseValue = baseQChroma[index];
+    } else {
+        baseValue = baseQLuma[index];
+    }
+    
+    float s = 0.0f;
+    if (qp < 50) {
+        s = 5000.0f / (float)qp;
+    } else {
+        s = 200.0 - (2.0 * (float)qp);
+    }
+    
+    float r = floor(s * baseValue + 50.0f) / 100.0f;
+
+    return r;
+}
+
+void copyTextureBlockIn(
+    half4 inColorRgb,
+    int colorPlane,
+    uint2 blockPosition,
+    threadgroup float *block
+) {
+    half4 inColor = yuva(inColorRgb);
+    
+    half color;
+    if (colorPlane == 0) {
+        color = inColor.r;
+    } else if (colorPlane == 1) {
+        color = inColor.g;
+    } else if (colorPlane == 2) {
+        color = inColor.b;
+    } else {
+        color = inColor.a;
+    }
+    
+    block[(blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x] = color;
+}
+
+void copyTextureBlockInDequantize(
+    texture2d<half, access::read> texture,
+    uint2 pixelPosition,
+    uint2 blockPosition,
+    threadgroup float *block,
+    int qp,
+    bool isChroma
+) {
+    half inColor = (half)texture.read(pixelPosition).r;
+    
+    int index = (blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x;
+    
+    float q = adjustQ(qp, index, isChroma);
+    float dequantized = inColor * q;
+    
+    block[index] = dequantized;
+}
+
+void copyTextureBlockOut(
+    uint2 pixelPosition,
+    uint2 blockPosition,
+    threadgroup float *block,
+    texture2d<half, access::write> texture
+) {
+    half result = block[(blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x];
+    texture.write(half4(result, result, result, 1.0), pixelPosition);
+}
+
+void copyTextureBlockOutFloat(
+    uint2 pixelPosition,
+    uint2 blockPosition,
+    threadgroup float *block,
+    texture2d<half, access::write> texture
+) {
+    int rawIndex = (blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x;
+    int index = rawIndex;
+    
+    half result = block[index];
+    texture.write(half(result), pixelPosition);
+}
+
+void reorderBlockZigzag(threadgroup float *blockIn, threadgroup float *blockOut, uint2 blockPosition) {
+    int rawIndex = (blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x;
+    int index = rawIndex;
+    blockOut[index] = blockIn[rawIndex];
+}
+
+void DCT(
+    uint2 blockPosition,
+    threadgroup float *CurBlockLocal1,
+    threadgroup float *CurBlockLocal2
+) {
+    int tx = blockPosition.x;
+    int ty = blockPosition.y;
+    
+    float curelem = 0;
+    int DCTv8matrixIndex = 0 * BLOCK_SIZE + ty;
+    int CurBlockLocal1Index = 0 * BLOCK_SIZE + tx;
+    
+#pragma unroll
+    for (int i=0; i < BLOCK_SIZE; i++)
+    {
+        curelem += DCTv8matrix[DCTv8matrixIndex] * (CurBlockLocal1[CurBlockLocal1Index] * 255.0f - 128.0f);
+        DCTv8matrixIndex += BLOCK_SIZE;
+        CurBlockLocal1Index += BLOCK_SIZE;
+    }
+    
+    CurBlockLocal2[(ty << BLOCK_SIZE_LOG2) + tx] = curelem;
+    
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    
+    curelem = 0;
+    int CurBlockLocal2Index = (ty << BLOCK_SIZE_LOG2) + 0;
+    DCTv8matrixIndex = 0 * BLOCK_SIZE + tx;
+    
+#pragma unroll
+    for (int i=0; i<BLOCK_SIZE; i++)
+    {
+        curelem += CurBlockLocal2[CurBlockLocal2Index] * DCTv8matrix[DCTv8matrixIndex];
+        CurBlockLocal2Index += 1;
+        DCTv8matrixIndex += BLOCK_SIZE;
+    }
+    
+    CurBlockLocal1[(ty << BLOCK_SIZE_LOG2) + tx ] = curelem;
+}
+
+void IDCT(
+    uint2 blockPosition,
+    threadgroup float *CurBlockLocal1,
+    threadgroup float *CurBlockLocal2
+) {
+    int tx = blockPosition.x;
+    int ty = blockPosition.y;
+    
+    float curelem = 0;
+    int DCTv8matrixIndex = (ty << BLOCK_SIZE_LOG2) + 0;
+    int CurBlockLocal1Index = 0 * BLOCK_SIZE + tx;
+    
+#pragma unroll
+    for (int i=0; i<BLOCK_SIZE; i++)
+    {
+        curelem += DCTv8matrix[DCTv8matrixIndex] * CurBlockLocal1[CurBlockLocal1Index];
+        DCTv8matrixIndex += 1;
+        CurBlockLocal1Index += BLOCK_SIZE;
+    }
+    
+    CurBlockLocal2[(ty << BLOCK_SIZE_LOG2) + tx ] = curelem;
+    
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    
+    curelem = 0;
+    int CurBlockLocal2Index = (ty << BLOCK_SIZE_LOG2) + 0;
+    DCTv8matrixIndex = (tx << BLOCK_SIZE_LOG2) + 0;
+    
+#pragma unroll
+    for (int i=0; i<BLOCK_SIZE; i++)
+    {
+        curelem += CurBlockLocal2[CurBlockLocal2Index] * DCTv8matrix[DCTv8matrixIndex];
+        CurBlockLocal2Index += 1;
+        DCTv8matrixIndex += 1;
+    }
+    
+    CurBlockLocal1[(ty << BLOCK_SIZE_LOG2) + tx ] = (curelem + 128.0f) / 255.0f;
+}
+
+void quantize(
+    int qp,
+    threadgroup float *sourceBlock,
+    threadgroup float *destinationBlock,
+    int index,
+    bool isChroma
+) {
+    float q = adjustQ(qp, index, isChroma);
+    
+    float value = sourceBlock[index];
+    float quantized = round(value / q);
+    destinationBlock[index] = quantized;
+}
+
+void dequantize(
+    int qp,
+    threadgroup float *sourceBlock,
+    threadgroup float *destinationBlock,
+    int index,
+    bool isChroma
+) {
+    float q = adjustQ(qp, index, isChroma);
+    
+    float value = sourceBlock[index];
+    float dequantized = value * q;
+    destinationBlock[index] = dequantized;
+}
+
+kernel void dctKernel(
+    texture2d<half, access::read> inTexture [[texture(0)]],
+    texture2d<half, access::write> outTexture [[texture(1)]],
+    uint2 pixelPosition [[thread_position_in_grid]],
+    uint2 blockPosition [[thread_position_in_threadgroup]],
+    constant int &colorPlane [[buffer(2)]]
+) {
+    threadgroup float CurBlockLocal1[BLOCK_SIZE2];
+    threadgroup float CurBlockLocal2[BLOCK_SIZE2];
+    
+    half4 rgbPixelIn;
+    int imageQp;
+    bool isChroma = false;
+    if (colorPlane == 1 || colorPlane == 2) {
+        imageQp = chromaQp;
+        isChroma = true;
+        
+        half4 rgbPixelIn0 = inTexture.read(uint2(pixelPosition.x * 2, pixelPosition.y * 2));
+        half4 rgbPixelNextX = inTexture.read(uint2(pixelPosition.x * 2 + 1, pixelPosition.y * 2));
+        half4 rgbPixelNextY = inTexture.read(uint2(pixelPosition.x * 2, pixelPosition.y * 2 + 1));
+        half4 rgbPixelNextXY = inTexture.read(uint2(pixelPosition.x * 2 + 1, pixelPosition.y * 2 + 1));
+        
+        rgbPixelIn = mix(rgbPixelIn0, rgbPixelNextX, 0.5);
+        rgbPixelIn = mix(rgbPixelIn, rgbPixelNextY, 0.5);
+        rgbPixelIn = mix(rgbPixelIn, rgbPixelNextXY, 0.5);
+    } else {
+        if (colorPlane == 3) {
+            imageQp = alphaQp;
+        } else {
+            imageQp = lumaQp;
+        }
+        
+        rgbPixelIn = inTexture.read(pixelPosition);
+    }
+    
+    copyTextureBlockIn(rgbPixelIn, colorPlane, blockPosition, CurBlockLocal1);
+    
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    
+    DCT(
+        blockPosition,
+        CurBlockLocal1,
+        CurBlockLocal2
+    );
+    
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    
+    int index = (blockPosition.y << BLOCK_SIZE_LOG2) + blockPosition.x;
+    quantize(imageQp, CurBlockLocal1, CurBlockLocal2, index, isChroma);
+    
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    
+    reorderBlockZigzag(CurBlockLocal2, CurBlockLocal1, blockPosition);
+    
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    
+    copyTextureBlockOutFloat(
+        pixelPosition,
+        blockPosition,
+        CurBlockLocal1,
+        outTexture
+    );
+}
+
+kernel void idctKernel(
+    texture2d<half, access::read> inTexture [[texture(0)]],
+    texture2d<half, access::write> outTexture [[texture(1)]],
+    uint2 pixelPosition [[thread_position_in_grid]],
+    uint2 blockPosition [[thread_position_in_threadgroup]],
+    constant int &colorPlane [[buffer(2)]]
+) {
+    threadgroup float CurBlockLocal1[BLOCK_SIZE2];
+    threadgroup float CurBlockLocal2[BLOCK_SIZE2];
+    
+    int imageQp;
+    bool isChroma = false;
+    if (colorPlane == 1 || colorPlane == 2) {
+        isChroma = true;
+        imageQp = chromaQp;
+    } else {
+        if (colorPlane == 3) {
+            imageQp = alphaQp;
+        } else {
+            imageQp = lumaQp;
+        }
+    }
+    
+    copyTextureBlockInDequantize(inTexture, pixelPosition, blockPosition, CurBlockLocal1, imageQp, isChroma);
+    
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    
+    IDCT(
+         blockPosition,
+         CurBlockLocal1,
+         CurBlockLocal2
+    );
+    
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    
+    copyTextureBlockOut(
+        pixelPosition,
+        blockPosition,
+        CurBlockLocal1,
+        outTexture
+    );
+}
@@ -0,0 +1,393 @@
+import Foundation
+import Metal
+import DctHuffman
+
+private final class BundleHelper: NSObject {
+}
+
+private func alignUp(size: Int, align: Int) -> Int {
+    precondition(((align - 1) & align) == 0, "Align must be a power of two")
+
+    let alignmentMask = align - 1
+    return (size + alignmentMask) & ~alignmentMask
+}
+
+final class Texture {
+    final class DirectBuffer {
+        let buffer: MTLBuffer
+        let bytesPerRow: Int
+        
+        init?(device: MTLDevice, width: Int, height: Int, bytesPerRow: Int) {
+            #if targetEnvironment(simulator)
+            return nil
+            #else
+            if #available(iOS 12.0, *) {
+                let pagesize = Int(getpagesize())
+                let allocationSize = alignUp(size: bytesPerRow * height, align: pagesize)
+                var data: UnsafeMutableRawPointer? = nil
+                let result = posix_memalign(&data, pagesize, allocationSize)
+                if result == noErr, let data = data {
+                    self.bytesPerRow = bytesPerRow
+                    
+                    guard let buffer = device.makeBuffer(
+                        bytesNoCopy: data,
+                        length: allocationSize,
+                        options: .storageModeShared,
+                        deallocator: { _, _ in
+                            free(data)
+                        }
+                    ) else {
+                        return nil
+                    }
+                    
+                    self.buffer = buffer
+                } else {
+                    return nil
+                }
+            } else {
+                return nil
+            }
+            #endif
+        }
+    }
+    
+    let width: Int
+    let height: Int
+    let texture: MTLTexture
+    
+    let directBuffer: DirectBuffer?
+    
+    init?(
+        device: MTLDevice,
+        width: Int,
+        height: Int,
+        pixelFormat: MTLPixelFormat,
+        usage: MTLTextureUsage,
+        isShared: Bool
+    ) {
+        self.width = width
+        self.height = height
+        
+        if #available(iOS 12.0, *), isShared, usage.contains(.shaderRead) {
+            switch pixelFormat {
+            case .r32Float, .bgra8Unorm:
+                let bytesPerPixel = 4
+                let pixelRowAlignment = device.minimumTextureBufferAlignment(for: pixelFormat)
+                let bytesPerRow = alignUp(size: width * bytesPerPixel, align: pixelRowAlignment)
+                self.directBuffer = DirectBuffer(device: device, width: width, height: height, bytesPerRow: bytesPerRow)
+            case .r8Unorm, .r8Uint:
+                let bytesPerPixel = 1
+                let pixelRowAlignment = device.minimumTextureBufferAlignment(for: pixelFormat)
+                let bytesPerRow = alignUp(size: width * bytesPerPixel, align: pixelRowAlignment)
+                self.directBuffer = DirectBuffer(device: device, width: width, height: height, bytesPerRow: bytesPerRow)
+            case .rg8Unorm:
+                let bytesPerPixel = 2
+                let pixelRowAlignment = device.minimumTextureBufferAlignment(for: pixelFormat)
+                let bytesPerRow = alignUp(size: width * bytesPerPixel, align: pixelRowAlignment)
+                self.directBuffer = DirectBuffer(device: device, width: width, height: height, bytesPerRow: bytesPerRow)
+            default:
+                self.directBuffer = nil
+            }
+        } else {
+            self.directBuffer = nil
+        }
+        
+        let textureDescriptor = MTLTextureDescriptor()
+        textureDescriptor.textureType = .type2D
+        textureDescriptor.pixelFormat = pixelFormat
+        textureDescriptor.width = width
+        textureDescriptor.height = height
+        textureDescriptor.usage = usage
+        
+        if let directBuffer = self.directBuffer {
+            textureDescriptor.storageMode = directBuffer.buffer.storageMode
+            guard let texture = directBuffer.buffer.makeTexture(descriptor: textureDescriptor, offset: 0, bytesPerRow: directBuffer.bytesPerRow) else {
+                return nil
+            }
+            self.texture = texture
+        } else {
+            guard let texture = device.makeTexture(descriptor: textureDescriptor) else {
+                return nil
+            }
+            self.texture = texture
+        }
+    }
+    
+    func replace(with image: AnimationCompressor.ImageData) {
+        if image.width != self.width || image.height != self.height {
+            assert(false, "Image size does not match")
+            return
+        }
+        let region = MTLRegion(origin: MTLOrigin(x: 0, y: 0, z: 0), size: MTLSize(width: image.width, height: image.height, depth: 1))
+        
+        if let directBuffer = self.directBuffer, directBuffer.bytesPerRow == image.bytesPerRow {
+            image.data.withUnsafeBytes { bytes in
+                let _ = memcpy(directBuffer.buffer.contents(), bytes.baseAddress!, image.bytesPerRow * self.height)
+            }
+        } else {
+            image.data.withUnsafeBytes { bytes in
+                self.texture.replace(region: region, mipmapLevel: 0, withBytes: bytes.baseAddress!, bytesPerRow: image.bytesPerRow)
+            }
+        }
+    }
+    
+    func readDirect(width: Int, height: Int, bytesPerRow: Int, read: (UnsafeMutableRawPointer?) -> UnsafeRawPointer) {
+        if let directBuffer = self.directBuffer, width == self.width, height == self.height, bytesPerRow == directBuffer.bytesPerRow {
+            let _ = read(directBuffer.buffer.contents())
+        } else {
+            let region = MTLRegion(origin: MTLOrigin(x: 0, y: 0, z: 0), size: MTLSize(width: width, height: height, depth: 1))
+            self.texture.replace(region: region, mipmapLevel: 0, withBytes: read(nil), bytesPerRow: bytesPerRow)
+        }
+    }
+}
+
+final class TextureSet {
+    struct Description {
+        let fractionWidth: Int
+        let fractionHeight: Int
+        let pixelFormat: MTLPixelFormat
+    }
+    
+    let width: Int
+    let height: Int
+    
+    let textures: [Texture]
+    
+    init?(
+        device: MTLDevice,
+        width: Int,
+        height: Int,
+        descriptions: [Description],
+        usage: MTLTextureUsage,
+        isShared: Bool
+    ) {
+        self.width = width
+        self.height = height
+        
+        var textures: [Texture] = []
+        for i in 0 ..< descriptions.count {
+            let planeWidth = width / descriptions[i].fractionWidth
+            let planeHeight = height / descriptions[i].fractionHeight
+            
+            guard let texture = Texture(
+                device: device,
+                width: planeWidth,
+                height: planeHeight,
+                pixelFormat: descriptions[i].pixelFormat,
+                usage: usage,
+                isShared: isShared
+            ) else {
+                return nil
+            }
+            
+            textures.append(texture)
+        }
+        
+        self.textures = textures
+    }
+}
+
+public final class AnimationCompressor {
+    public final class ImageData {
+        public let width: Int
+        public let height: Int
+        public let bytesPerRow: Int
+        public let data: Data
+        
+        public init(width: Int, height: Int, bytesPerRow: Int, data: Data) {
+            self.width = width
+            self.height = height
+            self.bytesPerRow = bytesPerRow
+            self.data = data
+        }
+    }
+    
+    public final class CompressedImageData {
+        public let data: Data
+        
+        public init(data: Data) {
+            self.data = data
+        }
+    }
+    
+    public final class SharedContext {
+        public static let shared: SharedContext = SharedContext()!
+        
+        public let device: MTLDevice
+        let defaultLibrary: MTLLibrary
+        private let computeDctPipelineState: MTLComputePipelineState
+        private let commandQueue: MTLCommandQueue
+        
+        public init?() {
+            guard let device = MTLCreateSystemDefaultDevice() else {
+                return nil
+            }
+            self.device = device
+            
+            let mainBundle = Bundle(for: BundleHelper.self)
+
+            guard let path = mainBundle.path(forResource: "AnimationCompressionBundle", ofType: "bundle") else {
+                return nil
+            }
+            guard let bundle = Bundle(path: path) else {
+                return nil
+            }
+            
+            if #available(iOS 10.0, *) {
+                guard let defaultLibrary = try? device.makeDefaultLibrary(bundle: bundle) else {
+                    return nil
+                }
+                self.defaultLibrary = defaultLibrary
+            } else {
+                preconditionFailure()
+            }
+            
+            guard let dctFunction = self.defaultLibrary.makeFunction(name: "dctKernel") else {
+                return nil
+            }
+            
+            guard let computeDctPipelineState = try? self.device.makeComputePipelineState(function: dctFunction) else {
+                return nil
+            }
+            self.computeDctPipelineState = computeDctPipelineState
+            
+            guard let commandQueue = self.device.makeCommandQueue() else {
+                return nil
+            }
+            self.commandQueue = commandQueue
+        }
+        
+        func compress(compressor: AnimationCompressor, image: ImageData, completion: @escaping (CompressedImageData) -> Void) {
+            let threadgroupSize = MTLSize(width: 8, height: 8, depth: 1)
+            
+            assert(image.width % 8 == 0)
+            assert(image.height % 8 == 0)
+            
+            let inputTexture: Texture
+            if let current = compressor.inputTexture, current.width == image.width, current.height == image.height {
+                inputTexture = current
+            } else {
+                guard let texture = Texture(
+                    device: self.device,
+                    width: image.width,
+                    height: image.height,
+                    pixelFormat: .bgra8Unorm,
+                    usage: .shaderRead,
+                    isShared: true
+                ) else {
+                    return
+                }
+                inputTexture = texture
+                compressor.inputTexture = texture
+            }
+            
+            inputTexture.replace(with: image)
+            
+            let compressedTextures: TextureSet
+            if let current = compressor.compressedTextures, current.width == image.width, current.height == image.height {
+                compressedTextures = current
+            } else {
+                guard let textures = TextureSet(
+                    device: self.device,
+                    width: image.width,
+                    height: image.height,
+                    descriptions: [
+                        TextureSet.Description(
+                            fractionWidth: 1, fractionHeight: 1,
+                            pixelFormat: .r32Float
+                        ),
+                        TextureSet.Description(
+                            fractionWidth: 2, fractionHeight: 2,
+                            pixelFormat: .r32Float
+                        ),
+                        TextureSet.Description(
+                            fractionWidth: 2, fractionHeight: 2,
+                            pixelFormat: .r32Float
+                        ),
+                        TextureSet.Description(
+                            fractionWidth: 1, fractionHeight: 1,
+                            pixelFormat: .r32Float
+                        )
+                    ],
+                    usage: [.shaderWrite],
+                    isShared: false
+                ) else {
+                    return
+                }
+                compressedTextures = textures
+                compressor.compressedTextures = textures
+            }
+            
+            guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
+                return
+            }
+            commandBuffer.label = "ImageCompressor"
+            
+            guard let computeEncoder = commandBuffer.makeComputeCommandEncoder() else {
+                return
+            }
+            computeEncoder.setComputePipelineState(self.computeDctPipelineState)
+            
+            computeEncoder.setTexture(inputTexture.texture, index: 0)
+            
+            for colorPlane in 0 ..< 4 {
+                computeEncoder.setTexture(compressedTextures.textures[colorPlane].texture, index: 1)
+                
+                var colorPlaneInt32 = Int32(colorPlane)
+                computeEncoder.setBytes(&colorPlaneInt32, length: 4, index: 2)
+                
+                let threadgroupCount = MTLSize(width: (compressedTextures.textures[colorPlane].width + threadgroupSize.width - 1) / threadgroupSize.width, height: (compressedTextures.textures[colorPlane].height + threadgroupSize.height - 1) / threadgroupSize.height, depth: 1)
+                
+                computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize)
+            }
+            
+            computeEncoder.endEncoding()
+            
+            commandBuffer.addCompletedHandler { _ in
+                let buffer = WriteBuffer()
+                
+                buffer.writeInt32(0x543ee445)
+                buffer.writeInt32(4)
+                buffer.writeInt32(Int32(compressedTextures.textures[0].width))
+                buffer.writeInt32(Int32(compressedTextures.textures[0].height))
+                
+                for i in 0 ..< 4 {
+                    let region = MTLRegion(origin: MTLOrigin(x: 0, y: 0, z: 0), size: MTLSize(width: compressedTextures.textures[i].width, height: compressedTextures.textures[i].height, depth: 1))
+                    let bytesPerRow = 4 * compressedTextures.textures[i].width
+                    
+                    buffer.writeInt32(Int32(compressedTextures.textures[i].width))
+                    buffer.writeInt32(Int32(compressedTextures.textures[i].height))
+                    buffer.writeInt32(Int32(bytesPerRow))
+                    
+                    var textureBytes = Data(count: bytesPerRow * compressedTextures.textures[i].height)
+                    textureBytes.withUnsafeMutableBytes { bytes in
+                        compressedTextures.textures[i].texture.getBytes(bytes.baseAddress!, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerRow * compressedTextures.textures[i].height, from: region, mipmapLevel: 0, slice: 0)
+                        
+                        let huffmanData = writeDCTBlocks(Int32(compressedTextures.textures[i].width), Int32(compressedTextures.textures[i].height), bytes.baseAddress!.assumingMemoryBound(to: Float32.self))!
+                        buffer.writeInt32(Int32(huffmanData.count))
+                        buffer.write(huffmanData)
+                    }
+                }
+                
+                DispatchQueue.main.async {
+                    completion(CompressedImageData(data: buffer.makeData()))
+                }
+            }
+            
+            commandBuffer.commit()
+        }
+    }
+    
+    private let sharedContext: SharedContext
+    
+    private var inputTexture: Texture?
+    private var compressedTextures: TextureSet?
+    
+    public init(sharedContext: SharedContext) {
+        self.sharedContext = sharedContext
+    }
+    
+    public func compress(image: ImageData, completion: @escaping (CompressedImageData) -> Void) {
+        self.sharedContext.compress(compressor: self, image: image, completion: completion)
+    }
+}
@@ -0,0 +1,110 @@
+import Foundation
+
+class MemoryBuffer {
+    var data: Data
+    var length: Int
+    
+    init(data: Data) {
+        self.data = data
+        self.length = data.count
+    }
+}
+
+final class WriteBuffer: MemoryBuffer {
+    var offset = 0
+    
+    init() {
+        super.init(data: Data())
+    }
+    
+    func makeData() -> Data {
+        return self.data
+    }
+    
+    func reset() {
+        self.offset = 0
+    }
+    
+    func write(_ data: UnsafeRawPointer, offset: Int = 0, length: Int) {
+        if self.offset + length > self.data.count {
+            self.data.count = self.offset + length + 256
+        }
+        self.data.withUnsafeMutableBytes { bytes in
+            let _ = memcpy(bytes.baseAddress!.advanced(by: self.offset), data + offset, length)
+        }
+        self.offset += length
+        self.length = self.offset
+    }
+    
+    func write(_ data: Data) {
+        data.withUnsafeBytes { bytes in
+            self.write(bytes.baseAddress!, length: bytes.count)
+        }
+    }
+
+    func writeInt8(_ value: Int8) {
+        var value = value
+        self.write(&value, length: 1)
+    }
+
+    func writeInt32(_ value: Int32) {
+        var value = value
+        self.write(&value, length: 4)
+    }
+
+    func writeFloat(_ value: Float) {
+        var value: Float32 = value
+        self.write(&value, length: 4)
+    }
+    
+    func seek(offset: Int) {
+        self.offset = offset
+    }
+}
+
+final class ReadBuffer: MemoryBuffer {
+    var offset = 0
+    
+    override init(data: Data) {
+        super.init(data: data)
+    }
+    
+    func read(_ data: UnsafeMutableRawPointer, length: Int) {
+        self.data.copyBytes(to: data.assumingMemoryBound(to: UInt8.self), from: self.offset ..< (self.offset + length))
+        self.offset += length
+    }
+    
+    func readDataNoCopy(length: Int) -> Data {
+        let result = self.data.withUnsafeBytes { bytes -> Data in
+            return Data(bytesNoCopy: UnsafeMutableRawPointer(mutating: bytes.baseAddress!.advanced(by: self.offset)), count: length, deallocator: .none)
+        }
+        self.offset += length
+        return result
+    }
+    
+    func readInt8() -> Int8 {
+        var result: Int8 = 0
+        self.read(&result, length: 1)
+        return result
+    }
+
+    func readInt32() -> Int32 {
+        var result: Int32 = 0
+        self.read(&result, length: 4)
+        return result
+    }
+
+    func readFloat() -> Float {
+        var result: Float32 = 0
+        self.read(&result, length: 4)
+        return result
+    }
+
+    func skip(_ length: Int) {
+        self.offset += length
+    }
+    
+    func reset() {
+        self.offset = 0
+    }
+}
@@ -0,0 +1,545 @@
+import Foundation
+import UIKit
+import Metal
+import MetalKit
+import simd
+import DctHuffman
+import MetalImageView
+
+private struct Vertex {
+    var position: vector_float2
+    var textureCoordinate: vector_float2
+}
+
+public final class CompressedImageRenderer {
+    private final class Shared {
+        static let shared: Shared = {
+            return Shared(sharedContext: AnimationCompressor.SharedContext.shared)!
+        }()
+        
+        let sharedContext: AnimationCompressor.SharedContext
+        
+        let computeIdctPipelineState: MTLComputePipelineState
+        let renderIdctPipelineState: MTLRenderPipelineState
+        let renderRgbPipelineState: MTLRenderPipelineState
+        let renderYuvaPipelineState: MTLRenderPipelineState
+        
+        init?(sharedContext: AnimationCompressor.SharedContext) {
+            self.sharedContext = sharedContext
+            
+            guard let idctFunction = self.sharedContext.defaultLibrary.makeFunction(name: "idctKernel") else {
+                return nil
+            }
+            
+            guard let computeIdctPipelineState = try? self.sharedContext.device.makeComputePipelineState(function: idctFunction) else {
+                return nil
+            }
+            self.computeIdctPipelineState = computeIdctPipelineState
+            
+            guard let vertexShader = self.sharedContext.defaultLibrary.makeFunction(name: "vertexShader") else {
+                return nil
+            }
+            guard let samplingIdctShader = self.sharedContext.defaultLibrary.makeFunction(name: "samplingIdctShader") else {
+                return nil
+            }
+            guard let samplingRgbShader = self.sharedContext.defaultLibrary.makeFunction(name: "samplingRgbShader") else {
+                return nil
+            }
+            guard let samplingYuvaShader = self.sharedContext.defaultLibrary.makeFunction(name: "samplingYuvaShader") else {
+                return nil
+            }
+            
+            let idctPipelineStateDescriptor = MTLRenderPipelineDescriptor()
+            idctPipelineStateDescriptor.label = "Render IDCT Pipeline"
+            idctPipelineStateDescriptor.vertexFunction = vertexShader
+            idctPipelineStateDescriptor.fragmentFunction = samplingIdctShader
+            idctPipelineStateDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
+            
+            guard let renderIdctPipelineState = try? self.sharedContext.device.makeRenderPipelineState(descriptor: idctPipelineStateDescriptor) else {
+                return nil
+            }
+            self.renderIdctPipelineState = renderIdctPipelineState
+            
+            let rgbPipelineStateDescriptor = MTLRenderPipelineDescriptor()
+            rgbPipelineStateDescriptor.label = "Render RGB Pipeline"
+            rgbPipelineStateDescriptor.vertexFunction = vertexShader
+            rgbPipelineStateDescriptor.fragmentFunction = samplingRgbShader
+            rgbPipelineStateDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
+            
+            guard let renderRgbPipelineState = try? self.sharedContext.device.makeRenderPipelineState(descriptor: rgbPipelineStateDescriptor) else {
+                return nil
+            }
+            self.renderRgbPipelineState = renderRgbPipelineState
+            
+            let yuvaPipelineStateDescriptor = MTLRenderPipelineDescriptor()
+            yuvaPipelineStateDescriptor.label = "Render YUVA Pipeline"
+            yuvaPipelineStateDescriptor.vertexFunction = vertexShader
+            yuvaPipelineStateDescriptor.fragmentFunction = samplingYuvaShader
+            yuvaPipelineStateDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
+            
+            guard let renderYuvaPipelineState = try? self.sharedContext.device.makeRenderPipelineState(descriptor: yuvaPipelineStateDescriptor) else {
+                return nil
+            }
+            self.renderYuvaPipelineState = renderYuvaPipelineState
+        }
+    }
+    
+    private let sharedContext: AnimationCompressor.SharedContext
+    private let shared: Shared
+    
+    private var compressedTextures: TextureSet?
+    private var outputTextures: TextureSet?
+    
+    private var rgbTexture: Texture?
+    
+    private var yuvaTextures: TextureSet?
+    
+    private let commandQueue: MTLCommandQueue
+    
+    private var isRendering: Bool = false
+
+    public init?(sharedContext: AnimationCompressor.SharedContext) {
+        self.sharedContext = sharedContext
+        self.shared = Shared.shared
+        
+        guard let commandQueue = self.sharedContext.device.makeCommandQueue() else {
+            return nil
+        }
+        self.commandQueue = commandQueue
+    }
+    
+    private var drawableRequestTimestamp: Double?
+    
+    private func getNextDrawable(layer: MetalImageLayer, drawableSize: CGSize) -> MetalImageLayer.Drawable? {
+        layer.renderer.drawableSize = drawableSize
+        return layer.renderer.nextDrawable()
+    }
+    
+    private func updateIdctTextures(compressedImage: AnimationCompressor.CompressedImageData) {
+        self.rgbTexture = nil
+        self.yuvaTextures = nil
+        
+        let readBuffer = ReadBuffer(data: compressedImage.data)
+        if readBuffer.readInt32() != 0x543ee445 {
+            return
+        }
+        if readBuffer.readInt32() != 4 {
+            return
+        }
+        
+        let width = Int(readBuffer.readInt32())
+        let height = Int(readBuffer.readInt32())
+        
+        let compressedTextures: TextureSet
+        if let current = self.compressedTextures, current.width == width, current.height == height {
+            compressedTextures = current
+        } else {
+            guard let textures = TextureSet(
+                device: self.sharedContext.device,
+                width: width,
+                height: height,
+                descriptions: [
+                    TextureSet.Description(
+                        fractionWidth: 1, fractionHeight: 1,
+                        pixelFormat: .r32Float
+                    ),
+                    TextureSet.Description(
+                        fractionWidth: 2, fractionHeight: 2,
+                        pixelFormat: .r32Float
+                    ),
+                    TextureSet.Description(
+                        fractionWidth: 2, fractionHeight: 2,
+                        pixelFormat: .r32Float
+                    ),
+                    TextureSet.Description(
+                        fractionWidth: 1, fractionHeight: 1,
+                        pixelFormat: .r32Float
+                    )
+                ],
+                usage: .shaderRead,
+                isShared: true
+            ) else {
+                return
+            }
+            self.compressedTextures = textures
+            compressedTextures = textures
+        }
+        
+        for i in 0 ..< 4 {
+            let planeWidth = Int(readBuffer.readInt32())
+            let planeHeight = Int(readBuffer.readInt32())
+            let bytesPerRow = Int(readBuffer.readInt32())
+            
+            let planeSize = Int(readBuffer.readInt32())
+            let planeData = readBuffer.readDataNoCopy(length: planeSize)
+            
+            var tempData: Data?
+            compressedTextures.textures[i].readDirect(width: planeWidth, height: planeHeight, bytesPerRow: bytesPerRow, read: { destinationBytes in
+                if let destinationBytes = destinationBytes {
+                    readDCTBlocks(Int32(planeWidth), Int32(planeHeight), planeData, destinationBytes.assumingMemoryBound(to: Float32.self), Int32(bytesPerRow / 4))
+                    return UnsafeRawPointer(destinationBytes)
+                } else {
+                    tempData = Data(count: bytesPerRow * planeHeight)
+                    return tempData!.withUnsafeMutableBytes { bytes -> UnsafeRawPointer in
+                        readDCTBlocks(Int32(planeWidth), Int32(planeHeight), planeData, bytes.baseAddress!.assumingMemoryBound(to: Float32.self), Int32(bytesPerRow / 4))
+                        return UnsafeRawPointer(bytes.baseAddress!)
+                    }
+                }
+            })
+        }
+    }
+    
+    public func renderIdct(layer: MetalImageLayer, compressedImage: AnimationCompressor.CompressedImageData, completion: @escaping () -> Void) {
+        DispatchQueue.global().async {
+            self.updateIdctTextures(compressedImage: compressedImage)
+            
+            DispatchQueue.main.async {
+                guard let compressedTextures = self.compressedTextures else {
+                    return
+                }
+                
+                guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
+                    return
+                }
+                commandBuffer.label = "MyCommand"
+                
+                guard let computeEncoder = commandBuffer.makeComputeCommandEncoder() else {
+                    return
+                }
+                
+                computeEncoder.setComputePipelineState(self.shared.computeIdctPipelineState)
+                
+                let outputTextures: TextureSet
+                if let current = self.outputTextures, current.width == compressedTextures.textures[0].width, current.height == compressedTextures.textures[0].height {
+                    outputTextures = current
+                } else {
+                    guard let textures = TextureSet(
+                        device: self.sharedContext.device,
+                        width: compressedTextures.textures[0].width,
+                        height: compressedTextures.textures[0].height,
+                        descriptions: [
+                            TextureSet.Description(
+                                fractionWidth: 1, fractionHeight: 1,
+                                pixelFormat: .r8Unorm
+                            ),
+                            TextureSet.Description(
+                                fractionWidth: 2, fractionHeight: 2,
+                                pixelFormat: .r8Unorm
+                            ),
+                            TextureSet.Description(
+                                fractionWidth: 2, fractionHeight: 2,
+                                pixelFormat: .r8Unorm
+                            ),
+                            TextureSet.Description(
+                                fractionWidth: 1, fractionHeight: 1,
+                                pixelFormat: .r8Unorm
+                            )
+                        ],
+                        usage: [.shaderRead, .shaderWrite],
+                        isShared: false
+                    ) else {
+                        return
+                    }
+                    self.outputTextures = textures
+                    outputTextures = textures
+                }
+                
+                for i in 0 ..< 4 {
+                    computeEncoder.setTexture(compressedTextures.textures[i].texture, index: 0)
+                    computeEncoder.setTexture(outputTextures.textures[i].texture, index: 1)
+                    
+                    var colorPlaneInt32 = Int32(i)
+                    computeEncoder.setBytes(&colorPlaneInt32, length: 4, index: 2)
+                    
+                    let threadgroupSize = MTLSize(width: 8, height: 8, depth: 1)
+                    let threadgroupCount = MTLSize(width: (compressedTextures.textures[i].width + threadgroupSize.width - 1) / threadgroupSize.width, height: (compressedTextures.textures[i].height + threadgroupSize.height - 1) / threadgroupSize.height, depth: 1)
+                    
+                    computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize)
+                }
+                
+                computeEncoder.endEncoding()
+                
+                let drawableSize = CGSize(width: CGFloat(outputTextures.textures[0].width), height: CGFloat(outputTextures.textures[0].height))
+                
+                guard let drawable = self.getNextDrawable(layer: layer, drawableSize: drawableSize) else {
+                    commandBuffer.commit()
+                    completion()
+                    return
+                }
+
+                let renderPassDescriptor = MTLRenderPassDescriptor()
+                renderPassDescriptor.colorAttachments[0].texture = drawable.texture
+                renderPassDescriptor.colorAttachments[0].loadAction = .clear
+                renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
+                
+                guard let renderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor) else {
+                    return
+                }
+                renderEncoder.label = "MyRenderEncoder"
+                
+                renderEncoder.setRenderPipelineState(self.shared.renderIdctPipelineState)
+                
+                for i in 0 ..< 4 {
+                    renderEncoder.setFragmentTexture(outputTextures.textures[i].texture, index: i)
+                }
+                
+                renderEncoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6)
+                
+                renderEncoder.endEncoding()
+                
+                var storedDrawable: MetalImageLayer.Drawable? = drawable
+                commandBuffer.addCompletedHandler { _ in
+                    DispatchQueue.main.async {
+                        autoreleasepool {
+                            storedDrawable?.present(completion: completion)
+                            storedDrawable = nil
+                        }
+                    }
+                }
+                
+                commandBuffer.commit()
+            }
+        }
+    }
+    
+    private func updateRgbTexture(width: Int, height: Int, bytesPerRow: Int, data: Data) {
+        self.compressedTextures = nil
+        self.outputTextures = nil
+        self.yuvaTextures = nil
+        
+        let rgbTexture: Texture
+        if let current = self.rgbTexture, current.width == width, current.height == height {
+            rgbTexture = current
+        } else {
+            guard let texture = Texture(device: self.sharedContext.device, width: width, height: height, pixelFormat: .bgra8Unorm, usage: .shaderRead, isShared: true) else {
+                return
+            }
+            self.rgbTexture = texture
+            rgbTexture = texture
+        }
+        
+        rgbTexture.readDirect(width: width, height: height, bytesPerRow: bytesPerRow, read: { destinationBytes in
+            return data.withUnsafeBytes { bytes -> UnsafeRawPointer in
+                if let destinationBytes = destinationBytes {
+                    memcpy(destinationBytes, bytes.baseAddress!, bytes.count)
+                    return UnsafeRawPointer(destinationBytes)
+                } else {
+                    return bytes.baseAddress!
+                }
+            }
+        })
+    }
+    
+    public func renderRgb(layer: MetalImageLayer, width: Int, height: Int, bytesPerRow: Int, data: Data, completion: @escaping () -> Void) {
+        self.updateRgbTexture(width: width, height: height, bytesPerRow: bytesPerRow, data: data)
+        
+        guard let rgbTexture = self.rgbTexture else {
+            return
+        }
+        
+        guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
+            return
+        }
+        commandBuffer.label = "MyCommand"
+        
+        let drawableSize = CGSize(width: CGFloat(rgbTexture.width), height: CGFloat(rgbTexture.height))
+        
+        guard let drawable = self.getNextDrawable(layer: layer, drawableSize: drawableSize) else {
+            commandBuffer.commit()
+            completion()
+            return
+        }
+
+        let renderPassDescriptor = MTLRenderPassDescriptor()
+        renderPassDescriptor.colorAttachments[0].texture = drawable.texture
+        renderPassDescriptor.colorAttachments[0].loadAction = .clear
+        renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
+        
+        guard let renderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor) else {
+            return
+        }
+        renderEncoder.label = "MyRenderEncoder"
+        
+        renderEncoder.setRenderPipelineState(self.shared.renderRgbPipelineState)
+        renderEncoder.setFragmentTexture(rgbTexture.texture, index: 0)
+        
+        renderEncoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6)
+        
+        renderEncoder.endEncoding()
+        
+        var storedDrawable: MetalImageLayer.Drawable? = drawable
+        commandBuffer.addCompletedHandler { _ in
+            DispatchQueue.main.async {
+                autoreleasepool {
+                    storedDrawable?.present(completion: completion)
+                    storedDrawable = nil
+                }
+            }
+        }
+        
+        commandBuffer.commit()
+    }
+    
+    private func updateYuvaTextures(width: Int, height: Int, data: Data) {
+        if width % 2 != 0 || height % 2 != 0 {
+            return
+        }
+        
+        self.compressedTextures = nil
+        self.outputTextures = nil
+        self.rgbTexture = nil
+        
+        let yuvaTextures: TextureSet
+        if let current = self.yuvaTextures, current.width == width, current.height == height {
+            yuvaTextures = current
+        } else {
+            guard let textures = TextureSet(
+                device: self.sharedContext.device,
+                width: width,
+                height: height,
+                descriptions: [
+                    TextureSet.Description(
+                        fractionWidth: 1, fractionHeight: 1,
+                        pixelFormat: .r8Unorm
+                    ),
+                    TextureSet.Description(
+                        fractionWidth: 2, fractionHeight: 2,
+                        pixelFormat: .rg8Unorm
+                    ),
+                    TextureSet.Description(
+                        fractionWidth: 2, fractionHeight: 1,
+                        pixelFormat: .r8Uint
+                    )
+                ],
+                usage: .shaderRead,
+                isShared: true
+            ) else {
+                return
+            }
+            self.yuvaTextures = textures
+            yuvaTextures = textures
+        }
+        
+        data.withUnsafeBytes { yuvaBuffer in
+            guard let yuva = yuvaBuffer.baseAddress?.assumingMemoryBound(to: UInt8.self) else {
+                return
+            }
+            
+            yuvaTextures.textures[0].readDirect(width: width, height: height, bytesPerRow: width, read: { destinationBytes in
+                if let destinationBytes = destinationBytes {
+                    memcpy(destinationBytes, yuva.advanced(by: 0), width * height)
+                    return UnsafeRawPointer(destinationBytes)
+                } else {
+                    return UnsafeRawPointer(yuva.advanced(by: 0))
+                }
+            })
+            
+            yuvaTextures.textures[1].readDirect(width: width / 2, height: height / 2, bytesPerRow: width, read: { destinationBytes in
+                if let destinationBytes = destinationBytes {
+                    memcpy(destinationBytes, yuva.advanced(by: width * height), width * height / 2)
+                    return UnsafeRawPointer(destinationBytes)
+                } else {
+                    return UnsafeRawPointer(yuva.advanced(by: width * height))
+                }
+            })
+            
+            yuvaTextures.textures[2].readDirect(width: width / 2, height: height, bytesPerRow: width / 2, read: { destinationBytes in
+                if let destinationBytes = destinationBytes {
+                    memcpy(destinationBytes, yuva.advanced(by: width * height * 2), width / 2 * height)
+                    return UnsafeRawPointer(destinationBytes)
+                } else {
+                    return UnsafeRawPointer(yuva.advanced(by: width * height * 2))
+                }
+            })
+        }
+    }
+    
+    public func renderYuva(layer: MetalImageLayer, width: Int, height: Int, data: Data, completion: @escaping () -> Void) {
+        DispatchQueue.global().async {
+            autoreleasepool {
+                //let renderStartTime = CFAbsoluteTimeGetCurrent()
+                
+                var beginTime: Double = 0.0
+                var duration: Double = 0.0
+                beginTime = CFAbsoluteTimeGetCurrent()
+                
+                self.updateYuvaTextures(width: width, height: height, data: data)
+                
+                duration = CFAbsoluteTimeGetCurrent() - beginTime
+                if duration > 1.0 / 60.0 {
+                    print("update textures lag \(duration * 1000.0)")
+                }
+                
+                guard let yuvaTextures = self.yuvaTextures else {
+                    DispatchQueue.main.async {
+                        completion()
+                    }
+                    return
+                }
+                
+                beginTime = CFAbsoluteTimeGetCurrent()
+                
+                guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
+                    DispatchQueue.main.async {
+                        completion()
+                    }
+                    return
+                }
+                
+                commandBuffer.label = "MyCommand"
+                
+                let drawableSize = CGSize(width: CGFloat(yuvaTextures.width), height: CGFloat(yuvaTextures.height))
+                
+                guard let drawable = self.getNextDrawable(layer: layer, drawableSize: drawableSize) else {
+                    commandBuffer.commit()
+                    DispatchQueue.main.async {
+                        completion()
+                    }
+                    return
+                }
+                
+                let renderPassDescriptor = MTLRenderPassDescriptor()
+                renderPassDescriptor.colorAttachments[0].texture = drawable.texture
+                renderPassDescriptor.colorAttachments[0].loadAction = .clear
+                renderPassDescriptor.colorAttachments[0].clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 0)
+                
+                guard let renderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor) else {
+                    DispatchQueue.main.async {
+                        completion()
+                    }
+                    return
+                }
+                renderEncoder.label = "MyRenderEncoder"
+                
+                renderEncoder.setRenderPipelineState(self.shared.renderYuvaPipelineState)
+                renderEncoder.setFragmentTexture(yuvaTextures.textures[0].texture, index: 0)
+                renderEncoder.setFragmentTexture(yuvaTextures.textures[1].texture, index: 1)
+                renderEncoder.setFragmentTexture(yuvaTextures.textures[2].texture, index: 2)
+                
+                var alphaSize = simd_uint2(UInt32(yuvaTextures.textures[0].texture.width), UInt32(yuvaTextures.textures[0].texture.height))
+                renderEncoder.setFragmentBytes(&alphaSize, length: 8, index: 3)
+                
+                renderEncoder.drawPrimitives(type: .triangle, vertexStart: 0, vertexCount: 6)
+                
+                renderEncoder.endEncoding()
+                
+                var storedDrawable: MetalImageLayer.Drawable? = drawable
+                commandBuffer.addCompletedHandler { _ in
+                    DispatchQueue.main.async {
+                        autoreleasepool {
+                            storedDrawable?.present(completion: completion)
+                            storedDrawable = nil
+                        }
+                    }
+                }
+                
+                commandBuffer.commit()
+                
+                duration = CFAbsoluteTimeGetCurrent() - beginTime
+                if duration > 1.0 / 60.0 {
+                    print("commit lag \(duration * 1000.0)")
+                }
+            }
+        }
+    }
+}