Handle gzipped UTF8 encoded byte array of Customer xml data

I am currently getting an ouput in below format.

<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <soap:Body>
    <Customer_GetResponse xmlns="http://www.magentaretail.com.au/">
      <Customer_GetResult>
        H4sIAAAAAAAACs1XW3PiNhR+z0z+g8fPdWzZZsEZ8NQ2l9IhkMWk2310bCWosS1GEiH8+x4Z4xubnTQPbQlk+I
        7O5ZPORWYY7LmgGWYhFspblub89o2TkboVYner64fD4eZg3VD2rJuGgfQ/7xZhvMVZpJGciyiPsapM3gSLYjHb
        k2SkTsdTZPiOr5l2z9LsycDX/IE91bygNxgE07H/xQlU9/pKUYbnyPxzcQsnDTclbkgCmmDXKF7mUG9JL3TXOK
        Ysub5qao3U0lhVFvQYpeK43GePmI1UVQn3LI8yUPmd5pirypQwLpaF5C5iR1CI0r2IBKE5SBhoeEnCMOdopPYM
        ZYZha1gJRSU3pdfyuwXfr69KYI/U8JjkGHzeUy7ighgcigExIACAZfhNVQK6zwU7jlQPdsCilESqMsYpecXsCD
        FrUAQ6AxmpAhCpBnUsoHIWlgEh51lE0pGawVZv/pIn8GtOdzRPbmKaqfdbkEitO/pI0uoY5Qv8r/yRipxBTzMQ
        vDeGcVu8ZeSMPrNotyVxSbjCZsmiEpS8K1xSr3Cvg790cL+DBx3sAG7yMTrrCHUZofJca0GXI+qSRF2WqEsTdX
        miLlHkdImYBdcZzhNZqYqsjJzjXMwgg1CLRiUYQy43RFYscgzjIh2rnVjtRZnoClcpBRZ0SQX4EtD8UtBoHvD8
        TNnx1ESL1XdvsfkO5RsxCNvuMGBzHkD7x7ZdMVkwtFk6T4pqijjAjvn1VVvscU5jUvSd3N87e7tnJMY+pS/nSD
        6DmbI9IceBvYFtFYlhQEDBNBC4cjTD3CDz1kK3dg/il6MBWoMVR1rrmb1GyIdNsACmdzSplOwLpXuGnzAcUoz5
        SD0BrmzJ81Z7hXGCE4UInMEsOVMbEx49ppIbGBepgizNy53/iK7i+cuiH5XyU/tan0PL03asYGD0LFPzDHOs2Y
        FnaAMHTTUDprs96U+s/tiXCT8NndXTGnOSSGOYPw8yVewFC5I/Ax0CRYfaopqhiQqGg8YZzPlsj7kojOb8VG9/
        YEaeiNwnalYFtBxPnvgT/IMZrddTvdEOX6UzqAauyH3ZZr+vKmeZlJjg0cv5QTbLNEo5/qAjy3LajhDcU590ZR
        sXrqza1YbtP+4JXXiyP0vKvHDV/5mr8rrCvBLVwoaoCMpjRqAyaO7+BqU31JuSlmp5zbrFLTvUz7ClU129rrx5
        h3qNW2ollQXJMXJRTwkhsrJOhnpr4T0Ty52tJ5NluFjdT8KWjdW2mXMKkywhr4TDboqHDe9B+7oYD/UfLHVNy4
        YqzQqTpqilLm/oQmqjHhrqFWwrybvYNfoIhoH8A71C0lI6NZlr2LVWKWqk8rznf5Teb5S9/DvpDWi2i/Kj6/kB
        XHBsR1lxCcBzX7nwk2I41UIoPlgLprvArzhVzF/gMRAmsmLC8bfW368ifz0PfW85+b+VEFwF/0EJVZJqatTP6a
        cfCRWUCjWCXyvu3/Hcf8K7DAAA
      </Customer_GetResult>
    </Customer_GetResponse>
  </soap:Body>
</soap:Envelope>

Is there a function within Celigo (That Celigo supports) that I can use ? gzip helper seems to be not available in Celigo

Ideally we would be able to use the zlib library, but we're not able to as of now. For now, you could do something like this in a script. You then could pull the xml out into JSON format to use. You may be able to use our xml to json api as well: https://docs.celigo.com/hc/en-us/articles/38955365263771-XML-to-JSON-API-endpoints.

function preSavePage(options) {
    // ========= Base64 (plain JS; no atob/Buffer) =========
    var _b64map = (function () {
        var t = {};
        var s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
        for (var i = 0; i < s.length; i++) t[s.charAt(i)] = i;
        t['-'] = 62; t['_'] = 63; // URL-safe
        return t;
    })();

    function b64ToBytes(b64) {
        var s = String(b64).replace(/[\r\n\s]/g, '').replace(/-/g, '+').replace(/_/g, '/');
        if (s.length % 4 !== 0) throw new Error('base64 length not multiple of 4');
        var out = [];
        for (var i = 0; i < s.length; i += 4) {
            var c1 = _b64map[s.charAt(i)];     if (c1 === undefined) throw new Error('bad base64');
            var c2 = _b64map[s.charAt(i+1)];   if (c2 === undefined) throw new Error('bad base64');
            var c3 = _b64map[s.charAt(i+2)];
            var c4 = _b64map[s.charAt(i+3)];
            var n = (c1 << 18) | (c2 << 12) | ((c3 & 63) << 6) | (c4 & 63);
            out.push((n >>> 16) & 255);
            if (s.charAt(i+2) !== '=') out.push((n >>> 8) & 255);
            if (s.charAt(i+3) !== '=') out.push(n & 255);
        }
        return out;
    }

    // ========= UTF-8 bytes -> JS string (plain JS) =========
    function utf8FromBytes(bytes) {
        var out = '', i = 0;
        while (i < bytes.length) {
            var c = bytes[i++];
            if (c < 0x80) { out += String.fromCharCode(c); }
            else if ((c & 0xe0) === 0xc0) {
                var c2 = bytes[i++]; out += String.fromCharCode(((c & 0x1f) << 6) | (c2 & 0x3f));
            } else if ((c & 0xf0) === 0xe0) {
                var c2e = bytes[i++], c3e = bytes[i++];
                out += String.fromCharCode(((c & 0x0f) << 12) | ((c2e & 0x3f) << 6) | (c3e & 0x3f));
            } else {
                var c2s = bytes[i++], c3s = bytes[i++], c4s = bytes[i++];
                var cp = ((c & 0x07) << 18) | ((c2s & 0x3f) << 12) | ((c3s & 0x3f) << 6) | (c4s & 0x3f);
                cp -= 0x10000;
                out += String.fromCharCode(0xD800 + (cp >> 10));
                out += String.fromCharCode(0xDC00 + (cp & 0x3ff));
            }
        }
        return out;
    }

    // ========= GZIP (header skip) + raw DEFLATE (plain JS) =========
    function gunzip(bytes) {
        var p = 0;
        if (bytes[p++] !== 0x1f || bytes[p++] !== 0x8b) throw new Error('not gzip (magic)');
        var method = bytes[p++]; if (method !== 8) throw new Error('unsupported gzip method');
        var flg = bytes[p++];    // flags
        p += 4; // mtime
        p += 1; // xfl
        p += 1; // os

        if (flg & 4) { // FEXTRA
            var xlen = bytes[p++] | (bytes[p++] << 8);
            p += xlen;
        }
        if (flg & 8) { // FNAME
            while (bytes[p++] !== 0) {}
        }
        if (flg & 16) { // FCOMMENT
            while (bytes[p++] !== 0) {}
        }
        if (flg & 2) { // FHCRC
            p += 2;
        }

        var inf = inflateRaw(bytes, p);
        // optional: CRC32/ISIZE follow (ignored)
        return inf.out;
    }

    // ---- Deflate constants ----
    var LEN_BASE  = [3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258];
    var LEN_EXTRA = [0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0];
    var DST_BASE  = [1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577];
    var DST_EXTRA = [0,0,0,0,1,1,2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,  9,  9,  10, 10, 11, 11, 12,   12,   13,   13];

    function reverseBits(v, n) {
        var r = 0;
        for (var i = 0; i < n; i++) { r = (r << 1) | (v & 1); v >>= 1; }
        return r;
    }

    function buildHuffmanTree(lengths, maxBits) {
        var n = lengths.length, count = [], i, len;
        for (i = 0; i <= maxBits; i++) count[i] = 0;
        for (i = 0; i < n; i++) count[lengths[i]] = (count[lengths[i]] || 0) + (lengths[i] ? 1 : 0);
        count[0] = 0;

        var next_code = [], code = 0;
        for (len = 1; len <= maxBits; len++) {
            code = (code + (count[len - 1] || 0)) << 1;
            next_code[len] = code;
        }

        var root = { l: null, r: null, sym: undefined };
        for (i = 0; i < n; i++) {
            len = lengths[i];
            if (!len) continue;
            var c = next_code[len]++;
            var rev = reverseBits(c, len);
            var node = root;
            for (var b = 0; b < len; b++) {
                var bit = (rev >> b) & 1;
                if (bit === 0) {
                    if (!node.l) node.l = { l: null, r: null, sym: undefined };
                    node = node.l;
                } else {
                    if (!node.r) node.r = { l: null, r: null, sym: undefined };
                    node = node.r;
                }
            }
            node.sym = i;
        }
        return root;
    }

    function inflateRaw(bytes, start) {
        var p = start || 0;
        var bitbuf = 0, bitcnt = 0;
        function needBits(n) {
            while (bitcnt < n) {
                bitbuf |= (bytes[p++] || 0) << bitcnt;
                bitcnt += 8;
            }
        }
        function readBits(n) {
            needBits(n);
            var v = bitbuf & ((1 << n) - 1);
            bitbuf >>>= n;
            bitcnt -= n;
            return v;
        }
        function alignByte() {
            bitbuf = 0; bitcnt = 0;
        }
        function decodeSym(tree) {
            var node = tree;
            while (node.sym === undefined) {
                var bit = readBits(1);
                node = bit ? node.r : node.l;
                if (!node) throw new Error('invalid huffman code');
            }
            return node.sym;
        }

        var out = [];

        function decodeBlock(litTree, distTree) {
            while (true) {
                var sym = decodeSym(litTree);
                if (sym < 256) {
                    out.push(sym);
                } else if (sym === 256) {
                    break; // end of block
                } else {
                    var lenSym = sym - 257;
                    if (lenSym < 0 || lenSym >= LEN_BASE.length) throw new Error('bad length symbol');
                    var length = LEN_BASE[lenSym] + (LEN_EXTRA[lenSym] ? readBits(LEN_EXTRA[lenSym]) : 0);

                    var distSym = decodeSym(distTree);
                    if (distSym < 0 || distSym >= DST_BASE.length) throw new Error('bad distance symbol');
                    var distance = DST_BASE[distSym] + (DST_EXTRA[distSym] ? readBits(DST_EXTRA[distSym]) : 0);
                    if (distance <= 0 || distance > out.length) throw new Error('invalid distance');

                    // copy match
                    for (var i = 0; i < length; i++) {
                        out.push(out[out.length - distance]);
                    }
                }
            }
        }

        var last = 0;
        while (!last) {
            last = readBits(1);
            var btype = readBits(2);
            if (btype === 0) {
                // stored
                alignByte();
                var len  = bytes[p++] | (bytes[p++] << 8);
                var nlen = bytes[p++] | (bytes[p++] << 8);
                if (((len ^ 0xFFFF) & 0xFFFF) !== (nlen & 0xFFFF)) throw new Error('stored block LEN/NLEN mismatch');
                for (var k = 0; k < len; k++) out.push(bytes[p++]);
            } else if (btype === 1 || btype === 2) {
                var litLenTree, distTree;

                if (btype === 1) {
                    // fixed trees
                    var ll = [];
                    for (var i = 0; i <= 287; i++) {
                        if (i <= 143) ll[i] = 8;
                        else if (i <= 255) ll[i] = 9;
                        else if (i <= 279) ll[i] = 7;
                        else ll[i] = 8;
                    }
                    var dd = [];
                    for (var j = 0; j <= 31; j++) dd[j] = 5;
                    litLenTree = buildHuffmanTree(ll, 9);
                    distTree   = buildHuffmanTree(dd, 5);
                } else {
                    // dynamic trees
                    var HLIT  = readBits(5) + 257;
                    var HDIST = readBits(5) + 1;
                    var HCLEN = readBits(4) + 4;

                    var order = [16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15];
                    var clen = [];
                    for (var m = 0; m < 19; m++) clen[m] = 0;
                    for (var m2 = 0; m2 < HCLEN; m2++) {
                        clen[order[m2]] = readBits(3);
                    }
                    var clenTree = buildHuffmanTree(clen, 7);

                    var num = HLIT + HDIST;
                    var lens = [];
                    var idx = 0;
                    while (idx < num) {
                        var sym = decodeSym(clenTree);
                        if (sym <= 15) {
                            lens[idx++] = sym;
                        } else if (sym === 16) {
                            var rep = readBits(2) + 3;
                            var prev = lens[idx - 1] || 0;
                            for (var r = 0; r < rep; r++) lens[idx++] = prev;
                        } else if (sym === 17) {
                            var rep0 = readBits(3) + 3;
                            for (var r0 = 0; r0 < rep0; r0++) lens[idx++] = 0;
                        } else if (sym === 18) {
                            var repz = readBits(7) + 11;
                            for (var rz = 0; rz < repz; rz++) lens[idx++] = 0;
                        } else {
                            throw new Error('bad code length sym');
                        }
                    }
                    var litLens = lens.slice(0, HLIT);
                    var distLens = lens.slice(HLIT, HLIT + HDIST);

                    litLenTree = buildHuffmanTree(litLens.concat([]), 15);
                    distTree   = buildHuffmanTree(distLens.concat([]), 15);
                }

                decodeBlock(litLenTree, distTree);
            } else {
                throw new Error('unsupported btype');
            }
        }

        return { out: out, pos: p };
    }

    function gunzipBase64ToString(b64) {
        var bytes = b64ToBytes(b64);
        // quick gzip magic check
        if (!(bytes[0] === 0x1f && bytes[1] === 0x8b)) {
            throw new Error('input is not gzip (missing 0x1f8b)');
        }
        var raw = gunzip(bytes);          // -> byte array
        return utf8FromBytes(raw);        // -> string
    }

    // ===== main preSavePage =====
    var inData = options && options.data ? options.data : [];
    var errors = options && options.errors ? options.errors : [];
    var outData = [];

    if (Array.isArray(inData)) {
        for (var i = 0; i < inData.length; i++) {
            var rec = inData[i];
            try {
                if (rec && typeof rec.data === 'string' && rec.data.length > 0) {
                    rec.decodedXml = gunzipBase64ToString(rec.data);
                }
            } catch (e) {
                errors.push({
                    code: 'gzip_decode_failed',
                    message: e && e.message ? e.message : String(e),
                    source: 'preSavePage',
                    record: rec
                });
            }
            outData.push(rec);
        }
    } else {
        outData = inData;
    }

    return {
        data: outData,
        errors: errors,
        abort: false,
        newErrorsAndRetryData: []
    };
}

1 Like

Thank you Tyler! This solves my problem! Was thinking this is not a possibility.

1 Like