'How to compress a C++ string using Zstd?

I'm very new to C++ and I wanted to compress a std:string object via Zstd compression library, but so far I couldn't find a C++ sample code for this purpose by googling it. I've found example C code but it seems that they are working with C style character arrays instead of a std:string object.

Example C code: https://github.com/facebook/zstd/blob/dev/examples/simple_compression.c

/*
 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

#include <stdio.h>     // printf
#include <stdlib.h>    // free
#include <string.h>    // strlen, strcat, memset
#include <zstd.h>      // presumes zstd library is installed
#include "common.h"    // Helper functions, CHECK(), and CHECK_ZSTD()

static void compress_orDie(const char* fname, const char* oname)
{
    size_t fSize;
    void* const fBuff = mallocAndLoadFile_orDie(fname, &fSize);
    size_t const cBuffSize = ZSTD_compressBound(fSize);
    void* const cBuff = malloc_orDie(cBuffSize);

    /* Compress.
     * If you are doing many compressions, you may want to reuse the context.
     * See the multiple_simple_compression.c example.
     */
    size_t const cSize = ZSTD_compress(cBuff, cBuffSize, fBuff, fSize, 1);
    CHECK_ZSTD(cSize);

    saveFile_orDie(oname, cBuff, cSize);

    /* success */
    printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);

    free(fBuff);
    free(cBuff);
}

static char* createOutFilename_orDie(const char* filename)
{
    size_t const inL = strlen(filename);
    size_t const outL = inL + 5;
    void* const outSpace = malloc_orDie(outL);
    memset(outSpace, 0, outL);
    strcat(outSpace, filename);
    strcat(outSpace, ".zst");
    return (char*)outSpace;
}

int main(int argc, const char** argv)
{
    const char* const exeName = argv[0];

    if (argc!=2) {
        printf("wrong arguments\n");
        printf("usage:\n");
        printf("%s FILE\n", exeName);
        return 1;
    }

    const char* const inFilename = argv[1];

    char* const outFilename = createOutFilename_orDie(inFilename);
    compress_orDie(inFilename, outFilename);
    free(outFilename);
    return 0;
}

My question is if anyone could direct me to a sample code/snippet showing how to compress a C++ string using Zstd?



Solution 1:[1]

It seems to me that the newest version of Boost library (version 1.70.0)has added the support compression via Zstd to its iostreams submodule. I could manage for the following C++ code snippet but it seems that older versions of Boost don't support Zstd compression (I'm using Boost 1.67.0 on Debian 10 which does not have Zstd support.)

The code I could manage to assemble at the moment is like this (it is based on the code from here):

    #include <iostream>
    #include <algorithm>
    #include <string>

    #include <sstream>
    #include <boost/iostreams/filtering_streambuf.hpp>
    #include <boost/iostreams/copy.hpp>
    #include <boost/iostreams/filter/zstd.hpp>


    std::string compress(std::string& data)
        {
            namespace bio = boost::iostreams;

            std::stringstream compressed;
            std::stringstream origin(data);

            bio::filtering_streambuf<bio::input> out;
            out.push(bio::zstd_compressor(bio::zstd_params(bio::zstd::default_compression)));

            out.push(origin);
            bio::copy(out, compressed);

            return compressed.str();
    }

Solution 2:[2]

Using string::c_str() you can get a pointer to an array that contains a null-terminated sequence of characters (a C-string) representing the current value of the string object.

Solution 3:[3]

namespace util {

int Util::CompressString(const string& src, string& dst, int compressionlevel) {
  size_t const cBuffSize = ZSTD_compressBound(src.size());
  dst.resize(cBuffSize);
  auto dstp = const_cast<void*>(static_cast<const void*>(dst.c_str()));
  auto srcp = static_cast<const void*>(src.c_str());
  size_t const cSize = ZSTD_compress(dstp, cBuffSize, srcp, src.size(), compressionlevel);
  auto code = ZSTD_isError(cSize);
  if (code) {
    return code;
  }
  dst.resize(cSize);
  return code;
}

int Util::DecompressString(const string& src, string& dst) {
  size_t const cBuffSize = ZSTD_getFrameContentSize(src.c_str(), src.size());

  if (0 == cBuffSize) {
    return cBuffSize;
  }

  if (ZSTD_CONTENTSIZE_UNKNOWN == cBuffSize) {
    return StreamDecompressString(src, dst);
  }

  if (ZSTD_CONTENTSIZE_ERROR == cBuffSize) {
    return -2;
  }

  dst.resize(cBuffSize);
  auto dstp = const_cast<void*>(static_cast<const void*>(dst.c_str()));
  auto srcp = static_cast<const void*>(src.c_str());
  size_t const cSize = ZSTD_decompress(dstp, cBuffSize, srcp, src.size());
  auto code = ZSTD_isError(cSize);
  if (code) {
    return code;
  }
  dst.resize(cSize);
  return code;
}

int Util::StreamCompressString(const string& src, string& dst, int compressionlevel) {
  size_t const buffInSize = ZSTD_CStreamInSize();
  string buffInTmp;
  buffInTmp.reserve(buffInSize);
  auto buffIn = const_cast<void*>(static_cast<const void*>(buffInTmp.c_str()));

  auto buffOutSize = ZSTD_CStreamOutSize();
  string buffOutTmp;
  buffOutTmp.reserve(buffOutSize);
  auto buffOut = const_cast<void*>(static_cast<const void*>(buffOutTmp.c_str()));

  ZSTD_CCtx* const cctx = ZSTD_createCCtx();
  ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionlevel);

  size_t const toRead = buffInSize;
  auto local_pos = 0;
  auto buff_tmp = const_cast<char*>(buffInTmp.c_str());
  for (;;) {
    size_t read = src.copy(buff_tmp, toRead, local_pos);
    local_pos += read;

    int const lastChunk = (read < toRead);
    ZSTD_EndDirective const mode = lastChunk ? ZSTD_e_end : ZSTD_e_continue;

    ZSTD_inBuffer input = {buffIn, read, 0};
    int finished;

    do {
      ZSTD_outBuffer output = {buffOut, buffOutSize, 0};
      size_t const remaining = ZSTD_compressStream2(cctx, &output, &input, mode);
      dst.insert(dst.end(), buffOutTmp.begin(), buffOutTmp.begin() + output.pos);
      finished = lastChunk ? (remaining == 0) : (input.pos == input.size);
    } while (!finished);

    if (lastChunk) {
      break;
    }
  }

  ZSTD_freeCCtx(cctx);

  return 0;
}

int Util::StreamDecompressString(const string& src, string& dst, int compressionlevel) {
  size_t const buffInSize = ZSTD_DStreamInSize();
  string buffInTmp;
  buffInTmp.reserve(buffInSize);
  auto buffIn = const_cast<void*>(static_cast<const void*>(buffInTmp.c_str()));

  auto buffOutSize = ZSTD_DStreamOutSize();
  string buffOutTmp;
  buffOutTmp.reserve(buffOutSize);
  auto buffOut = const_cast<void*>(static_cast<const void*>(buffOutTmp.c_str()));

  ZSTD_DCtx* const dctx = ZSTD_createDCtx();

  size_t const toRead = buffInSize;
  size_t read;
  size_t last_ret = 0;
  size_t local_pos = 0;
  auto buff_tmp = const_cast<char*>(buffInTmp.c_str());

  while ((read = src.copy(buff_tmp, toRead, local_pos))) {
    local_pos += read;
    ZSTD_inBuffer input = {buffIn, read, 0};
    while (input.pos < input.size) {
      ZSTD_outBuffer output = {buffOut, buffOutSize, 0};
      size_t const ret = ZSTD_decompressStream(dctx, &output, &input);
      dst.insert(dst.end(), buffOutTmp.begin(), buffOutTmp.begin() + output.pos);
      last_ret = ret;
    }
  }

  ZSTD_freeDCtx(dctx);

  if(last_ret != 0) {
    return -3;
  }

  return 0;
}

}  // namespace util

Solution 4:[4]

Here's the STL-functor style wrapper:

#ifndef ZSTD_UTILITY_H
#define ZSTD_UTILITY_H

#include <type_traits>
#include <string>

namespace zstd {
#include <zstd.h>
} // namespace zstd

static constexpr int kMinCLevel = 1;
static constexpr int kMaxCLevel = 22;
static constexpr int kDefaultCLevel = kMinCLevel;

template<typename ChType>
struct ZstdCompression final {
    using char_type = typename std::char_traits<ChType>::char_type;
    using ContainerType = std::basic_string<char_type>;

    inline auto operator()(const char_type* input, const size_t inputSize, const int level = kDefaultCLevel) {
        assert(0 < inputSize);
        assert(kMinCLevel <= level);
        assert(kMaxCLevel >= level);

        const size_t required = zstd::ZSTD_compressBound(inputSize);
        ContainerType block(required, 0x0);
        const size_t actual = zstd::ZSTD_compress(block.data(), block.size(), input, inputSize, level);
        assert(!zstd::ZSTD_isError(actual));
        block.erase(block.begin() + actual, block.end());

        return block;
    }

    template<typename Container, typename IteratorValueType = typename std::iterator_traits<typename Container::iterator>::value_type>
    inline auto operator()(const Container& input, const int level = kDefaultCLevel) -> std::enable_if_t<std::is_same_v<char_type, IteratorValueType>, ContainerType> {
        return this->operator()(input.data(), input.size(), level);
    }
};

using ZstdCompressor = ZstdCompression<char>;

template<typename ChType>
struct ZstdDeCompression final {
    using char_type = typename std::char_traits<ChType>::char_type;
    using ContainerType = std::basic_string<char_type>;

    inline auto operator()(const char_type* input, const size_t inputSize) {
        assert(0 < inputSize);

        const size_t size = zstd::ZSTD_getFrameContentSize(input, inputSize);
        assert(ZSTD_CONTENTSIZE_ERROR != size);
        assert(ZSTD_CONTENTSIZE_UNKNOWN != size);

        ContainerType block(size, 0x0);
        const size_t actual = zstd::ZSTD_decompress(block.data(), block.size(), input, inputSize);
        assert(!zstd::ZSTD_isError(actual));
        assert(actual == size);

        return block;
    }

    template<typename Container, typename IteratorValueType = typename std::iterator_traits<typename Container::iterator>::value_type>
    inline auto operator()(const Container& input) -> std::enable_if_t<std::is_same_v<char_type, IteratorValueType>, ContainerType> {
        return this->operator()(input.data(), input.size());
    }
};

using ZstdDeCompressor = ZstdDeCompression<char>;

#endif // ZSTD_UTILITY_H

Solution 5:[5]

habedi's answer is concise and does the job beautifully.

For the sake of completeness, below you'll find the decompress function using boost 1.74.0-14 on Debian Bookworm.

//g++  test.cc -lboost_iostreams -o test
#include <iostream>
#include <string>
#include <sstream>

#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/zstd.hpp>


std::string compress(std::string& data)
    {
        namespace bio = boost::iostreams;

        std::stringstream compressed;
        std::stringstream origin(data);

        bio::filtering_streambuf<bio::input> out;
        out.push(bio::zstd_compressor(bio::zstd_params(bio::zstd::default_compression)));

        out.push(origin);
        bio::copy(out, compressed);

        return compressed.str();
}

std::string decompress(std::string& data)
    {
      namespace bio = boost::iostreams;

      std::stringstream decompressed;
      std::stringstream origin(data);

      bio::filtering_streambuf<bio::input> out;
       out.push(bio::zstd_decompressor(bio::zstd_params(bio::zstd::default_compression)));
      out.push(origin);
      bio::copy(out, decompressed);

      return decompressed.str();
}


int main(int argc, char *argv[])
{
  
  std::string original = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
  std::string compressed_encoded = compress(original);
  std::cout << compressed_encoded << std::endl;
  std::string decompressed_decoded = decompress(compressed_encoded);
  std::cout << decompressed_decoded << std::endl;
  if (original == decompressed_decoded) {
    std::cout << "Successfully compressed/decompressed\n";
  }
  return 0;
}

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Hassan Abedi
Solution 2 πάντα ῥεῖ
Solution 3 fattycoder
Solution 4 IMAN4K
Solution 5 Gregorio Toscano