'Check if file is compressed browser side

There is any way for check if file is compressed reading first bytes of a file?

Eg, reading first 4 bytes this method return true if a file is compressed with zip

function isZip(buffer) {
    const bytes = new Uint8Array(buffer, 0, 4);
    return (bytes[0] === 0x50 && bytes[1] === 0x4b && (bytes[2] === 0x03 || bytes[2] === 0x05 || bytes[2] === 0x07) && (bytes[3] === 0x04 || bytes[3] === 0x06 || bytes[3] === 0x08));
}
    
function read() {
    const input = document.getElementById("file");
    const file = input.files[0];
    const reader = new FileReader();
    reader.onload = function () {
        console.log(file.name, isZip(this.result) );
    };
    reader.readAsArrayBuffer(file);
}
<input type="file" id="file" onchange="read()">

but, how detect every (or most famous) compression algorithms (deflate, gzip, 7zip)?



Solution 1:[1]

/**
 * Return true if file has a compression bytes signature
 * Based on: https://en.wikipedia.org/wiki/List_of_file_signatures
 */
export async function isCompressedSignature(file: File): Promise<boolean> {

  if (!file || file.size < 8) {
    return false;
  }

  const buffer = await file.slice(0, 8).arrayBuffer();
  const bytes = new Uint8Array(buffer);

  const checkSequence = (sequence: number[]): boolean => {
    if (sequence.length > bytes.length) {
      throw new Error(`bytes.size ${bytes.length} sequence.length ${sequence.length}`)
    }
    for (let index = 0; index < sequence.length; index++) {
      const byte = bytes[index];
      if (byte !== sequence[index]) {
        return false;
      }
    }
    return true;
  }

  // compressed file (often tar zip) using Lempel-Ziv-Welch algorithm 
  if (checkSequence([0x1F, 0x9D])) {
    return true;
  }
  // Compressed file (often tar zip) using LZH algorithm 
  if (checkSequence([0x1F, 0xA0])) {
    return true;
  }
  // Compressed file using Bzip2 algorithm 
  if (checkSequence([0x42, 0x5A, 0x68])) {
    return true;
  }
  // Compressed file using Rob Northen Compression algorithm version 1
  if (checkSequence([0x52, 0x4E, 0x43, 0x01])) {
    return true;
  }
  // Compressed file using Rob Northen Compression algorithm version 2
  if (checkSequence([0x52, 0x4E, 0x43, 0x02])) {
    return true;
  }
  // lzip compressed file 
  if (checkSequence([0x4C, 0x5A, 0x49, 0x50])) {
    return true;
  }
  // Roshal ARchive compressed archive v1.50 onwards[13]
  if (checkSequence([0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00])) {
    return true;
  }
  // Roshal ARchive compressed archive v5.00 onwards
  if (checkSequence([0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00])) {
    return true;
  }
  // 7-Zip File Format 
  if (checkSequence([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C])) {
    return true;
  }
  // GZIP compressed file
  if (checkSequence([0x1F, 0x8B])) {
    return true;
  }
  // XZ compression utility using LZMA2 compression 
  if (checkSequence([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00])) {
    return true;
  }
  // Microsoft compressed file in Quantum format
  if (checkSequence([0x53, 0x5A, 0x44, 0x44, 0x88, 0xF0, 0x27, 0x33])) {
    return true;
  }
  // Lepton compressed JPEG image
  if (checkSequence([0xCF, 0x84, 0x01])) {
    return true;
  }
  // Zstandard compressed file
  if (checkSequence([0x28, 0xB5, 0x2F, 0xFD])) {
    return true;
  }
  // QuickZip rs compressed archive
  if (checkSequence([0x52, 0x53, 0x56, 0x4B, 0x44, 0x41, 0x54, 0x41])) {
    return true;
  }
  // ACE (compressed file format)
  if (checkSequence([0x2A, 0x2A, 0x41, 0x43, 0x45, 0x2A, 0x2A])) {
    return true;
  }
  // Windows 3.1x Compressed File 
  if (checkSequence([0x4B, 0x57, 0x41, 0x4A])) {
    return true;
  }
  // Windows 9x Compressed File 
  if (checkSequence([0x53, 0x5A, 0x44, 0x44])) {
    return true;
  }
  // Compressed ISO image
  if (checkSequence([0x49, 0x73, 0x5A, 0x21])) {
    return true;
  }
  // 3D model compressed with Google Draco
  if (checkSequence([0x44, 0x52, 0x41, 0x43, 0x4F])) {
    return true;
  }
  // Slob (sorted list of blobs) is a read-only, compressed data store
  if (checkSequence([0x21, 0x2D, 0x31, 0x53, 0x4C, 0x4F, 0x42, 0x1F])) {
    return true;
  }
  // zip file format and formats based on it, such as EPUB, JAR, ODF, OOXML
  if (checkSequence([0x50, 0x4B, 0x03, 0x04])) {
    return true;
  }
  // zip file format empty archive
  if (checkSequence([0x50, 0x4B, 0x05, 0x06])) {
    return true;
  }
  // zip file format spanned archive
  if (checkSequence([0x50, 0x4B, 0x07, 0x08])) {
    return true;
  }
  return false;
}

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 DarkBee