'Check if file is compressed browser side
There is any way for check if file is compressed reading first bytes of a file?
Eg, reading first 4 bytes this method return true if a file is compressed with zip
function isZip(buffer) {
const bytes = new Uint8Array(buffer, 0, 4);
return (bytes[0] === 0x50 && bytes[1] === 0x4b && (bytes[2] === 0x03 || bytes[2] === 0x05 || bytes[2] === 0x07) && (bytes[3] === 0x04 || bytes[3] === 0x06 || bytes[3] === 0x08));
}
function read() {
const input = document.getElementById("file");
const file = input.files[0];
const reader = new FileReader();
reader.onload = function () {
console.log(file.name, isZip(this.result) );
};
reader.readAsArrayBuffer(file);
}
<input type="file" id="file" onchange="read()">
but, how detect every (or most famous) compression algorithms (deflate, gzip, 7zip)?
Solution 1:[1]
/**
* Return true if file has a compression bytes signature
* Based on: https://en.wikipedia.org/wiki/List_of_file_signatures
*/
export async function isCompressedSignature(file: File): Promise<boolean> {
if (!file || file.size < 8) {
return false;
}
const buffer = await file.slice(0, 8).arrayBuffer();
const bytes = new Uint8Array(buffer);
const checkSequence = (sequence: number[]): boolean => {
if (sequence.length > bytes.length) {
throw new Error(`bytes.size ${bytes.length} sequence.length ${sequence.length}`)
}
for (let index = 0; index < sequence.length; index++) {
const byte = bytes[index];
if (byte !== sequence[index]) {
return false;
}
}
return true;
}
// compressed file (often tar zip) using Lempel-Ziv-Welch algorithm
if (checkSequence([0x1F, 0x9D])) {
return true;
}
// Compressed file (often tar zip) using LZH algorithm
if (checkSequence([0x1F, 0xA0])) {
return true;
}
// Compressed file using Bzip2 algorithm
if (checkSequence([0x42, 0x5A, 0x68])) {
return true;
}
// Compressed file using Rob Northen Compression algorithm version 1
if (checkSequence([0x52, 0x4E, 0x43, 0x01])) {
return true;
}
// Compressed file using Rob Northen Compression algorithm version 2
if (checkSequence([0x52, 0x4E, 0x43, 0x02])) {
return true;
}
// lzip compressed file
if (checkSequence([0x4C, 0x5A, 0x49, 0x50])) {
return true;
}
// Roshal ARchive compressed archive v1.50 onwards[13]
if (checkSequence([0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00])) {
return true;
}
// Roshal ARchive compressed archive v5.00 onwards
if (checkSequence([0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00])) {
return true;
}
// 7-Zip File Format
if (checkSequence([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C])) {
return true;
}
// GZIP compressed file
if (checkSequence([0x1F, 0x8B])) {
return true;
}
// XZ compression utility using LZMA2 compression
if (checkSequence([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00])) {
return true;
}
// Microsoft compressed file in Quantum format
if (checkSequence([0x53, 0x5A, 0x44, 0x44, 0x88, 0xF0, 0x27, 0x33])) {
return true;
}
// Lepton compressed JPEG image
if (checkSequence([0xCF, 0x84, 0x01])) {
return true;
}
// Zstandard compressed file
if (checkSequence([0x28, 0xB5, 0x2F, 0xFD])) {
return true;
}
// QuickZip rs compressed archive
if (checkSequence([0x52, 0x53, 0x56, 0x4B, 0x44, 0x41, 0x54, 0x41])) {
return true;
}
// ACE (compressed file format)
if (checkSequence([0x2A, 0x2A, 0x41, 0x43, 0x45, 0x2A, 0x2A])) {
return true;
}
// Windows 3.1x Compressed File
if (checkSequence([0x4B, 0x57, 0x41, 0x4A])) {
return true;
}
// Windows 9x Compressed File
if (checkSequence([0x53, 0x5A, 0x44, 0x44])) {
return true;
}
// Compressed ISO image
if (checkSequence([0x49, 0x73, 0x5A, 0x21])) {
return true;
}
// 3D model compressed with Google Draco
if (checkSequence([0x44, 0x52, 0x41, 0x43, 0x4F])) {
return true;
}
// Slob (sorted list of blobs) is a read-only, compressed data store
if (checkSequence([0x21, 0x2D, 0x31, 0x53, 0x4C, 0x4F, 0x42, 0x1F])) {
return true;
}
// zip file format and formats based on it, such as EPUB, JAR, ODF, OOXML
if (checkSequence([0x50, 0x4B, 0x03, 0x04])) {
return true;
}
// zip file format empty archive
if (checkSequence([0x50, 0x4B, 0x05, 0x06])) {
return true;
}
// zip file format spanned archive
if (checkSequence([0x50, 0x4B, 0x07, 0x08])) {
return true;
}
return false;
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | DarkBee |
