235 lines
7.7 KiB
Rust
235 lines
7.7 KiB
Rust
use std::sync::LazyLock;
|
|
|
|
const MAX_FILE_SIZE: usize = 100 * 1024 * 1024;
|
|
|
|
static MAGIC_BYTES: LazyLock<Vec<(&'static [u8], &'static str)>> = LazyLock::new(|| {
|
|
vec![
|
|
(&[0xFF, 0xD8, 0xFF], "image/jpeg"),
|
|
(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A], "image/png"),
|
|
(b"GIF87a", "image/gif"),
|
|
(b"GIF89a", "image/gif"),
|
|
(b"BM", "image/bmp"),
|
|
(b"II*\x00", "image/tiff"),
|
|
(b"MM\x00*", "image/tiff"),
|
|
(b"%PDF-", "application/pdf"),
|
|
(b"PK\x03\x04", "application/zip"),
|
|
(b"PK\x05\x06", "application/zip"),
|
|
(b"PK\x07\x08", "application/zip"),
|
|
(b"Rar!\x1A\x07", "application/vnd.rar"),
|
|
(&[0x1F, 0x8B, 0x08], "application/gzip"),
|
|
(b"BZh", "application/x-bzip2"),
|
|
(&[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00], "application/x-xz"),
|
|
(&[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C], "application/7z"),
|
|
(b"ftyp", "video/mp4"),
|
|
(&[0x1A, 0x45, 0xDF, 0xA3], "video/webm"),
|
|
(&[0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C], "video/asf"),
|
|
(&[0x00, 0x00, 0x00, 0x1C, 0x66, 0x74, 0x79, 0x70], "video/mp4"),
|
|
(&[0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70], "video/mp4"),
|
|
(b"ID3", "audio/mpeg"),
|
|
(&[0xFF, 0xFB], "audio/mpeg"),
|
|
(&[0xFF, 0xFA], "audio/mpeg"),
|
|
(&[0xFF, 0xF3], "audio/mpeg"),
|
|
(&[0xFF, 0xF2], "audio/mpeg"),
|
|
(b"OggS", "audio/ogg"),
|
|
(b"fLaC", "audio/flac"),
|
|
(&[0x00, 0x00, 0x00, 0x14, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6F, 0x6D], "audio/mp4"),
|
|
(&[0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x6D, 0x70, 0x34, 0x32], "audio/mp4"),
|
|
(&[0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x6D, 0x70, 0x34, 0x32], "audio/mp4"),
|
|
(&[0x00, 0x00, 0x00, 0x1C, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6F, 0x6D], "audio/mp4"),
|
|
(b"RIFF", "audio/wav"),
|
|
(&[0xE0, 0x00, 0x00, 0x00], "audio/aiff"),
|
|
]
|
|
});
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct FileValidationConfig {
|
|
pub max_size: usize,
|
|
pub allowed_types: Vec<String>,
|
|
pub block_executables: bool,
|
|
pub check_magic_bytes: bool,
|
|
defang_pdf: bool,
|
|
}
|
|
|
|
impl Default for FileValidationConfig {
|
|
fn default() -> Self {
|
|
Self {
|
|
max_size: MAX_FILE_SIZE,
|
|
allowed_types: vec![
|
|
"image/jpeg".into(),
|
|
"image/png".into(),
|
|
"image/gif".into(),
|
|
"application/pdf".into(),
|
|
"text/plain".into(),
|
|
"application/zip".into(),
|
|
],
|
|
block_executables: true,
|
|
check_magic_bytes: true,
|
|
defang_pdf: true,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct FileValidationResult {
|
|
pub is_valid: bool,
|
|
pub detected_type: Option<String>,
|
|
pub errors: Vec<String>,
|
|
pub warnings: Vec<String>,
|
|
}
|
|
|
|
pub fn validate_file_upload(
|
|
filename: &str,
|
|
content_type: &str,
|
|
data: &[u8],
|
|
config: &FileValidationConfig,
|
|
) -> FileValidationResult {
|
|
let mut result = FileValidationResult {
|
|
is_valid: true,
|
|
detected_type: None,
|
|
errors: Vec::new(),
|
|
warnings: Vec::new(),
|
|
};
|
|
|
|
if data.len() > config.max_size {
|
|
result.is_valid = false;
|
|
result.errors.push(format!(
|
|
"File size {} bytes exceeds maximum allowed size of {} bytes",
|
|
data.len(),
|
|
config.max_size
|
|
));
|
|
}
|
|
|
|
if let Some(extensions) = get_blocked_extensions() {
|
|
if let Some(ext) = filename.split('.').next_back() {
|
|
if extensions.contains(&ext.to_lowercase().as_str()) {
|
|
result.is_valid = false;
|
|
result.errors.push(format!(
|
|
"File extension .{} is blocked for security reasons",
|
|
ext
|
|
));
|
|
}
|
|
}
|
|
}
|
|
|
|
if config.check_magic_bytes {
|
|
if let Some(detected) = detect_file_type(data) {
|
|
result.detected_type = Some(detected.clone());
|
|
|
|
if !config.allowed_types.is_empty() && !config.allowed_types.contains(&detected) {
|
|
result.is_valid = false;
|
|
result.errors.push(format!(
|
|
"Detected file type '{}' is not in the allowed types list",
|
|
detected
|
|
));
|
|
}
|
|
|
|
if content_type != detected && !content_type.starts_with("text/plain") && !content_type.starts_with("application/octet-stream") {
|
|
result.warnings.push(format!(
|
|
"Content-Type header '{}' does not match detected file type '{}'",
|
|
content_type, detected
|
|
));
|
|
}
|
|
}
|
|
}
|
|
|
|
if config.block_executables && is_potentially_executable(data) {
|
|
result.is_valid = false;
|
|
result.errors.push(
|
|
"File appears to be executable or contains executable code, which is blocked".into(),
|
|
);
|
|
}
|
|
|
|
if config.defang_pdf && content_type == "application/pdf"
|
|
&& has_potential_malicious_pdf_content(data) {
|
|
result.warnings.push(
|
|
"PDF file may contain potentially malicious content (JavaScript, forms, or embedded files)".into(),
|
|
);
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
fn detect_file_type(data: &[u8]) -> Option<String> {
|
|
for (magic, mime_type) in MAGIC_BYTES.iter() {
|
|
if data.starts_with(magic) {
|
|
return Some(mime_type.to_string());
|
|
}
|
|
}
|
|
|
|
if data.starts_with(b"<") || data.starts_with(b"<!DOCTYPE") {
|
|
if data.to_ascii_lowercase().windows(5).any(|w| w == b"<html") {
|
|
return Some("text/html".into());
|
|
}
|
|
if data.windows(5).any(|w| w == b"<?xml") {
|
|
return Some("text/xml".into());
|
|
}
|
|
return Some("text/plain".into());
|
|
}
|
|
|
|
if data.iter().all(|&b| b.is_ascii() && !b.is_ascii_control()) {
|
|
return Some("text/plain".into());
|
|
}
|
|
|
|
None
|
|
}
|
|
|
|
fn get_blocked_extensions() -> Option<Vec<&'static str>> {
|
|
Some(vec![
|
|
"exe", "dll", "so", "dylib", "app", "deb", "rpm", "dmg", "pkg", "msi", "scr", "bat",
|
|
"cmd", "com", "pif", "vbs", "vbe", "js", "jse", "ws", "wsf", "wsc", "wsh", "ps1",
|
|
"ps1xml", "ps2", "ps2xml", "psc1", "psc2", "msh", "msh1", "msh2", "mshxml", "msh1xml",
|
|
"msh2xml", "scf", "lnk", "inf", "reg", "docm", "dotm", "xlsm", "xltm", "xlam",
|
|
"pptm", "potm", "ppam", "ppsm", "sldm", "jar", "appx", "appxbundle", "msix",
|
|
"msixbundle", "sh", "csh", "bash", "zsh", "fish",
|
|
])
|
|
}
|
|
|
|
fn is_potentially_executable(data: &[u8]) -> bool {
|
|
if data.len() < 2 {
|
|
return false;
|
|
}
|
|
|
|
let magic = &data[0..2];
|
|
|
|
if matches!(magic, [0x4D, 0x5A]) {
|
|
return true;
|
|
}
|
|
|
|
if data.len() >= 4 {
|
|
let header = &data[0..4];
|
|
if matches!(header, [0x7F, 0x45, 0x4C, 0x46]) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if data.len() >= 8 {
|
|
let header = &data[0..8];
|
|
if matches!(header, [0xFE, 0xED, 0xFA, 0xCF, 0x00, 0x00, 0x00, 0x01])
|
|
|| matches!(header, [0xCF, 0xFA, 0xED, 0xFE, 0x01, 0x00, 0x00, 0x00])
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if data.len() >= 4 {
|
|
let text_content = String::from_utf8_lossy(&data[0..data.len().min(4096)]);
|
|
let lower = text_content.to_lowercase();
|
|
if lower.contains("#!/bin/") || lower.contains("#!/usr/bin/") {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
fn has_potential_malicious_pdf_content(data: &[u8]) -> bool {
|
|
let text_content = String::from_utf8_lossy(data);
|
|
let lower = text_content.to_lowercase();
|
|
|
|
lower.contains("/javascript")
|
|
|| lower.contains("/action")
|
|
|| lower.contains("/launch")
|
|
|| lower.contains("/embeddedfile")
|
|
|| lower.contains("/efilename")
|
|
}
|
|
|