- Input Sanitization Expert
- Expert in input validation, sanitization, and encoding for secure applications.
- Core Principles
- Validation vs Sanitization vs Encoding
- Validation
-
- Reject invalid input entirely (preferred)
- Sanitization
-
- Clean/modify input to make it safe
- Encoding
- Transform input for safe use in specific contexts
Apply in order: Validate first, sanitize if needed, encode for output context
Defense in Depth
Never rely on client-side validation alone
Implement validation at multiple layers (input, business logic, data access)
Use allowlists over denylists when possible
Fail securely - reject invalid input rather than attempting to fix it
Input Validation Strategies
Strict Validation Patterns (Python)
import
re
from
typing
import
Optional
class
InputValidator
:
"""Strict input validation using allowlist patterns."""
PATTERNS
=
{
'email'
:
r'^[a-zA-Z0-9.%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}$'
,
'username'
:
r'^[a-zA-Z0-9]{3,20}$'
,
'phone'
:
r'^+?1?[0-9]{10,14}$'
,
'alphanumeric'
:
r'^[a-zA-Z0-9]+$'
,
'safe_filename'
:
r'^[a-zA-Z0-9._-]+$'
,
'uuid'
:
r'^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
,
'slug'
:
r'^[a-z0-9]+(?:-[a-z0-9]+)*$'
}
@staticmethod
def
validate_input
(
value
:
str
,
pattern_type
:
str
,
max_length
:
int
=
255
)
-
Optional [ str ] : """Validate input against allowlist pattern.""" if not value or len ( value )
max_length : return None pattern = InputValidator . PATTERNS . get ( pattern_type ) if pattern and re . match ( pattern , value ) : return value . strip ( ) return None @staticmethod def validate_integer ( value : str , min_val : int = None , max_val : int = None ) -
Optional [ int ] : """Validate and convert string to integer with bounds checking.""" try : num = int ( value ) if min_val is not None and num < min_val : return None if max_val is not None and num
max_val : return None return num except ( ValueError , TypeError ) : return None @staticmethod def validate_enum ( value : str , allowed_values : set ) -
Optional [ str ] : """Validate value against allowed set.""" if value in allowed_values : return value return None JavaScript/TypeScript Validation class InputValidator { private static readonly PATTERNS : Record < string , RegExp
= { email : / ^ [ a - z A - Z 0 - 9 ._%+- ] + @ [ a - z A - Z 0 - 9 .- ] + . [ a - z A - Z ] {2,} $ / , username : / ^ [ a - z A - Z 0 - 9 _ ] {3,20} $ / , phone : / ^ + ? 1 ? [ 0 - 9 ] {10,14} $ / , alphanumeric : / ^ [ a - z A - Z 0 - 9 ] + $ / , uuid : / ^ [ 0 - 9 a - f A - F ] {8} - [ 0 - 9 a - f A - F ] {4} - [ 0 - 9 a - f A - F ] {4} - [ 0 - 9 a - f A - F ] {4} - [ 0 - 9 a - f A - F ] {12} $ / } ; static validate ( value : string , type : string , maxLength = 255 ) : string | null { if ( ! value || value . length
maxLength ) { return null ; } const pattern = this . PATTERNS [ type ] ; if ( pattern && pattern . test ( value ) ) { return value . trim ( ) ; } return null ; } static validateInteger ( value : string , min ? : number , max ? : number ) : number | null { const num = parseInt ( value , 10 ) ; if ( isNaN ( num ) ) return null ; if ( min !== undefined && num < min ) return null ; if ( max !== undefined && num
max ) return null ; return num ; } static sanitizeHtml ( input : string ) : string { const map : Record < string , string
= { '&' : '&' , '<' : '<' , '>' : '>' , '"' : '"' , "'" : ''' , '/' : '/' } ; return input . replace ( / [ &<>"'/ ] / g , char => map [ char ] ) ; } } Context-Specific Encoding HTML Output Encoding import html from markupsafe import escape def safe_html_output ( user_input : str ) -
str : """Encode for HTML context.""" return html . escape ( user_input , quote = True ) def safe_html_attribute ( user_input : str ) -
str : """Encode for HTML attribute context - more restrictive.""" encoded = html . escape ( user_input , quote = True )
Additional encoding for attribute-specific risks
encoded
encoded . replace ( "'" , "'" ) . replace ( "`" , "`" ) return encoded def safe_html_url ( user_input : str ) -
str : """Encode URL for use in href/src attributes.""" from urllib . parse import quote
Only allow safe URL schemes
if not user_input . lower ( ) . startswith ( ( 'http://' , 'https://' , '/' ) ) : return '#' return quote ( user_input , safe = ':/?&=#' ) JavaScript Context Encoding class JSEncoder { static encodeForJS ( input ) { if ( typeof input !== 'string' ) { input = String ( input ) ; } return input . replace ( / \ / g , '\\' ) . replace ( / ' / g , "\'" ) . replace ( / " / g , '\"' ) . replace ( / \n / g , '\n' ) . replace ( / \r / g , '\r' ) . replace ( / \t / g , '\t' ) . replace ( / < / g , '\u003c' ) . replace ( /
/ g , '\u003e' ) ; } static safeJSONStringify ( obj ) { return JSON . stringify ( obj ) . replace ( / < / g , '\u003c' ) . replace ( /
/ g , '\u003e' ) . replace ( / & / g , '\u0026' ) ; } static encodeForHTMLAttribute ( input ) { return input . replace ( / & / g , '&' ) . replace ( / " / g , '"' ) . replace ( / ' / g , ''' ) . replace ( / < / g , '<' ) . replace ( /
/ g , '>' ) ; } } SQL Context - Parameterized Queries import sqlite3 from typing import List , Any , Optional class SafeDatabaseAccess : """Always use parameterized queries - never string concatenation.""" def init ( self , db_path : str ) : self . db_path = db_path def safe_query ( self , query : str , params : tuple = ( ) ) -
List [ Any ] : """Execute query with parameters - prevents SQL injection.""" with sqlite3 . connect ( self . db_path ) as conn : cursor = conn . cursor ( )
Parameters are automatically escaped
cursor . execute ( query , params ) return cursor . fetchall ( ) def get_user_by_email ( self , email : str ) -
Optional [ dict ] : """Example: safe parameterized query."""
Validate email first
if not InputValidator . validate_input ( email , 'email' ) : return None
Use ? placeholder - NEVER f-string or .format()
query
"SELECT id, username, email FROM users WHERE email = ?" results = self . safe_query ( query , ( email , ) ) if results : return dict ( zip ( [ 'id' , 'username' , 'email' ] , results [ 0 ] ) ) return None
WRONG - SQL Injection vulnerable:
query = f"SELECT * FROM users WHERE email = '{email}'"
query = "SELECT * FROM users WHERE email = '%s'" % email
File Upload Sanitization import os import hashlib import magic from pathlib import Path from typing import Optional class FileUploadSanitizer : """Secure file upload handling.""" ALLOWED_EXTENSIONS = { '.jpg' , '.jpeg' , '.png' , '.gif' , '.pdf' , '.txt' , '.docx' } ALLOWED_MIME_TYPES = { 'image/jpeg' , 'image/png' , 'image/gif' , 'application/pdf' , 'text/plain' , 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' } MAX_FILE_SIZE = 10 * 1024 * 1024
10MB
@staticmethod def sanitize_filename ( filename : str ) -
str : """Generate safe filename."""
Remove path components (directory traversal prevention)
filename
os . path . basename ( filename )
Remove dangerous characters - allowlist approach
safe_chars
"-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" filename = '' . join ( c for c in filename if c in safe_chars )
Limit length
if len ( filename )
100 : name , ext = os . path . splitext ( filename ) filename = name [ : 95 ] + ext return filename or "unnamed_file" @classmethod def validate_upload ( cls , file_data : bytes , filename : str , content_type : str ) -
dict : """Comprehensive file upload validation.""" result = { 'valid' : False , 'errors' : [ ] , 'safe_filename' : None }
Check file size
if len ( file_data )
cls . MAX_FILE_SIZE : result [ 'errors' ] . append ( f"File too large: { len ( file_data ) } bytes (max: { cls . MAX_FILE_SIZE } )" )
Check extension
ext
Path ( filename ) . suffix . lower ( ) if ext not in cls . ALLOWED_EXTENSIONS : result [ 'errors' ] . append ( f"Extension not allowed: { ext } " )
Check declared MIME type
if content_type not in cls . ALLOWED_MIME_TYPES : result [ 'errors' ] . append ( f"MIME type not allowed: { content_type } " )
Verify actual content type matches (magic bytes)
try : detected_mime = magic . from_buffer ( file_data , mime = True ) if detected_mime != content_type : result [ 'errors' ] . append ( f"MIME type mismatch: declared= { content_type } , detected= { detected_mime } " ) except Exception : result [ 'errors' ] . append ( "Could not verify file content type" ) result [ 'valid' ] = len ( result [ 'errors' ] ) == 0 result [ 'safe_filename' ] = cls . sanitize_filename ( filename ) return result @staticmethod def generate_safe_path ( base_dir : str , filename : str ) -
str : """Generate unique, safe file path.""" safe_name = FileUploadSanitizer . sanitize_filename ( filename )
Add hash to prevent overwriting
hash_prefix
hashlib . md5 ( os . urandom ( 16 ) ) . hexdigest ( ) [ : 8 ] name , ext = os . path . splitext ( safe_name ) return os . path . join ( base_dir , f" { name } _ { hash_prefix } { ext } " ) URL and Path Sanitization from urllib . parse import urlparse , quote import os . path class URLSanitizer : @staticmethod def validate_redirect_url ( url : str , allowed_hosts : set ) -
Optional [ str ] : """Validate redirect URLs to prevent open redirects.""" try : parsed = urlparse ( url )
Only allow specific schemes
if parsed . scheme not in ( 'http' , 'https' , '' ) : return None
For relative URLs
if not parsed . netloc :
Prevent protocol-relative URLs (//evil.com)
if url . startswith ( '//' ) : return None return url
Check against allowlist of hosts
if parsed . netloc not in allowed_hosts : return None return url except Exception : return None @staticmethod def sanitize_path_parameter ( path : str , base_dir : str ) -
Optional [ str ] : """Prevent directory traversal attacks."""
Normalize the path
normalized
os . path . normpath ( path )
Check for directory traversal attempts
if '..' in normalized or normalized . startswith ( '/' ) or normalized . startswith ( '\' ) : return None
Ensure path stays within base directory
full_path
os . path . abspath ( os . path . join ( base_dir , normalized ) ) base_abs = os . path . abspath ( base_dir ) if not full_path . startswith ( base_abs + os . sep ) : return None return normalized Content Security Policy Headers from flask import Flask , Response def apply_security_headers ( response : Response ) -
Response : """Apply comprehensive security headers.""" response . headers . update ( {
Content Security Policy - prevent XSS
'Content-Security-Policy' : '; ' . join ( [ "default-src 'self'" , "script-src 'self' https://trusted-cdn.com" , "style-src 'self' 'unsafe-inline' https://fonts.googleapis.com" , "img-src 'self' data: https:" , "font-src 'self' https://fonts.gstatic.com" , "connect-src 'self' https://api.example.com" , "frame-ancestors 'none'" , "base-uri 'self'" , "form-action 'self'" ] ) ,
Prevent MIME type sniffing
'X-Content-Type-Options' : 'nosniff' ,
Clickjacking protection
'X-Frame-Options' : 'DENY' ,
XSS filter (legacy browsers)
'X-XSS-Protection' : '1; mode=block' ,
Force HTTPS
'Strict-Transport-Security' : 'max-age=31536000; includeSubDomains; preload' ,
Referrer policy
'Referrer-Policy' : 'strict-origin-when-cross-origin' ,
Permissions policy
'Permissions-Policy' : 'geolocation=(), microphone=(), camera=()' } ) return response
Express.js equivalent
''' const helmet = require('helmet'); app.use(helmet({ contentSecurityPolicy: { directives: { defaultSrc: ["'self'"], scriptSrc: ["'self'", "https://trusted-cdn.com"], styleSrc: ["'self'", "'unsafe-inline'"], imgSrc: ["'self'", "data:", "https:"], frameAncestors: ["'none'"] } }, hsts: { maxAge: 31536000, includeSubDomains: true, preload: true } })); ''' XSS Prevention Checklist Output Encoding : - HTML body : Use HTML entity encoding - HTML attributes : Use attribute encoding , always quote values - JavaScript : Use JavaScript encoding , prefer JSON.stringify - CSS : Use CSS encoding - URLs : Use URL encoding Input Validation : - Validate type , length , format , range - Use allowlists over denylists - Reject invalid input , don't sanitize Security Headers : - Implement Content Security Policy - Set X-Content-Type-Options : nosniff - Set X-Frame-Options : DENY - Enable HSTS Framework Protections : - Use auto - escaping template engines - Enable CSRF protection - Use HttpOnly and Secure cookie flags - Implement SameSite cookie attribute Лучшие практики Validate at server — никогда не доверяйте client-side валидации Allowlist approach — определяйте допустимое, не запрещённое Context-aware encoding — разные контексты требуют разного encoding Parameterized queries — никогда не конкатенируйте SQL Defense in depth — валидация на каждом уровне Fail securely — отклоняйте invalid input, не пытайтесь исправить