""" Strict ALLOW-LIST redaction for telemetry data. PRIVACY GUARANTEE: If a field is not explicitly allowed, it is removed. This module ensures NO sensitive data ever reaches the Hub database. """ from typing import Any # ONLY these fields can be stored in metadata ALLOWED_METADATA_FIELDS = frozenset({ "tool_name", "duration_ms", "status", "error_code", "component", "version", }) # Patterns that indicate sensitive data (defense in depth) SENSITIVE_PATTERNS = frozenset({ "password", "secret", "token", "key", "credential", "auth", "cookie", "session", "bearer", "content", "body", "payload", "data", "file", "env", "environment", "config", "setting", "screenshot", "image", "base64", "binary", "private", "cert", "certificate", }) def redact_metadata(metadata: dict[str, Any] | None) -> dict[str, Any]: """ Filter metadata to ONLY allowed fields. Uses allow-list approach: if not explicitly allowed, it's removed. This provides defense against accidentally storing sensitive data. Args: metadata: Raw metadata from telemetry Returns: Filtered metadata with only safe fields """ if metadata is None: return {} redacted: dict[str, Any] = {} for key, value in metadata.items(): # Must be in allow list if key not in ALLOWED_METADATA_FIELDS: continue # Defense in depth: reject if key contains sensitive pattern key_lower = key.lower() if any(pattern in key_lower for pattern in SENSITIVE_PATTERNS): continue # Only primitive types (no nested objects that could hide data) if isinstance(value, (str, int, float, bool)): # String length limit to prevent large data blobs if isinstance(value, str) and len(value) > 100: continue redacted[key] = value return redacted def validate_tool_name(tool_name: str) -> bool: """ Validate tool name format. Tool names must: - Start with a known prefix (sysadmin., browser., gateway.) - Be reasonably short - Not contain suspicious characters Args: tool_name: Tool name to validate Returns: True if valid, False otherwise """ # Must match known prefixes valid_prefixes = ("sysadmin.", "browser.", "gateway.", "llm.") if not tool_name.startswith(valid_prefixes): return False # Length limit if len(tool_name) > 100: return False # No suspicious content suspicious_chars = {";", "'", '"', "\\", "\n", "\r", "\t", "\0"} if any(c in tool_name for c in suspicious_chars): return False return True def sanitize_error_code(error_code: str | None) -> str | None: """ Sanitize an error code to ensure it doesn't contain sensitive data. Args: error_code: Raw error code Returns: Sanitized error code or None if invalid """ if error_code is None: return None # Length limit if len(error_code) > 50: return None # Must be alphanumeric with underscores/dashes allowed = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-") if not all(c in allowed for c in error_code): return None return error_code