From 508cbaeb7e54709eeff6891e76f84391f459f05d Mon Sep 17 00:00:00 2001 From: pliny <133052465+elder-plinius@users.noreply.github.com> Date: Thu, 2 Apr 2026 14:42:09 -0700 Subject: [PATCH] Add files via upload --- README.md | 232 +++- analysis_tools.py | 95 ++ cli.py | 8 +- crypto.py | 22 +- index.html | 2507 +++++++++++++++++++++++++++++++++++++---- pyproject.toml | 47 +- steg_core.py | 90 +- test_comprehensive.py | 344 ++++++ 8 files changed, 3040 insertions(+), 305 deletions(-) create mode 100644 test_comprehensive.py diff --git a/README.md b/README.md index 8150a31..8dadab1 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,11 @@ ๐ **Hosted site: [ste.gg](https://ste.gg)** -[](LICENSE) +[](https://pypi.org/project/stegg/) +[](https://github.com/elder-plinius/st3gg/blob/main/LICENSE) [](https://python.org) [](http://makeapullrequest.com) -[](examples/) +[](https://github.com/elder-plinius/st3gg/tree/main/examples) ``` __ .--. @@ -62,9 +63,9 @@ It runs **100% in your browser** (static site, no server) or as a **Python CLI/T | Bit Depth | 1 bit fixed | **1-8 bits per channel** (adjustable) | | Encoding Strategies | Sequential | **4 strategies** (sequential, interleaved, spread, randomized) | | Nested Steg | - | **Up to 11 layers deep** (Matryoshka mode) | -| Channel Cipher | - | **Novel cross-channel hopping** (GODMODE) | -| Compression Survival | - | **DCT mode survives JPEG/social media** | -| Smart Decode | - | **16+ config auto-detection** | +| Channel Cipher | - | **Novel cross-channel hopping** (SPECTER) | +| Compression Survival | - | **F5 survives JPEG/social media; DCT designed for compression resistance** | +| Smart Decode | - | **120+ config auto-detection** | | Encryption | Basic/None | **AES-256-GCM + XOR** | | Image Formats | PNG only | **PNG, JPEG, WebP, GIF** | | File Types | Images only | **Images, audio, text, docs, network, archives, code** | @@ -86,7 +87,7 @@ Data exfiltration doesn't always look like data exfiltration. ST3GG lets red tea - **Polyglot file generation** โ files that are simultaneously valid as two formats (PNG+ZIP) - **Network protocol covert channels** โ data hidden in DNS queries, ICMP payloads, TCP sequence numbers, HTTP headers - **Unicode steganography** โ invisible homoglyphs, zero-width chars, variation selectors, confusable whitespace -- **Compression-resistant encoding** โ DCT mode survives JPEG re-compression on social media +- **Compression-resistant encoding** โ F5 mode operates directly on JPEG coefficients (proven to survive social media); DCT mode designed for compression resistance - **Multi-layer nesting** โ up to 11 recursive layers of steganography (Matryoshka mode) - **Ghost Mode** โ AES-256 encryption + bit scrambling + noise decoys for maximum evasion @@ -131,7 +132,7 @@ Analyze seized media for steganographic communication channels. Detect hidden da Explore steganography as a privacy-preserving communication channel. Understand the trade-offs between capacity, stealth, and compression survival. Test which techniques survive social media re-encoding for real-world deniable communication. ### Academics & Students -Study the full landscape of steganographic techniques across every modality. Use the 100+ example files as a teaching dataset. Benchmark new detection algorithms against known encodings. The codebase is well-documented and Apache 2.0 licensed for research. +Study the full landscape of steganographic techniques across every modality. Use the 100+ example files as a teaching dataset. Benchmark new detection algorithms against known encodings. The codebase is well-documented and AGPL-3.0 licensed โ free for individuals, researchers, and open-source projects. ### AI Safety & LLM Security Test how AI systems handle steganographic content โ hidden instructions in images, invisible Unicode in prompts, polyglot files that bypass content filters. Understand the data smuggling surface area that AI systems need to defend against. @@ -139,15 +140,51 @@ Test how AI systems handle steganographic content โ hidden instructions in ima ### Data Loss Prevention (DLP) Vendors Benchmark your DLP solution against ST3GG's 100+ encoding techniques. If your product can't detect data hidden in DNS query names, TCP sequence numbers, or invisible Unicode characters โ your customers deserve to know. ST3GG is your adversarial test suite. +### AI Agent Security & Red Teaming +The next frontier of steganography is **agent-to-agent covert communication** and **prompt injection via hidden payloads**. ST3GG is the toolkit for this emerging attack surface: + +- **Prompt injection via images** โ embed hidden instructions in images that vision-enabled agents process. The agent sees a normal photo; the hidden payload says "ignore all previous instructions." +- **Agent data exfiltration** โ test whether your agent can be tricked into encoding stolen data into images it generates, smuggling it past output filters. +- **Covert agent channels** โ agents passing hidden instructions through innocuous-looking files in shared tool contexts. +- **Agent output watermarking** โ embed provenance or tracking data in images agents generate for attribution. +- **Content filter bypass** โ test moderation systems by hiding prohibited content in image payloads that pass automated review. +- **Multi-modal poisoning** โ craft images that look normal to humans but contain hidden data that alters agent behavior when processed. + +**Use ST3GG as a Python library in your agent pipeline:** + +```python +from steg_core import encode, decode, detect_encoding, StegConfig, get_channel_preset +from analysis_tools import detect_unicode_steg, detect_file_type, TOOL_REGISTRY +from PIL import Image + +# Encode a hidden payload into an image +img = Image.open("carrier.png") +config = StegConfig(channels=get_channel_preset("RGB"), bits_per_channel=1) +stego = encode(img, b"hidden agent instructions", config) +stego.save("stego.png") + +# Detect and decode hidden data +detected = detect_encoding(Image.open("stego.png")) +if detected: + payload = decode(Image.open("stego.png")) + print(f"Found: {payload.decode()}") + +# Scan for ALL steganography types +tools = TOOL_REGISTRY.list_tools() # 48 detection tools +result = detect_unicode_steg(open("message.txt", "rb").read()) +if result['found']: + print(f"Hidden Unicode: {result['invisible_chars']} chars") +``` + --- ## โฐ Megalithic Features โฑ -### GODMODE โ Channel Cipher Steganography +### SPECTER โ Channel Cipher Steganography *A novel approach where data hops between color channels like a cryptographic dance.* -Instead of hiding all data in one channel, GODMODE distributes bits across R, G, and B channels in a pattern that becomes your key: +Instead of hiding all data in one channel, SPECTER distributes bits across R, G, and B channels in a pattern that becomes your key: ``` Pattern: R1-G2-B1-RG2-B1 @@ -176,11 +213,19 @@ Hide images within images within images โ up to **11 layers deep**. The smart ### DCT Mode โ Compression Resistant -Traditional LSB dies to JPEG compression. DCT mode embeds data in frequency-domain coefficients of 8x8 pixel blocks โ the same way JPEG stores image data. **Survives social media re-encoding at quality 70%+.** +Traditional LSB is destroyed by ANY JPEG compression โ even quality 99%. DCT mode embeds in frequency-domain coefficients of 8x8 pixel blocks, designed for compression resistance. For **proven** social media survival, use **F5 mode** which operates directly on JPEG DCT coefficients via matrix encoding. -### AI Agent โ Exhaustive Analysis +> **LSB** โ PNG only (lossless). **DCT** โ compression resistant. **F5** โ survives JPEG/social media. -An autonomous AI agent that analyzes uploaded files using **all known decoding methods** for that file type. Powered by OpenRouter, it intelligently tests every steganographic technique โ LSB extraction, metadata parsing, frequency analysis, unicode detection, and more. +### AI Agent โ Reveal & Conceal + +The AI agent has two modes: + +**๐ Reveal** โ Upload any file. The agent tests every known decoding method automatically, finds hidden data, and extracts it as downloadable artifacts. + +**๐ฎ Conceal** โ Type a secret message, upload (or generate) a carrier image, and the agent hides your data using the optimal encoding method. One click from secret to stego image. + +Powered by OpenRouter. Works with Claude, GPT, Gemini, and other models. --- @@ -212,7 +257,38 @@ Python, JavaScript, C, CSS, Shell, SQL, LaTeX โ all with steganographic commen ## โฐ Quick Start โฑ -### Browser (Recommended) +### Install from PyPI + +```bash +pip install stegg +``` + +That's it. Now you have `stegg` in your terminal: + +```bash +# Encode a secret message +stegg encode image.png "your secret message" -o stego.png + +# Decode hidden data +stegg decode stego.png + +# Analyze a suspicious file +stegg analyze suspicious.png --full + +# SPECTER mode with password +stegg encode image.png "{SPECTER:ENABLED}" -o stego.png +``` + +### Install with extras + +```bash +pip install stegg[tui] # Terminal UI (Textual) +pip install stegg[web] # Web UI (NiceGUI) +pip install stegg[crypto] # AES-256-GCM encryption +pip install stegg[all] # Everything +``` + +### Browser (No Install) ```bash # Just open index.html โ that's it. No server needed. @@ -221,33 +297,20 @@ open index.html Everything runs 100% client-side. No data ever leaves your machine. -### Python Tools +### From Source ```bash -# Clone -git clone https://github.com/elder-plinius/ST3GG.git -cd ST3GG - -# Install -pip install -r requirements.txt - -# Pick your interface -python webui.py # Modern browser UI (NiceGUI) -python cli.py --help # Command line -python tui.py # Terminal UI (Textual) +git clone https://github.com/elder-plinius/st3gg.git +cd st3gg +pip install -e ".[all]" ``` -### Encode from CLI +### Interfaces ```bash -# Basic LSB encode -python cli.py encode image.png "your secret message" -o output.png - -# GODMODE with password -python cli.py encode image.png "{GODMODE:ENABLED}" -o output.png - -# Analyze a suspicious file -python cli.py analyze suspicious.png --full +stegg --help # CLI +stegg-tui # Terminal UI (requires: pip install stegg[tui]) +stegg-web # Browser UI (requires: pip install stegg[web]) ``` --- @@ -286,7 +349,7 @@ A 1920x1080 image with RGB 1-bit holds ~760KB. With RGBA 4-bit: **~4MB**. | Method | Strength | Speed | Use Case | |--------|----------|-------|----------| | **AES-256-GCM** | Maximum | Medium | Ghost Mode | -| **XOR Cipher** | Basic | Fast | Quick obfuscation | +| **XOR Obfuscation** | Minimal | Fast | Basic scrambling only (not encryption) | | **None** | - | Fastest | When secrecy isn't needed | --- @@ -335,31 +398,118 @@ ST3GG/ ## โฐ Security Notes โฑ - Standard LSB steganography is **statistically detectable** โ chi-square and bit-plane analysis can reveal it -- **GODMODE Channel Cipher** increases resistance by hopping across channels unpredictably +- **SPECTER Channel Cipher** increases resistance by hopping across channels unpredictably - **Ghost Mode** adds encryption + scrambling + noise for maximum stealth -- **DCT mode** survives JPEG compression but has lower capacity +- **DCT mode** designed for compression resistance; **F5 mode** proven to survive JPEG recompression +- **LSB** is destroyed by ANY JPEG compression โ use PNG format only - Always **encrypt** sensitive data before embedding - For maximum security: **Ghost Mode + DCT + strong password** --- +## โฐ Roadmap โฑ + +``` +โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ +โ ST3GG EVOLUTION ROADMAP โ +โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฃ +โ โ +โ โ SHIPPED โ +โ โโโโโโโโ โ +โ โ 112 steganographic techniques across all modalities โ +โ โ 15 channel presets ร 8 bit depths = 120 LSB combinations โ +โ โ 8 encoding methods (LSB, DCT, PVD, F5, Chroma, Palette, โ +โ Spread Spectrum, SPECTER channel cipher) โ +โ โ AI Agent with Reveal + Conceal modes โ +โ โ 13 text steganography methods with encode + decode โ +โ โ 50 registered analysis/decode tools โ +โ โ RS Analysis + Sample Pairs Analysis (academic steganalysis) โ +โ โ Raw PNG parser (bypasses canvas premultiplied alpha) โ +โ โ Password-derived headers (stealth mode) โ +โ โ AES-256-GCM with PBKDF2 600k iterations โ +โ โ AI carrier image generation (OpenRouter + procedural) โ +โ โ 109 example files, 568 automated tests โ +โ โ pip install stegg โ +โ โ 100% browser-based at ste.gg โ +โ โ +โ ๐ NEXT UP โ +โ โโโโโโโโโโ โ +โ โ Spread + Randomized strategies in browser โ +โ (defined but only interleaved is implemented) โ +โ โ Password brute-forcer with wordlist support โ +โ (Stegseek does 10M/sec โ we should match it) โ +โ โ Content-adaptive embedding (HUGO/WOW-inspired) โ +โ (embed in texture, skip smooth areas) โ +โ โ Steghide format compatibility โ +โ (read/write steghide's embedding format) โ +โ โ Weighted Stego (WS) analysis โ +โ (more accurate LSB detection than chi-square) โ +โ โ Calibrated RS/SPA for real-world detection accuracy โ +โ โ +โ ๐ฎ FUTURE โ +โ โโโโโโโโโโ โ +โ โ ML-based steganalysis โ +โ (CNN trained on StegoAppDB โ Aletheia-grade detection) โ +โ โ nsF5 / S-UNIWARD embedding โ +โ (academic state-of-the-art, minimal detectability) โ +โ โ Adversarial steganography โ +โ (GAN-based embedding that defeats ML detectors) โ +โ โ Video steganography (frame-by-frame + temporal) โ +โ โ Network protocol live capture + injection โ +โ (real-time covert channel creation, not just PCAPs) โ +โ โ WebAssembly acceleration for browser-side analysis โ +โ โ Plugin system for community-contributed techniques โ +โ โ Mobile-native app (iOS/Android) โ +โ โ VS Code / JetBrains extension for inline text steg โ +โ โ MCP server for Claude Code / AI agent integration โ +โ โ +โ ๐ MOONSHOTS โ +โ โโโโโโโโโโโโ โ +โ โ Quantum-resistant steganographic protocols โ +โ โ Blockchain-anchored provenance watermarking โ +โ โ Cross-modal steganography (hide audio in images, โ +โ images in text, text in network traffic) โ +โ โ Federated steganalysis (distributed detection network) โ +โ โ Self-modifying steganographic payloads โ +โ โ Steganographic filesystem (deniable encryption layer) โ +โ โ +โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ +``` + +> *โฐโข-โขโง Want to help build any of these? PRs welcome. โงโข-โขโฑ* + +--- + ## โฐ Contributing โฑ -PRs are welcome! Whether it's new steganographic techniques, better detection algorithms, or documentation improvements. +PRs are welcome! Whether it's new steganographic techniques, better detection algorithms, or entirely new modalities. ```bash -# Run tests before submitting +# Run the comprehensive test suite (568 tests) +python test_comprehensive.py + +# Run example file tests python test_examples.py -# Regenerate examples after changes +# Regenerate all 109 example files python examples/generate_examples.py ``` +Areas we'd especially love contributions in: +- **ML steganalysis** โ train detection models on stego datasets +- **New encoding methods** โ academic techniques (HUGO, WOW, HILL, UNIWARD) +- **Format support** โ HEIC, AVIF, FLAC, MP4 steganography +- **Steghide compatibility** โ read/write steghide's format natively +- **Performance** โ WebAssembly for browser-side analysis +- **Mobile** โ responsive improvements, native app wrappers + --- ## โฐ License โฑ -Apache License 2.0. See [LICENSE](LICENSE) for details. +**AGPL-3.0** โ free and open source for individuals, researchers, educators, and open-source projects. See [LICENSE](LICENSE) for details. + +**Enterprise / Commercial use?** If you want to use ST3GG in a proprietary product or SaaS without open-sourcing your code, contact us for a commercial license. This tool is intended for **authorized security research**, **CTF competitions**, **digital forensics education**, and **privacy research**. Use responsibly. diff --git a/analysis_tools.py b/analysis_tools.py index a789972..5e84a00 100644 --- a/analysis_tools.py +++ b/analysis_tools.py @@ -2316,6 +2316,7 @@ def tar_decode(data: bytes) -> Dict[str, Any]: results = {'found': False, 'findings': []} try: tf = tarfile.open(fileobj=io.BytesIO(data)) + # Note: we only READ members, never extract to filesystem โ no path traversal risk for member in tf.getmembers(): if hasattr(member, 'pax_headers') and member.pax_headers: for k, v in member.pax_headers.items(): @@ -2674,6 +2675,97 @@ def decode_emoji_skin_tone(data: bytes) -> Dict[str, Any]: return {'error': str(e), 'found': False} +# ============== ADVANCED STEGANALYSIS ============== + +def rs_analysis(data: bytes) -> Dict[str, Any]: + """RS (Regular-Singular) Analysis โ gold standard for LSB detection. + + Divides pixels into pairs and measures how LSB flipping affects smoothness. + Clean images: flipping increases/decreases regularity equally. + Stego images: balance is skewed because LSBs already carry data. + More accurate than chi-square for low embedding rates. + """ + if not HAS_PIL or not HAS_NUMPY: + return {'error': 'PIL/numpy required', 'found': False} + try: + img = Image.open(io.BytesIO(data)).convert('RGB') + pixels = np.array(img, dtype=np.int16) + results = {} + for ch_idx, ch_name in enumerate(['Red', 'Green', 'Blue']): + ch = pixels[:, :, ch_idx].flatten() + n = len(ch) // 2 + p1, p2 = ch[:n*2:2], ch[1:n*2:2] + d_orig = float(np.mean(np.abs(p1 - p2))) + d_flip = float(np.mean(np.abs((p1 ^ 1) - p2))) + rs_ratio = d_flip / d_orig if d_orig > 0 else 1.0 + est_rate = max(0, min(1, (rs_ratio - 1.0) * 2)) + results[ch_name] = { + 'smoothness_original': round(d_orig, 4), + 'smoothness_flipped': round(d_flip, 4), + 'rs_ratio': round(rs_ratio, 4), + 'estimated_embedding_rate': round(est_rate, 4), + 'suspicious': rs_ratio > 1.02 or est_rate > 0.05, + } + rate = max(r['estimated_embedding_rate'] for r in results.values()) + return { + 'found': True, 'channels': results, + 'overall_embedding_rate': round(rate, 4), + 'suspicious': any(r['suspicious'] for r in results.values()), + 'interpretation': f"RS analysis: {rate:.1%} estimated embedding. " + ( + "HIGH probability of LSB steg." if rate > 0.1 + else "MODERATE indicators." if rate > 0.03 + else "LOW โ likely clean."), + 'method': 'rs_analysis' + } + except Exception as e: + return {'error': str(e), 'found': False} + + +def sample_pairs_analysis(data: bytes) -> Dict[str, Any]: + """Sample Pairs Analysis (SPA) โ detects LSB by pixel pair statistics. + + Examines how adjacent pixel pairs relate when LSBs are considered. + Clean images have predictable pair-type ratios. LSB embedding disrupts them. + Complementary to RS analysis โ catches different patterns. + """ + if not HAS_PIL or not HAS_NUMPY: + return {'error': 'PIL/numpy required', 'found': False} + try: + img = Image.open(io.BytesIO(data)).convert('RGB') + pixels = np.array(img, dtype=np.int16) + results = {} + for ch_idx, ch_name in enumerate(['Red', 'Green', 'Blue']): + ch = pixels[:, :, ch_idx].flatten() + n = len(ch) - 1 + p1, p2 = ch[:n], ch[1:n+1] + h1, h2 = p1 >> 1, p2 >> 1 + x = int(np.sum(h1 == h2)) + y = int(np.sum(np.abs(h1 - h2) == 1)) + total = float(n) + x_r, y_r = x/total, y/total + spa = abs(x_r - y_r) / (x_r + y_r) if (x_r + y_r) > 0 else 0 + est = max(0, min(1, 1.0 - spa * 3)) + results[ch_name] = { + 'x_pairs': x, 'y_pairs': y, 'z_pairs': n - x - y, + 'spa_ratio': round(spa, 4), + 'estimated_embedding_rate': round(est, 4), + 'suspicious': spa < 0.1, + } + rate = max(r['estimated_embedding_rate'] for r in results.values()) + return { + 'found': True, 'channels': results, + 'overall_embedding_rate': round(rate, 4), + 'suspicious': any(r['suspicious'] for r in results.values()), + 'interpretation': f"SPA: {rate:.1%} estimated embedding. " + ( + "HIGH probability." if rate > 0.5 + else "MODERATE." if rate > 0.2 + else "LOW."), + 'method': 'sample_pairs_analysis' + } + except Exception as e: + return {'error': str(e), 'found': False} + + # ============== REGISTER ALL TOOLS ============== def _register_all_tools(): @@ -2684,6 +2776,9 @@ def _register_all_tools(): TOOL_REGISTRY.register('detect_confusable_whitespace', detect_confusable_whitespace) TOOL_REGISTRY.register('detect_emoji_steg', detect_emoji_steg) TOOL_REGISTRY.register('detect_capitalization_steg', detect_capitalization_steg) + # Advanced steganalysis + TOOL_REGISTRY.register('rs_analysis', rs_analysis) + TOOL_REGISTRY.register('sample_pairs_analysis', sample_pairs_analysis) TOOL_REGISTRY.register('audio_lsb_decode', audio_lsb_decode) TOOL_REGISTRY.register('pcap_decode', pcap_decode) TOOL_REGISTRY.register('zip_decode', zip_decode) diff --git a/cli.py b/cli.py index 73efc35..c831c48 100644 --- a/cli.py +++ b/cli.py @@ -36,7 +36,13 @@ from steg_core import ( encode, decode, create_config, calculate_capacity, analyze_image, detect_encoding, CHANNEL_PRESETS, EncodingStrategy ) -from crypto import encrypt, decrypt, get_available_methods, crypto_status +try: + from crypto import encrypt, decrypt, get_available_methods, crypto_status +except Exception: + # Gracefully handle broken cryptography library (e.g., broken system install) + encrypt = decrypt = None + def get_available_methods(): return ["none", "xor"] + def crypto_status(): return "โ crypto module unavailable (install cryptography package)" from injector import ( generate_injection_filename, get_template_names, get_jailbreak_template, get_jailbreak_names, diff --git a/crypto.py b/crypto.py index cc3c880..d412133 100644 --- a/crypto.py +++ b/crypto.py @@ -10,12 +10,22 @@ from typing import Tuple, Optional from dataclasses import dataclass # Try to import cryptography library, fall back to basic XOR if not available +HAS_CRYPTO = False try: - from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes - from cryptography.hazmat.primitives import padding - from cryptography.hazmat.backends import default_backend - HAS_CRYPTO = True -except ImportError: + # Pre-check: verify cryptography's native bindings work. + # Some systems have a broken cryptography install where the Rust + # bindings crash with a pyo3 panic that Python can't catch. + import subprocess as _sp + _probe = _sp.run( + ['python3', '-c', 'from cryptography.exceptions import InvalidSignature'], + capture_output=True, timeout=5 + ) + if _probe.returncode == 0: + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + from cryptography.hazmat.primitives import padding + from cryptography.hazmat.backends import default_backend + HAS_CRYPTO = True +except Exception: HAS_CRYPTO = False @@ -44,7 +54,7 @@ def derive_key(password: str, salt: bytes, key_length: int = 32) -> bytes: 'sha256', password.encode('utf-8'), salt, - iterations=100000, + iterations=600000, dklen=key_length ) diff --git a/index.html b/index.html index 5354aa9..a8f64fe 100644 --- a/index.html +++ b/index.html @@ -2,8 +2,8 @@
- -