mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-04-23 19:16:06 +02:00
668ce16dc7
Gate messages now propagate via the Infonet hashchain as encrypted blobs — every node syncs them through normal chain sync while only Gate members with MLS keys can decrypt. Added mesh reputation system, peer push workers, voluntary Wormhole opt-in for node participation, fork recovery, killwormhole scripts, obfuscated terminology, and hardened the self-updater to protect encryption keys and chain state during updates. New features: Shodan search, train tracking, Sentinel Hub imagery, 8 new intelligence layers, CCTV expansion to 11,000+ cameras across 6 countries, Mesh Terminal CLI, prediction markets, desktop-shell scaffold, and comprehensive mesh test suite (215 frontend + backend tests passing). Community contributors: @wa1id, @AlborzNazari, @adust09, @Xpirix, @imqdcr, @csysp, @suranyami, @chr0n1x, @johan-martensson, @singularfailure, @smithbh, @OrfeoTerkuci, @deuza, @tm-const, @Elhard1, @ttulttul
234 lines
7.3 KiB
Python
234 lines
7.3 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import logging
|
|
from cachetools import cached, TTLCache
|
|
import cloudscraper
|
|
import reverse_geocoder as rg
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Cache the top feeds for 5 minutes so we don't hammer Broadcastify
|
|
radio_cache = TTLCache(maxsize=1, ttl=300)
|
|
|
|
|
|
@cached(radio_cache)
|
|
def get_top_broadcastify_feeds():
|
|
"""
|
|
Scrapes the Broadcastify Top 50 live audio feeds public dashboard.
|
|
Returns a list of dictionaries containing feed metadata and direct stream URLs.
|
|
"""
|
|
logger.info("Scraping Broadcastify Top Feeds (Cache Miss)")
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
}
|
|
|
|
try:
|
|
res = requests.get("https://www.broadcastify.com/listen/top", headers=headers, timeout=10)
|
|
if res.status_code != 200:
|
|
logger.error(f"Broadcastify Scrape Failed: HTTP {res.status_code}")
|
|
return []
|
|
|
|
soup = BeautifulSoup(res.text, "html.parser")
|
|
|
|
table = soup.find("table", {"class": "btable"})
|
|
if not table:
|
|
logger.error("Could not find feeds table on Broadcastify.")
|
|
return []
|
|
|
|
feeds = []
|
|
rows = table.find_all("tr")[1:] # Skip header row
|
|
|
|
for row in rows:
|
|
cols = row.find_all("td")
|
|
if len(cols) >= 5:
|
|
# Top layout: [Listeners, Feed ID (hidden), Location, Feed Name, Category, Genre]
|
|
listeners_str = cols[0].text.strip().replace(",", "")
|
|
listeners = int(listeners_str) if listeners_str.isdigit() else 0
|
|
|
|
link_tag = cols[2].find("a")
|
|
if not link_tag:
|
|
continue
|
|
|
|
href = link_tag.get("href", "")
|
|
feed_id = href.split("/")[-1] if "/listen/feed/" in href else None
|
|
|
|
if not feed_id:
|
|
continue
|
|
|
|
location = cols[1].text.strip()
|
|
name = cols[2].text.strip()
|
|
category = cols[3].text.strip()
|
|
|
|
feeds.append(
|
|
{
|
|
"id": feed_id,
|
|
"listeners": listeners,
|
|
"location": location,
|
|
"name": name,
|
|
"category": category,
|
|
"stream_url": f"https://broadcastify.cdnstream1.com/{feed_id}",
|
|
}
|
|
)
|
|
|
|
logger.info(f"Successfully scraped {len(feeds)} top feeds from Broadcastify.")
|
|
return feeds
|
|
|
|
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
|
logger.error(f"Broadcastify Scrape Exception: {e}")
|
|
return []
|
|
|
|
|
|
# Cache OpenMHZ systems mapping so we don't have to fetch all 450+ every time
|
|
openmhz_systems_cache = TTLCache(maxsize=1, ttl=3600)
|
|
|
|
|
|
@cached(openmhz_systems_cache)
|
|
def get_openmhz_systems():
|
|
"""Fetches the full directory of OpenMHZ systems."""
|
|
logger.info("Scraping OpenMHZ Systems (Cache Miss)")
|
|
scraper = cloudscraper.create_scraper(
|
|
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
|
)
|
|
|
|
try:
|
|
res = scraper.get("https://api.openmhz.com/systems", timeout=15)
|
|
if res.status_code == 200:
|
|
data = res.json()
|
|
# Return list of systems
|
|
return data.get("systems", []) if isinstance(data, dict) else []
|
|
return []
|
|
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
|
logger.error(f"OpenMHZ Systems Scrape Exception: {e}")
|
|
return []
|
|
|
|
|
|
# Cache specific city calls briefly (15-30s) to limit our polling rate
|
|
openmhz_calls_cache = TTLCache(maxsize=100, ttl=20)
|
|
|
|
|
|
@cached(openmhz_calls_cache)
|
|
def get_recent_openmhz_calls(sys_name: str):
|
|
"""Fetches the actual audio burst .m4a URLs for a specific system (e.g., 'wmata')."""
|
|
logger.info(f"Fetching OpenMHZ calls for {sys_name} (Cache Miss)")
|
|
scraper = cloudscraper.create_scraper(
|
|
browser={"browser": "chrome", "platform": "windows", "desktop": True}
|
|
)
|
|
|
|
try:
|
|
url = f"https://api.openmhz.com/{sys_name}/calls"
|
|
res = scraper.get(url, timeout=15)
|
|
if res.status_code == 200:
|
|
data = res.json()
|
|
return data.get("calls", []) if isinstance(data, dict) else []
|
|
return []
|
|
except (requests.RequestException, ConnectionError, TimeoutError, ValueError, KeyError) as e:
|
|
logger.error(f"OpenMHZ Calls Scrape Exception ({sys_name}): {e}")
|
|
return []
|
|
|
|
|
|
US_STATES = {
|
|
"Alabama": "AL",
|
|
"Alaska": "AK",
|
|
"Arizona": "AZ",
|
|
"Arkansas": "AR",
|
|
"California": "CA",
|
|
"Colorado": "CO",
|
|
"Connecticut": "CT",
|
|
"Delaware": "DE",
|
|
"Florida": "FL",
|
|
"Georgia": "GA",
|
|
"Hawaii": "HI",
|
|
"Idaho": "ID",
|
|
"Illinois": "IL",
|
|
"Indiana": "IN",
|
|
"Iowa": "IA",
|
|
"Kansas": "KS",
|
|
"Kentucky": "KY",
|
|
"Louisiana": "LA",
|
|
"Maine": "ME",
|
|
"Maryland": "MD",
|
|
"Massachusetts": "MA",
|
|
"Michigan": "MI",
|
|
"Minnesota": "MN",
|
|
"Mississippi": "MS",
|
|
"Missouri": "MO",
|
|
"Montana": "MT",
|
|
"Nebraska": "NE",
|
|
"Nevada": "NV",
|
|
"New Hampshire": "NH",
|
|
"New Jersey": "NJ",
|
|
"New Mexico": "NM",
|
|
"New York": "NY",
|
|
"North Carolina": "NC",
|
|
"North Dakota": "ND",
|
|
"Ohio": "OH",
|
|
"Oklahoma": "OK",
|
|
"Oregon": "OR",
|
|
"Pennsylvania": "PA",
|
|
"Rhode Island": "RI",
|
|
"South Carolina": "SC",
|
|
"South Dakota": "SD",
|
|
"Tennessee": "TN",
|
|
"Texas": "TX",
|
|
"Utah": "UT",
|
|
"Vermont": "VT",
|
|
"Virginia": "VA",
|
|
"Washington": "WA",
|
|
"West Virginia": "WV",
|
|
"Wisconsin": "WI",
|
|
"Wyoming": "WY",
|
|
"Washington, D.C.": "DC",
|
|
"District of Columbia": "DC",
|
|
}
|
|
|
|
import math
|
|
|
|
|
|
def haversine_distance(lat1, lon1, lat2, lon2):
|
|
R = 3958.8 # Earth radius in miles
|
|
dLat = math.radians(lat2 - lat1)
|
|
dLon = math.radians(lon2 - lon1)
|
|
a = math.sin(dLat / 2) * math.sin(dLat / 2) + math.cos(math.radians(lat1)) * math.cos(
|
|
math.radians(lat2)
|
|
) * math.sin(dLon / 2) * math.sin(dLon / 2)
|
|
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
|
return R * c
|
|
|
|
|
|
def find_nearest_openmhz_systems_list(lat: float, lng: float, limit: int = 5):
|
|
"""
|
|
Finds the strictly nearest OpenMHZ systems by distance.
|
|
"""
|
|
systems = get_openmhz_systems()
|
|
if not systems:
|
|
return []
|
|
|
|
# Calculate distance for all systems that provide coordinates
|
|
valid_systems = []
|
|
for s in systems:
|
|
s_lat = s.get("lat")
|
|
s_lng = s.get("lng")
|
|
if s_lat is not None and s_lng is not None:
|
|
dist = haversine_distance(lat, lng, float(s_lat), float(s_lng))
|
|
s["distance_miles"] = dist
|
|
valid_systems.append(s)
|
|
|
|
if not valid_systems:
|
|
return []
|
|
|
|
# Sort strictly by distance
|
|
valid_systems.sort(key=lambda x: x["distance_miles"])
|
|
return valid_systems[:limit]
|
|
|
|
|
|
def find_nearest_openmhz_system(lat: float, lng: float):
|
|
"""
|
|
Returns the single closest OpenMHZ system by distance.
|
|
"""
|
|
nearest = find_nearest_openmhz_systems_list(lat, lng, limit=1)
|
|
if nearest:
|
|
return nearest[0]
|
|
return None
|