mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-04-25 12:06:08 +02:00
68 lines
2.4 KiB
Python
68 lines
2.4 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import json
|
|
|
|
def scrape_broadcastify_top():
|
|
print("Scraping Broadcastify Top Feeds...")
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
}
|
|
|
|
try:
|
|
# The top 50 feeds page provides a wealth of listening data
|
|
res = requests.get("https://www.broadcastify.com/listen/top", headers=headers, timeout=10)
|
|
if res.status_code != 200:
|
|
print(f"Failed HTTP {res.status_code}")
|
|
return []
|
|
|
|
soup = BeautifulSoup(res.text, 'html.parser')
|
|
|
|
# The table of feeds is in a standard class
|
|
table = soup.find('table', {'class': 'btable'})
|
|
if not table:
|
|
print("Could not find feeds table.")
|
|
return []
|
|
|
|
feeds = []
|
|
rows = table.find_all('tr')[1:] # Skip header
|
|
|
|
for row in rows:
|
|
cols = row.find_all('td')
|
|
if len(cols) >= 5:
|
|
# Top layout: [Listeners, Feed ID (hidden), Location, Feed Name, Category, Genre]
|
|
listeners_str = cols[0].text.strip().replace(',', '')
|
|
listeners = int(listeners_str) if listeners_str.isdigit() else 0
|
|
|
|
# The link is usually in the Feed Name column
|
|
link_tag = cols[2].find('a')
|
|
if not link_tag:
|
|
continue
|
|
|
|
href = link_tag.get('href', '')
|
|
feed_id = href.split('/')[-1] if '/listen/feed/' in href else None
|
|
|
|
if not feed_id:
|
|
continue
|
|
|
|
location = cols[1].text.strip()
|
|
name = cols[2].text.strip()
|
|
|
|
feeds.append({
|
|
"id": feed_id,
|
|
"listeners": listeners,
|
|
"location": location,
|
|
"name": name,
|
|
"stream_url": f"https://broadcastify.cdnstream1.com/{feed_id}"
|
|
})
|
|
|
|
print(f"Successfully scraped {len(feeds)} top feeds.")
|
|
return feeds
|
|
|
|
except Exception as e:
|
|
print(f"Scrape error: {e}")
|
|
return []
|
|
|
|
if __name__ == "__main__":
|
|
top_feeds = scrape_broadcastify_top()
|
|
print(json.dumps(top_feeds[:3], indent=2))
|