remove server side clustering, increase frequency, cleanup tf

This commit is contained in:
Will Freeman
2025-01-04 13:38:22 -07:00
parent c5a84a6db8
commit 884ac11200
5 changed files with 7 additions and 70 deletions
+2 -55
View File
@@ -37,6 +37,7 @@ WHITELISTED_TAGS = [
"manufacturer",
"direction",
"brand",
"camera:direction",
]
def get_all_nodes():
@@ -54,50 +55,6 @@ def get_all_nodes():
response.raise_for_status()
return response.json()["elements"]
def get_clusters(nodes: list[Any]):
# Request data from Overpass API
print("Requesting data from Overpass API.")
print("Data received. Parsing nodes...")
# Parse nodes and extract lat/lon for clustering
coordinates = []
node_ids = []
for element in nodes:
if element["type"] == "node":
coordinates.append([element["lat"], element["lon"]])
node_ids.append(element["id"])
# Convert coordinates to NumPy array for DBSCAN
coordinates = np.array(coordinates)
# Define the clustering radius (50 miles in meters)
radius_miles = 50
radius_km = radius_miles * 1.60934 # 1 mile = 1.60934 km
radius_in_radians = radius_km / 6371.0 # Earth's radius in km
# Perform DBSCAN clustering
db = DBSCAN(
eps=radius_in_radians, min_samples=1, algorithm="ball_tree", metric="haversine"
).fit(np.radians(coordinates))
labels = db.labels_
# Prepare clusters and calculate centroids
clusters = {}
for label in set(labels):
cluster_points = coordinates[labels == label]
centroid = np.mean(cluster_points, axis=0)
first_node_id = node_ids[labels.tolist().index(label)]
# Store in clusters dict with centroid and first node ID
clusters[label] = {"lat": centroid[0], "lon": centroid[1], "id": first_node_id}
output = {"clusters": list(clusters.values())}
print("Clustering complete.")
return output
def segment_regions(nodes: Any, tile_size_degrees: int) -> dict[Any]:
print("Segmenting regions...")
tile_dict = defaultdict(list)
@@ -118,22 +75,12 @@ def segment_regions(nodes: Any, tile_size_degrees: int) -> dict[Any]:
def lambda_handler(event, context):
nodes = get_all_nodes()
alpr_clusters = get_clusters(nodes)
regions_dict = segment_regions(nodes=nodes, tile_size_degrees=TILE_SIZE_DEGREES)
print("Uploading data to S3...")
s3 = boto3.client("s3")
bucket = "deflock-clusters"
bucket_new = "cdn.deflock.me"
key = "alpr_clusters.json"
s3.put_object(
Bucket=bucket,
Key=key,
Body=json.dumps(alpr_clusters),
ContentType="application/json",
)
# TODO: handle outdated index files when their referenced files are deleted
epoch = int(time.time())
@@ -174,7 +121,7 @@ def lambda_handler(event, context):
return {
"statusCode": 200,
"body": "Successfully clustered.",
"body": "Successfully cached OSM nodes",
}