remove server side clustering, increase frequency, cleanup tf

This commit is contained in:
Will Freeman
2025-01-04 13:38:22 -07:00
parent c5a84a6db8
commit 884ac11200
5 changed files with 7 additions and 70 deletions

View File

@@ -37,6 +37,7 @@ WHITELISTED_TAGS = [
"manufacturer",
"direction",
"brand",
"camera:direction",
]
def get_all_nodes():
@@ -54,50 +55,6 @@ def get_all_nodes():
response.raise_for_status()
return response.json()["elements"]
def get_clusters(nodes: list[Any]):
# Request data from Overpass API
print("Requesting data from Overpass API.")
print("Data received. Parsing nodes...")
# Parse nodes and extract lat/lon for clustering
coordinates = []
node_ids = []
for element in nodes:
if element["type"] == "node":
coordinates.append([element["lat"], element["lon"]])
node_ids.append(element["id"])
# Convert coordinates to NumPy array for DBSCAN
coordinates = np.array(coordinates)
# Define the clustering radius (50 miles in meters)
radius_miles = 50
radius_km = radius_miles * 1.60934 # 1 mile = 1.60934 km
radius_in_radians = radius_km / 6371.0 # Earth's radius in km
# Perform DBSCAN clustering
db = DBSCAN(
eps=radius_in_radians, min_samples=1, algorithm="ball_tree", metric="haversine"
).fit(np.radians(coordinates))
labels = db.labels_
# Prepare clusters and calculate centroids
clusters = {}
for label in set(labels):
cluster_points = coordinates[labels == label]
centroid = np.mean(cluster_points, axis=0)
first_node_id = node_ids[labels.tolist().index(label)]
# Store in clusters dict with centroid and first node ID
clusters[label] = {"lat": centroid[0], "lon": centroid[1], "id": first_node_id}
output = {"clusters": list(clusters.values())}
print("Clustering complete.")
return output
def segment_regions(nodes: Any, tile_size_degrees: int) -> dict[Any]:
print("Segmenting regions...")
tile_dict = defaultdict(list)
@@ -118,22 +75,12 @@ def segment_regions(nodes: Any, tile_size_degrees: int) -> dict[Any]:
def lambda_handler(event, context):
nodes = get_all_nodes()
alpr_clusters = get_clusters(nodes)
regions_dict = segment_regions(nodes=nodes, tile_size_degrees=TILE_SIZE_DEGREES)
print("Uploading data to S3...")
s3 = boto3.client("s3")
bucket = "deflock-clusters"
bucket_new = "cdn.deflock.me"
key = "alpr_clusters.json"
s3.put_object(
Bucket=bucket,
Key=key,
Body=json.dumps(alpr_clusters),
ContentType="application/json",
)
# TODO: handle outdated index files when their referenced files are deleted
epoch = int(time.time())
@@ -174,7 +121,7 @@ def lambda_handler(event, context):
return {
"statusCode": 200,
"body": "Successfully clustered.",
"body": "Successfully cached OSM nodes",
}

View File

@@ -12,7 +12,6 @@ module "alpr_counts" {
module "alpr_clusters" {
module_name = "alpr_clusters"
source = "./modules/alpr_clusters"
deflock_stats_bucket = var.deflock_stats_bucket
deflock_cdn_bucket = var.deflock_cdn_bucket
rate = "rate(1 day)"
rate = "rate(1 hour)"
}

View File

@@ -22,7 +22,7 @@ resource "aws_iam_role_policy_attachment" "lambda_basic_execution" {
resource "aws_iam_policy" "lambda_s3_write_policy" {
name = "${var.module_name}_lambda_s3_write_policy"
description = "Policy for Lambda to write to S3 bucket ${var.deflock_stats_bucket}"
description = "Policy for Lambda to write to S3 bucket ${var.deflock_cdn_bucket}"
policy = jsonencode({
Version = "2012-10-17"
@@ -34,8 +34,7 @@ resource "aws_iam_policy" "lambda_s3_write_policy" {
]
Effect = "Allow"
Resource = [
"arn:aws:s3:::${var.deflock_cdn_bucket}/*",
"arn:aws:s3:::${var.deflock_stats_bucket}/*"
"arn:aws:s3:::${var.deflock_cdn_bucket}/*"
]
}
]
@@ -57,6 +56,7 @@ resource "aws_lambda_function" "overpass_lambda" {
environment {
variables = {
UPDATE_RATE_MINS = var.rate
OUTPUT_BUCKET = var.deflock_cdn_bucket
}
}
}

View File

@@ -2,15 +2,6 @@ variable "module_name" {
description = "Name of the module"
}
variable "output_filename" {
description = "Filename for the ALPR clusters JSON file"
default = "alpr_clusters.json"
}
variable "deflock_stats_bucket" {
description = "S3 bucket for the ALPR clusters JSON file"
}
variable "deflock_cdn_bucket" {
description = "S3 bucket for the CDN"
}

View File

@@ -89,7 +89,7 @@
title="How Long Will It Take?"
>
<p>
We pull data from OpenStreetMap <i>daily</i>, so it may take up to 24 hours for your changes to appear on this site. Rest assured that your changes will be reflected here soon. As we continue to scale, we hope to reduce this delay.
We pull data from OpenStreetMap <i>hourly</i>, so it may take up to an hour for your changes to appear on this site. Rest assured that your changes will be reflected here soon. As we continue to scale, we hope to reduce this delay.
</p>
</v-alert>
</v-stepper-vertical-item>