remove server side clustering, increase frequency, cleanup tf

2026-02-12 15:02:45 +00:00 · 2025-01-04 13:38:22 -07:00
parent c5a84a6db8
commit 884ac11200
5 changed files with 7 additions and 70 deletions
--- a/serverless/alpr_clusters/src/alpr_clusters.py
+++ b/serverless/alpr_clusters/src/alpr_clusters.py
@@ -37,6 +37,7 @@ WHITELISTED_TAGS = [
    "manufacturer",
    "direction",
    "brand",
+    "camera:direction",
 ]

 def get_all_nodes():
@@ -54,50 +55,6 @@ def get_all_nodes():
    response.raise_for_status()
    return response.json()["elements"]

-
-def get_clusters(nodes: list[Any]):
-    # Request data from Overpass API
-    print("Requesting data from Overpass API.")
-
-    print("Data received. Parsing nodes...")
-
-    # Parse nodes and extract lat/lon for clustering
-    coordinates = []
-    node_ids = []
-    for element in nodes:
-        if element["type"] == "node":
-            coordinates.append([element["lat"], element["lon"]])
-            node_ids.append(element["id"])
-
-    # Convert coordinates to NumPy array for DBSCAN
-    coordinates = np.array(coordinates)
-
-    # Define the clustering radius (50 miles in meters)
-    radius_miles = 50
-    radius_km = radius_miles * 1.60934  # 1 mile = 1.60934 km
-    radius_in_radians = radius_km / 6371.0  # Earth's radius in km
-
-    # Perform DBSCAN clustering
-    db = DBSCAN(
-        eps=radius_in_radians, min_samples=1, algorithm="ball_tree", metric="haversine"
-    ).fit(np.radians(coordinates))
-    labels = db.labels_
-
-    # Prepare clusters and calculate centroids
-    clusters = {}
-    for label in set(labels):
-        cluster_points = coordinates[labels == label]
-        centroid = np.mean(cluster_points, axis=0)
-        first_node_id = node_ids[labels.tolist().index(label)]
-
-        # Store in clusters dict with centroid and first node ID
-        clusters[label] = {"lat": centroid[0], "lon": centroid[1], "id": first_node_id}
-
-    output = {"clusters": list(clusters.values())}
-    print("Clustering complete.")
-    return output
-
-
 def segment_regions(nodes: Any, tile_size_degrees: int) -> dict[Any]:
    print("Segmenting regions...")
    tile_dict = defaultdict(list)
@@ -118,22 +75,12 @@ def segment_regions(nodes: Any, tile_size_degrees: int) -> dict[Any]:

 def lambda_handler(event, context):
    nodes = get_all_nodes()
-    alpr_clusters = get_clusters(nodes)
    regions_dict = segment_regions(nodes=nodes, tile_size_degrees=TILE_SIZE_DEGREES)

    print("Uploading data to S3...")

    s3 = boto3.client("s3")
-    bucket = "deflock-clusters"
    bucket_new = "cdn.deflock.me"
-    key = "alpr_clusters.json"
-
-    s3.put_object(
-        Bucket=bucket,
-        Key=key,
-        Body=json.dumps(alpr_clusters),
-        ContentType="application/json",
-    )

    # TODO: handle outdated index files when their referenced files are deleted
    epoch = int(time.time())
@@ -174,7 +121,7 @@ def lambda_handler(event, context):

    return {
        "statusCode": 200,
-        "body": "Successfully clustered.",
+        "body": "Successfully cached OSM nodes",
    }


--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -12,7 +12,6 @@ module "alpr_counts" {
 module "alpr_clusters" {
  module_name = "alpr_clusters"
  source = "./modules/alpr_clusters"
-  deflock_stats_bucket = var.deflock_stats_bucket
  deflock_cdn_bucket = var.deflock_cdn_bucket
-  rate = "rate(1 day)"
+  rate = "rate(1 hour)"
 }
--- a/terraform/modules/alpr_clusters/main.tf
+++ b/terraform/modules/alpr_clusters/main.tf
@@ -22,7 +22,7 @@ resource "aws_iam_role_policy_attachment" "lambda_basic_execution" {

 resource "aws_iam_policy" "lambda_s3_write_policy" {
  name        = "${var.module_name}_lambda_s3_write_policy"
-  description = "Policy for Lambda to write to S3 bucket ${var.deflock_stats_bucket}"
+  description = "Policy for Lambda to write to S3 bucket ${var.deflock_cdn_bucket}"

  policy = jsonencode({
    Version = "2012-10-17"
@@ -34,8 +34,7 @@ resource "aws_iam_policy" "lambda_s3_write_policy" {
        ]
        Effect   = "Allow"
        Resource = [
-          "arn:aws:s3:::${var.deflock_cdn_bucket}/*",
-          "arn:aws:s3:::${var.deflock_stats_bucket}/*"
+          "arn:aws:s3:::${var.deflock_cdn_bucket}/*"
        ] 
      }
    ]
@@ -57,6 +56,7 @@ resource "aws_lambda_function" "overpass_lambda" {
  environment {
    variables = {
      UPDATE_RATE_MINS = var.rate
+      OUTPUT_BUCKET = var.deflock_cdn_bucket
    }
  }
 }
--- a/terraform/modules/alpr_clusters/variables.tf
+++ b/terraform/modules/alpr_clusters/variables.tf
@@ -2,15 +2,6 @@ variable "module_name" {
  description = "Name of the module"
 }

-variable "output_filename" {
-  description = "Filename for the ALPR clusters JSON file"
-  default     = "alpr_clusters.json"
-}
-
-variable "deflock_stats_bucket" {
-  description = "S3 bucket for the ALPR clusters JSON file"
-}
-
 variable "deflock_cdn_bucket" {
  description = "S3 bucket for the CDN"
 }
--- a/webapp/src/views/ReportView.vue
+++ b/webapp/src/views/ReportView.vue
@@ -89,7 +89,7 @@
            title="How Long Will It Take?"
          >
            <p>
-              We pull data from OpenStreetMap <i>daily</i>, so it may take up to 24 hours for your changes to appear on this site. Rest assured that your changes will be reflected here soon. As we continue to scale, we hope to reduce this delay.
+              We pull data from OpenStreetMap <i>hourly</i>, so it may take up to an hour for your changes to appear on this site. Rest assured that your changes will be reflected here soon. As we continue to scale, we hope to reduce this delay.
            </p>
          </v-alert>
        </v-stepper-vertical-item>