refactor terraform modules, add clustering lambda (#4)

This commit is contained in:
Will Freeman
2024-11-25 17:57:15 -06:00
committed by GitHub
parent cf4b93092f
commit a13b48af5b
16 changed files with 293 additions and 1057 deletions

View File

@@ -0,0 +1,31 @@
#!/bin/bash
ECR_REPO_URL=912821578123.dkr.ecr.us-east-1.amazonaws.com/alpr_clusters-lambda
set -e
# check if AWS role is assumed
if ! aws sts get-caller-identity &> /dev/null; then
echo "Error: AWS role is not assumed. Please assume the necessary role and try again."
exit 1
fi
cd src
# build Docker image
docker build -t alpr_clusters .
# tag docker image with ECR repo
docker tag alpr_clusters:latest $ECR_REPO_URL:latest
# login to ECR
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $ECR_REPO_URL
# push Docker image to ECR
docker push $ECR_REPO_URL:latest
# update lambda function
export AWS_PAGER=""
aws lambda update-function-code --function-name alpr_clusters --image-uri $ECR_REPO_URL:latest
echo "Deployed!"

View File

@@ -0,0 +1,78 @@
import boto3
import requests
import json
from sklearn.cluster import DBSCAN
import numpy as np
def get_clusters():
# Set up the Overpass API query
query = """
[out:json];
node["man_made"="surveillance"]["surveillance:type"="ALPR"];
out body;
"""
# Request data from Overpass API
print("Requesting data from Overpass API.")
url = "http://overpass-api.de/api/interpreter"
response = requests.get(url, params={'data': query}, headers={'User-Agent': 'DeFlock/1.0'})
data = response.json()
print("Data received. Parsing nodes...")
# Parse nodes and extract lat/lon for clustering
coordinates = []
node_ids = []
for element in data['elements']:
if element['type'] == 'node':
coordinates.append([element['lat'], element['lon']])
node_ids.append(element['id'])
# Convert coordinates to NumPy array for DBSCAN
coordinates = np.array(coordinates)
# Define the clustering radius (10 miles in meters)
radius_miles = 50
radius_km = radius_miles * 1.60934 # 1 mile = 1.60934 km
radius_in_radians = radius_km / 6371.0 # Earth's radius in km
# Perform DBSCAN clustering
db = DBSCAN(eps=radius_in_radians, min_samples=1, algorithm='ball_tree', metric='haversine').fit(np.radians(coordinates))
labels = db.labels_
# Prepare clusters and calculate centroids
clusters = {}
for label in set(labels):
cluster_points = coordinates[labels == label]
centroid = np.mean(cluster_points, axis=0)
first_node_id = node_ids[labels.tolist().index(label)]
# Store in clusters dict with centroid and first node ID
clusters[label] = {
"lat": centroid[0],
"lon": centroid[1],
"id": first_node_id
}
output = {"clusters": list(clusters.values())}
print("Clustering complete.")
return output
def lambda_handler(event, context):
alpr_clusters = get_clusters()
s3 = boto3.client('s3')
bucket = 'deflock-clusters'
key = 'alpr_clusters.json'
s3.put_object(
Bucket=bucket,
Key=key,
Body=json.dumps(alpr_clusters),
ContentType='application/json'
)
return {
'statusCode': 200,
'body': 'Successfully fetched ALPR counts.',
}

View File

@@ -0,0 +1,4 @@
boto3
requests
scikit-learn
numpy