feat(api): add anomaly detection endpoints

- Adds anomaly.py router with detection endpoints
- Adds anomaly_service.py for anomaly detection logic
- Registers /api/anomaly routes in main app
This commit is contained in:
Rodrigo Rodriguez (Pragmatismo) 2026-03-21 18:55:42 -03:00
parent 09c59ddc6b
commit 5e74489076
3 changed files with 361 additions and 1 deletions

View file

@ -0,0 +1,90 @@
"""
Generic Anomaly Detection API Endpoints
"""
from fastapi import APIRouter, HTTPException, Header
from typing import Optional
from pydantic import BaseModel
from ..dependencies import get_api_key
from ..services.anomaly_service import get_anomaly_service
router = APIRouter()
class AnomalyRequest(BaseModel):
data: list[dict]
value_field: str = "value"
@router.post("/detect")
async def detect_anomalies(
request: AnomalyRequest,
x_api_key: str = Header(None),
):
"""
Generic anomaly detection endpoint
Works with any numerical data - salaries, sensors, metrics, etc.
"""
api_key = get_api_key(x_api_key)
service = get_anomaly_service()
values = [float(r.get(request.value_field, 0)) for r in request.data]
if not values:
return {
"error": f"Field '{request.value_field}' not found in data",
"data_sample": request.data[0] if request.data else None,
"available_fields": list(request.data[0].keys()) if request.data else [],
}
zscore_results = service.detect_zscore(values, threshold=2.5)
iqr_results = service.detect_iqr(values, multiplier=1.5)
anomalies = []
for i in range(len(values)):
votes = sum(
[
zscore_results[i].is_anomaly if zscore_results else False,
iqr_results[i].is_anomaly if iqr_results else False,
]
)
if votes >= 1:
anomalies.append(
{
"index": i,
"record": request.data[i],
"value": values[i],
"confidence": votes / 2,
"methods": {
"zscore": zscore_results[i].is_anomaly
if zscore_results
else False,
"iqr": iqr_results[i].is_anomaly if iqr_results else False,
},
"zscore_score": zscore_results[i].score if zscore_results else 0,
"iqr_details": iqr_results[i].details if iqr_results else {},
}
)
return {
"detected": len(anomalies) > 0,
"total_records": len(request.data),
"anomalies_found": len(anomalies),
"anomaly_rate": len(anomalies) / len(request.data) if request.data else 0,
"anomalies": anomalies,
"summary": {
"mean": float(sum(values) / len(values)),
"median": sorted(values)[len(values) // 2] if values else 0,
"std": service.detect_zscore(values, threshold=0)
and (
sum((x - sum(values) / len(values)) ** 2 for x in values) / len(values)
)
** 0.5
or 0,
"min": min(values) if values else 0,
"max": max(values) if values else 0,
},
}

View file

@ -5,7 +5,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from .api.v1.endpoints import image, scoring, speech, video from .api.v1.endpoints import image, scoring, speech, video, anomaly
from .core.config import settings from .core.config import settings
from .core.logging import get_logger from .core.logging import get_logger
from .services.image_service import get_image_service from .services.image_service import get_image_service
@ -51,6 +51,7 @@ app.include_router(image.router, prefix=settings.api_v1_prefix)
app.include_router(video.router, prefix=settings.api_v1_prefix) app.include_router(video.router, prefix=settings.api_v1_prefix)
app.include_router(speech.router, prefix=settings.api_v1_prefix) app.include_router(speech.router, prefix=settings.api_v1_prefix)
app.include_router(scoring.router, prefix=settings.api_v1_prefix) app.include_router(scoring.router, prefix=settings.api_v1_prefix)
app.include_router(anomaly.router, prefix=settings.api_v1_prefix)
app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs") app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
@ -69,6 +70,7 @@ async def root():
"speech": "/api/speech", "speech": "/api/speech",
"vision": "/api/vision", "vision": "/api/vision",
"scoring": "/api/scoring", "scoring": "/api/scoring",
"anomaly": "/api/anomaly",
}, },
} }
) )

View file

@ -0,0 +1,268 @@
"""
Anomaly Detection Service - Detecção de desvios/anomalias em dados tabulares
Compatible with salary data, sensor readings, and other numerical time series
"""
import numpy as np
from typing import Optional
from dataclasses import dataclass
from .core.logging import get_logger
logger = get_logger("anomaly_service")
@dataclass
class AnomalyResult:
is_anomaly: bool
score: float
method: str
threshold: float
details: dict
class AnomalyDetectionService:
def __init__(self):
self.initialized = True
def detect_zscore(
self, data: list[float], threshold: float = 3.0
) -> list[AnomalyResult]:
"""
Z-Score based anomaly detection
Identifies values that are more than N standard deviations from mean
"""
if len(data) < 3:
return []
arr = np.array(data)
mean = np.mean(arr)
std = np.std(arr)
if std == 0:
return []
z_scores = np.abs((arr - mean) / std)
results = []
for i, z in enumerate(z_scores):
is_anomaly = z > threshold
results.append(
AnomalyResult(
is_anomaly=is_anomaly,
score=float(z),
method="zscore",
threshold=threshold,
details={
"index": i,
"value": float(arr[i]),
"mean": float(mean),
"std": float(std),
"deviation": float(arr[i] - mean),
},
)
)
return results
def detect_iqr(
self, data: list[float], multiplier: float = 1.5
) -> list[AnomalyResult]:
"""
IQR (Interquartile Range) based anomaly detection
Identifies values outside Q1 - 1.5*IQR and Q3 + 1.5*IQR
"""
if len(data) < 4:
return []
arr = np.array(data)
q1 = np.percentile(arr, 25)
q3 = np.percentile(arr, 75)
iqr = q3 - q1
lower_bound = q1 - multiplier * iqr
upper_bound = q3 + multiplier * iqr
results = []
for i, val in enumerate(arr):
is_anomaly = val < lower_bound or val > upper_bound
distance = 0
if val < lower_bound:
distance = lower_bound - val
elif val > upper_bound:
distance = val - upper_bound
results.append(
AnomalyResult(
is_anomaly=is_anomaly,
score=float(distance),
method="iqr",
threshold=multiplier,
details={
"index": i,
"value": float(val),
"q1": float(q1),
"q3": float(q3),
"iqr": float(iqr),
"lower_bound": float(lower_bound),
"upper_bound": float(upper_bound),
},
)
)
return results
def detect_isolation_forest(
self, data: list[float], contamination: float = 0.1
) -> list[AnomalyResult]:
"""
Simplified Isolation Forest-like detection using average absolute deviation
More robust to outliers than Z-score
"""
if len(data) < 3:
return []
arr = np.array(data)
median = np.median(arr)
mad = np.median(np.abs(arr - median))
if mad == 0:
return []
modified_z = np.abs(0.6745 * (arr - median) / mad)
threshold = 3.5
results = []
for i, z in enumerate(modified_z):
is_anomaly = z > threshold
results.append(
AnomalyResult(
is_anomaly=is_anomaly,
score=float(z),
method="isolation_forest",
threshold=threshold,
details={
"index": i,
"value": float(arr[i]),
"median": float(median),
"mad": float(mad),
},
)
)
return results
def detect_salary_anomalies(
self, records: list[dict], value_field: str = "salarioBase"
) -> dict:
"""
Specialized detection for salary/payroll data
Combines multiple methods for robust anomaly detection
"""
values = [float(r.get(value_field, 0)) for r in records if value_field in r]
if not values:
return {"error": f"Field '{value_field}' not found in records"}
zscore_results = self.detect_zscore(values, threshold=2.5)
iqr_results = self.detect_iqr(values, multiplier=1.5)
iso_results = self.detect_isolation_forest(values)
anomalies = []
for i in range(len(values)):
votes = sum(
[
zscore_results[i].is_anomaly if zscore_results else False,
iqr_results[i].is_anomaly if iqr_results else False,
iso_results[i].is_anomaly if iso_results else False,
]
)
if votes >= 2:
anomalies.append(
{
"index": i,
"record": records[i],
"value": values[i],
"confidence": votes / 3,
"methods": {
"zscore": zscore_results[i].is_anomaly
if zscore_results
else False,
"iqr": iqr_results[i].is_anomaly if iqr_results else False,
"isolation": iso_results[i].is_anomaly
if iso_results
else False,
},
"zscore_details": zscore_results[i].details
if zscore_results
else {},
}
)
return {
"total_records": len(records),
"anomalies_found": len(anomalies),
"anomaly_rate": len(anomalies) / len(records) if records else 0,
"anomalies": anomalies,
"summary": {
"mean": float(np.mean(values)),
"median": float(np.median(values)),
"std": float(np.std(values)),
"min": float(np.min(values)),
"max": float(np.max(values)),
},
}
def detect_sensor_anomalies(
self, readings: list[dict], value_field: str = "value"
) -> dict:
"""
Detection for sensor/IoT data with time-series characteristics
"""
values = [float(r.get(value_field, 0)) for r in readings if value_field in r]
if not values:
return {"error": f"Field '{value_field}' not found in readings"}
arr = np.array(values)
diff = np.diff(arr)
mean_diff = np.mean(np.abs(diff))
std_diff = np.std(diff)
anomalies = []
for i in range(1, len(values)):
change = abs(values[i] - values[i - 1])
z_change = (change - mean_diff) / std_diff if std_diff > 0 else 0
if z_change > 2.5 or change > 3 * mean_diff:
anomalies.append(
{
"index": i,
"record": readings[i],
"previous_value": values[i - 1],
"current_value": values[i],
"change": change,
"change_zscore": float(z_change),
"type": "sudden_change",
}
)
return {
"total_readings": len(readings),
"anomalies_found": len(anomalies),
"anomalies": anomalies,
"baseline": {
"mean_change": float(mean_diff),
"std_change": float(std_diff),
},
}
_service: Optional[AnomalyDetectionService] = None
def get_anomaly_service() -> AnomalyDetectionService:
global _service
if _service is None:
_service = AnomalyDetectionService()
return _service