feat(api): add anomaly detection endpoints
- Adds anomaly.py router with detection endpoints - Adds anomaly_service.py for anomaly detection logic - Registers /api/anomaly routes in main app
This commit is contained in:
parent
09c59ddc6b
commit
5e74489076
3 changed files with 361 additions and 1 deletions
90
src/api/v1/endpoints/anomaly.py
Normal file
90
src/api/v1/endpoints/anomaly.py
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
"""
|
||||||
|
Generic Anomaly Detection API Endpoints
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import APIRouter, HTTPException, Header
|
||||||
|
from typing import Optional
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from ..dependencies import get_api_key
|
||||||
|
from ..services.anomaly_service import get_anomaly_service
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
class AnomalyRequest(BaseModel):
|
||||||
|
data: list[dict]
|
||||||
|
value_field: str = "value"
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/detect")
|
||||||
|
async def detect_anomalies(
|
||||||
|
request: AnomalyRequest,
|
||||||
|
x_api_key: str = Header(None),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Generic anomaly detection endpoint
|
||||||
|
Works with any numerical data - salaries, sensors, metrics, etc.
|
||||||
|
"""
|
||||||
|
api_key = get_api_key(x_api_key)
|
||||||
|
|
||||||
|
service = get_anomaly_service()
|
||||||
|
|
||||||
|
values = [float(r.get(request.value_field, 0)) for r in request.data]
|
||||||
|
|
||||||
|
if not values:
|
||||||
|
return {
|
||||||
|
"error": f"Field '{request.value_field}' not found in data",
|
||||||
|
"data_sample": request.data[0] if request.data else None,
|
||||||
|
"available_fields": list(request.data[0].keys()) if request.data else [],
|
||||||
|
}
|
||||||
|
|
||||||
|
zscore_results = service.detect_zscore(values, threshold=2.5)
|
||||||
|
iqr_results = service.detect_iqr(values, multiplier=1.5)
|
||||||
|
|
||||||
|
anomalies = []
|
||||||
|
for i in range(len(values)):
|
||||||
|
votes = sum(
|
||||||
|
[
|
||||||
|
zscore_results[i].is_anomaly if zscore_results else False,
|
||||||
|
iqr_results[i].is_anomaly if iqr_results else False,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if votes >= 1:
|
||||||
|
anomalies.append(
|
||||||
|
{
|
||||||
|
"index": i,
|
||||||
|
"record": request.data[i],
|
||||||
|
"value": values[i],
|
||||||
|
"confidence": votes / 2,
|
||||||
|
"methods": {
|
||||||
|
"zscore": zscore_results[i].is_anomaly
|
||||||
|
if zscore_results
|
||||||
|
else False,
|
||||||
|
"iqr": iqr_results[i].is_anomaly if iqr_results else False,
|
||||||
|
},
|
||||||
|
"zscore_score": zscore_results[i].score if zscore_results else 0,
|
||||||
|
"iqr_details": iqr_results[i].details if iqr_results else {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"detected": len(anomalies) > 0,
|
||||||
|
"total_records": len(request.data),
|
||||||
|
"anomalies_found": len(anomalies),
|
||||||
|
"anomaly_rate": len(anomalies) / len(request.data) if request.data else 0,
|
||||||
|
"anomalies": anomalies,
|
||||||
|
"summary": {
|
||||||
|
"mean": float(sum(values) / len(values)),
|
||||||
|
"median": sorted(values)[len(values) // 2] if values else 0,
|
||||||
|
"std": service.detect_zscore(values, threshold=0)
|
||||||
|
and (
|
||||||
|
sum((x - sum(values) / len(values)) ** 2 for x in values) / len(values)
|
||||||
|
)
|
||||||
|
** 0.5
|
||||||
|
or 0,
|
||||||
|
"min": min(values) if values else 0,
|
||||||
|
"max": max(values) if values else 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
@ -5,7 +5,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
from .api.v1.endpoints import image, scoring, speech, video
|
from .api.v1.endpoints import image, scoring, speech, video, anomaly
|
||||||
from .core.config import settings
|
from .core.config import settings
|
||||||
from .core.logging import get_logger
|
from .core.logging import get_logger
|
||||||
from .services.image_service import get_image_service
|
from .services.image_service import get_image_service
|
||||||
|
|
@ -51,6 +51,7 @@ app.include_router(image.router, prefix=settings.api_v1_prefix)
|
||||||
app.include_router(video.router, prefix=settings.api_v1_prefix)
|
app.include_router(video.router, prefix=settings.api_v1_prefix)
|
||||||
app.include_router(speech.router, prefix=settings.api_v1_prefix)
|
app.include_router(speech.router, prefix=settings.api_v1_prefix)
|
||||||
app.include_router(scoring.router, prefix=settings.api_v1_prefix)
|
app.include_router(scoring.router, prefix=settings.api_v1_prefix)
|
||||||
|
app.include_router(anomaly.router, prefix=settings.api_v1_prefix)
|
||||||
|
|
||||||
app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
|
app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
|
||||||
|
|
||||||
|
|
@ -69,6 +70,7 @@ async def root():
|
||||||
"speech": "/api/speech",
|
"speech": "/api/speech",
|
||||||
"vision": "/api/vision",
|
"vision": "/api/vision",
|
||||||
"scoring": "/api/scoring",
|
"scoring": "/api/scoring",
|
||||||
|
"anomaly": "/api/anomaly",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
||||||
268
src/services/anomaly_service.py
Normal file
268
src/services/anomaly_service.py
Normal file
|
|
@ -0,0 +1,268 @@
|
||||||
|
"""
|
||||||
|
Anomaly Detection Service - Detecção de desvios/anomalias em dados tabulares
|
||||||
|
Compatible with salary data, sensor readings, and other numerical time series
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from typing import Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from .core.logging import get_logger
|
||||||
|
|
||||||
|
logger = get_logger("anomaly_service")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AnomalyResult:
|
||||||
|
is_anomaly: bool
|
||||||
|
score: float
|
||||||
|
method: str
|
||||||
|
threshold: float
|
||||||
|
details: dict
|
||||||
|
|
||||||
|
|
||||||
|
class AnomalyDetectionService:
|
||||||
|
def __init__(self):
|
||||||
|
self.initialized = True
|
||||||
|
|
||||||
|
def detect_zscore(
|
||||||
|
self, data: list[float], threshold: float = 3.0
|
||||||
|
) -> list[AnomalyResult]:
|
||||||
|
"""
|
||||||
|
Z-Score based anomaly detection
|
||||||
|
Identifies values that are more than N standard deviations from mean
|
||||||
|
"""
|
||||||
|
if len(data) < 3:
|
||||||
|
return []
|
||||||
|
|
||||||
|
arr = np.array(data)
|
||||||
|
mean = np.mean(arr)
|
||||||
|
std = np.std(arr)
|
||||||
|
|
||||||
|
if std == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
z_scores = np.abs((arr - mean) / std)
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for i, z in enumerate(z_scores):
|
||||||
|
is_anomaly = z > threshold
|
||||||
|
results.append(
|
||||||
|
AnomalyResult(
|
||||||
|
is_anomaly=is_anomaly,
|
||||||
|
score=float(z),
|
||||||
|
method="zscore",
|
||||||
|
threshold=threshold,
|
||||||
|
details={
|
||||||
|
"index": i,
|
||||||
|
"value": float(arr[i]),
|
||||||
|
"mean": float(mean),
|
||||||
|
"std": float(std),
|
||||||
|
"deviation": float(arr[i] - mean),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def detect_iqr(
|
||||||
|
self, data: list[float], multiplier: float = 1.5
|
||||||
|
) -> list[AnomalyResult]:
|
||||||
|
"""
|
||||||
|
IQR (Interquartile Range) based anomaly detection
|
||||||
|
Identifies values outside Q1 - 1.5*IQR and Q3 + 1.5*IQR
|
||||||
|
"""
|
||||||
|
if len(data) < 4:
|
||||||
|
return []
|
||||||
|
|
||||||
|
arr = np.array(data)
|
||||||
|
q1 = np.percentile(arr, 25)
|
||||||
|
q3 = np.percentile(arr, 75)
|
||||||
|
iqr = q3 - q1
|
||||||
|
|
||||||
|
lower_bound = q1 - multiplier * iqr
|
||||||
|
upper_bound = q3 + multiplier * iqr
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for i, val in enumerate(arr):
|
||||||
|
is_anomaly = val < lower_bound or val > upper_bound
|
||||||
|
distance = 0
|
||||||
|
if val < lower_bound:
|
||||||
|
distance = lower_bound - val
|
||||||
|
elif val > upper_bound:
|
||||||
|
distance = val - upper_bound
|
||||||
|
|
||||||
|
results.append(
|
||||||
|
AnomalyResult(
|
||||||
|
is_anomaly=is_anomaly,
|
||||||
|
score=float(distance),
|
||||||
|
method="iqr",
|
||||||
|
threshold=multiplier,
|
||||||
|
details={
|
||||||
|
"index": i,
|
||||||
|
"value": float(val),
|
||||||
|
"q1": float(q1),
|
||||||
|
"q3": float(q3),
|
||||||
|
"iqr": float(iqr),
|
||||||
|
"lower_bound": float(lower_bound),
|
||||||
|
"upper_bound": float(upper_bound),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def detect_isolation_forest(
|
||||||
|
self, data: list[float], contamination: float = 0.1
|
||||||
|
) -> list[AnomalyResult]:
|
||||||
|
"""
|
||||||
|
Simplified Isolation Forest-like detection using average absolute deviation
|
||||||
|
More robust to outliers than Z-score
|
||||||
|
"""
|
||||||
|
if len(data) < 3:
|
||||||
|
return []
|
||||||
|
|
||||||
|
arr = np.array(data)
|
||||||
|
median = np.median(arr)
|
||||||
|
mad = np.median(np.abs(arr - median))
|
||||||
|
|
||||||
|
if mad == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
modified_z = np.abs(0.6745 * (arr - median) / mad)
|
||||||
|
threshold = 3.5
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for i, z in enumerate(modified_z):
|
||||||
|
is_anomaly = z > threshold
|
||||||
|
results.append(
|
||||||
|
AnomalyResult(
|
||||||
|
is_anomaly=is_anomaly,
|
||||||
|
score=float(z),
|
||||||
|
method="isolation_forest",
|
||||||
|
threshold=threshold,
|
||||||
|
details={
|
||||||
|
"index": i,
|
||||||
|
"value": float(arr[i]),
|
||||||
|
"median": float(median),
|
||||||
|
"mad": float(mad),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def detect_salary_anomalies(
|
||||||
|
self, records: list[dict], value_field: str = "salarioBase"
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Specialized detection for salary/payroll data
|
||||||
|
Combines multiple methods for robust anomaly detection
|
||||||
|
"""
|
||||||
|
values = [float(r.get(value_field, 0)) for r in records if value_field in r]
|
||||||
|
|
||||||
|
if not values:
|
||||||
|
return {"error": f"Field '{value_field}' not found in records"}
|
||||||
|
|
||||||
|
zscore_results = self.detect_zscore(values, threshold=2.5)
|
||||||
|
iqr_results = self.detect_iqr(values, multiplier=1.5)
|
||||||
|
iso_results = self.detect_isolation_forest(values)
|
||||||
|
|
||||||
|
anomalies = []
|
||||||
|
for i in range(len(values)):
|
||||||
|
votes = sum(
|
||||||
|
[
|
||||||
|
zscore_results[i].is_anomaly if zscore_results else False,
|
||||||
|
iqr_results[i].is_anomaly if iqr_results else False,
|
||||||
|
iso_results[i].is_anomaly if iso_results else False,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if votes >= 2:
|
||||||
|
anomalies.append(
|
||||||
|
{
|
||||||
|
"index": i,
|
||||||
|
"record": records[i],
|
||||||
|
"value": values[i],
|
||||||
|
"confidence": votes / 3,
|
||||||
|
"methods": {
|
||||||
|
"zscore": zscore_results[i].is_anomaly
|
||||||
|
if zscore_results
|
||||||
|
else False,
|
||||||
|
"iqr": iqr_results[i].is_anomaly if iqr_results else False,
|
||||||
|
"isolation": iso_results[i].is_anomaly
|
||||||
|
if iso_results
|
||||||
|
else False,
|
||||||
|
},
|
||||||
|
"zscore_details": zscore_results[i].details
|
||||||
|
if zscore_results
|
||||||
|
else {},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_records": len(records),
|
||||||
|
"anomalies_found": len(anomalies),
|
||||||
|
"anomaly_rate": len(anomalies) / len(records) if records else 0,
|
||||||
|
"anomalies": anomalies,
|
||||||
|
"summary": {
|
||||||
|
"mean": float(np.mean(values)),
|
||||||
|
"median": float(np.median(values)),
|
||||||
|
"std": float(np.std(values)),
|
||||||
|
"min": float(np.min(values)),
|
||||||
|
"max": float(np.max(values)),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def detect_sensor_anomalies(
|
||||||
|
self, readings: list[dict], value_field: str = "value"
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Detection for sensor/IoT data with time-series characteristics
|
||||||
|
"""
|
||||||
|
values = [float(r.get(value_field, 0)) for r in readings if value_field in r]
|
||||||
|
|
||||||
|
if not values:
|
||||||
|
return {"error": f"Field '{value_field}' not found in readings"}
|
||||||
|
|
||||||
|
arr = np.array(values)
|
||||||
|
diff = np.diff(arr)
|
||||||
|
mean_diff = np.mean(np.abs(diff))
|
||||||
|
std_diff = np.std(diff)
|
||||||
|
|
||||||
|
anomalies = []
|
||||||
|
for i in range(1, len(values)):
|
||||||
|
change = abs(values[i] - values[i - 1])
|
||||||
|
z_change = (change - mean_diff) / std_diff if std_diff > 0 else 0
|
||||||
|
|
||||||
|
if z_change > 2.5 or change > 3 * mean_diff:
|
||||||
|
anomalies.append(
|
||||||
|
{
|
||||||
|
"index": i,
|
||||||
|
"record": readings[i],
|
||||||
|
"previous_value": values[i - 1],
|
||||||
|
"current_value": values[i],
|
||||||
|
"change": change,
|
||||||
|
"change_zscore": float(z_change),
|
||||||
|
"type": "sudden_change",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_readings": len(readings),
|
||||||
|
"anomalies_found": len(anomalies),
|
||||||
|
"anomalies": anomalies,
|
||||||
|
"baseline": {
|
||||||
|
"mean_change": float(mean_diff),
|
||||||
|
"std_change": float(std_diff),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_service: Optional[AnomalyDetectionService] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_anomaly_service() -> AnomalyDetectionService:
|
||||||
|
global _service
|
||||||
|
if _service is None:
|
||||||
|
_service = AnomalyDetectionService()
|
||||||
|
return _service
|
||||||
Loading…
Add table
Reference in a new issue