From fc95cba887c3b133c28b277e8682db4b06de98a0 Mon Sep 17 00:00:00 2001 From: "Rodrigo Rodriguez (Pragmatismo)" Date: Sun, 5 Apr 2026 09:12:32 -0300 Subject: [PATCH] Update submodules: botserver, botui and project guidelines. --- .opencode/plans/folders.md | 429 +++++++++++ AGENTS.md | 1479 +++++------------------------------- botserver | 2 +- botui | 2 +- 4 files changed, 610 insertions(+), 1302 deletions(-) create mode 100644 .opencode/plans/folders.md diff --git a/.opencode/plans/folders.md b/.opencode/plans/folders.md new file mode 100644 index 0000000..6955ba1 --- /dev/null +++ b/.opencode/plans/folders.md @@ -0,0 +1,429 @@ +# Folders.md: Sistema de Permissões de Pastas (Estilo Windows ACL) + +## Visão Geral + +Implementar controle de acesso a pastas baseado em grupos RBAC, permitindo que `USE KB` inclua seletivamente pastas conforme os grupos do usuário. + +## Arquitetura Atual + +### Já Existe ✅ +| Componente | Arquivo | Estado | +|------------|---------|--------| +| `KbPermissions` | `core/kb/permissions.rs` | Completo - `AccessLevel::GroupBased`, `FolderPermission` | +| `UserContext` | `core/kb/permissions.rs` | Tem `groups: Vec` | +| `build_qdrant_permission_filter()` | `core/kb/permissions.rs` | Gera filtros Qdrant por grupo | +| `rbac_groups` | Schema core.rs | Tabela existe | +| `rbac_user_groups` | Schema core.rs | Tabela existe (user → group) | +| `file_shares` | migrations drive | Tem `shared_with_group` | + +### Falta Integrar ❌ +| Componente | Descrição | +|------------|-----------| +| `folder_group_access` | Tabela para link pasta → grupo | +| `UserContext.groups` | Popular grupos do BD na sessão | +| `USE KB` permission check | Verificar grupos antes de adicionar | +| UI Admin | Atribuir grupos a pastas | +| `USE FOLDER` keyword | BASIC keyword para pastas | + +--- + +## Estrutura de Permissões (Windows-style) + +``` +Organização +├── Gestores (grupo RBAC) +│ ├── Pasta: /relatorios/financeiros +│ │ └── Permissão: Gestores (ler/escrever) +│ ├── Pasta: /strategic +│ │ └── Permissão: Gestores (ler) +│ └── Pasta: /publico +│ └── Permissão: Todos (ler) +│ +├── RH (grupo RBAC) +│ ├── Pasta: /rh/documentos +│ │ └── Permissão: RH (ler/escrever) +│ └── Pasta: /relatorios/financeiros +│ └── Permissão: RH (ler) +│ +└── Todos (grupo implícito) + ├── Pasta: /publico + │ └── Permissão: Todos (ler) + └── Pasta: /intranet + └── Permissão: Autenticados (ler) +``` + +--- + +## Plano de Implementação + +### Fase 1: Database (Migration) + +**Arquivo:** `botserver/migrations/6.2.0-02-folder-access/up.sql` + +```sql +-- Tabela principal: pasta ↔ grupo +CREATE TABLE folder_group_access ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + folder_path TEXT NOT NULL, -- Ex: "work/bot1/pasta-protegida" + group_id UUID NOT NULL REFERENCES rbac_groups(id) ON DELETE CASCADE, + permission_level TEXT NOT NULL DEFAULT 'read', -- read|write|admin + created_by UUID REFERENCES users(id), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(folder_path, group_id) +); + +-- Índice para busca rápida +CREATE INDEX idx_folder_group_access_path ON folder_group_access(folder_path); +CREATE INDEX idx_folder_group_access_group ON folder_group_access(group_id); + +-- Adicionar coluna de permissões em kb_collections +ALTER TABLE kb_collections +ADD COLUMN IF NOT EXISTS access_level TEXT DEFAULT 'authenticated'; + +COMMENT ON TABLE folder_group_access IS 'Windows-style ACL: pasta ↔ grupo RBAC'; +``` + +--- + +### Fase 2: Schema Diesel + +**Arquivo:** `botserver/src/core/shared/schema/research.rs` + +```rust +diesel::table! { + folder_group_access (id) { + id -> Uuid, + folder_path -> Text, + group_id -> Uuid, + permission_level -> Varchar, + created_by -> Nullable, + created_at -> Timestamptz, + } +} + +diesel::joinable!(folder_group_access -> rbac_groups (group_id)); + +// Adicionar em kb_collections: +access_level -> Varchar, // all|authenticated|role_based|group_based +``` + +--- + +### Fase 3: Modelos Rust + +**Arquivo:** `botserver/src/core/kb/models.rs` (novo) + +```rust +#[derive(Debug, Clone, Queryable, Selectable)] +#[diesel(table_name = folder_group_access)] +pub struct FolderGroupAccess { + pub id: Uuid, + pub folder_path: String, + pub group_id: Uuid, + pub permission_level: String, + pub created_by: Option, + pub created_at: DateTime, +} + +#[derive(Debug, Clone, Insertable)] +#[diesel(table_name = folder_group_access)] +pub struct NewFolderGroupAccess { + pub folder_path: String, + pub group_id: Uuid, + pub permission_level: String, + pub created_by: Option, +} +``` + +--- + +### Fase 4: Carregar Grupos do Usuário + +**Arquivo:** `botserver/src/core/shared/state.rs` + +Modificar `AppState` ou `UserContext` para popular grupos: + +```rust +// Nova função em core/kb/permissions.rs +pub async fn load_user_groups( + db_pool: &DbPool, + user_id: Uuid, +) -> Result, String> { + use crate::core::shared::schema::core::rbac_groups::dsl::*; + use crate::core::shared::schema::core::rbac_user_groups::dsl::*; + + let mut conn = db_pool.get().map_err(|e| e.to_string())?; + + let group_names: Vec = rbac_user_groups + .inner_join(rbac_groups) + .filter(user_id.eq(user_id)) + .select(name) + .load(&mut conn) + .map_err(|e| e.to_string())?; + + Ok(group_names) +} + +// Em UserContext, adicionar método: +impl UserContext { + pub async fn with_db_groups(mut self, db_pool: &DbPool) -> Result { + let groups = load_user_groups(db_pool, self.user_id).await?; + self.groups = groups; + Ok(self) + } +} +``` + +--- + +### Fase 5: Modificar USE KB + +**Arquivo:** `botserver/src/basic/keywords/use_kb.rs` + +```rust +use crate::core::kb::permissions::{KbPermissionParser, FolderPermission, AccessLevel}; + +fn add_kb_to_session( + conn_pool: DbPool, + session_id: Uuid, + bot_id: Uuid, + user_id: Uuid, // Adicionar + kb_name: &str, +) -> Result<(), String> { + // ... código existente ... + + // NOVO: Verificar permissões de grupo + let user_groups = load_user_groups(&conn_pool, user_id)?; + + let has_access = check_folder_group_access( + &conn_pool, + &kb_folder_path, + &user_groups, + )?; + + if !has_access { + return Err(format!( + "Acesso negado: KB '{}' requer grupo específico", + kb_name + )); + } + + // ... resto do código ... +} + +fn check_folder_group_access( + conn_pool: &DbPool, + folder_path: &str, + user_groups: &[String], +) -> Result { + // Buscar grupos associados à pasta + // Se pasta é "pública" (sem grupos) → permitir + // Se usuário está em algum grupo da pasta → permitir + // Caso contrário → negar +} +``` + +--- + +### Fase 6: Modificar THINK KB (Filtro Qdrant) + +**Arquivo:** `botserver/src/basic/keywords/think_kb.rs` + +```rust +use crate::core::kb::permissions::build_qdrant_permission_filter; + +async fn think_kb_search( + // ... parâmetros ... + user_id: Uuid, +) -> Result { + // Carregar contexto do usuário com grupos + let user_groups = load_user_groups(&db_pool, user_id).await?; + + let user_context = UserContext::authenticated( + user_id, + Some(email), + org_id, + ).with_groups(user_groups); + + // Filtrar resultados do Qdrant com base nos grupos + let qdrant_filter = build_qdrant_permission_filter(&user_context); + + // Buscar no Qdrant com filtro + // ... +} +``` + +--- + +### Fase 7: Novo Keyword USE FOLDER + +**Arquivo:** `botserver/src/basic/keywords/use_folder.rs` (novo) + +```rust +// USE FOLDER "caminho/da/pasta" [READ|WRITE|ADMIN] +engine.register_custom_syntax( + ["USE", "FOLDER", "$expr$", "($expr$)", "($expr$)", "($expr$)"], + true, + move |context, inputs| { + let folder_path = context.eval_expression_tree(&inputs[0])?.to_string(); + // Verificar acesso, adicionar à sessão + }, +); +``` + +--- + +### Fase 8: API Endpoints + +**Arquivo:** `botserver/src/api/routes/rbac.rs` + +```rust +// GET /api/rbac/folders/{path}/groups +// Lista grupos com acesso a uma pasta +async fn get_folder_groups( + Path(folder_path): Path, + State(state): State, +) -> Result>, AppError> { + // Query folder_group_access +} + +// POST /api/rbac/folders/{path}/groups/{group_id} +async fn add_folder_group( + Path((folder_path, group_id)): Path<(String, Uuid)>, + Json(payload): Json, +) -> Result, AppError> { + // INSERT folder_group_access +} + +// DELETE /api/rbac/folders/{path}/groups/{group_id} +async fn remove_folder_group( + Path((folder_path, group_id)): Path<(String, Uuid)>, +) -> Result { + // DELETE folder_group_access +} + +// GET /api/rbac/users/{user_id}/accessible-folders +async fn get_user_accessible_folders( + // Lista pastas que o usuário pode acessar +) +``` + +--- + +### Fase 9: UI Admin + +**Arquivo:** `botui/ui/suite/admin/groups.html` + +Adicionar aba "Pastas" na visualização do grupo: + +```html + +
+ +
+ +
+ + +
+``` + +**Arquivo:** `botui/ui/suite/drive/drive.html` + +Mostrar cadeado nas pastas protegidas: + +```html + +``` + +--- + +## Fluxo Completo + +``` +1. Usuário executa: USE KB "relatorios-financeiros" + +2. Sistema carrega: + - user_id da sessão + - grupos do usuário (rbac_user_groups → rbac_groups) + - grupos da pasta (folder_group_access) + +3. Verificação: + - Se pasta não tem restrições (pública) → OK + - Se usuário está em algum grupo da pasta → OK + - Caso contrário → ERRO "Acesso negado" + +4. Se OK: + - Adiciona KB em session_kb_associations + - THINK KB agora busca no Qdrant com filtro de grupos + +5. THINK KB retorna: + - Apenas documentos de pastas que o usuário tem acesso +``` + +--- + +## Testes + +```rust +#[test] +fn test_group_access_allowed() { + let groups = vec!["gestores".to_string()]; + let folder_path = "work/bot/financeiro"; + + // Gestor tem acesso + assert!(check_folder_group_access(folder_path, &groups).unwrap()); +} + +#[test] +fn test_group_access_denied() { + let groups = vec!["rh".to_string()]; + let folder_path = "work/bot/financeiro"; + + // RH não tem acesso a financeiro + assert!(!check_folder_group_access(folder_path, &groups).unwrap()); +} + +#[test] +fn test_public_folder_access() { + let groups = vec![]; + let folder_path = "work/bot/publico"; + + // Pasta pública permite todos + assert!(check_folder_group_access(folder_path, &groups).unwrap()); +} +``` + +--- + +## Prioridades de Implementação + +| # | Tarefa | Prioridade | Complexidade | +|---|--------|------------|--------------| +| 1 | Migration folder_group_access | Alta | Baixa | +| 2 | Schema Diesel | Alta | Baixa | +| 3 | load_user_groups() | Alta | Média | +| 4 | check_folder_group_access() | Alta | Média | +| 5 | Modificar USE KB | Alta | Média | +| 6 | Modificar THINK KB (Qdrant filter) | Alta | Média | +| 7 | API endpoints | Média | Média | +| 8 | UI Admin | Média | Alta | +| 9 | USE FOLDER keyword | Baixa | Média | + +--- + +## Arquivos a Modificar + +| Arquivo | Ação | +|---------|------| +| `migrations/6.2.0-02-folder-access/up.sql` | Criar | +| `migrations/6.2.0-02-folder-access/down.sql` | Criar | +| `src/core/shared/schema/research.rs` | Modificar | +| `src/core/kb/permissions.rs` | Modificar (load_user_groups) | +| `src/core/kb/models.rs` | Criar | +| `src/basic/keywords/use_kb.rs` | Modificar | +| `src/basic/keywords/think_kb.rs` | Modificar | +| `src/api/routes/rbac.rs` | Modificar | +| `botui/ui/suite/admin/groups.html` | Modificar | +| `botui/ui/suite/drive/drive.html` | Modificar | diff --git a/AGENTS.md b/AGENTS.md index d066721..c2d3d8d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,41 +1,14 @@ # General Bots AI Agent Guidelines -- stop saving .png on root! Use /tmp. never allow new files on root. -- never push to alm without asking first - pbecause it is production! -- **❌ NEVER deploy to production manually — ALWAYS use CI/CD pipeline** -- **❌ NEVER include sensitive data (IPs, tokens, passwords, keys) in AGENTS.md or any documentation** -- **❌ NEVER use `scp`, direct SSH binary copy, or manual deployment to system container** -- **✅ ALWAYS push to ALM → CI builds on alm-ci → CI deploys to system container automatically** -8080 is server 3000 is client ui -if you are in trouble with some tool, please go to the ofiical website to get proper install or instructions -To test web is http://localhost:3000 (botui!) -Use apenas a lingua culta ao falar . -test login here http://localhost:3000/suite/auth/login.html -> **⚠️ CRITICAL SECURITY WARNING** -I AM IN DEV ENV, but sometimes, pasting from PROD, do not treat my env as prod! Just fix, to me and push to CI. So I can test in PROD, for a while. ->Use Playwrigth MCP to start localhost:3000/ now. -> **NEVER CREATE FILES WITH SECRETS IN THE REPOSITORY ROOT** -> - ❌ **NEVER** write internal IPs to logs or output -> - When debugging network issues, mask IPs (e.g., "10.x.x.x" instead of "10.16.164.222") -> - Use hostnames instead of IPs in configs and documentation -See botserver/src/drive/local_file_monitor.rs to see how to load from /opt/gbo/data the list of development bots. -- ❌ **NEVER** use `cargo clean` - causes 30min rebuilds, use `./reset.sh` for database issues -> -> Secret files MUST be placed in `/tmp/` only: -> - ✅ `/tmp/vault-token-gb` - Vault root token -> - ✅ `/tmp/vault-unseal-key-gb` - Vault unseal key -> - ❌ `vault-unseal-keys` - FORBIDDEN (tracked by git) -> - ❌ `start-and-unseal.sh` - FORBIDDEN (contains secrets) -> -> **Why `/tmp/`?** -> - Cleared on reboot (ephemeral) -> - Not tracked by git -> - Standard Unix security practice -> - Prevents accidental commits +- Use apenas a língua culta ao falar. +- Never save files on root! Use `/tmp` for temp files. +- Never push to ALM without asking first — it is production! +- If in trouble with a tool, go to the official website for install instructions. +- See `botserver/src/drive/local_file_monitor.rs` to load bots from `/opt/gbo/data`. --- -## 📁 WORKSPACE STRUCTURE +## 📁 Workspace Structure | Crate | Purpose | Port | Tech Stack | |-------|---------|------|------------| @@ -53,308 +26,140 @@ See botserver/src/drive/local_file_monitor.rs to see how to load from /opt/gbo/d - **Run from:** `botserver/` directory - **Env file:** `botserver/.env` - **UI Files:** `botui/ui/suite/` +- **Bot data:** `/opt/gbo/data` (primary) +- **Test web:** `http://localhost:3000` — Login: `http://localhost:3000/suite/auth/login.html` --- ## 🧭 LLM Navigation Guide -### Reading This Workspace -/opt/gbo/data is a place also for bots. -**For LLMs analyzing this codebase:** -0. Bots are in /opt/gbo/data primary -1. Start with **[Component Dependency Graph](../README.md#-component-dependency-graph)** in README to understand relationships -2. Review **[Module Responsibility Matrix](../README.md#-module-responsibility-matrix)** for what each module does -3. Study **[Data Flow Patterns](../README.md#-data-flow-patterns)** to understand execution flow -4. Reference **[Common Architectural Patterns](../README.md#-common-architectural-patterns)** before making changes -5. Check **[Security Rules](#-security-directives---mandatory)** below - violations are blocking issues -6. Follow **[Code Patterns](#-mandatory-code-patterns)** below - consistency is mandatory +1. Start with **[Component Dependency Graph](../README.md#-component-dependency-graph)** +2. Review **[Module Responsibility Matrix](../README.md#-module-responsibility-matrix)** +3. Study **[Data Flow Patterns](../README.md#-data-flow-patterns)** +4. Reference **[Common Architectural Patterns](../README.md#-common-architectural-patterns)** +5. Check [Security Rules](#-security-directives---mandatory) — violations are blocking +6. Follow [Code Patterns](#-mandatory-code-patterns) — consistency is mandatory --- -## 🔄 Reset Process Notes +## ❌ Absolute Prohibitions -### reset.sh Behavior -- **Purpose**: Cleans and restarts the development environment -- **Timeouts**: The script can timeout during "Step 3/4: Waiting for BotServer to bootstrap" -- **Bootstrap Process**: Takes 3-5 minutes to install all components (Vault, PostgreSQL, Valkey, MinIO, Zitadel, LLM) +### Build & Deploy +- ❌ **NEVER** search `/target` folder +- ❌ **NEVER** build in release mode or use `--release` +- ❌ **NEVER** run `cargo build` — use `cargo check` for verification +- ❌ **NEVER** run `cargo clean` — causes 30min rebuilds; use `./reset.sh` for DB issues +- ❌ **NEVER** deploy manually — ALWAYS use CI/CD pipeline (push → ALM → alm-ci builds → deploys) +- ❌ **NEVER** use `scp`, direct SSH binary copy, or manual deployment +- ❌ **NEVER** run the binary directly — use `systemctl` or `./restart.sh` -### Common Issues -1. **Script Timeout**: reset.sh waits for "Bootstrap complete: admin user" message - - If Zitadel isn't ready within 60s, admin user creation fails - - Script continues waiting indefinitely - - **Solution**: Check botserver.log for "Bootstrap process completed!" message +### Code Quality +- ❌ **NEVER** use `panic!()`, `todo!()`, `unimplemented!()`, `unwrap()`, `expect()` +- ❌ **NEVER** use `Command::new()` directly — use `SafeCommand` +- ❌ **NEVER** return raw error strings to HTTP clients — use `ErrorSanitizer` +- ❌ **NEVER** use `#[allow()]` or lint exceptions in `Cargo.toml` — FIX the code +- ❌ **NEVER** use `_` prefix for unused vars — DELETE or USE them +- ❌ **NEVER** leave unused imports, dead code, or commented-out code +- ❌ **NEVER** use CDN links — all assets must be local +- ❌ **NEVER** create `.md` docs without checking `botbook/` first +- ❌ **NEVER** hardcode credentials — use `generate_random_string()` or env vars -2. **Zitadel Not Ready**: "Bootstrap check failed (Zitadel may not be ready)" - - Directory service may need more than 60 seconds to start - - Admin user creation deferred - - Services still start successfully +### Security +- ❌ **NEVER** include sensitive data (IPs, tokens, keys) in docs or code +- ❌ **NEVER** write internal IPs to logs — mask them (e.g., "10.x.x.x") +- ❌ **NEVER** create files with secrets in repo root -3. **Services Exit After Start**: - - botserver/botui may exit after initial startup - - Check logs for "dispatch failure" errors - - Check Vault certificate errors: "tls: failed to verify certificate: x509" - -### Manual Service Management -```bash -# If reset.sh times out, manually verify services: -ps aux | grep -E "(botserver|botui)" | grep -v grep -curl http://localhost:8080/health -tail -f botserver.log botui.log - -# Restart services manually: -./restart.sh -``` - -### ⚠️ NEVER Run Binary Directly -- ❌ **NEVER** run `/opt/gbo/bin/botserver` or `./target/debug/botserver` directly on any system -- ❌ **NEVER** execute the binary with `su - gbuser -c '/opt/gbo/bin/botserver'` or similar -- ✅ **ALWAYS** use `systemctl` for service management: - ```bash - systemctl status botserver - systemctl start botserver - systemctl stop botserver - systemctl restart botserver - journalctl -u botserver -f - ``` -- ✅ **For diagnostics**: Use `journalctl -u botserver --no-pager -n 50` or check `/opt/gbo/logs/stdout.log` - -### Reset Verification -After reset completes, verify: -- ✅ PostgreSQL running (port 5432) -- ✅ Valkey cache running (port 6379) -- ✅ BotServer listening on port 8080 -- ✅ BotUI listening on port 3000 -- ✅ No errors in botserver.log +> **Secret files MUST be placed in `/tmp/` only** (ephemeral, not tracked by git). --- -## 🔐 Security Directives - MANDATORY - -### 1. Error Handling - NO PANICS IN PRODUCTION +## 🔐 Security Directives — MANDATORY +### 1. Error Handling — No Panics ```rust -// ❌ FORBIDDEN -value.unwrap() -value.expect("message") -panic!("error") -todo!() -unimplemented!() - -// ✅ REQUIRED +// ❌ FORBIDDEN: unwrap(), expect(), panic!(), todo!() +// ✅ REQUIRED: value? value.ok_or_else(|| Error::NotFound)? value.unwrap_or_default() -value.unwrap_or_else(|e| { log::error!("{}", e); default }) if let Some(v) = value { ... } -match value { Ok(v) => v, Err(e) => return Err(e.into()) } ``` -### 2. Command Execution - USE SafeCommand - +### 2. Command Execution — SafeCommand ```rust -// ❌ FORBIDDEN -Command::new("some_command").arg(user_input).output() - -// ✅ REQUIRED +// ❌ FORBIDDEN: Command::new("cmd").arg(user_input).output() +// ✅ REQUIRED: use crate::security::command_guard::SafeCommand; -SafeCommand::new("allowed_command")? - .arg("safe_arg")? - .execute() +SafeCommand::new("allowed_command")?.arg("safe_arg")?.execute() ``` -### 3. Error Responses - USE ErrorSanitizer - +### 3. Error Responses — ErrorSanitizer ```rust -// ❌ FORBIDDEN -Json(json!({ "error": e.to_string() })) -format!("Database error: {}", e) - -// ✅ REQUIRED +// ❌ FORBIDDEN: Json(json!({ "error": e.to_string() })) +// ✅ REQUIRED: use crate::security::error_sanitizer::log_and_sanitize; let sanitized = log_and_sanitize(&e, "context", None); (StatusCode::INTERNAL_SERVER_ERROR, sanitized) ``` -### 4. SQL - USE sql_guard - +### 4. SQL — sql_guard ```rust -// ❌ FORBIDDEN -format!("SELECT * FROM {}", user_table) - -// ✅ REQUIRED +// ❌ FORBIDDEN: format!("SELECT * FROM {}", user_table) +// ✅ REQUIRED: use crate::security::sql_guard::{sanitize_identifier, validate_table_name}; let safe_table = sanitize_identifier(&user_table); validate_table_name(&safe_table)?; ``` -### 5. Rate Limiting Strategy (IMP-07) +### 5. Rate Limiting +- General: 100 req/s, Auth: 10 req/s, API: 50 req/s per token, WebSocket: 10 msgs/s +- Use `governor` crate with per-IP and per-User tracking -- **Default Limits:** - - General: 100 req/s (global) - - Auth: 10 req/s (login endpoints) - - API: 50 req/s (per token) -- **Implementation:** - - MUST use `governor` crate - - MUST implement per-IP and per-User tracking - - WebSocket connections MUST have message rate limits (e.g., 10 msgs/s) +### 6. CSRF Protection +- ALL state-changing endpoints (POST/PUT/DELETE/PATCH) MUST require CSRF token +- Use `tower_csrf`, bound to user session. Exempt: Bearer Token endpoints -### 6. CSRF Protection (IMP-08) +### 7. Security Headers (ALL responses) +`Content-Security-Policy`, `Strict-Transport-Security`, `X-Frame-Options: DENY`, `X-Content-Type-Options: nosniff`, `Referrer-Policy: strict-origin-when-cross-origin`, `Permissions-Policy: geolocation=(), microphone=(), camera=()` -- **Requirement:** ALL state-changing endpoints (POST, PUT, DELETE, PATCH) MUST require a CSRF token. -- **Implementation:** - - Use `tower_csrf` or similar middleware - - Token MUST be bound to user session - - Double-Submit Cookie pattern or Header-based token verification - - **Exemptions:** API endpoints using Bearer Token authentication (stateless) - -### 7. Security Headers (IMP-09) - -- **Mandatory Headers on ALL Responses:** - - `Content-Security-Policy`: "default-src 'self'; script-src 'self'; object-src 'none';" - - `Strict-Transport-Security`: "max-age=63072000; includeSubDomains; preload" - - `X-Frame-Options`: "DENY" or "SAMEORIGIN" - - `X-Content-Type-Options`: "nosniff" - - `Referrer-Policy`: "strict-origin-when-cross-origin" - - `Permissions-Policy`: "geolocation=(), microphone=(), camera=()" - -### 8. Dependency Management (IMP-10) - -- **Pinning:** - - Application crates (`botserver`, `botui`) MUST track `Cargo.lock` - - Library crates (`botlib`) MUST NOT track `Cargo.lock` -- **Versions:** - - Critical dependencies (crypto, security) MUST use exact versions (e.g., `=1.0.1`) - - Regular dependencies MAY use caret (e.g., `1.0`) -- **Auditing:** - - Run `cargo audit` weekly - - Update dependencies only via PR with testing +### 8. Dependency Management +- App crates track `Cargo.lock`; lib crates don't +- Critical deps: exact versions (`=1.0.1`); regular: caret (`1.0`) +- Run `cargo audit` weekly; update only via PR with testing --- ## ✅ Mandatory Code Patterns -### Use Self in Impl Blocks ```rust -impl MyStruct { - fn new() -> Self { Self { } } // ✅ Not MyStruct -} -``` - -### Derive Eq with PartialEq -```rust -#[derive(PartialEq, Eq)] // ✅ Always both -struct MyStruct { } -``` - -### Inline Format Args -```rust -format!("Hello {name}") // ✅ Not format!("{}", name) -``` - -### Combine Match Arms -```rust -match x { - A | B => do_thing(), // ✅ Combine identical arms - C => other(), -} +impl MyStruct { fn new() -> Self { Self { } } } // Use Self, not type name +#[derive(PartialEq, Eq)] // Always derive both +format!("Hello {name}") // Inline format args +match x { A | B => do_thing(), C => other() } // Combine identical arms ``` --- -## ❌ Absolute Prohibitions -- NEVER search /target folder! It is binary compiled. -- ❌ **NEVER** hardcode passwords, tokens, API keys, or any credentials in source code — ALWAYS use `generate_random_string()` or environment variables -- ❌ **NEVER** build in release mode - ONLY debug builds allowed -- ❌ **NEVER** use `--release` flag on ANY cargo command -- ❌ **NEVER** run `cargo build` - use `cargo check` for syntax verification -- ❌ **NEVER** compile directly for production - ALWAYS use push + CI/CD pipeline -- ❌ **NEVER** use `scp` or manual transfer to deploy - ONLY CI/CD ensures correct deployment -- ❌ **NEVER** manually copy binaries to production system container - ALWAYS push to ALM and let CI/CD build and deploy -- ❌ **NEVER** SSH into system container to deploy binaries - CI workflow handles build, transfer, and restart via alm-ci SSH -- ✅ **ALWAYS** push code to ALM → CI builds on alm-ci → CI deploys to system container via SSH from alm-ci -- ✅ **CI deploy path**: alm-ci builds at `/opt/gbo/data/botserver/target/debug/botserver` → tar+gzip via SSH → `/opt/gbo/bin/botserver` on system container → restart -- ❌ **NEVER** manually copy binaries to production system container - ALWAYS push to ALM and let CI/CD build and deploy -- ❌ **NEVER** SSH into system container to deploy binaries - CI workflow handles build, transfer, and restart via alm-ci SSH -- ✅ **ALWAYS** push code to ALM → CI builds on alm-ci → CI deploys to system container via SSH from alm-ci -- ✅ **CI deploy path**: alm-ci builds at `/opt/gbo/data/botserver/target/debug/botserver` → tar+gzip via SSH → `/opt/gbo/bin/botserver` on system container → restart +## 📏 File Size Limits -**Current Status:** ✅ **0 clippy warnings** (down from 61 - PERFECT SCORE in YOLO mode) -- ❌ **NEVER** use `panic!()`, `todo!()`, `unimplemented!()` -- ❌ **NEVER** use `Command::new()` directly - use `SafeCommand` -- ❌ **NEVER** return raw error strings to HTTP clients -- ❌ **NEVER** use `#[allow()]` in source code - FIX the code instead -- ❌ **NEVER** add lint exceptions to `Cargo.toml` - FIX the code instead -- ❌ **NEVER** use `_` prefix for unused variables - DELETE or USE them -- ❌ **NEVER** leave unused imports or dead code -- ❌ **NEVER** use CDN links - all assets must be local -- ❌ **NEVER** create `.md` documentation files without checking `botbook/` first -- ❌ **NEVER** comment out code - FIX it or DELETE it entirely - ---- - -## 📏 File Size Limits - MANDATORY - -### Maximum 450 Lines Per File - -When a file grows beyond this limit: - -1. **Identify logical groups** - Find related functions -2. **Create subdirectory module** - e.g., `handlers/` -3. **Split by responsibility:** - - `types.rs` - Structs, enums, type definitions - - `handlers.rs` - HTTP handlers and routes - - `operations.rs` - Core business logic - - `utils.rs` - Helper functions - - `mod.rs` - Re-exports and configuration -4. **Keep files focused** - Single responsibility -5. **Update mod.rs** - Re-export all public items - -**NEVER let a single file exceed 450 lines - split proactively at 350 lines** +- **Max 450 lines per file** — split proactively at 350 lines +- Split by: `types.rs`, `handlers.rs`, `operations.rs`, `utils.rs`, `mod.rs` +- Re-export all public items in `mod.rs` --- ## 🔥 Error Fixing Workflow -### Mode 1: OFFLINE Batch Fix (PREFERRED) - -When given error output: - -1. **Read ENTIRE error list first** -2. **Group errors by file** -3. **For EACH file with errors:** - a. View file → understand context - b. Fix ALL errors in that file - c. Write once with all fixes -4. **Move to next file** -5. **REPEAT until ALL errors addressed** -6. **ONLY THEN → verify with build/diagnostics** - -**NEVER run cargo build/check/clippy DURING fixing** -**Fix ALL errors OFFLINE first, verify ONCE at the end** - -### Mode 2: Interactive Loop - -``` -LOOP UNTIL (0 warnings AND 0 errors): - 1. Run diagnostics → pick file with issues - 2. Read entire file - 3. Fix ALL issues in that file - 4. Write file once with all fixes - 5. Verify with diagnostics - 6. CONTINUE LOOP -END LOOP -``` +### Preferred: Offline Batch Fix +1. Read ENTIRE error list first +2. Group errors by file +3. For each file: view → fix ALL errors → write once +4. Verify with build/diagnostics only AFTER all fixes ### ⚡ Streaming Build Rule +Don't wait for `cargo` to finish — cancel at first errors, fix, re-run. -**Do NOT wait for `cargo` to finish.** As soon as the first errors appear in output, cancel/interrupt the build, fix those errors immediately, then re-run. This avoids wasting time on a full compile when errors are already visible. - ---- - -## 🧠 Memory Management - -When compilation fails due to memory issues (process "Killed"): - +### 🧠 Memory Issues (process "Killed") ```bash pkill -9 cargo; pkill -9 rustc; pkill -9 botserver CARGO_BUILD_JOBS=1 cargo check -p botserver 2>&1 | tail -200 @@ -362,1080 +167,154 @@ CARGO_BUILD_JOBS=1 cargo check -p botserver 2>&1 | tail -200 --- -## 🎭 Playwright Browser Testing - YOLO Mode +## 🔄 Reset & Service Management -### Browser Setup & Troubleshooting +### reset.sh +- Cleans and restarts dev env (3-5 min bootstrap: Vault, PostgreSQL, Valkey, MinIO, Zitadel, LLM) +- May timeout waiting for Zitadel — check `botserver.log` for "Bootstrap process completed!" -**If browser keeps closing or fails to connect:** -1. Kill all leftover browser processes: `pkill -9 -f brave; pkill -9 -f chrome; pkill -9 -f chromium; pkill -9 -f mcp-chrome` -2. Wait 3 seconds for cleanup -3. Navigate again with `mcp__playwright__browser_navigate` +### Verify After Reset +✅ PostgreSQL (5432), ✅ Valkey (6379), ✅ BotServer (8080), ✅ BotUI (3000), ✅ No errors in logs -**Bot-Specific Testing URL Pattern:** -- Dev: `http://localhost:3000/` -- Prod chat: `https://chat..com/` - -### Complete Bot Tool Testing Workflow - -**Step 1: Navigate and Verify Initial State** -``` -1. mcp__playwright__browser_navigate → open the bot chat URL -2. mcp__playwright__browser_snapshot → see the page state -3. Verify: Welcome message appears, suggestion buttons render correctly -4. Check: Portuguese accents display correctly (ç, ã, é, õ, etc.) -``` - -**Step 2: Interact with the Bot** -``` -1. Click a suggestion button (e.g., "Fazer Inscrição") -2. Wait for bot response: mcp__playwright__browser_wait_for (3-5 seconds) -3. Take snapshot to see bot's reply -4. Fill in the requested data via textbox: - - mcp__playwright__browser_type with all required fields - - Set submit: true to send the message -5. Wait for response: mcp__playwright__browser_wait_for (5-8 seconds) -6. Take snapshot to see confirmation/next step -7. If bot asks for confirmation, type confirmation and submit -8. Wait and take final snapshot to see success message -``` - -**Step 3: Verify Data Was Saved to Database** +### Service Commands ```bash -# Connect to the tables container and query the bot's database -ssh "sudo incus exec tables -- psql -h 127.0.0.1 -U postgres -d bot_ -c \" -SELECT * FROM ORDER BY dataCadastro DESC LIMIT 5; -\"" - -# Verify: -# - New record exists with correct data -# - All fields match what was entered in the chat -# - Timestamp is recent -# - Status is correct (e.g., AGUARDANDO_ANALISE) -``` - -**Step 4: Verify Backend Logs** -```bash -# Check botserver logs for the interaction -ssh "sudo incus exec system -- tail -50 /opt/gbo/logs/stdout.log | grep -iE '||SAVE|inscricao'" - -# Check for any errors -ssh "sudo incus exec system -- tail -20 /opt/gbo/logs/err.log | grep -iE 'panic|error|fail' | grep -v Qdrant" -``` - -**Step 5: Report Findings** -- Take screenshot with `mcp__playwright__browser_take_screenshot` (save to `.playwright-mcp/` directory) -- Show the database record that was created -- Confirm the full flow worked: UI → Bot processing → Database save - -### ⚠️ IMPORTANT - Desktop UI Navigation: -- The desktop may have a maximized chat window covering other apps -- To access CRM/sidebar icons, click the **middle button** (restore/down arrow) in the chat window header to minimize it -- Or navigate directly via URL: http://localhost:3000/suite/crm (after login) - -### WhatsApp Testing via Playwright - -**Important: WhatsApp webhook is GLOBAL** - a single endpoint serves all bots. Bot routing is done by typing the bot name as the first message. - -**Setup:** -1. Get WhatsApp verify token from default bot: `cat /opt/gbo/data/default.gbai/default.gbot/config.csv | grep whatsapp-verify-token` -2. The webhook endpoint is `/webhook/whatsapp/:bot_id` but routing is automatic via bot name - -**Complete WhatsApp Test Workflow:** - -**Step 1: Open WhatsApp Web** -``` -1. mcp__playwright__browser_navigate → https://web.whatsapp.com/ -2. mcp__playwright__browser_snapshot → verify WhatsApp loaded -3. Find the "General Bots" chat (the shared WhatsApp business number) -``` - -**Step 2: Activate the Bot (Critical!)** -``` -1. Click the General Bots chat -2. Type the bot name (e.g., "salesianos") and press Enter -3. Wait 5-10 seconds for the bot to respond -4. mcp__playwright__browser_snapshot → see the bot's welcome message -``` - -**Step 3: Interact with the Bot** -``` -1. Type your request (e.g., "Quero fazer inscrição") -2. Wait for bot response: mcp__playwright__browser_wait_for (5-8 seconds) -3. Take snapshot to see bot's reply -4. Fill in requested data when prompted -5. Confirm when bot asks -6. Wait for success message with protocol number -``` - -**Step 4: Verify Backend** -```bash -# Check prod logs for WhatsApp activity -ssh "sudo incus exec system -- tail -50 /opt/gbo/logs/stdout.log | grep -iE 'whatsapp|salesianos|routing|message'" - -# Check database for saved data -ssh "sudo incus exec tables -- psql -h 127.0.0.1 -U postgres -d bot_ -c \"SELECT * FROM ORDER BY dataCadastro DESC LIMIT 1;\"" -``` - -**Key differences from web chat:** -- No suggestion buttons - user must type everything -- Must type bot name FIRST to activate routing -- Single WhatsApp number serves ALL bots -- Bot routing uses `whatsapp-id` config in each bot's config.csv - ---- - -## ➕ Adding New Features Workflow - -### Step 1: Plan the Feature - -**Understand requirements:** -1. What problem does this solve? -2. Which module owns this functionality? (Check [Module Responsibility Matrix](../README.md#-module-responsibility-matrix)) -3. What data structures are needed? -4. What are the security implications? - -**Design checklist:** -- [ ] Does it fit existing architecture patterns? -- [ ] Will it require database migrations? -- [ ] Does it need new API endpoints? -- [ ] Will it affect existing features? -- [ ] What are the error cases? - -### Step 2: Implement the Feature - -**Follow the pattern:** -```rust -// 1. Add types to botlib if shared across crates -// botlib/src/models.rs -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NewFeature { - pub id: Uuid, - pub name: String, -} - -// 2. Add database schema if needed -// botserver/migrations/YYYY-MM-DD-HHMMSS_feature_name/up.sql -CREATE TABLE new_features ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - name VARCHAR(255) NOT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -// 3. Add Diesel model -// botserver/src/core/shared/models/core.rs -#[derive(Queryable, Insertable)] -#[diesel(table_name = new_features)] -pub struct NewFeatureDb { - pub id: Uuid, - pub name: String, - pub created_at: DateTime, -} - -// 4. Add business logic -// botserver/src/features/new_feature.rs -pub async fn create_feature( - state: &AppState, - name: String, -) -> Result { - // Implementation -} - -// 5. Add API endpoint -// botserver/src/api/routes.rs -async fn create_feature_handler( - Extension(state): Extension>, - Json(payload): Json, -) -> Result, (StatusCode, String)> { - // Handler implementation -} -``` - -**Security checklist:** -- [ ] Input validation (use `sanitize_identifier` for SQL) -- [ ] Authentication required? -- [ ] Authorization checks? -- [ ] Rate limiting needed? -- [ ] Error messages sanitized? (use `log_and_sanitize`) -- [ ] No `unwrap()` or `expect()` in production code - -### Step 3: Add BASIC Keywords (if applicable) - -**For features accessible from .bas scripts:** -```rust -// botserver/src/basic/keywords/new_feature.rs -pub fn new_feature_keyword( - state: Arc, - user_session: UserSession, - engine: &mut Engine, -) { - let state_clone = state.clone(); - let session_clone = user_session.clone(); - - engine - .register_custom_syntax( - ["NEW_FEATURE", "$expr$"], - true, - move |context, inputs| { - let param = context.eval_expression_tree(&inputs[0])?.to_string(); - - // Call async function from sync context using separate thread - let (tx, rx) = std::sync::mpsc::channel(); - std::thread::spawn(move || { - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all().build().ok(); - let result = if let Some(rt) = rt { - rt.block_on(async { - create_feature(&state_clone, param).await - }) - } else { - Err("Failed to create runtime".into()) - }; - let _ = tx.send(result); - }); - let result = rx.recv().unwrap_or(Err("Channel error".into())); - - match result { - Ok(feature) => Ok(Dynamic::from(feature.name)), - Err(e) => Err(format!("Failed: {}", e).into()), - } - }, - ) - .expect("valid syntax registration"); -} -``` - -### Step 4: Test the Feature - -**Local testing:** -```bash -# 1. Run migrations -diesel migration run - -# 2. Build and restart -./restart.sh - -# 3. Test via API -curl -X POST http://localhost:9000/api/features \ - -H "Content-Type: application/json" \ - -d '{"name": "test"}' - -# 4. Test via BASIC script -# Create test.bas in /opt/gbo/data/testbot.gbai/testbot.gbdialog/ -# NEW_FEATURE "test" - -# 5. Check logs -tail -f botserver.log | grep -i "new_feature" -``` - -**Integration test:** -```rust -// bottest/tests/new_feature_test.rs -#[tokio::test] -async fn test_create_feature() { - let state = setup_test_state().await; - let result = create_feature(&state, "test".to_string()).await; - assert!(result.is_ok()); -} -``` - -### Step 5: Document the Feature - -**Update documentation:** -- Add to `botbook/src/features/` if user-facing -- Add to module README.md if developer-facing -- Add inline code comments for complex logic -- Update API documentation - -**Example documentation:** -```markdown -## NEW_FEATURE Keyword - -Creates a new feature with the given name. - -**Syntax:** -```basic -NEW_FEATURE "feature_name" -``` - -**Example:** -```basic -NEW_FEATURE "My Feature" -TALK "Feature created!" -``` - -**Returns:** Feature name as string -``` - -### Step 6: Commit & Deploy - -**Commit pattern:** -```bash -git add . -git commit -m "feat: Add NEW_FEATURE keyword - -- Adds new_features table with migrations -- Implements create_feature business logic -- Adds NEW_FEATURE BASIC keyword -- Includes API endpoint at POST /api/features -- Tests: Unit tests for business logic, integration test for API" - -git push alm main -git push origin main +ps aux | grep -E "(botserver|botui)" | grep -v grep +curl http://localhost:8080/health +./restart.sh # Restart services +systemctl status|start|stop|restart botserver # systemd management +journalctl -u botserver -f # Follow logs ``` --- -## 🧪 Testing Strategy +## 🎭 Playwright Browser Testing -### Unit Tests -- **Location**: Each crate has `tests/` directory or inline `#[cfg(test)]` modules -- **Naming**: Test functions use `test_` prefix or describe what they test -- **Running**: `cargo test -p ` or `cargo test` for all +### Browser Setup +If browser fails: `pkill -9 -f brave; pkill -9 -f chrome; pkill -9 -f chromium` → wait 3s → navigate again. -### Integration Tests -- **Location**: `bottest/` crate contains integration tests -- **Scope**: Tests full workflows across multiple crates -- **Running**: `cargo test -p bottest` +### Bot Testing Flow +1. Navigate to `http://localhost:3000/` +2. Snapshot → verify welcome message + suggestion buttons + Portuguese accents +3. Click suggestion → wait 3-5s → snapshot → fill data → submit +4. Verify DB records and backend logs -### Coverage Goals -- **Critical paths**: 80%+ coverage required -- **Error handling**: ALL error paths must have tests -- **Security**: All security guards must have tests +### Desktop UI Note +Chat window may cover other apps — click **middle button** (restore) to minimize, or navigate directly via URL. -### WhatsApp Integration Testing - -#### Prerequisites -1. **Enable WhatsApp Feature**: Build botserver with whatsapp feature enabled: - ```bash - cargo build -p botserver --bin botserver --features whatsapp - ``` -2. **Bot Configuration**: Ensure the bot has WhatsApp credentials configured in `config.csv`: - - `whatsapp-api-key` - API key from Meta Business Suite - - `whatsapp-verify-token` - Custom token for webhook verification - - `whatsapp-phone-number-id` - Phone Number ID from Meta - - `whatsapp-business-account-id` - Business Account ID from Meta - -#### Using Localtunnel (lt) as Reverse Proxy - -# Check database for message storage -psql -h localhost -U postgres -d botserver -c "SELECT * FROM messages WHERE bot_id = '' ORDER BY created_at DESC LIMIT 5;" ---- - -## 🐛 Debugging Rules - -### 🚨 CRITICAL ERROR HANDLING RULE - -**STOP EVERYTHING WHEN ERRORS APPEAR** - -When ANY error appears in logs during startup or operation: -1. **IMMEDIATELY STOP** - Do not continue with other tasks -2. **IDENTIFY THE ERROR** - Read the full error message and context -3. **FIX THE ERROR** - Address the root cause, not symptoms -4. **VERIFY THE FIX** - Ensure error is completely resolved -5. **ONLY THEN CONTINUE** - Never ignore or work around errors - -**NEVER restart servers to "fix" errors - FIX THE ACTUAL PROBLEM** - -### Log Locations - -| Component | Log File | What's Logged | -|-----------|----------|---------------| -| **botserver** | `botserver.log` | API requests, errors, script execution, **client navigation events** | -| **botui** | `botui.log` | UI rendering, WebSocket connections | -| **drive_monitor** | In botserver logs with `[drive_monitor]` prefix | File sync, compilation | -| **client errors** | In botserver logs with `CLIENT:` prefix | JavaScript errors, navigation events | +### WhatsApp Testing +- Webhook is **global** — bot routing by typing bot name as first message +- Single WhatsApp number serves ALL bots; routing via `whatsapp-id` in `config.csv` --- -## 🔧 Bug Fixing Workflow +## ➕ Adding New Features -### Step 1: Reproduce & Diagnose +### Checklist +- [ ] Which module owns this? (Check Module Responsibility Matrix) +- [ ] Database migrations needed? +- [ ] New API endpoints? +- [ ] Security: input validation, auth, rate limiting, error sanitization? +- [ ] Screens in botui? +- [ ] No `unwrap()`/`expect()`? -**Identify the symptom:** +### Pattern: types → schema → Diesel model → business logic → API endpoint → BASIC keyword (if applicable) → tests → docs in `botbook/` + +### Commit & Deploy ```bash -# Check recent errors -grep -E " E | W " botserver.log | tail -20 - -# Check specific component -grep "component_name" botserver.log | tail -50 - -# Monitor live -tail -f botserver.log | grep -E "ERROR|WARN" -``` - -**Trace the data flow:** -1. Find where the bug manifests (UI, API, database, cache) -2. Work backwards through the call chain -3. Check logs at each layer - -**Example: "Suggestions not showing"** -```bash -# 1. Check if frontend is requesting suggestions -grep "GET /api/suggestions" botserver.log | tail -5 - -# 2. Check if suggestions exist in cache -/opt/gbo/bin/botserver-stack/bin/cache/bin/valkey-cli --scan --pattern "suggestions:*" - -# 3. Check if suggestions are being generated -grep "ADD_SUGGESTION" botserver.log | tail -10 - -# 4. Verify the Redis key format -grep "Adding suggestion to Redis key" botserver.log | tail -5 -``` - -### Step 2: Find the Code - -**Use code search tools:** -```bash -# Find function/keyword implementation -cd botserver/src && grep -r "ADD_SUGGESTION_TOOL" --include="*.rs" - -# Find where Redis keys are constructed -grep -r "suggestions:" --include="*.rs" | grep format - -# Find struct definition -grep -r "pub struct UserSession" --include="*.rs" -``` - -**Check module responsibility:** -- Refer to [Module Responsibility Matrix](../README.md#-module-responsibility-matrix) -- Check `mod.rs` files for module structure -- Look for related functions in same file - -### Step 3: Fix the Bug - -**Identify root cause:** -- Wrong variable used? (e.g., `user_id` instead of `bot_id`) -- Missing validation? -- Race condition? -- Configuration issue? - -**Make minimal changes:** -```rust -// ❌ BAD: Rewrite entire function -fn add_suggestion(...) { - // 100 lines of new code -} - -// ✅ GOOD: Fix only the bug -fn add_suggestion(...) { - // Change line 318: - - let key = format!("suggestions:{}:{}", user_session.user_id, session_id); - + let key = format!("suggestions:{}:{}", user_session.bot_id, session_id); -} -``` - -**Search for similar bugs:** -```bash -# If you fixed user_id -> bot_id in one place, check all occurrences -grep -n "user_session.user_id" botserver/src/basic/keywords/add_suggestion.rs -``` - -### Step 4: Test Locally - -**Verify the fix:** -```bash -# 1. Build -cargo check -p botserver - -# 2. Restart -./restart.sh - -# 3. Test the specific feature -# - Open browser to http://localhost:3000/ -# - Trigger the bug scenario -# - Verify it's fixed - -# 4. Check logs for errors -tail -20 botserver.log | grep -E "ERROR|WARN" -``` - -### Step 5: Commit & Deploy - -**Commit with clear message:** -```bash -cd botserver -git add src/path/to/file.rs -git commit -m "Fix: Use bot_id instead of user_id in suggestion keys - -- Root cause: Wrong field used in Redis key format -- Impact: Suggestions stored under wrong key, frontend couldn't retrieve -- Files: src/basic/keywords/add_suggestion.rs (5 occurrences) -- Testing: Verified suggestions now appear in UI" -``` - -**Push to remotes:** -```bash -# Push submodule -git push alm main -git push origin main - -# Update root repository -cd .. -git add botserver -git commit -m "Update botserver: Fix suggestion key bug" -git push alm main -git push origin main -``` - -**Production deployment:** -- ALM push triggers CI/CD pipeline -- Wait ~10 minutes for build + deploy -- Service auto-restarts on binary update -- Test in production after deployment - -### Step 6: Document - -**Add to AGENTS-PROD.md if production-relevant:** -- Common symptom -- Diagnosis commands -- Fix procedure -- Prevention tips - -**Update code comments if needed:** -```rust -// Redis key format: suggestions:bot_id:session_id -// Note: Must use bot_id (not user_id) to match frontend queries -let key = format!("suggestions:{}:{}", user_session.bot_id, session_id); +cd botserver && git push alm main && git push origin main +cd .. && git add botserver && git commit -m "Update botserver: " && git push alm main && git push origin main ``` --- ## 🎨 Frontend Standards -### HTMX-First Approach -- Use HTMX to minimize JavaScript -- Server returns HTML fragments, not JSON -- Use `hx-get`, `hx-post`, `hx-target`, `hx-swap` -- WebSocket via htmx-ws extension - -### Local Assets Only - NO CDN -```html - - - - - -``` +- **HTMX-first** — server returns HTML fragments, not JSON +- **Local assets only** — NO CDN links +- Use `hx-get`, `hx-post`, `hx-target`, `hx-swap`; WebSocket via htmx-ws --- -## 🚀 Performance & Size Standards +## 🚀 Performance & Quality -### Binary Size Optimization -- **Release Profile**: Always maintain `opt-level = "z"`, `lto = true`, `codegen-units = 1`, `strip = true`, `panic = "abort"`. -- **Dependencies**: - - Run `cargo tree --duplicates` weekly - - Run `cargo machete` to remove unused dependencies - - Use `default-features = false` and explicitly opt-in to needed features - -### Linting & Code Quality -- **Clippy**: Code MUST pass `cargo clippy --workspace` with **0 warnings**. -- **No Allow**: NEVER use `#[allow(clippy::...)]` in source code - FIX the code instead. +- `cargo clippy --workspace` must pass with **0 warnings** +- `cargo tree --duplicates` / `cargo machete` / `cargo audit` weekly +- Release profile: `opt-level = "z"`, `lto = true`, `codegen-units = 1`, `strip = true`, `panic = "abort"` +- Use `default-features = false` and opt-in to needed features --- -## 🔧 Technical Debt +## 🐛 Debugging -### Critical Issues to Address -- Error handling debt: instances of `unwrap()`/`expect()` in production code -- Performance debt: excessive `clone()`/`to_string()` calls -- File size debt: files exceeding 450 lines +### Critical Rule +**STOP on ANY error** — identify → fix root cause → verify → then continue. Never restart to "fix" errors. -### Weekly Maintenance Tasks -```bash -cargo tree --duplicates # Find duplicate dependencies -cargo machete # Remove unused dependencies -cargo build --release && ls -lh target/release/botserver # Check binary size -cargo audit # Security audit -``` +### Log Locations +| Component | Log | Prefix | +|-----------|-----|--------| +| botserver | `botserver.log` | — | +| botui | `botui.log` | — | +| drive_monitor | botserver logs | `[drive_monitor]` | +| client errors | botserver logs | `CLIENT:` | + +### Bug Fix Flow +1. Reproduce: `grep -E " E | W " botserver.log | tail -20` +2. Trace data flow backwards through call chain +3. Fix minimal change, search for similar occurrences +4. `cargo check -p botserver` → `./restart.sh` → test → check logs +5. Commit with clear root cause description --- -## 📋 Continuation Prompt +## 🧪 Testing -When starting a new session or continuing work: - -``` -Continue on gb/ workspace. Follow AGENTS.md strictly: - -1. Check current state with build/diagnostics -2. Fix ALL warnings and errors - NO #[allow()] attributes -3. Delete unused code, don't suppress warnings -4. Remove unused parameters, don't prefix with _ -5. Replace ALL unwrap()/expect() with proper error handling -6. Verify after each fix batch -7. Loop until 0 warnings, 0 errors -8. Refactor files >450 lines -``` +- **Unit:** per-crate `tests/` or `#[cfg(test)]` modules — `cargo test -p ` +- **Integration:** `bottest/` crate — `cargo test -p bottest` +- **Coverage:** 80%+ on critical paths; ALL error paths and security guards tested --- -## 🔑 Memory & Main Directives +## 🚢 Deploy Workflow (CI/CD Only) -**LOOP AND COMPACT UNTIL 0 WARNINGS - MAXIMUM PRECISION** - -- 0 warnings -- 0 errors -- Trust project diagnostics -- Respect all rules -- No `#[allow()]` in source code -- Real code fixes only - -**Remember:** -- **OFFLINE FIRST** - Fix all errors from list before compiling -- **BATCH BY FILE** - Fix ALL errors in a file at once -- **WRITE ONCE** - Single edit per file with all fixes -- **VERIFY LAST** - Only compile/diagnostics after ALL fixes -- **DELETE DEAD CODE** - Don't keep unused code around -- **GIT WORKFLOW** - ALWAYS push to ALL repositories (github, pragmatismo) - ---- - -## Deploy in Prod Workflow - -### CI/CD Pipeline (Primary Method) - -1. **Push to ALM** — triggers CI/CD automatically: - ```bash - cd botserver - git push alm main - git push origin main - cd .. - git add botserver - git commit -m "Update botserver: " - git push alm main - git push origin main - ``` - -2. **Wait for CI programmatically** — poll Forgejo API until build completes: - ```bash - # ALM is at http://:4747 (port 4747, NOT 3000) - # The runner is in container alm-ci, registered with token from DB - - # Method 1: Poll API for latest workflow run status - ALM_URL="http://:4747" - REPO="GeneralBots/BotServer" - MAX_WAIT=600 # 10 minutes - ELAPSED=0 - - while [ $ELAPSED -lt $MAX_WAIT ]; do - STATUS=$(curl -sf "$ALM_URL/api/v1/repos/$REPO/actions/runs?per_page=1" | python3 -c "import sys,json; runs=json.load(sys.stdin); print(runs[0]['status'] if runs else 'unknown')") - if [ "$STATUS" = "completed" ] || [ "$STATUS" = "failure" ] || [ "$STATUS" = "cancelled" ]; then - echo "CI finished with status: $STATUS" - break - fi - echo "CI status: $STATUS (waiting ${ELAPSED}s...)" - sleep 15 - ELAPSED=$((ELAPSED + 15)) - done - - # Method 2: Check runner logs directly - ssh "sudo incus exec alm-ci -- tail -20 /opt/gbo/logs/forgejo-runner.log" - - # Method 3: Check binary timestamp after CI completes - sleep 240 - ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - "sudo incus exec system -- stat -c '%y' /opt/gbo/bin/botserver" - ``` - -3. **Restart in prod** — CI/CD handles this automatically: - ```bash - # CI deploy workflow: - # Step 1: Check binary - # Step 2: Backup old binary (cp /opt/gbo/bin/botserver /tmp/botserver.bak) - # Step 3: Stop service (systemctl stop botserver) - # Step 4: Transfer new binary (tar+gzip via SSH) - # Step 5: Start service (systemctl start botserver) - ``` - -4. **Verify deployment**: - ```bash - # Check service status - ssh "sudo incus exec system -- systemctl status botserver" - # Monitor logs - ssh "sudo incus exec system -- journalctl -u botserver -f" - # Or check stdout log - ssh "sudo incus exec system -- tail -30 /opt/gbo/logs/stdout.log" - ``` - -### Production Container Architecture - -| Container | Service | Port | Notes | -|-----------|---------|------|-------| -| system | BotServer | 8080 | Main API server | -| vault | Vault | 8200 | Secrets management (isolated) | -| tables | PostgreSQL | 5432 | Database | -| cache | Valkey | 6379 | Cache | -| drive | MinIO | 9100 | Object storage | -| directory | Zitadel | 9000 | Identity provider | -| meet | LiveKit | 7880 | Video conferencing | -| vectordb | Qdrant | 6333 | Vector database | -| llm | llama.cpp | 8081 | Local LLM | -| email | Stalwart | 25/587 | Mail server | -| alm | Forgejo | 4747 | Git server (NOT 3000!) | -| alm-ci | Forgejo Runner | - | CI runner | -| proxy | Caddy | 80/443 | Reverse proxy | - -**Important:** ALM (Forgejo) listens on port **4747**, not 3000. The runner token is stored in the `action_runner_token` table in the `PROD-ALM` database. - -### CI Runner Troubleshooting - -| Symptom | Cause | Fix | -|---------|-------|-----| -| Runner not connecting | Wrong ALM port (3000 vs 4747) | Use port 4747 in runner registration | -| `registration file not found` | `.runner` file missing or wrong format | Re-register: `forgejo-runner register --instance http://:4747 --token --name gbo --labels ubuntu-latest:docker://node:20-bookworm --no-interactive` | -| `unsupported protocol scheme` | `.runner` file has wrong JSON format | Delete `.runner` and re-register | -| `connection refused` to ALM | iptables blocking or ALM not running | Check `sudo incus exec alm -- ss -tlnp \| grep 4747` | -| CI not picking up jobs | Runner not registered or labels mismatch | Check runner labels match workflow `runs-on` field | - ---- - -## 🖥️ Production Operations Guide - -### ⚠️ CRITICAL SAFETY RULES -1. **NEVER modify iptables rules without explicit confirmation** — always confirm the exact rules, source IPs, ports, and destinations before applying -2. **NEVER touch the PROD project without asking first** — no changes to production services, configs, or containers without user approval -3. **ALWAYS backup files to `/tmp` before editing** — e.g. `cp /path/to/file /tmp/$(basename /path/to/file).bak-$(date +%Y%m%d%H%M%S)` - -### Infrastructure Overview -- **Host OS:** Ubuntu LTS -- **Container engine:** Incus (LXC-based) -- **Base path:** `/opt/gbo/` (General Bots Operations) -- **Data path:** `/opt/gbo/data` — shared data, configs, bot definitions -- **Bin path:** `/opt/gbo/bin` — compiled binaries -- **Conf path:** `/opt/gbo/conf` — service configurations -- **Log path:** `/opt/gbo/logs` — application logs +1. Push to ALM (triggers CI automatically) +2. CI builds on alm-ci → deploys to system container via SSH +3. Service auto-restarts on binary update +4. Verify: check service status + logs after ~10 min ### Container Architecture -| Role | Service | Typical Port | Notes | -|------|---------|-------------|-------| -| **dns** | CoreDNS | 53 | DNS resolution, zone files in `/opt/gbo/data` | -| **proxy** | Caddy | 80/443 | Reverse proxy, TLS termination | -| **tables** | PostgreSQL | 5432 | Primary database | -| **email** | Stalwart | 993/465/587 | Mail server (IMAPS, SMTPS, Submission) | -| **system** | BotServer + Valkey | 8080/6379 | Main API + cache | -| **webmail** | Roundcube | behind proxy | PHP-FPM webmail frontend | -| **alm** | Forgejo | 4747 | Git/ALM server (NOT 3000!) | -| **alm-ci** | Forgejo Runner | - | CI/CD runner | -| **drive** | MinIO | 9000/9100 | Object storage | -| **table-editor** | NocoDB | behind proxy | Database UI, connects to tables | -| **vault** | Vault | 8200 | Secrets management | -| **directory** | Zitadel | 9000 | Identity provider | -| **meet** | LiveKit | 7880 | Video conferencing | -| **vectordb** | Qdrant | 6333 | Vector database | -| **llm** | llama.cpp | 8081 | Local LLM inference | - -### Container Management +| Container | Service | Port | +|-----------|---------|------| +| system | BotServer + Valkey | 8080/6379 | +| tables | PostgreSQL | 5432 | +| vault | Vault | 8200 | +| directory | Zitadel | 9000 | +| drive | MinIO | 9100 | +| cache | Valkey | 6379 | +| llm | llama.cpp | 8081 | +| vectordb | Qdrant | 6333 | +| meet | LiveKit | 7880 | +| email | Stalwart | 25/587 | +| alm | Forgejo | **4747** (NOT 3000!) | +| alm-ci | Forgejo Runner | — | +| proxy | Caddy | 80/443 | +### Container Management (Incus) ```bash -# List all containers -sudo incus list - -# Start/Stop/Restart -sudo incus start -sudo incus stop -sudo incus restart - -# Exec into container -sudo incus exec -- bash - -# View container logs -sudo incus log -sudo incus log --show-log - -# File operations -sudo incus file pull /path/to/file /local/dest -sudo incus file push /local/src /path/to/dest - -# Create snapshot before changes +sudo incus list # List all +sudo incus start|stop|restart # Lifecycle +sudo incus exec -- bash # Shell access +sudo incus exec -- systemctl restart sudo incus snapshot create pre-change-$(date +%Y%m%d%H%M%S) ``` -### Service Management (inside container) +--- -```bash -# Check if process is running -sudo incus exec -- pgrep -a +## 🔑 Core Directives Summary -# Restart service (systemd) -sudo incus exec -- systemctl restart - -# Follow logs -sudo incus exec -- journalctl -u -f - -# Check listening ports -sudo incus exec -- ss -tlnp -``` - -### Quick Health Check - -```bash -# Check all containers status -sudo incus list --format csv - -# Quick service check across containers -for c in dns proxy tables system email webmail alm alm-ci drive table-editor; do - echo -n "$c: " - sudo incus exec $c -- pgrep -a $(case $c in - dns) echo "coredns";; - proxy) echo "caddy";; - tables) echo "postgres";; - system) echo "botserver";; - email) echo "stalwart";; - webmail) echo "php-fpm";; - alm) echo "forgejo";; - alm-ci) echo "runner";; - drive) echo "minio";; - table-editor) echo "nocodb";; - esac) >/dev/null && echo OK || echo FAIL -done -``` - -### Network & NAT - -#### Port Forwarding Pattern -External ports on the host are DNAT'd to container IPs via iptables. NAT rules live in `/etc/iptables.rules`. - -**Critical rule pattern** — always use the external interface (`-i `) to avoid loopback issues: -``` --A PREROUTING -i -p tcp --dport -j DNAT --to-destination : -``` - -#### Typical Port Map - -| External | Service | Notes | -|----------|---------|-------| -| 53 | DNS | Public DNS resolution | -| 80/443 | HTTP/HTTPS | Via Caddy proxy | -| 5432 | PostgreSQL | Restricted access only | -| 993 | IMAPS | Secure email retrieval | -| 465 | SMTPS | Secure email sending | -| 587 | SMTP Submission | STARTTLS | -| 25 | SMTP | Often blocked by ISPs | -| 4747 | Forgejo | Behind proxy | -| 9000 | MinIO API | Internal only | -| 8200 | Vault | Isolated | - -#### Network Diagnostics - -```bash -# Check NAT rules -sudo iptables -t nat -L -n | grep DNAT - -# Test connectivity from container -sudo incus exec -- ping -c 3 8.8.8.8 - -# Test DNS resolution -sudo incus exec -- dig - -# Test port connectivity -nc -zv -``` - -### Key Service Operations - -#### DNS (CoreDNS) -- **Config:** `/opt/gbo/conf/Corefile` -- **Zones:** `/opt/gbo/data/.zone` -- **Test:** `dig @ ` - -#### Database (PostgreSQL) -- **Data:** `/opt/gbo/data` -- **Backup:** `pg_dump -U postgres -F c -f /tmp/backup.dump ` -- **Restore:** `pg_restore -U postgres -d /tmp/backup.dump` - -#### Email (Stalwart) -- **Config:** `/opt/gbo/conf/config.toml` -- **DKIM:** Check TXT records for `selector._domainkey.` -- **Webmail:** Behind proxy -- **Admin:** Accessible via configured admin port - -**Recovery from crash:** -```bash -# Check if service starts with config validation -sudo incus exec email -- /opt/gbo/bin/stalwart -c /opt/gbo/conf/config.toml --help - -# Check error logs -sudo incus exec email -- cat /opt/gbo/logs/stderr.log - -# Restore from snapshot if config corrupted -sudo incus snapshot list email -sudo incus copy email/ email-temp -sudo incus start email-temp -sudo incus file pull email-temp/opt/gbo/conf/config.toml /tmp/config.toml -sudo incus file push /tmp/config.toml email/opt/gbo/conf/config.toml -``` - -#### Proxy (Caddy) -- **Config:** `/opt/gbo/conf/config` -- **Backup before edit:** `cp /opt/gbo/conf/config /opt/gbo/conf/config.bak-$(date +%Y%m%d)` -- **Validate:** `caddy validate --config /opt/gbo/conf/config` -- **Reload:** `caddy reload --config /opt/gbo/conf/config` - -#### Storage (MinIO) -- **Console:** Behind proxy -- **Internal API:** http://:9000 -- **Data:** `/opt/gbo/data` - -#### Bot System (system) -- **Service:** BotServer + Valkey (Redis-compatible) -- **Binary:** `/opt/gbo/bin/botserver` -- **Valkey:** port 6379 - -#### Git/ALM (Forgejo) -- **Port:** 4747 (NOT 3000!) -- **Behind proxy:** Access via configured hostname -- **CI Runner:** Separate container, registered with token from DB - -#### CI/CD (Forgejo Runner) -- **Config:** `/opt/gbo/bin/config.yaml` -- **Init:** `/etc/systemd/system/alm-ci-runner.service` (runs as `gbuser`, NOT root) -- **Logs:** `/opt/gbo/logs/out.log`, `/opt/gbo/logs/err.log` -- **Auto-start:** Via systemd (enabled) -- **Runner user:** `gbuser` (uid 1000) — all `/opt/gbo/` files owned by `gbuser:gbuser` -- **sccache:** Installed at `/usr/local/bin/sccache`, configured via `RUSTC_WRAPPER=sccache` in workflow -- **Workspace:** `/opt/gbo/data/` (NOT `/opt/gbo/ci/`) -- **Cargo cache:** `/home/gbuser/.cargo/` (registry + git db) -- **Rustup:** `/home/gbuser/.rustup/` -- **SSH keys:** `/home/gbuser/.ssh/id_ed25519` (for deploy to system container) -- **Deploy mechanism:** CI builds binary → tar+gzip via SSH → `/opt/gbo/bin/botserver` on system container - -### Backup & Recovery - -#### Snapshot Recovery -```bash -# List snapshots -sudo incus snapshot list - -# Restore from snapshot -sudo incus copy / -restored -sudo incus start -restored - -# Get files from snapshot without starting -sudo incus file pull //path/to/file . -``` - -#### Backup Scripts -- Host config backup: `/opt/gbo/bin/backup-local-host.sh` -- Remote backup to S3: `/opt/gbo/bin/backup-remote.sh` - -### Troubleshooting - -#### Container Won't Start -```bash -# Check status -sudo incus list -sudo incus info - -# Check logs -sudo incus log --show-log - -# Try starting with verbose -sudo incus start -v -``` - -#### Service Not Running -```bash -# Find process -sudo incus exec -- pgrep -a - -# Check listening ports -sudo incus exec -- ss -tlnp | grep - -# Check application logs -sudo incus exec -- tail -50 /opt/gbo/logs/stderr.log -``` - -#### Email Delivery Issues -```bash -# Check mail server is running -sudo incus exec email -- pgrep -a stalwart - -# Check IMAP/SMTP ports -nc -zv 993 -nc -zv 465 -nc -zv 587 - -# Check DKIM DNS records -dig TXT ._domainkey. - -# Check mail logs -sudo incus exec email -- tail -100 /opt/gbo/logs/email.log -``` - -### Maintenance - -#### Update Container -```bash -# Stop container -sudo incus stop - -# Create snapshot backup -sudo incus snapshot create pre-update-$(date +%Y%m%d) - -# Update packages -sudo incus exec -- apt update && apt upgrade -y - -# Restart -sudo incus start -``` - -#### Disk Space Management -```bash -# Check host disk usage -df -h / - -# Check btrfs pool (if applicable) -sudo btrfs filesystem df /var/lib/incus - -# Clean old logs in container -sudo incus exec -- find /opt/gbo/logs -name "*.log.*" -mtime +7 -delete -``` - -### Container Tricks & Optimizations - -#### Resource Limits -```bash -# Set CPU limit -sudo incus config set limits.cpu 2 - -# Set memory limit -sudo incus config set limits.memory 4GiB - -# Set disk limit -sudo incus config device set root size 20GiB -``` - -#### Profile Management -```bash -# List profiles -sudo incus profile list - -# Apply profile to container -sudo incus profile add - -# Clone container for testing -sudo incus copy --ephemeral -``` - -#### Network Optimization -```bash -# Add static DHCP-like assignment -sudo incus config device add eth0 nic nictype=bridged parent= - -# Set custom DNS for container -sudo incus config set raw.lxc "lxc.net.0.ipv4.address=" -``` - -#### Quick Container Cloning for Testing -```bash -# Snapshot and clone for safe testing -sudo incus snapshot create test-base -sudo incus copy /test-base -test -sudo incus start -test -# ... test safely ... -sudo incus stop -test -sudo incus delete -test -``` +- **OFFLINE FIRST** — fix all errors from list before compiling +- **BATCH BY FILE** — fix ALL errors in a file at once, write once +- **VERIFY LAST** — only compile after ALL fixes applied +- **DELETE DEAD CODE** — never keep unused code +- **GIT WORKFLOW** — always push to ALL repositories +- **0 warnings, 0 errors** — loop until clean diff --git a/botserver b/botserver index 552f37a..155d465 160000 --- a/botserver +++ b/botserver @@ -1 +1 @@ -Subproject commit 552f37a41c63cb5a86e147a5de3cc2555a521d3f +Subproject commit 155d465b14de5d018c7c3eeaf47abe5b3b95fb0c diff --git a/botui b/botui index 3919a85..45f56f0 160000 --- a/botui +++ b/botui @@ -1 +1 @@ -Subproject commit 3919a857b2441c472dcdd1bd90b84734f6f10b02 +Subproject commit 45f56f0f6e4c54c168d73464736fb3fc80f79026