diff --git a/.cargo/config.toml b/.cargo/config.toml
deleted file mode 100644
index a625243b..00000000
--- a/.cargo/config.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-[build]
-jobs = 6
-
-[target.x86_64-unknown-linux-gnu]
-linker = "clang"
-rustflags = [
-    "-C", "link-arg=-fuse-ld=mold"
-]
diff --git a/AGENTS.md b/AGENTS.md
index f347f0e8..7f62fe65 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -17,7 +17,7 @@ I AM IN DEV ENV, but sometimes, pasting from PROD, do not treat my env as prod!
 > - ❌ **NEVER** write internal IPs to logs or output
 > - When debugging network issues, mask IPs (e.g., "10.x.x.x" instead of "10.16.164.222")
 > - Use hostnames instead of IPs in configs and documentation
-See botserver/src/drive/local_file_monitor.rs to see how to load from /opt/gbo/data the list of development bots.
+See botserver/src/drive/local_file_monitor.rs to see how bots are loaded from MinIO drive buckets (`.gbai` format).
 - ❌ **NEVER** use `cargo clean` - causes 30min rebuilds, use `./reset.sh` for database issues
 
 >
@@ -72,8 +72,8 @@ User Message (WebSocket)
 │
 ▼
 ┌─────────────────────────────────┐
-│  2. start.bas Execution         │  /opt/gbo/data/{bot}.gbai/...
-│     - Runs ONCE per session     │  {bot}.gbdialog/start.bas
+│ 2. start.bas Execution │ MinIO: {bot}.gbai/...
+│ - Runs ONCE per session │ {bot}.gbdialog/start.bas
 │     - ADD_SUGGESTION calls      │  Adds button suggestions
 │     - Sets Redis flag           │  prevents re-run
 └──────────────┬──────────────────┘
diff --git a/Cargo.lock b/Cargo.lock
index 4d328263..1ac7b0db 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -671,6 +671,7 @@ dependencies = [
  "mimalloc",
  "mockito",
  "num-format",
+ "ole",
  "once_cell",
  "ooxmlsdk",
  "pdf-extract",
@@ -3902,6 +3903,12 @@ dependencies = [
  "asn1-rs 0.7.1",
 ]
 
+[[package]]
+name = "ole"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f0add6eeb62fdaf7afd332d52763f27d365cfd047f95e232671532c1efb3a66"
+
 [[package]]
 name = "once_cell"
 version = "1.21.4"
diff --git a/botserver/Cargo.toml b/botserver/Cargo.toml
index 2a231845..cedd6294 100644
--- a/botserver/Cargo.toml
+++ b/botserver/Cargo.toml
@@ -49,7 +49,7 @@ tickets = ["automation", "drive", "cache"]
 billing = ["automation", "drive", "cache"]
 
 # Document Processing (lightweight - KB extraction without heavy OOXML SDKs)
-kb-extraction = ["drive", "dep:calamine"]
+kb-extraction = ["drive", "dep:calamine", "dep:ole"]
 
 # Documents (full editing UI - opt-in, adds ~4min compile time from ooxmlsdk)
 docs = ["automation", "drive", "cache", "dep:docx-rs", "dep:ooxmlsdk", "kb-extraction"]
@@ -169,6 +169,7 @@ mailparse = { workspace = true, optional = true }
 docx-rs = { workspace = true, optional = true }
 ooxmlsdk = { workspace = true, optional = true, features = ["parts"] }
 calamine = { workspace = true, optional = true }
+ole = { version = "0.1", optional = true }
 rust_xlsxwriter = { workspace = true, optional = true }
 umya-spreadsheet = { workspace = true, optional = true }
 
diff --git a/botserver/src/core/kb/document_processor/mod.rs b/botserver/src/core/kb/document_processor/mod.rs
index 59af752d..d60db03a 100644
--- a/botserver/src/core/kb/document_processor/mod.rs
+++ b/botserver/src/core/kb/document_processor/mod.rs
@@ -7,7 +7,6 @@ pub use types::{ChunkMetadata, DocumentFormat, DocumentMetadata, TextChunk};
 use anyhow::Result;
 use log::{debug, info, warn};
 use std::collections::HashMap;
-use std::io::Cursor;
 use std::path::Path;
 use tokio::io::AsyncReadExt;
 
diff --git a/botserver/src/core/kb/document_processor/types.rs b/botserver/src/core/kb/document_processor/types.rs
index 35dde261..27c419e5 100644
--- a/botserver/src/core/kb/document_processor/types.rs
+++ b/botserver/src/core/kb/document_processor/types.rs
@@ -4,7 +4,9 @@ use serde::{Deserialize, Serialize};
 pub enum DocumentFormat {
     PDF,
     DOCX,
+    DOC,
     XLSX,
+    XLS,
     PPTX,
     TXT,
     MD,
@@ -21,7 +23,9 @@ impl DocumentFormat {
         match ext.as_str() {
             "pdf" => Some(Self::PDF),
             "docx" => Some(Self::DOCX),
+            "doc" => Some(Self::DOC),
             "xlsx" => Some(Self::XLSX),
+            "xls" => Some(Self::XLS),
             "pptx" => Some(Self::PPTX),
             "txt" => Some(Self::TXT),
             "md" | "markdown" => Some(Self::MD),
@@ -38,7 +42,7 @@ impl DocumentFormat {
         match self {
             Self::PDF => 500 * 1024 * 1024,
             Self::PPTX => 200 * 1024 * 1024,
-            Self::DOCX | Self::XLSX | Self::TXT | Self::JSON | Self::XML => 100 * 1024 * 1024,
+            Self::DOCX | Self::DOC | Self::XLSX | Self::XLS | Self::TXT | Self::JSON | Self::XML => 100 * 1024 * 1024,
             Self::HTML | Self::RTF => 50 * 1024 * 1024,
             Self::MD => 10 * 1024 * 1024,
             Self::CSV => 1024 * 1024 * 1024,
diff --git a/botserver/src/core/kb/embedding_generator.rs b/botserver/src/core/kb/embedding_generator.rs
index ac5b6056..6358e3ef 100644
--- a/botserver/src/core/kb/embedding_generator.rs
+++ b/botserver/src/core/kb/embedding_generator.rs
@@ -37,20 +37,21 @@ pub struct EmbeddingConfig {
 }
 
 impl Default for EmbeddingConfig {
-    fn default() -> Self {
-        Self {
-            embedding_url: "".to_string(),
-            embedding_model: "BAAI/bge-multilingual-gemma2".to_string(),
-            embedding_key: None,
-            dimensions: 2048,
-            batch_size: 2, // Reduced from 16 to prevent llama-server crash
-            timeout_seconds: 60,
-            max_concurrent_requests: 1,
-            connect_timeout_seconds: 10,
-        }
+fn default() -> Self {
+    Self {
+        embedding_url: "".to_string(),
+        embedding_model: "BAAI/bge-multilingual-gemma2".to_string(),
+        embedding_key: None,
+        dimensions: 384, // Default to BGE-Small dimensions, will be overridden by config
+        batch_size: 2,
+        timeout_seconds: 60,
+        max_concurrent_requests: 1,
+        connect_timeout_seconds: 10,
     }
 }
 
+}
+
 impl EmbeddingConfig {
     pub fn from_env() -> Self {
         Self::default()
diff --git a/botserver/src/core/package_manager/installer.rs b/botserver/src/core/package_manager/installer.rs
index d4c4e644..93a2102c 100644
--- a/botserver/src/core/package_manager/installer.rs
+++ b/botserver/src/core/package_manager/installer.rs
@@ -385,7 +385,7 @@ impl PackageManager {
 
                     "https://huggingface.co/CompendiumLabs/bge-small-en-v1.5-gguf/resolve/main/bge-small-en-v1.5-f32.gguf".to_string(),
                 ],
-                exec_cmd: "nohup {{BIN_PATH}}/build/bin/llama-server --port 8081 --ssl-key-file {{CONF_PATH}}/system/certificates/llm/server.key --ssl-cert-file {{CONF_PATH}}/system/certificates/llm/server.crt -m {{DATA_PATH}}/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf --ubatch-size 512 > {{LOGS_PATH}}/llm.log 2>&1 & nohup {{BIN_PATH}}/build/bin/llama-server --port 8082 --ssl-key-file {{CONF_PATH}}/system/certificates/embedding/server.key --ssl-cert-file {{CONF_PATH}}/system/certificates/embedding/server.crt -m {{DATA_PATH}}/bge-small-en-v1.5-f32.gguf --embedding --ubatch-size 512 > {{LOGS_PATH}}/embedding.log 2>&1 &".to_string(),
+                exec_cmd: "nohup {{BIN_PATH}}/build/bin/llama-server --port 8081 --ssl-key-file {{CONF_PATH}}/system/certificates/llm/server.key --ssl-cert-file {{CONF_PATH}}/system/certificates/llm/server.crt -m {{DATA_PATH}}/DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf --ubatch-size 512 > {{LOGS_PATH}}/llm.log 2>&1 & nohup {{BIN_PATH}}/build/bin/llama-server --port 8082 --ssl-key-file {{CONF_PATH}}/system/certificates/embedding/server.key --ssl-cert-file {{CONF_PATH}}/system/certificates/embedding/server.crt -m {{DATA_PATH}}/bge-small-en-v1.5-f32.gguf --embeddings --pooling mean --n-gpu-layers 0 --ctx-size 512 --ubatch-size 512 > {{LOGS_PATH}}/embedding.log 2>&1 &".to_string(),
                 check_cmd: "curl -f -k --connect-timeout 2 -m 5 https://localhost:8081/health >/dev/null 2>&1 && curl -f -k --connect-timeout 2 -m 5 https://localhost:8082/health >/dev/null 2>&1".to_string(),
             container: None,
             },
@@ -1703,9 +1703,10 @@ VAULT_CACERT={}
                 ("openai_key".to_string(), "none".to_string()),
                 ("anthropic_key".to_string(), "none".to_string()),
                 ("ollama_url".to_string(), "".to_string()),
-                ("embedding_url".to_string(), "http://localhost:8082/v1/embeddings".to_string()),
-                ("embedding_model".to_string(), "bge-small-en-v1.5-f32.gguf".to_string()),
-                ("embedding_port".to_string(), "8082".to_string()),
+ ("embedding_url".to_string(), "http://localhost:8082/v1/embeddings".to_string()),
+ ("embedding_model".to_string(), "bge-small-en-v1.5-f32.gguf".to_string()),
+ ("embedding_port".to_string(), "8082".to_string()),
+ ("embedding_dimensions".to_string(), "384".to_string()),
             ],
         ),
             (
diff --git a/botserver/src/drive/drive_files.rs b/botserver/src/drive/drive_files.rs
index 405ba5f2..f726eb8d 100644
--- a/botserver/src/drive/drive_files.rs
+++ b/botserver/src/drive/drive_files.rs
@@ -98,13 +98,14 @@ impl DriveFileRepository {
                 drive_files::created_at.eq(now),
                 drive_files::updated_at.eq(now),
             ))
-            .on_conflict((drive_files::bot_id, drive_files::file_path))
-            .do_update()
-            .set((
-                drive_files::etag.eq(etag_clone),
-                drive_files::last_modified.eq(last_modified_clone),
-                drive_files::updated_at.eq(now),
-            ))
+        .on_conflict((drive_files::bot_id, drive_files::file_path))
+        .do_update()
+        .set((
+            drive_files::file_type.eq(file_type),
+            drive_files::etag.eq(etag_clone),
+            drive_files::last_modified.eq(last_modified_clone),
+            drive_files::updated_at.eq(now),
+        ))
             .execute(&mut conn)
             .map_err(|e| e.to_string())?;
 
diff --git a/botserver/src/drive/drive_monitor/types.rs b/botserver/src/drive/drive_monitor/types.rs
index 3f5ba482..68c1defe 100644
--- a/botserver/src/drive/drive_monitor/types.rs
+++ b/botserver/src/drive/drive_monitor/types.rs
@@ -84,21 +84,23 @@ impl DriveMonitor {
 
             if file_type == "bas" {
                 self.sync_bas_to_work(bot_name, &obj.key).await;
+            } else if file_type != "kb" && file_type != "config" {
+                let _ = self.file_repo.mark_indexed(self.bot_id, &full_key);
             }
         } else {
             log::trace!("{} unchanged, skipping upsert", full_key);
         }
 
-            if needs_reindex && file_type == "kb" {
-                    #[cfg(any(feature = "research", feature = "llm"))]
-                    {
-                        self.index_kb_file(bot_name, &full_key, &obj.key).await;
-                    }
-                }
+        if needs_reindex && file_type == "kb" {
+            #[cfg(any(feature = "research", feature = "llm"))]
+            {
+                self.index_kb_file(bot_name, &full_key, &obj.key).await;
+            }
+        }
 
-                if file_type == "config" && needs_reindex {
-                    self.sync_bot_config(bot_name, &obj.key).await;
-                }
+        if file_type == "config" && needs_reindex {
+            self.sync_bot_config(bot_name, &obj.key).await;
+        }
                     }
 
         self.handle_deleted_files(bot_name, &current_keys);
diff --git a/botserver/src/llm/local.rs b/botserver/src/llm/local.rs
index 9aa1ba73..5151bb71 100644
--- a/botserver/src/llm/local.rs
+++ b/botserver/src/llm/local.rs
@@ -7,12 +7,19 @@ use crate::core::shared::state::AppState;
 use diesel::prelude::*;
 use log::{error, info, trace, warn};
 use reqwest;
+use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 use tokio;
 
+static LLAMA_SERVERS_STARTED: AtomicBool = AtomicBool::new(false);
+
 pub async fn ensure_llama_servers_running(
     app_state: Arc<AppState>,
 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
+    if LLAMA_SERVERS_STARTED.swap(true, Ordering::SeqCst) {
+        info!("ensure_llama_servers_running already called, skipping duplicate invocation");
+        return Ok(());
+    }
     trace!("ensure_llama_servers_running ENTER");
     let start_mem = MemoryStats::current();
     trace!(
@@ -91,32 +98,29 @@ let llm_url = if llm_url.is_empty() && llm_server_enabled {
     llm_url
 };
 
-let llm_model = if llm_model.is_empty() {
-    info!("No LLM model configured, using default: DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf");
-    "DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf".to_string()
-} else {
-    llm_model
-};
+    // Use config values, fallback to safe defaults for local development
+    let llm_model = if llm_model.is_empty() {
+        info!("No LLM model configured, using default: DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf");
+        "DeepSeek-R1-Distill-Qwen-1.5B-Q3_K_M.gguf".to_string()
+    } else {
+        llm_model
+    };
 
-let embedding_model = if embedding_model.is_empty() {
-    info!("No embedding model configured, using default: bge-small-en-v1.5-f32.gguf");
-    "bge-small-en-v1.5-f32.gguf".to_string()
-} else {
-    embedding_model
-};
+    let embedding_model = if embedding_model.is_empty() {
+        info!("No embedding model configured, using default: bge-small-en-v1.5-f32.gguf");
+        "bge-small-en-v1.5-f32.gguf".to_string()
+    } else {
+        embedding_model
+    };
 
-let embedding_url = if embedding_url.is_empty() {
-    let default_port = "8082";
-    let url = format!("http://localhost:{default_port}/v1/embeddings");
-    info!("No embedding-url configured, using default: {url}");
-    let config_manager = ConfigManager::new(app_state.conn.clone());
-    if let Err(e) = config_manager.set_config(&default_bot_id, "embedding-url", &url) {
-        warn!("Failed to persist default embedding-url: {e}");
-    }
-    url
-} else {
-    embedding_url
-};
+    let embedding_url = if embedding_url.is_empty() {
+        let default_port = "8082";
+        let url = format!("http://localhost:{default_port}/v1/embeddings");
+        info!("No embedding-url configured, using default: {url}");
+        url
+    } else {
+        embedding_url
+    };
 
     // For llama-server startup, use path relative to botserver root
     // The models are in <stack_path>/data/llm/ and the llama-server runs from botserver root
@@ -136,38 +140,6 @@ let embedding_url = if embedding_url.is_empty() {
     info!("  LLM Model: {llm_model}");
     info!("  Embedding Model: {embedding_model}");
     info!("  LLM Server Path: {llm_server_path}");
-    info!("Restarting any existing llama-server processes...");
-    trace!("About to pkill llama-server...");
-    let before_pkill = MemoryStats::current();
-    trace!(
-        "[LLM_LOCAL] Before pkill, RSS={}",
-        MemoryStats::format_bytes(before_pkill.rss_bytes)
-    );
-
-    let pkill_result = SafeCommand::new("sh")
-        .and_then(|c| c.arg("-c"))
-        .and_then(|c| c.trusted_shell_script_arg("pkill llama-server -9; true"));
-
-    match pkill_result {
-        Ok(cmd) => {
-            if let Err(e) = cmd.execute() {
-                error!("Failed to execute pkill for llama-server: {e}");
-            } else {
-                tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
-                info!("Existing llama-server processes terminated (if any)");
-            }
-        }
-        Err(e) => error!("Failed to build pkill command: {e}"),
-    }
-    trace!("pkill done");
-
-    let after_pkill = MemoryStats::current();
-    trace!(
-        "[LLM_LOCAL] After pkill, RSS={} (delta={})",
-        MemoryStats::format_bytes(after_pkill.rss_bytes),
-        MemoryStats::format_bytes(after_pkill.rss_bytes.saturating_sub(before_pkill.rss_bytes))
-    );
-
     let llm_running = if llm_url.starts_with("https://") {
         info!("Using external HTTPS LLM server, skipping local startup");
         true
@@ -188,6 +160,23 @@ let embedding_url = if embedding_url.is_empty() {
         }
         return Ok(());
     }
+
+    info!("Killing existing llama-server processes to restart with correct args...");
+    let pkill_result = SafeCommand::new("sh")
+        .and_then(|c| c.arg("-c"))
+        .and_then(|c| c.trusted_shell_script_arg("pkill llama-server -9; true"));
+
+    match pkill_result {
+        Ok(cmd) => {
+            if let Err(e) = cmd.execute() {
+                error!("Failed to execute pkill for llama-server: {e}");
+            } else {
+                tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
+                info!("Existing llama-server processes terminated");
+            }
+        }
+        Err(e) => error!("Failed to build pkill command: {e}"),
+    }
     let mut tasks = vec![];
     if !llm_running && !llm_model.is_empty() {
         info!("Starting LLM server...");
@@ -474,6 +463,12 @@ pub fn start_llm_server(
         format!("{}/llama-server", llama_cpp_path)
     };
 
+    // Get ubatch-size from config, default to 512 if not set
+    let ubatch_size = config_manager
+        .get_config(&default_bot_id, "llm-server-ubatch-size", Some("512"))
+        .unwrap_or_else(|_| "512".to_string());
+    let ubatch_size = if ubatch_size.is_empty() { "512".to_string() } else { ubatch_size };
+
     let mut args_vec = vec![
         "-m", &model_path,
         "--host", "0.0.0.0",
@@ -482,7 +477,7 @@ pub fn start_llm_server(
         "--temp", "0.6",
         "--repeat-penalty", "1.2",
         "--n-gpu-layers", &gpu_layers,
-        "--ubatch-size", "2048",
+        "--ubatch-size", &ubatch_size,
     ];
 
     if !reasoning_format.is_empty() {
@@ -578,7 +573,7 @@ pub async fn start_embedding_server(
     };
 
     let mut args_vec = vec![
-        "-m", &model_path,
+        "-m", &full_model_path,
         "--host", "0.0.0.0",
         "--port", port,
         "--embeddings",
@@ -636,5 +631,10 @@ pub async fn start_embedding_server(
 }
 
 fn extract_port(url: &str) -> &str {
-    url.rsplit(':').next().unwrap_or("8081")
+    url.rsplit(':')
+        .next()
+        .unwrap_or("8081")
+        .split('/')
+        .next()
+        .unwrap_or("8081")
 }
diff --git a/bottemplates/ai-search.gbai/ai-search.gbdialog/start.bas b/bottemplates/ai-search.gbai/ai-search.gbdialog/start.bas
index 5700ed88..626a5a5a 100644
--- a/bottemplates/ai-search.gbai/ai-search.gbdialog/start.bas
+++ b/bottemplates/ai-search.gbai/ai-search.gbdialog/start.bas
@@ -1,9 +1,11 @@
 ADD TOOL "qr"
+USE KB "sheetlib"
 
 CLEAR SUGGESTIONS
 ADD SUGGESTION "scan" AS "Scan a QR Code"
 ADD SUGGESTION "find" AS "Find a procedure"
 ADD SUGGESTION "help" AS "How to search documents"
+ADD SUGGESTION "test kb" AS "Test KB injection"
 
 BEGIN TALK
 General Bots AI Search
@@ -13,11 +15,13 @@ Comprehensive Document Search with AI summaries and EDM integration.
 **Options:**
 • Scan a QR Code - Send a photo to scan
 • Find a Procedure - Ask about any process
+• Test KB - Verify sheetlib knowledge base injection
 
 **Examples:**
 - How to send a fax?
 - How to clean the machine?
 - How to find a contact?
+- What is in the sheetlib KB?
 END TALK
 
 BEGIN SYSTEM PROMPT
diff --git a/bottemplates/ai-search.gbai/ai-search.gbkb/docs/sheetlib.md b/bottemplates/ai-search.gbai/ai-search.gbkb/docs/sheetlib.md
new file mode 100644
index 00000000..e7c39321
--- /dev/null
+++ b/bottemplates/ai-search.gbai/ai-search.gbkb/docs/sheetlib.md
@@ -0,0 +1,35 @@
+# SheetLib Knowledge Base
+
+## Overview
+SheetLib is a spreadsheet processing library for General Bots.
+
+## Features
+- Create and edit spreadsheets
+- Import/export Excel files (XLSX, XLS)
+- Formula calculations
+- Cell formatting
+- Multiple sheets support
+
+## Usage Examples
+
+### Create a Spreadsheet
+```
+CREATE SHEET "Sales Report"
+```
+
+### Add Data
+```
+SET CELL "A1" = "Product"
+SET CELL "B1" = "Price"
+SET CELL "A2" = "Widget"
+SET CELL "B2" = 99.99
+```
+
+### Export
+```
+EXPORT SHEET TO "report.xlsx"
+```
+
+## Testing KB Injection
+If you can read this, the KB injection is working correctly!
+The sheetlib knowledge base has been successfully loaded.