From b392d508c34f97486bec4cde419c04546f9c55ff Mon Sep 17 00:00:00 2001
From: Rodrigo Rodriguez <rodriguez@pragmatismo.com.br>
Date: Thu, 30 Apr 2026 22:21:46 -0300
Subject: [PATCH] Fix HTML streaming: accumulate chunks and render only on
 is_complete

- botui/chat-messages.js: HTML chunks now accumulated without rendering,
  only showing loading indicator. When is_complete=true, full HTML
  rendered at once. Text/markdown continues streaming normally.
- botserver/mod.rs: Remove unused html_buffer variable
- drive_monitor/monitor.rs: Change CHECK_INTERVAL_SECS from 1 to 2
- CI workflow: Fix paths to use target/fast/ instead of target/debug/
  and target/release/
---
 .forgejo/workflows/botserver.yaml             | 10 +--
 DEV-DEPENDENCIES.sh                           | 29 -------
 .../src/02-architecture-packages/building.md  | 28 +++++--
 botbook/src/12-ecosystem-reference/ci-cd.md   |  4 +-
 botserver/src/core/bot/mod.rs                 | 27 +------
 botserver/src/drive/drive_monitor/monitor.rs  |  2 +-
 botserver/src/main.rs                         |  2 +-
 botui/ui/suite/chat/chat-messages.js          | 75 ++++++++++---------
 8 files changed, 71 insertions(+), 106 deletions(-)

diff --git a/.forgejo/workflows/botserver.yaml b/.forgejo/workflows/botserver.yaml
index 3b25a3dd..9aa46eea 100644
--- a/.forgejo/workflows/botserver.yaml
+++ b/.forgejo/workflows/botserver.yaml
@@ -40,11 +40,11 @@ steps:
         run: |
           echo "=== Deploying to Stage ==="
           scp -i /home/gbuser/.ssh/id_ed25519 -o StrictHostKeyChecking=no \
-            /opt/gbo/work/generalbots/target/debug/botserver \
-            gbuser@system:/opt/gbo/bin/botserver-new
-          scp -i /home/gbuser/.ssh/id_ed25519 -o StrictHostKeyChecking=no \
-            /opt/gbo/work/generalbots/target/release/botui \
-            gbuser@system:/opt/gbo/bin/botui-new
+        /opt/gbo/work/generalbots/target/fast/botserver \
+        gbuser@system:/opt/gbo/bin/botserver-new
+        scp -i /home/gbuser/.ssh/id_ed25519 -o StrictHostKeyChecking=no \
+        /opt/gbo/work/generalbots/target/fast/botui \
+        gbuser@system:/opt/gbo/bin/botui-new
           ssh -i /home/gbuser/.ssh/id_ed25519 -o StrictHostKeyChecking=no \
             gbuser@system \
             "sudo systemctl stop botserver || true && \
diff --git a/DEV-DEPENDENCIES.sh b/DEV-DEPENDENCIES.sh
index 5cc5249e..57bd0816 100755
--- a/DEV-DEPENDENCIES.sh
+++ b/DEV-DEPENDENCIES.sh
@@ -25,33 +25,6 @@ install_rust() {
   fi
 }
 
-install_sccache() {
-  WANT_VER="0.14.0"
-  CURRENT_VER=""
-  if command -v sccache &> /dev/null; then
-    CURRENT_VER=$(sccache --version 2>/dev/null | grep -oP '[\d.]+' | head -1)
-  fi
-  if [ "$CURRENT_VER" = "$WANT_VER" ]; then
-    echo "sccache $WANT_VER already installed"
-  else
-    echo "Upgrading sccache from ${CURRENT_VER:-none} to $WANT_VER..."
-    rm -f /usr/local/bin/sccache /usr/local/bin/sccache-dist
-    ARCH=$(uname -m)
-    curl -L "https://github.com/mozilla/sccache/releases/download/v${WANT_VER}/sccache-v${WANT_VER}-${ARCH}-unknown-linux-musl.tar.gz" -o /tmp/sccache.tar.gz
-    tar -xzf /tmp/sccache.tar.gz -C /tmp
-    cp "/tmp/sccache-v${WANT_VER}-${ARCH}-unknown-linux-musl/sccache" /usr/local/bin/sccache.real
-    chmod +x /usr/local/bin/sccache.real
-    rm -rf /tmp/sccache*
-  fi
-  # Install wrapper that unsets CARGO_INCREMENTAL before calling sccache.real
-  cat > /usr/local/bin/sccache << 'EOF'
-#!/bin/bash
-unset CARGO_INCREMENTAL
-exec /usr/local/bin/sccache.real "$@"
-EOF
-  chmod +x /usr/local/bin/sccache
-  sccache --version
-}
 
 install_mold() {
   if command -v mold &> /dev/null; then
@@ -92,7 +65,6 @@ CARGOCONF
 }
 
 install_rust
-install_sccache
 install_mold
 install_cargo_tools
 setup_cargo_config
@@ -101,7 +73,6 @@ echo ""
 echo "✅ Dev environment ready:"
 echo "   Rust:       $(rustc --version)"
 echo "   Linker:     clang + lld + mold"
-echo "   Cache:      sccache"
 echo "   Audit:      cargo-audit, cargo-machete, cargo-tree"
 echo "📦 .cargo/config.toml configured"
 echo "⚡ Build: cargo build -p botserver --bin botserver"
diff --git a/botbook/src/02-architecture-packages/building.md b/botbook/src/02-architecture-packages/building.md
index a4bdd199..98af6d31 100644
--- a/botbook/src/02-architecture-packages/building.md
+++ b/botbook/src/02-architecture-packages/building.md
@@ -79,6 +79,7 @@ sudo apt update
 sudo apt install -y \
     clang \
     lld \
+    mold \
     build-essential \
     pkg-config \
     libssl-dev \
@@ -111,7 +112,7 @@ mkdir -p ~/.cargo
 cat >> ~/.cargo/config.toml << EOF
 [target.x86_64-unknown-linux-gnu]
 linker = "clang"
-rustflags = ["-C", "link-arg=-fuse-ld=lld"]
+rustflags = ["-C", "link-arg=-fuse-ld=mold"] # or "lld"
 EOF
 ```
 
@@ -123,6 +124,7 @@ EOF
 sudo dnf install -y \
     clang \
     lld \
+    mold \
     gcc \
     gcc-c++ \
     make \
@@ -156,7 +158,7 @@ mkdir -p ~/.cargo
 cat >> ~/.cargo/config.toml << EOF
 [target.x86_64-unknown-linux-gnu]
 linker = "clang"
-rustflags = ["-C", "link-arg=-fuse-ld=lld"]
+rustflags = ["-C", "link-arg=-fuse-ld=mold"] # or "lld"
 EOF
 ```
 
@@ -192,9 +194,9 @@ git submodule update --init --recursive
 
 ## Build Cache with sccache
 
-sccache caches compilation artifacts for faster rebuilds.
+sccache (Shared Compilation Cache) caches compilation artifacts to accelerate rebuilds across different environments.
 
-Install and configure:
+### Installation & Configuration
 
 ```bash
 cargo install sccache
@@ -204,14 +206,26 @@ compiler = "sccache"' >> ~/.cargo/config.toml
 export RUSTC_WRAPPER=sccache
 ```
 
-Verify cache hits:
+### Ephemeral vs. Persistent Environments
 
+The decision to use `sccache` depends heavily on your build environment:
+
+#### 1. When to use sccache (Ephemeral/Clean CI)
+Use `sccache` in environments that start with a **clean disk** for every build (e.g., standard GitHub Actions, cloud-based CI). Since the `target/` directory is lost between runs, `sccache` allows you to recover compiled artifacts from a remote bucket (S3/GCS) or global persistent cache, saving hours of dependency recompilation.
+
+#### 2. When to avoid sccache (Persistent/Self-hosted)
+If you are using a **self-hosted runner** with a **persistent `target/` directory** (like the standard General Bots production setup), Cargo's native incremental logic is usually faster than `sccache`.
+- **Reason**: Cargo native incrementalism only checks file timestamps and metadata. `sccache` must calculate cryptographic hashes of every source file, which adds overhead.
+- **Recommendation**: For persistent runners, rely on `CARGO_INCREMENTAL=1` and a fast linker like `mold` or `lld`.
+
+### Monitoring and Maintenance
+
+Verify cache hits:
 ```bash
 sccache --show-stats
 ```
 
 Clear cache if needed:
-
 ```bash
 sccache --zero-stats
 ```
@@ -494,7 +508,7 @@ $env:PQ_LIB_DIR="C:\Program Files\PostgreSQL\15\lib"
 
 ### Out of Memory During Build
 
-Use sccache to cache compilations:
+Use sccache to cache compilations (helps avoid recompilation in ephemeral CI, but does not reduce peak memory of a single crate):
 
 ```bash
 cargo install sccache
diff --git a/botbook/src/12-ecosystem-reference/ci-cd.md b/botbook/src/12-ecosystem-reference/ci-cd.md
index d1943755..d7b3a3d5 100644
--- a/botbook/src/12-ecosystem-reference/ci-cd.md
+++ b/botbook/src/12-ecosystem-reference/ci-cd.md
@@ -143,7 +143,7 @@ sudo incus exec alm-ci -- tail -f /opt/gbo/logs/forgejo-runner.log | grep -E "Cl
 | Rust compilation (incremental) | 30-60 seconds |
 | First build (dependencies) | Downloads ~200 crates |
 | Deploy step | ~5 seconds |
-| Total CI time | 2-6 minutes depending on cache |
+| Total CI time | 2-4 minutes (incremental with mold) |
 
 ---
 
@@ -173,7 +173,7 @@ curl -sf https://<system-domain>/api/health && echo "OK" || echo "FAILED"
 - **User:** gbuser (uid 1000)
 - **Workspace:** /opt/gbo/data/
 - **SSH deploy key:** /home/gbuser/.ssh/id_ed25519
-- **sccache:** /usr/local/bin/sccache (via RUSTC_WRAPPER=sccache)
+- **Cache:** Native Cargo incremental (target/ persistence)
 - **Cargo cache:** /home/gbuser/.cargo/
 - **Rustup:** /home/gbuser/.rustup/
 
diff --git a/botserver/src/core/bot/mod.rs b/botserver/src/core/bot/mod.rs
index a7d02028..ca22d27b 100644
--- a/botserver/src/core/bot/mod.rs
+++ b/botserver/src/core/bot/mod.rs
@@ -1054,8 +1054,7 @@ let system_prompt = if !message.active_switchers.is_empty() {
         let mut in_analysis = false;
         let mut tool_call_buffer = String::new(); // Accumulate potential tool call JSON chunks
         let mut accumulating_tool_call = false; // Track if we're currently accumulating a tool call
-        let mut html_buffer = String::new(); // Buffer for HTML content
-        let handler = llm_models::get_handler(&model);
+    let handler = llm_models::get_handler(&model);
 
         trace!("Using model handler for {}", model);
         info!("llm_start: Starting LLM streaming for session {}", session.id);
@@ -1438,29 +1437,7 @@ if !in_analysis {
         #[cfg(not(feature = "chat"))]
         let switchers: Vec<Switcher> = Vec::new();
 
-        // Flush any remaining HTML buffer before sending final response
-        if !html_buffer.is_empty() {
-            trace!("Flushing remaining {} chars in HTML buffer", html_buffer.len());
-            let final_chunk = BotResponse {
-                bot_id: message.bot_id.clone(),
-                user_id: message.user_id.clone(),
-                session_id: message.session_id.clone(),
-                channel: message.channel.clone(),
-                content: html_buffer.clone(),
-                message_type: MessageType::BOT_RESPONSE,
-                stream_token: None,
-            is_complete: false,
-            suggestions: Vec::new(),
-            switchers: Vec::new(),
-            context_name: None,
-            context_length: 0,
-            context_max_length: 0,
-        };
-        let _ = response_tx.send(final_chunk).await;
-            html_buffer.clear();
-        }
-
-        // Content was already sent as streaming chunks.
+    // Content was already sent as streaming chunks.
         // Sending full_response again would duplicate it (especially for WhatsApp which accumulates buffer).
         // The final response is just a signal that streaming is complete - it should not contain content.
         let final_content = String::new();
diff --git a/botserver/src/drive/drive_monitor/monitor.rs b/botserver/src/drive/drive_monitor/monitor.rs
index bca34d34..0134b7b5 100644
--- a/botserver/src/drive/drive_monitor/monitor.rs
+++ b/botserver/src/drive/drive_monitor/monitor.rs
@@ -4,7 +4,7 @@ use std::time::Duration;
 use super::types::DriveMonitor;
 
 /// Intervalo de verificação do DriveMonitor e DriveCompiler (em segundos)
-pub const CHECK_INTERVAL_SECS: u64 = 1;
+pub const CHECK_INTERVAL_SECS: u64 = 2;
 
 impl DriveMonitor {
     pub fn calculate_backoff(&self) -> Duration {
diff --git a/botserver/src/main.rs b/botserver/src/main.rs
index 5ac91baa..6c1afc79 100644
--- a/botserver/src/main.rs
+++ b/botserver/src/main.rs
@@ -256,7 +256,7 @@ rustls=off,rustls_pemfile=off,tokio_rustls=off,\
         Ok(existing) if !existing.is_empty() => format!("{},{}", existing, noise_filters),
         _ => format!("info,{}", noise_filters),
     };
-// Test mold+sccache build
+// Test mold+incremental build
 
     std::env::set_var("RUST_LOG", &rust_log);
 
diff --git a/botui/ui/suite/chat/chat-messages.js b/botui/ui/suite/chat/chat-messages.js
index 0da8fbb9..fabee638 100644
--- a/botui/ui/suite/chat/chat-messages.js
+++ b/botui/ui/suite/chat/chat-messages.js
@@ -92,49 +92,52 @@ function isTagBalanced(html) {
 }
 
 function updateStreaming(content) {
-var el = document.getElementById(ChatState.streamingMessageId);
-if (!el) return;
+  var el = document.getElementById(ChatState.streamingMessageId);
+  if (!el) return;
 
-var msgContent = el.querySelector(".message-content");
-var cleanContent = stripMarkdownBlocks(content);
-var isHtml = /<\/?[a-zA-Z][^>]*>|<!--|-->/i.test(cleanContent);
+  var msgContent = el.querySelector(".message-content");
+  var cleanContent = stripMarkdownBlocks(content);
+  var isHtml = /<\/?[a-zA-Z][^>]*>|<!--|-->/i.test(cleanContent);
 
-if (isHtml) {
-    if (isTagBalanced(cleanContent) || (Date.now() - ChatState.lastRenderTime > 2000)) {
-        msgContent.innerHTML = renderMentionInMessage(cleanContent); // Don't escape HTML
-        ChatState.lastRenderTime = Date.now();
-        if (!ChatState.isUserScrolling) scrollToBottom(true);
+  if (isHtml) {
+    if (!el.querySelector(".streaming-loading")) {
+      var loader = document.createElement("div");
+      loader.className = "streaming-loading";
+      loader.innerHTML = '<span class="loading-dots">...</span>';
+      msgContent.appendChild(loader);
     }
-}
- else {
-var parsed = typeof marked !== "undefined" && marked.parse
-? marked.parse(cleanContent)
-: escapeHtml(cleanContent);
-parsed = renderMentionInMessage(parsed);
-msgContent.innerHTML = parsed;
-if (!ChatState.isUserScrolling) scrollToBottom(true);
-}
+  } else {
+    var parsed = typeof marked !== "undefined" && marked.parse
+      ? marked.parse(cleanContent)
+      : escapeHtml(cleanContent);
+    parsed = renderMentionInMessage(parsed);
+    msgContent.innerHTML = parsed;
+    if (!ChatState.isUserScrolling) scrollToBottom(true);
+  }
 }
 
 function finalizeStreaming() {
-var el = document.getElementById(ChatState.streamingMessageId);
-if (el) {
-var cleanContent = stripMarkdownBlocks(ChatState.currentStreamingContent);
-var hasHtmlTags = /<\/?[a-zA-Z][^>]*>|<!--|-->/i.test(cleanContent);
-var parsed = hasHtmlTags
-    ? cleanContent // Don't escape HTML
-    : (typeof marked !== "undefined" && marked.parse
+  var el = document.getElementById(ChatState.streamingMessageId);
+  if (el) {
+    var cleanContent = stripMarkdownBlocks(ChatState.currentStreamingContent);
+    var hasHtmlTags = /<\/?[a-zA-Z][^>]*>|<!--|-->/i.test(cleanContent);
+    var parsed;
+    if (hasHtmlTags) {
+      parsed = cleanContent;
+    } else {
+      parsed = typeof marked !== "undefined" && marked.parse
         ? marked.parse(cleanContent)
-        : escapeHtml(cleanContent));
-parsed = renderMentionInMessage(parsed);
-el.querySelector(".message-content").innerHTML = parsed;
-el.removeAttribute("id");
-setupMentionClickHandlers(el);
-if (!ChatState.isUserScrolling) scrollToBottom(true);
-}
-ChatState.streamingMessageId = null;
-ChatState.currentStreamingContent = "";
-ChatState.streamingBuffer = "";
+        : escapeHtml(cleanContent);
+    }
+    parsed = renderMentionInMessage(parsed);
+    el.querySelector(".message-content").innerHTML = parsed;
+    el.removeAttribute("id");
+    setupMentionClickHandlers(el);
+    if (!ChatState.isUserScrolling) scrollToBottom(true);
+  }
+  ChatState.streamingMessageId = null;
+  ChatState.currentStreamingContent = "";
+  ChatState.streamingBuffer = "";
 }
 
 function processMessage(data) {