From b392d508c34f97486bec4cde419c04546f9c55ff Mon Sep 17 00:00:00 2001 From: Rodrigo Rodriguez Date: Thu, 30 Apr 2026 22:21:46 -0300 Subject: [PATCH] Fix HTML streaming: accumulate chunks and render only on is_complete - botui/chat-messages.js: HTML chunks now accumulated without rendering, only showing loading indicator. When is_complete=true, full HTML rendered at once. Text/markdown continues streaming normally. - botserver/mod.rs: Remove unused html_buffer variable - drive_monitor/monitor.rs: Change CHECK_INTERVAL_SECS from 1 to 2 - CI workflow: Fix paths to use target/fast/ instead of target/debug/ and target/release/ --- .forgejo/workflows/botserver.yaml | 10 +-- DEV-DEPENDENCIES.sh | 29 ------- .../src/02-architecture-packages/building.md | 28 +++++-- botbook/src/12-ecosystem-reference/ci-cd.md | 4 +- botserver/src/core/bot/mod.rs | 27 +------ botserver/src/drive/drive_monitor/monitor.rs | 2 +- botserver/src/main.rs | 2 +- botui/ui/suite/chat/chat-messages.js | 75 ++++++++++--------- 8 files changed, 71 insertions(+), 106 deletions(-) diff --git a/.forgejo/workflows/botserver.yaml b/.forgejo/workflows/botserver.yaml index 3b25a3dd..9aa46eea 100644 --- a/.forgejo/workflows/botserver.yaml +++ b/.forgejo/workflows/botserver.yaml @@ -40,11 +40,11 @@ steps: run: | echo "=== Deploying to Stage ===" scp -i /home/gbuser/.ssh/id_ed25519 -o StrictHostKeyChecking=no \ - /opt/gbo/work/generalbots/target/debug/botserver \ - gbuser@system:/opt/gbo/bin/botserver-new - scp -i /home/gbuser/.ssh/id_ed25519 -o StrictHostKeyChecking=no \ - /opt/gbo/work/generalbots/target/release/botui \ - gbuser@system:/opt/gbo/bin/botui-new + /opt/gbo/work/generalbots/target/fast/botserver \ + gbuser@system:/opt/gbo/bin/botserver-new + scp -i /home/gbuser/.ssh/id_ed25519 -o StrictHostKeyChecking=no \ + /opt/gbo/work/generalbots/target/fast/botui \ + gbuser@system:/opt/gbo/bin/botui-new ssh -i /home/gbuser/.ssh/id_ed25519 -o StrictHostKeyChecking=no \ gbuser@system \ "sudo systemctl stop botserver || true && \ diff --git a/DEV-DEPENDENCIES.sh b/DEV-DEPENDENCIES.sh index 5cc5249e..57bd0816 100755 --- a/DEV-DEPENDENCIES.sh +++ b/DEV-DEPENDENCIES.sh @@ -25,33 +25,6 @@ install_rust() { fi } -install_sccache() { - WANT_VER="0.14.0" - CURRENT_VER="" - if command -v sccache &> /dev/null; then - CURRENT_VER=$(sccache --version 2>/dev/null | grep -oP '[\d.]+' | head -1) - fi - if [ "$CURRENT_VER" = "$WANT_VER" ]; then - echo "sccache $WANT_VER already installed" - else - echo "Upgrading sccache from ${CURRENT_VER:-none} to $WANT_VER..." - rm -f /usr/local/bin/sccache /usr/local/bin/sccache-dist - ARCH=$(uname -m) - curl -L "https://github.com/mozilla/sccache/releases/download/v${WANT_VER}/sccache-v${WANT_VER}-${ARCH}-unknown-linux-musl.tar.gz" -o /tmp/sccache.tar.gz - tar -xzf /tmp/sccache.tar.gz -C /tmp - cp "/tmp/sccache-v${WANT_VER}-${ARCH}-unknown-linux-musl/sccache" /usr/local/bin/sccache.real - chmod +x /usr/local/bin/sccache.real - rm -rf /tmp/sccache* - fi - # Install wrapper that unsets CARGO_INCREMENTAL before calling sccache.real - cat > /usr/local/bin/sccache << 'EOF' -#!/bin/bash -unset CARGO_INCREMENTAL -exec /usr/local/bin/sccache.real "$@" -EOF - chmod +x /usr/local/bin/sccache - sccache --version -} install_mold() { if command -v mold &> /dev/null; then @@ -92,7 +65,6 @@ CARGOCONF } install_rust -install_sccache install_mold install_cargo_tools setup_cargo_config @@ -101,7 +73,6 @@ echo "" echo "✅ Dev environment ready:" echo " Rust: $(rustc --version)" echo " Linker: clang + lld + mold" -echo " Cache: sccache" echo " Audit: cargo-audit, cargo-machete, cargo-tree" echo "📦 .cargo/config.toml configured" echo "⚡ Build: cargo build -p botserver --bin botserver" diff --git a/botbook/src/02-architecture-packages/building.md b/botbook/src/02-architecture-packages/building.md index a4bdd199..98af6d31 100644 --- a/botbook/src/02-architecture-packages/building.md +++ b/botbook/src/02-architecture-packages/building.md @@ -79,6 +79,7 @@ sudo apt update sudo apt install -y \ clang \ lld \ + mold \ build-essential \ pkg-config \ libssl-dev \ @@ -111,7 +112,7 @@ mkdir -p ~/.cargo cat >> ~/.cargo/config.toml << EOF [target.x86_64-unknown-linux-gnu] linker = "clang" -rustflags = ["-C", "link-arg=-fuse-ld=lld"] +rustflags = ["-C", "link-arg=-fuse-ld=mold"] # or "lld" EOF ``` @@ -123,6 +124,7 @@ EOF sudo dnf install -y \ clang \ lld \ + mold \ gcc \ gcc-c++ \ make \ @@ -156,7 +158,7 @@ mkdir -p ~/.cargo cat >> ~/.cargo/config.toml << EOF [target.x86_64-unknown-linux-gnu] linker = "clang" -rustflags = ["-C", "link-arg=-fuse-ld=lld"] +rustflags = ["-C", "link-arg=-fuse-ld=mold"] # or "lld" EOF ``` @@ -192,9 +194,9 @@ git submodule update --init --recursive ## Build Cache with sccache -sccache caches compilation artifacts for faster rebuilds. +sccache (Shared Compilation Cache) caches compilation artifacts to accelerate rebuilds across different environments. -Install and configure: +### Installation & Configuration ```bash cargo install sccache @@ -204,14 +206,26 @@ compiler = "sccache"' >> ~/.cargo/config.toml export RUSTC_WRAPPER=sccache ``` -Verify cache hits: +### Ephemeral vs. Persistent Environments +The decision to use `sccache` depends heavily on your build environment: + +#### 1. When to use sccache (Ephemeral/Clean CI) +Use `sccache` in environments that start with a **clean disk** for every build (e.g., standard GitHub Actions, cloud-based CI). Since the `target/` directory is lost between runs, `sccache` allows you to recover compiled artifacts from a remote bucket (S3/GCS) or global persistent cache, saving hours of dependency recompilation. + +#### 2. When to avoid sccache (Persistent/Self-hosted) +If you are using a **self-hosted runner** with a **persistent `target/` directory** (like the standard General Bots production setup), Cargo's native incremental logic is usually faster than `sccache`. +- **Reason**: Cargo native incrementalism only checks file timestamps and metadata. `sccache` must calculate cryptographic hashes of every source file, which adds overhead. +- **Recommendation**: For persistent runners, rely on `CARGO_INCREMENTAL=1` and a fast linker like `mold` or `lld`. + +### Monitoring and Maintenance + +Verify cache hits: ```bash sccache --show-stats ``` Clear cache if needed: - ```bash sccache --zero-stats ``` @@ -494,7 +508,7 @@ $env:PQ_LIB_DIR="C:\Program Files\PostgreSQL\15\lib" ### Out of Memory During Build -Use sccache to cache compilations: +Use sccache to cache compilations (helps avoid recompilation in ephemeral CI, but does not reduce peak memory of a single crate): ```bash cargo install sccache diff --git a/botbook/src/12-ecosystem-reference/ci-cd.md b/botbook/src/12-ecosystem-reference/ci-cd.md index d1943755..d7b3a3d5 100644 --- a/botbook/src/12-ecosystem-reference/ci-cd.md +++ b/botbook/src/12-ecosystem-reference/ci-cd.md @@ -143,7 +143,7 @@ sudo incus exec alm-ci -- tail -f /opt/gbo/logs/forgejo-runner.log | grep -E "Cl | Rust compilation (incremental) | 30-60 seconds | | First build (dependencies) | Downloads ~200 crates | | Deploy step | ~5 seconds | -| Total CI time | 2-6 minutes depending on cache | +| Total CI time | 2-4 minutes (incremental with mold) | --- @@ -173,7 +173,7 @@ curl -sf https:///api/health && echo "OK" || echo "FAILED" - **User:** gbuser (uid 1000) - **Workspace:** /opt/gbo/data/ - **SSH deploy key:** /home/gbuser/.ssh/id_ed25519 -- **sccache:** /usr/local/bin/sccache (via RUSTC_WRAPPER=sccache) +- **Cache:** Native Cargo incremental (target/ persistence) - **Cargo cache:** /home/gbuser/.cargo/ - **Rustup:** /home/gbuser/.rustup/ diff --git a/botserver/src/core/bot/mod.rs b/botserver/src/core/bot/mod.rs index a7d02028..ca22d27b 100644 --- a/botserver/src/core/bot/mod.rs +++ b/botserver/src/core/bot/mod.rs @@ -1054,8 +1054,7 @@ let system_prompt = if !message.active_switchers.is_empty() { let mut in_analysis = false; let mut tool_call_buffer = String::new(); // Accumulate potential tool call JSON chunks let mut accumulating_tool_call = false; // Track if we're currently accumulating a tool call - let mut html_buffer = String::new(); // Buffer for HTML content - let handler = llm_models::get_handler(&model); + let handler = llm_models::get_handler(&model); trace!("Using model handler for {}", model); info!("llm_start: Starting LLM streaming for session {}", session.id); @@ -1438,29 +1437,7 @@ if !in_analysis { #[cfg(not(feature = "chat"))] let switchers: Vec = Vec::new(); - // Flush any remaining HTML buffer before sending final response - if !html_buffer.is_empty() { - trace!("Flushing remaining {} chars in HTML buffer", html_buffer.len()); - let final_chunk = BotResponse { - bot_id: message.bot_id.clone(), - user_id: message.user_id.clone(), - session_id: message.session_id.clone(), - channel: message.channel.clone(), - content: html_buffer.clone(), - message_type: MessageType::BOT_RESPONSE, - stream_token: None, - is_complete: false, - suggestions: Vec::new(), - switchers: Vec::new(), - context_name: None, - context_length: 0, - context_max_length: 0, - }; - let _ = response_tx.send(final_chunk).await; - html_buffer.clear(); - } - - // Content was already sent as streaming chunks. + // Content was already sent as streaming chunks. // Sending full_response again would duplicate it (especially for WhatsApp which accumulates buffer). // The final response is just a signal that streaming is complete - it should not contain content. let final_content = String::new(); diff --git a/botserver/src/drive/drive_monitor/monitor.rs b/botserver/src/drive/drive_monitor/monitor.rs index bca34d34..0134b7b5 100644 --- a/botserver/src/drive/drive_monitor/monitor.rs +++ b/botserver/src/drive/drive_monitor/monitor.rs @@ -4,7 +4,7 @@ use std::time::Duration; use super::types::DriveMonitor; /// Intervalo de verificação do DriveMonitor e DriveCompiler (em segundos) -pub const CHECK_INTERVAL_SECS: u64 = 1; +pub const CHECK_INTERVAL_SECS: u64 = 2; impl DriveMonitor { pub fn calculate_backoff(&self) -> Duration { diff --git a/botserver/src/main.rs b/botserver/src/main.rs index 5ac91baa..6c1afc79 100644 --- a/botserver/src/main.rs +++ b/botserver/src/main.rs @@ -256,7 +256,7 @@ rustls=off,rustls_pemfile=off,tokio_rustls=off,\ Ok(existing) if !existing.is_empty() => format!("{},{}", existing, noise_filters), _ => format!("info,{}", noise_filters), }; -// Test mold+sccache build +// Test mold+incremental build std::env::set_var("RUST_LOG", &rust_log); diff --git a/botui/ui/suite/chat/chat-messages.js b/botui/ui/suite/chat/chat-messages.js index 0da8fbb9..fabee638 100644 --- a/botui/ui/suite/chat/chat-messages.js +++ b/botui/ui/suite/chat/chat-messages.js @@ -92,49 +92,52 @@ function isTagBalanced(html) { } function updateStreaming(content) { -var el = document.getElementById(ChatState.streamingMessageId); -if (!el) return; + var el = document.getElementById(ChatState.streamingMessageId); + if (!el) return; -var msgContent = el.querySelector(".message-content"); -var cleanContent = stripMarkdownBlocks(content); -var isHtml = /<\/?[a-zA-Z][^>]*>|/i.test(cleanContent); + var msgContent = el.querySelector(".message-content"); + var cleanContent = stripMarkdownBlocks(content); + var isHtml = /<\/?[a-zA-Z][^>]*>|/i.test(cleanContent); -if (isHtml) { - if (isTagBalanced(cleanContent) || (Date.now() - ChatState.lastRenderTime > 2000)) { - msgContent.innerHTML = renderMentionInMessage(cleanContent); // Don't escape HTML - ChatState.lastRenderTime = Date.now(); - if (!ChatState.isUserScrolling) scrollToBottom(true); + if (isHtml) { + if (!el.querySelector(".streaming-loading")) { + var loader = document.createElement("div"); + loader.className = "streaming-loading"; + loader.innerHTML = '...'; + msgContent.appendChild(loader); } -} - else { -var parsed = typeof marked !== "undefined" && marked.parse -? marked.parse(cleanContent) -: escapeHtml(cleanContent); -parsed = renderMentionInMessage(parsed); -msgContent.innerHTML = parsed; -if (!ChatState.isUserScrolling) scrollToBottom(true); -} + } else { + var parsed = typeof marked !== "undefined" && marked.parse + ? marked.parse(cleanContent) + : escapeHtml(cleanContent); + parsed = renderMentionInMessage(parsed); + msgContent.innerHTML = parsed; + if (!ChatState.isUserScrolling) scrollToBottom(true); + } } function finalizeStreaming() { -var el = document.getElementById(ChatState.streamingMessageId); -if (el) { -var cleanContent = stripMarkdownBlocks(ChatState.currentStreamingContent); -var hasHtmlTags = /<\/?[a-zA-Z][^>]*>|/i.test(cleanContent); -var parsed = hasHtmlTags - ? cleanContent // Don't escape HTML - : (typeof marked !== "undefined" && marked.parse + var el = document.getElementById(ChatState.streamingMessageId); + if (el) { + var cleanContent = stripMarkdownBlocks(ChatState.currentStreamingContent); + var hasHtmlTags = /<\/?[a-zA-Z][^>]*>|/i.test(cleanContent); + var parsed; + if (hasHtmlTags) { + parsed = cleanContent; + } else { + parsed = typeof marked !== "undefined" && marked.parse ? marked.parse(cleanContent) - : escapeHtml(cleanContent)); -parsed = renderMentionInMessage(parsed); -el.querySelector(".message-content").innerHTML = parsed; -el.removeAttribute("id"); -setupMentionClickHandlers(el); -if (!ChatState.isUserScrolling) scrollToBottom(true); -} -ChatState.streamingMessageId = null; -ChatState.currentStreamingContent = ""; -ChatState.streamingBuffer = ""; + : escapeHtml(cleanContent); + } + parsed = renderMentionInMessage(parsed); + el.querySelector(".message-content").innerHTML = parsed; + el.removeAttribute("id"); + setupMentionClickHandlers(el); + if (!ChatState.isUserScrolling) scrollToBottom(true); + } + ChatState.streamingMessageId = null; + ChatState.currentStreamingContent = ""; + ChatState.streamingBuffer = ""; } function processMessage(data) {