new(basic.gblib): SEND FILE pdf as temporary images.

This commit is contained in:
me@rodrigorodriguez.com 2024-10-02 18:39:25 -03:00
parent d161a23005
commit 4fb7eae1fc
5 changed files with 120 additions and 72 deletions

View file

@ -37,7 +37,7 @@ import urlJoin from 'url-join';
import { GBServer } from '../../../src/app.js'; import { GBServer } from '../../../src/app.js';
import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js'; import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js';
import { SecService } from '../../security.gbapp/services/SecService.js'; import { SecService } from '../../security.gbapp/services/SecService.js';
import {Jimp} from 'jimp'; import { Jimp } from 'jimp';
import jsQR from 'jsqr'; import jsQR from 'jsqr';
import { SystemKeywords } from './SystemKeywords.js'; import { SystemKeywords } from './SystemKeywords.js';
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js'; import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
@ -254,28 +254,28 @@ export class DialogKeywords {
* *
* @example EXIT * @example EXIT
*/ */
public async exit({}) {} public async exit({ }) { }
/** /**
* Get active tasks. * Get active tasks.
* *
* @example list = ACTIVE TASKS * @example list = ACTIVE TASKS
*/ */
public async getActiveTasks({ pid }) {} public async getActiveTasks({ pid }) { }
/** /**
* Creates a new deal. * Creates a new deal.
* *
* @example CREATE DEAL dealname,contato,empresa,amount * @example CREATE DEAL dealname,contato,empresa,amount
*/ */
public async createDeal({ pid, dealName, contact, company, amount }) {} public async createDeal({ pid, dealName, contact, company, amount }) { }
/** /**
* Finds contacts in XRM. * Finds contacts in XRM.
* *
* @example list = FIND CONTACT "Sandra" * @example list = FIND CONTACT "Sandra"
*/ */
public async fndContact({ pid, name }) {} public async fndContact({ pid, name }) { }
public getContentLocaleWithCulture(contentLocale) { public getContentLocaleWithCulture(contentLocale) {
switch (contentLocale) { switch (contentLocale) {
@ -936,7 +936,7 @@ export class DialogKeywords {
* @example MENU * @example MENU
* *
*/ */
public async showMenu({}) { public async showMenu({ }) {
// https://github.com/GeneralBots/BotServer/issues/237 // https://github.com/GeneralBots/BotServer/issues/237
// return await beginDialog('/menu'); // return await beginDialog('/menu');
} }
@ -1215,20 +1215,20 @@ export class DialogKeywords {
const handle = WebAutomationServices.cyrb53({ pid, str: min.botId + answer.filename }); const handle = WebAutomationServices.cyrb53({ pid, str: min.botId + answer.filename });
GBServer.globals.files[handle] = answer; GBServer.globals.files[handle] = answer;
// Load the image with Jimp // Load the image with Jimp
const image = await Jimp.read(answer.data); const image = await Jimp.read(answer.data);
// Get the image data // Get the image data
const imageData = { const imageData = {
data: new Uint8ClampedArray(image.bitmap.data), data: new Uint8ClampedArray(image.bitmap.data),
width: image.bitmap.width, width: image.bitmap.width,
height: image.bitmap.height, height: image.bitmap.height,
}; };
// Use jsQR to decode the QR code // Use jsQR to decode the QR code
const decodedQR = jsQR(imageData.data, imageData.width, imageData.height); const decodedQR = jsQR(imageData.data, imageData.width, imageData.height);
result = decodedQR.data; result = decodedQR.data;
} else if (kind === 'zipcode') { } else if (kind === 'zipcode') {
const extractEntity = (text: string) => { const extractEntity = (text: string) => {
@ -1455,6 +1455,36 @@ export class DialogKeywords {
let nameOnly; let nameOnly;
const gbaiName = GBUtil.getGBAIPath(min.botId); const gbaiName = GBUtil.getGBAIPath(min.botId);
if (filename.endsWith('.pdf')) {
const gbdriveName = GBUtil.getGBAIPath(min.botId, 'gbdrive');
const pdf = path.join(GBConfigService.get('STORAGE_LIBRARY'), gbdriveName, filename);
const pngs = await GBUtil.pdfPageAsImage(min, pdf, undefined);
await CollectionUtil.asyncForEach(pngs, async png => {
// Prepare a cache to be referenced by Bot Framework.
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(png.localName));
const contentType = mime.lookup(url);
const reply = { type: ActivityTypes.Message, text: caption };
reply['attachments'] = [];
reply['attachments'].push({
name: nameOnly,
contentType: contentType,
contentUrl: url
});
if (channel === 'omnichannel' || !user) {
await min.conversationalService.sendFile(min, null, mobile, url, caption);
} else {
await min.conversationalService['sendOnConversation'](min, user, reply);
}
});
}
// Web automation. // Web automation.
if (element) { if (element) {
@ -1489,32 +1519,37 @@ export class DialogKeywords {
// .gbdrive direct sending. // .gbdrive direct sending.
else { else {
const ext = path.extname(filename);
const gbaiName = GBUtil.getGBAIPath(min.botId);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const fileUrl = urlJoin('/', gbaiName, `${min.botId}.gbdrive`, filename);
GBLogEx.info(min, `Direct send from .gbdrive: ${fileUrl} to ${mobile}.`);
const sys = new SystemKeywords(); if (GBConfigService.get('STORAGE_NAME')) {
const pathOnly = fileUrl.substring(0, fileUrl.lastIndexOf('/')); const ext = path.extname(filename);
const fileOnly = fileUrl.substring(fileUrl.lastIndexOf('/') + 1); const gbaiName = GBUtil.getGBAIPath(min.botId);
let template = await sys.internalGetDocument(client, baseUrl, pathOnly, fileOnly); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const fileUrl = urlJoin('/', gbaiName, `${min.botId}.gbdrive`, filename);
GBLogEx.info(min, `Direct send from .gbdrive: ${fileUrl} to ${mobile}.`);
const driveUrl = template['@microsoft.graph.downloadUrl']; const sys = new SystemKeywords();
const res = await fetch(driveUrl);
let buf: any = Buffer.from(await res.arrayBuffer());
let localName1 = path.join(
'work',
gbaiName,
'cache',
`${fileOnly.replace(/\s/gi, '')}-${GBAdminService.getNumberIdentifier()}.${ext}`
);
await fs.writeFile(localName1, buf, { encoding: null });
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName1)); const pathOnly = fileUrl.substring(0, fileUrl.lastIndexOf('/'));
const fileOnly = fileUrl.substring(fileUrl.lastIndexOf('/') + 1);
let template = await sys.internalGetDocument(client, baseUrl, pathOnly, fileOnly);
const driveUrl = template['@microsoft.graph.downloadUrl'];
const res = await fetch(driveUrl);
let buf: any = Buffer.from(await res.arrayBuffer());
let localName1 = path.join(
'work',
gbaiName,
'cache',
`${fileOnly.replace(/\s/gi, '')}-${GBAdminService.getNumberIdentifier()}.${ext}`
);
await fs.writeFile(localName1, buf, { encoding: null });
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName1));
}
} }
if (!url) { if (!url) {

View file

@ -775,14 +775,14 @@ export class KeywordsExpressions {
// Handles the GET http version. // Handles the GET http version.
else { else {
const value = $2.replace(/\`/gi, ''); return `
if (value.endsWith('.pdf') && !value.startsWith('https')) { if (${$2}.endsWith('.pdf') && !${$2}.startsWith('https')) {
return `${$1} = await sys.getPdf({pid: pid, file: ${$2}});`; ${$1} = await sys.getPdf({pid: pid, file: ${$2}});
} else { } else {
return `
let __${$1} = null let __${$1} = null
await retry( await retry(
async (bail) => { async (bail) => {
await ensureTokens(); await ensureTokens();
@ -790,11 +790,10 @@ export class KeywordsExpressions {
},{ retries: 5}); },{ retries: 5});
${$1} = __${$1} ${$1} = __${$1}
__${$1} = null __${$1} = null
}
`; `;
} }
}
} }
]; ];

View file

@ -952,7 +952,7 @@ export class SystemKeywords {
GBLogEx.info(min, `GET '${addressOrHeaders}' in '${file}'.`); GBLogEx.info(min, `GET '${addressOrHeaders}' in '${file}'.`);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min); let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId; const botId = min.instance.botId;
('');
const packagePath = GBUtil.getGBAIPath(botId, 'gbdata'); const packagePath = GBUtil.getGBAIPath(botId, 'gbdata');
let document = await this.internalGetDocument(client, baseUrl, packagePath, file); let document = await this.internalGetDocument(client, baseUrl, packagePath, file);

View file

@ -149,8 +149,8 @@ export class GBLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
const localName = path.join(process.env.PWD, 'work', gbaiName, 'docs', source.file); const localName = path.join(process.env.PWD, 'work', gbaiName, 'docs', source.file);
if (localName) { if (localName) {
const { url } = await ChatServices.pdfPageAsImage(this.min, localName, source.page); const pngs = await GBUtil.pdfPageAsImage(this.min, localName, source.page);
text = `![alt text](${url}) text = `![alt text](${pngs[0].url})
${text}`; ${text}`;
found = true; found = true;
source.file = localName; source.file = localName;
@ -167,30 +167,7 @@ export class GBLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
} }
export class ChatServices { export class ChatServices {
public static async pdfPageAsImage(min, filename, pageNumber) {
// Converts the PDF to PNG.
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber}...`);
const pngPages: PngPageOutput[] = await pdfToPng(filename, {
disableFontFace: true,
useSystemFonts: true,
viewportScale: 2.0,
pagesToProcess: [pageNumber],
strictPagesToProcess: false,
verbosityLevel: 0
});
// Prepare an image on cache and return the GBFILE information.
if (pngPages.length > 0) {
const buffer = pngPages[0].content;
const gbaiName = GBUtil.getGBAIPath(min.botId, null);
const localName = path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName));
await fs.writeFile(localName, buffer, { encoding: null });
return { localName: localName, url: url, data: buffer };
}
}
private static async getRelevantContext( private static async getRelevantContext(
vectorStore: HNSWLib, vectorStore: HNSWLib,

View file

@ -42,8 +42,12 @@ import { VerbosityLevel, getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs';
VerbosityLevel.ERRORS = 0; VerbosityLevel.ERRORS = 0;
VerbosityLevel.WARNINGS = 0; VerbosityLevel.WARNINGS = 0;
VerbosityLevel.INFOS = 0; VerbosityLevel.INFOS = 0;
import { Page } from 'puppeteer';
import urljoin from 'url-join'; import urljoin from 'url-join';
import { GBAdminService } from '../packages/admin.gbapp/services/GBAdminService.js';
import { GBLogEx } from '../packages/core.gbapp/services/GBLogEx.js';
import { PngPageOutput, pdfToPng } from 'pdf-to-png-converter';
import urlJoin from 'url-join';
import { GBServer } from './app.js';
export class GBUtil { export class GBUtil {
public static repeat(chr, count) { public static repeat(chr, count) {
@ -244,4 +248,37 @@ export class GBUtil {
return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`); return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`);
} }
} }
public static async pdfPageAsImage(min, filename, pageNumber) {
// Converts the PDF to PNG.
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber ?? 'all'}...`);
const options = {
disableFontFace: true,
useSystemFonts: true,
viewportScale: 2.0,
pagesToProcess: pageNumber !== undefined ? [pageNumber] : undefined,
strictPagesToProcess: false,
verbosityLevel: 0
};
const pngPages: PngPageOutput[] = await pdfToPng(filename, options);
const generatedFiles = [];
for (const pngPage of pngPages) {
const buffer = pngPage.content;
const gbaiName = GBUtil.getGBAIPath(min.botId, null);
const localName = path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName));
await fs.writeFile(localName, buffer, { encoding: null });
generatedFiles.push({ localName: localName, url: url, data: buffer });
}
return generatedFiles.length > 0 ? generatedFiles : null;
}
} }