new(basic.gblib): SEND FILE pdf as temporary images.

This commit is contained in:
me@rodrigorodriguez.com 2024-10-02 18:39:25 -03:00
parent d161a23005
commit 4fb7eae1fc
5 changed files with 120 additions and 72 deletions

View file

@ -37,7 +37,7 @@ import urlJoin from 'url-join';
import { GBServer } from '../../../src/app.js';
import { GBDeployer } from '../../core.gbapp/services/GBDeployer.js';
import { SecService } from '../../security.gbapp/services/SecService.js';
import {Jimp} from 'jimp';
import { Jimp } from 'jimp';
import jsQR from 'jsqr';
import { SystemKeywords } from './SystemKeywords.js';
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService.js';
@ -254,28 +254,28 @@ export class DialogKeywords {
*
* @example EXIT
*/
public async exit({}) {}
public async exit({ }) { }
/**
* Get active tasks.
*
* @example list = ACTIVE TASKS
*/
public async getActiveTasks({ pid }) {}
public async getActiveTasks({ pid }) { }
/**
* Creates a new deal.
*
* @example CREATE DEAL dealname,contato,empresa,amount
*/
public async createDeal({ pid, dealName, contact, company, amount }) {}
public async createDeal({ pid, dealName, contact, company, amount }) { }
/**
* Finds contacts in XRM.
*
* @example list = FIND CONTACT "Sandra"
*/
public async fndContact({ pid, name }) {}
public async fndContact({ pid, name }) { }
public getContentLocaleWithCulture(contentLocale) {
switch (contentLocale) {
@ -936,7 +936,7 @@ export class DialogKeywords {
* @example MENU
*
*/
public async showMenu({}) {
public async showMenu({ }) {
// https://github.com/GeneralBots/BotServer/issues/237
// return await beginDialog('/menu');
}
@ -1455,6 +1455,36 @@ export class DialogKeywords {
let nameOnly;
const gbaiName = GBUtil.getGBAIPath(min.botId);
if (filename.endsWith('.pdf')) {
const gbdriveName = GBUtil.getGBAIPath(min.botId, 'gbdrive');
const pdf = path.join(GBConfigService.get('STORAGE_LIBRARY'), gbdriveName, filename);
const pngs = await GBUtil.pdfPageAsImage(min, pdf, undefined);
await CollectionUtil.asyncForEach(pngs, async png => {
// Prepare a cache to be referenced by Bot Framework.
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(png.localName));
const contentType = mime.lookup(url);
const reply = { type: ActivityTypes.Message, text: caption };
reply['attachments'] = [];
reply['attachments'].push({
name: nameOnly,
contentType: contentType,
contentUrl: url
});
if (channel === 'omnichannel' || !user) {
await min.conversationalService.sendFile(min, null, mobile, url, caption);
} else {
await min.conversationalService['sendOnConversation'](min, user, reply);
}
});
}
// Web automation.
if (element) {
@ -1489,6 +1519,10 @@ export class DialogKeywords {
// .gbdrive direct sending.
else {
if (GBConfigService.get('STORAGE_NAME')) {
const ext = path.extname(filename);
const gbaiName = GBUtil.getGBAIPath(min.botId);
@ -1516,6 +1550,7 @@ export class DialogKeywords {
url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName1));
}
}
if (!url) {
const ext = path.extname(filename.localName);

View file

@ -775,12 +775,12 @@ export class KeywordsExpressions {
// Handles the GET http version.
else {
const value = $2.replace(/\`/gi, '');
if (value.endsWith('.pdf') && !value.startsWith('https')) {
return `${$1} = await sys.getPdf({pid: pid, file: ${$2}});`;
} else {
return `
if (${$2}.endsWith('.pdf') && !${$2}.startsWith('https')) {
${$1} = await sys.getPdf({pid: pid, file: ${$2}});
} else {
let __${$1} = null
await retry(
@ -791,9 +791,8 @@ export class KeywordsExpressions {
${$1} = __${$1}
__${$1} = null
`;
}
`;
}
}
];

View file

@ -952,7 +952,7 @@ export class SystemKeywords {
GBLogEx.info(min, `GET '${addressOrHeaders}' in '${file}'.`);
let { baseUrl, client } = await GBDeployer.internalGetDriveClient(min);
const botId = min.instance.botId;
('');
const packagePath = GBUtil.getGBAIPath(botId, 'gbdata');
let document = await this.internalGetDocument(client, baseUrl, packagePath, file);

View file

@ -149,8 +149,8 @@ export class GBLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
const localName = path.join(process.env.PWD, 'work', gbaiName, 'docs', source.file);
if (localName) {
const { url } = await ChatServices.pdfPageAsImage(this.min, localName, source.page);
text = `![alt text](${url})
const pngs = await GBUtil.pdfPageAsImage(this.min, localName, source.page);
text = `![alt text](${pngs[0].url})
${text}`;
found = true;
source.file = localName;
@ -167,30 +167,7 @@ export class GBLLMOutputParser extends BaseLLMOutputParser<ExpectedOutput> {
}
export class ChatServices {
public static async pdfPageAsImage(min, filename, pageNumber) {
// Converts the PDF to PNG.
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber}...`);
const pngPages: PngPageOutput[] = await pdfToPng(filename, {
disableFontFace: true,
useSystemFonts: true,
viewportScale: 2.0,
pagesToProcess: [pageNumber],
strictPagesToProcess: false,
verbosityLevel: 0
});
// Prepare an image on cache and return the GBFILE information.
if (pngPages.length > 0) {
const buffer = pngPages[0].content;
const gbaiName = GBUtil.getGBAIPath(min.botId, null);
const localName = path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName));
await fs.writeFile(localName, buffer, { encoding: null });
return { localName: localName, url: url, data: buffer };
}
}
private static async getRelevantContext(
vectorStore: HNSWLib,

View file

@ -42,8 +42,12 @@ import { VerbosityLevel, getDocument } from 'pdfjs-dist/legacy/build/pdf.mjs';
VerbosityLevel.ERRORS = 0;
VerbosityLevel.WARNINGS = 0;
VerbosityLevel.INFOS = 0;
import { Page } from 'puppeteer';
import urljoin from 'url-join';
import { GBAdminService } from '../packages/admin.gbapp/services/GBAdminService.js';
import { GBLogEx } from '../packages/core.gbapp/services/GBLogEx.js';
import { PngPageOutput, pdfToPng } from 'pdf-to-png-converter';
import urlJoin from 'url-join';
import { GBServer } from './app.js';
export class GBUtil {
public static repeat(chr, count) {
@ -244,4 +248,37 @@ export class GBUtil {
return urljoin(gbai, packageName ? packageName : `${botId}.${packageType}`);
}
}
public static async pdfPageAsImage(min, filename, pageNumber) {
// Converts the PDF to PNG.
GBLogEx.info(min, `Converting ${filename}, page: ${pageNumber ?? 'all'}...`);
const options = {
disableFontFace: true,
useSystemFonts: true,
viewportScale: 2.0,
pagesToProcess: pageNumber !== undefined ? [pageNumber] : undefined,
strictPagesToProcess: false,
verbosityLevel: 0
};
const pngPages: PngPageOutput[] = await pdfToPng(filename, options);
const generatedFiles = [];
for (const pngPage of pngPages) {
const buffer = pngPage.content;
const gbaiName = GBUtil.getGBAIPath(min.botId, null);
const localName = path.join('work', gbaiName, 'cache', `img${GBAdminService.getRndReadableIdentifier()}.png`);
const url = urlJoin(GBServer.globals.publicAddress, min.botId, 'cache', path.basename(localName));
await fs.writeFile(localName, buffer, { encoding: null });
generatedFiles.push({ localName: localName, url: url, data: buffer });
}
return generatedFiles.length > 0 ? generatedFiles : null;
}
}