Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 | 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 39x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 5x 2x 5x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 54x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 54x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 54x 54x 2x 2x 2x 54x 2x 2x 2x 2x 54x 2x 2x 2x 52x 27x 27x 1x 1x 1x 25x 25x 25x 1x 2x 2x 24x 2x 24x 2x 12x 2x 2x 2x 2x 2x 2x 2x 11x 11x 12x 53x 54x 2x 10x 51x 51x 51x 53x 11x 11x 28x 28x 28x 7x 21x 21x 21x 21x 2x 2x 2x 2x 2x 2x 21x 2x 2x 21x 2x 2x 2x 2x 2x 2x 2x 19x 2x 2x 19x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 4x 4x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 7x 7x 2x 6x 2x 2x 2x 2x 2x 7x 6x 6x 2x 2x 7x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 12x 2x 2x 2x 2x 2x 2x 12x 2x 2x 10x 10x 10x 2x 2x 8x 8x 10x 10x 10x 10x 12x 3x 3x 7x 7x 7x 7x 1x 1x 6x 6x 6x 6x 6x 6x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 3x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 3x 2x 3x 3x 3x 2x 3x 3x 2x 2x 2x 2x 2x 2x 2x 2x 3x 2x 2x 2x 3x 2x 2x 2x 1x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 4x 4x 4x 1x 4x 4x 4x 4x 4x 4x 4x 4x 3x 3x 3x 3x 3x 3x 4x 2x 2x 2x 2x 4x 2x 2x 2x 2x 1x 1x 2x 2x 3x 3x 2x 2x 2x 2x 2x 2x 2x 2x 2x 1x 1x 1x 1x 2x 1x 2x 1x 1x 1x 1x 2x 1x 1x 2x 2x 2x 1x 1x 1x 2x 1x 1x 1x 1x 1x 1x 1x 2x 2x 1x 2x 2x 2x 1x 2x 1x 1x 2x 2x 1x 1x 1x 1x 1x 1x 1x 2x 3x 3x 8x 8x 1x 7x 7x 1x 7x 8x 8x 2x 2x 7x 2x 2x 2x 7x 8x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 7x 2x 7x 2x 2x 2x 2x 2x 2x 10x 10x 2x 10x 10x 2x 2x 2x 2x 2x 2x 2x 10x 2x 2x 2x 10x 10x 4x 4x 4x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 10x 2x 2x 2x 2x 2x 10x 10x 2x 2x 2x 10x 10x 2x 10x 2x 2x 2x 2x 2x 2x 2x 2x 10x 10x 10x 2x 10x 10x 10x 2x 2x 2x 8x 8x 2x 2x 2x 2x 2x 2x 2x 7x 2x 10x 2x 2x 2x 2x 7x 10x 2x 2x 2x 2x 10x 5x 5x 5x 5x 5x 5x 5x 5x 2x 2x 2x 10x 2x 2x 10x 2x 2x 2x 2x 7x 7x 2x 2x 2x 2x 2x 7x 2x 2x 7x 7x 2x 7x 7x 7x 7x 2x 7x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 10x 2x 6x 2x 6x 2x 2x 2x 6x 6x 2x 2x 2x 2x 2x 2x 2x 2x 6x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 39x | // src/services/aiService.server.ts
/**
* @file This file contains all server-side functions that directly interact with the Google AI (Gemini) API.
* It is intended to be used only by the backend (e.g., server.ts) and should never be imported into client-side code.
* The `.server.ts` naming convention helps enforce this separation.
*/
import { GoogleGenAI, type GenerateContentResponse, type Content, type Tool } from '@google/genai';
import fsPromises from 'node:fs/promises';
import type { Logger } from 'pino';
import { z } from 'zod';
import { pRateLimit } from 'p-ratelimit';
import type {
FlyerItem,
MasterGroceryItem,
ExtractedFlyerItem,
UserProfile,
ExtractedCoreData,
FlyerInsert,
Flyer,
} from '../types';
import { FlyerProcessingError } from './processingErrors';
import * as db from './db/index.db';
import { flyerQueue } from './queueService.server';
import type { Job } from 'bullmq';
import { createFlyerAndItems } from './db/flyer.db';
import { getBaseUrl } from '../utils/serverUtils'; // This was a duplicate, fixed.
import { generateFlyerIcon, processAndSaveImage } from '../utils/imageProcessor';
import { AdminRepository } from './db/admin.db';
import path from 'path';
import { ValidationError } from './db/errors.db'; // Keep this import for ValidationError
import { AiFlyerDataSchema, ExtractedFlyerItemSchema } from '../types/ai'; // Import consolidated schemas
interface FlyerProcessPayload extends Partial<ExtractedCoreData> {
checksum?: string;
originalFileName?: string;
extractedData?: Partial<ExtractedCoreData>;
data?: FlyerProcessPayload; // For nested data structures
}
// Helper to safely extract an error message from unknown `catch` values.
const errMsg = (e: unknown) => {
Eif (e instanceof Error) return e.message;
if (typeof e === 'object' && e !== null && 'message' in e)
return String((e as { message: unknown }).message);
return String(e || 'An unknown error occurred.');
};
/**
* Defines the contract for a file system utility. This interface allows for
* dependency injection, making the AIService testable without hitting the real file system.
*/
interface IFileSystem {
readFile(path: string): Promise<Buffer>;
}
/**
* Defines the contract for an AI model client. This allows for dependency injection,
* making the AIService testable without making real API calls to Google.
*/
interface IAiClient {
generateContent(request: {
contents: Content[];
tools?: Tool[];
useLiteModels?: boolean;
}): Promise<GenerateContentResponse>;
}
/**
* Defines the shape of a single flyer item as returned by the AI.
* This type is intentionally loose to accommodate potential null/undefined values
* from the AI before they are cleaned and normalized.
*/
export type RawFlyerItem = z.infer<typeof ExtractedFlyerItemSchema>;
export class DuplicateFlyerError extends FlyerProcessingError {
constructor(
message: string,
public flyerId: number,
) {
super(message, 'DUPLICATE_FLYER', message);
}
}
export class AIService {
private aiClient: IAiClient;
private fs: IFileSystem;
private rateLimiter: <T>(fn: () => Promise<T>) => Promise<T>;
private logger: Logger;
// OPTIMIZED: Flyer Image Processing (Vision + Long Output)
// PRIORITIES:
// 1. Output Limit: Must be 65k+ (Gemini 2.5/3.0) to avoid cutting off data.
// 2. Intelligence: 'Pro' models handle messy layouts better.
// 3. Quota Management: 'Preview' and 'Exp' models are added as fallbacks to tap into separate rate limits.
private readonly models = [
// --- TIER A: The Happy Path (Fast & Stable) ---
'gemini-2.5-flash', // Primary workhorse. 65k output.
'gemini-2.5-flash-lite', // Cost-saver. 65k output.
// --- TIER B: The Heavy Lifters (Complex Layouts) ---
'gemini-2.5-pro', // High IQ for messy flyers. 65k output.
// --- TIER C: Separate Quota Buckets (Previews) ---
'gemini-3-flash-preview', // Newer/Faster. Separate 'Preview' quota. 65k output.
'gemini-3-pro-preview', // High IQ. Separate 'Preview' quota. 65k output.
// --- TIER D: Experimental Buckets (High Capacity) ---
'gemini-exp-1206', // Excellent reasoning. Separate 'Experimental' quota. 65k output.
// --- TIER E: Last Resorts (Lower Capacity/Local) ---
'gemma-3-27b-it', // Open model fallback.
'gemini-2.0-flash-exp', // Exp fallback. WARNING: 8k output limit. Good for small flyers only.
];
// OPTIMIZED: Simple Text Tasks (Recipes, Shopping Lists, Summaries)
// PRIORITIES:
// 1. Cost/Speed: These tasks are simple.
// 2. Output Limit: The 8k limit of Gemini 2.0 is perfectly fine here.
private readonly models_lite = [
// --- Best Value (Smart + Cheap) ---
'gemini-2.5-flash-lite', // Current generation efficiency king.
// --- The "Recycled" Gemini 2.0 Models (Perfect for Text) ---
'gemini-2.0-flash-lite-001', // Extremely cheap, very capable for text.
'gemini-2.0-flash-001', // Smarter than Lite, good for complex recipes.
// --- Open Models (Good for simple categorization) ---
'gemma-3-12b-it', // Solid reasoning for an open model.
'gemma-3-4b-it', // Very fast.
// --- Quota Fallbacks (Experimental/Preview) ---
'gemini-2.0-flash-exp', // Use this separate quota bucket if others are exhausted.
// --- Edge/Nano Models (Simple string manipulation only) ---
'gemma-3n-e4b-it', // Corrected name from JSON
'gemma-3n-e2b-it', // Corrected name from JSON
];
// Helper to return valid mock data for tests
private getMockFlyerData() {
return {
store_name: 'Mock Store from AIService',
valid_from: '2025-01-01',
valid_to: '2025-01-07',
store_address: '123 Mock St',
items: [
{
item: 'Mocked Integration Item',
price_display: '$1.99',
price_in_cents: 199,
quantity: 'each',
category_name: 'Mock Category',
master_item_id: null,
},
],
};
}
constructor(logger: Logger, aiClient?: IAiClient, fs?: IFileSystem) {
this.logger = logger;
this.logger.info('---------------- [AIService] Constructor Start ----------------');
// Use mock AI in test, staging, and development environments (no real API calls, no GEMINI_API_KEY needed)
const isTestEnvironment =
process.env.NODE_ENV === 'test' ||
process.env.NODE_ENV === 'staging' ||
process.env.NODE_ENV === 'development' ||
!!process.env.VITEST_POOL_ID;
if (aiClient) {
this.logger.info(
'[AIService Constructor] Using provided mock AI client. This indicates a UNIT TEST environment.',
);
this.aiClient = aiClient;
} else if (isTestEnvironment) {
this.logger.info(
'[AIService Constructor] Test environment detected. Using internal mock for AI client to prevent real API calls in INTEGRATION TESTS.',
);
this.aiClient = {
generateContent: async (request) => {
this.logger.info(
{ useLiteModels: request.useLiteModels },
'[AIService] Mock generateContent called in test environment.',
);
const mockData = this.getMockFlyerData();
return {
text: JSON.stringify(mockData),
} as unknown as GenerateContentResponse;
},
};
} else {
this.logger.info(
'[AIService Constructor] No mock client provided and not a test environment. Initializing Google GenAI client for PRODUCTION.',
);
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
this.logger.error('[AIService] GEMINI_API_KEY is required in non-test environments.');
throw new Error('GEMINI_API_KEY environment variable not set for server-side AI calls.');
}
const genAI = new GoogleGenAI({ apiKey });
// We create a shim/adapter that matches the old structure but uses the new SDK call pattern.
// This preserves the dependency injection pattern used throughout the class.
this.aiClient = {
generateContent: async (request) => {
if (!request.contents || request.contents.length === 0) {
this.logger.error(
{ request },
'[AIService Adapter] generateContent called with no content, which is invalid.',
);
throw new Error('AIService.generateContent requires at least one content element.');
}
const { useLiteModels, ...apiReq } = request;
const models = useLiteModels ? this.models_lite : this.models;
return this._generateWithFallback(genAI, apiReq, models);
},
};
}
this.fs = fs || fsPromises;
if (aiClient) {
this.logger.warn(
'[AIService Constructor] Mock client detected. Rate limiter is DISABLED for testing.',
);
this.rateLimiter = <T>(fn: () => Promise<T>) => fn(); // Pass-through function
} else {
const requestsPerMinute = parseInt(process.env.GEMINI_RPM || '5', 10);
this.logger.info(
`[AIService Constructor] Initializing production rate limiter to ${requestsPerMinute} RPM.`,
);
this.rateLimiter = pRateLimit({
interval: 60 * 1000,
rate: requestsPerMinute,
concurrency: requestsPerMinute,
});
}
this.logger.info('---------------- [AIService] Constructor End ----------------');
}
private async _generateWithFallback(
genAI: GoogleGenAI,
request: { contents: Content[]; tools?: Tool[] },
models: string[] = this.models,
): Promise<GenerateContentResponse> {
let lastError: Error | null = null;
for (const modelName of models) {
try {
this.logger.info(
`[AIService Adapter] Attempting to generate content with model: ${modelName}`,
);
const result = await genAI.models.generateContent({ model: modelName, ...request });
// If the call succeeds, return the result immediately.
return result;
} catch (error: unknown) {
// Robust error message extraction to handle various error shapes (Error objects, JSON responses, etc.)
let errorMsg = '';
if (error instanceof Error) {
lastError = error;
errorMsg = error.message;
} else E{
try {
if (typeof error === 'object' && error !== null && 'message' in error) {
errorMsg = String((error as { message: unknown }).message);
} else {
errorMsg = JSON.stringify(error);
}
} catch {
errorMsg = String(error);
}
lastError = new Error(errorMsg);
}
const lowerErrorMsg = errorMsg.toLowerCase();
// Check for specific error messages indicating quota issues or model unavailability.
if (
lowerErrorMsg.includes('quota') ||
lowerErrorMsg.includes('429') || // HTTP 429 Too Many Requests
lowerErrorMsg.includes('503') || // HTTP 503 Service Unavailable
lowerErrorMsg.includes('resource_exhausted') ||
lowerErrorMsg.includes('overloaded') || // Covers "model is overloaded"
lowerErrorMsg.includes('unavailable') || // Covers "Service Unavailable"
lowerErrorMsg.includes('not found') // Also retry if model is not found (e.g., regional availability or API version issue)
) {
this.logger.warn(
`[AIService Adapter] Model '${modelName}' failed due to quota/rate limit/overload. Trying next model. Error: ${errorMsg}`,
);
continue; // Try the next model in the list.
} else {
// For other errors (e.g., invalid input, safety settings), fail immediately.
this.logger.error(
{ error: lastError },
`[AIService Adapter] Model '${modelName}' failed with a non-retriable error.`,
);
throw lastError;
}
}
}
// If all models in the list have failed, throw the last error encountered.
this.logger.error(
{ lastError },
'[AIService Adapter] All AI models failed. Throwing last known error.',
);
throw lastError || new Error('All AI models failed to generate content.');
}
private async serverFileToGenerativePart(path: string, mimeType: string) {
const fileData = await this.fs.readFile(path);
return {
inlineData: {
data: fileData.toString('base64'),
mimeType,
},
};
}
/**
* Constructs the detailed prompt for the AI to extract flyer data.
* @param masterItems A list of known grocery items to aid in matching.
* @param submitterIp The IP address of the user who submitted the flyer.
* @param userProfileAddress The profile address of the user.
* @returns A formatted string to be used as the AI prompt.
*/
private _buildFlyerExtractionPrompt(
masterItems: MasterGroceryItem[],
submitterIp?: string,
userProfileAddress?: string,
): string {
let locationHint = '';
if (userProfileAddress) {
locationHint = `The user who uploaded this flyer has a profile address of "${userProfileAddress}". Use this as a strong hint for the store's location.`;
} else if (submitterIp) {
locationHint = `The user uploaded this flyer from an IP address that suggests a location. Use this as a general hint for the store's region.`;
}
// Optimization: Instead of sending the whole masterItems object, send only the necessary fields.
// This significantly reduces the number of tokens used in the prompt.
const simplifiedMasterList = masterItems.map((item) => ({
id: item.master_grocery_item_id,
name: item.name,
}));
return `
# TASK
Analyze the provided flyer image(s) and extract key information into a single, valid JSON object.
# RULES
1. Extract the following top-level details for the flyer:
- "store_name": The name of the grocery store (e.g., "Walmart", "No Frills").
- "valid_from": The start date of the sale in YYYY-MM-DD format. Use null if not present.
- "valid_to": The end date of the sale in YYYY-MM-DD format. Use null if not present.
- "store_address": The physical address of the store. Use null if not present. ${locationHint}
2. Extract each individual sale item into an "items" array. For each item, provide:
- "item": The name of the product (e.g., "Coca-Cola Classic").
- "price_display": The exact sale price as a string (e.g., "$2.99", "2 for $5.00"). If no price is visible, use an empty string "".
- "price_in_cents": The primary numeric price in cents. For "$2.99", use 299. For "2 for $5.00", use 500. If no price is visible, you MUST use null.
- "quantity": A string describing the quantity or weight (e.g., "12x355mL", "500g", "each"). If no quantity is visible, use an empty string "".
- "master_item_id": Find the best matching item from the MASTER LIST provided below and return its "id". If no good match is found, you MUST use null.
- "category_name": The most appropriate category (e.g., "Beverages", "Meat & Seafood"). If unsure, use "Other/Miscellaneous".
3. Your entire output MUST be a single JSON object. Do not include any other text, explanations, or markdown formatting like \`\`\`json.
# EXAMPLES
- For an item "Red Seedless Grapes" on sale for "$1.99 /lb" that matches master item ID 45:
{ "item": "Red Seedless Grapes", "price_display": "$1.99 /lb", "price_in_cents": 199, "quantity": "/lb", "master_item_id": 45, "category_name": "Produce" }
- For an item "PC Cola 2L" on sale "3 for $5.00" that has no master item match:
{ "item": "PC Cola 2L", "price_display": "3 for $5.00", "price_in_cents": 500, "quantity": "2L", "master_item_id": null, "category_name": "Beverages" }
- For an item "Store-made Muffins" with no price listed:
{ "item": "Store-made Muffins", "price_display": "", "price_in_cents": null, "quantity": "6 pack", "master_item_id": 123, "category_name": "Bakery" }
# MASTER LIST
${JSON.stringify(simplifiedMasterList)}
# JSON OUTPUT
`;
}
/**
* Safely parses a JSON object from a string, typically from an AI response.
* @param responseText The raw text response from the AI.
* @returns The parsed JSON object, or null if parsing fails.
*/
private _parseJsonFromAiResponse<T>(responseText: string | undefined, logger: Logger): T | null {
// --- START EXTENSIVE DEBUG LOGGING ---
logger.debug(
{
responseText_type: typeof responseText,
responseText_length: responseText?.length,
responseText_preview: responseText?.substring(0, 200),
},
'[_parseJsonFromAiResponse] Starting JSON parsing.',
);
if (!responseText) {
logger.warn(
'[_parseJsonFromAiResponse] Response text is empty or undefined. Aborting parsing.',
);
return null;
}
// Find the start of the JSON, which can be inside a markdown block
const markdownRegex = /```(json)?\s*([\s\S]*?)\s*```/;
const markdownMatch = responseText.match(markdownRegex);
let jsonString;
if (markdownMatch && markdownMatch[2] !== undefined) {
logger.debug(
{ capturedLength: markdownMatch[2].length },
'[_parseJsonFromAiResponse] Found JSON content within markdown code block.',
);
jsonString = markdownMatch[2].trim();
} else {
logger.debug(
'[_parseJsonFromAiResponse] No markdown code block found. Using raw response text.',
);
jsonString = responseText;
}
// Find the first '{' or '[' and the last '}' or ']' to isolate the JSON object.
const firstBrace = jsonString.indexOf('{');
const firstBracket = jsonString.indexOf('[');
logger.debug(
{ firstBrace, firstBracket },
'[_parseJsonFromAiResponse] Searching for start of JSON.',
);
// Determine the starting point of the JSON content
const startIndex =
firstBrace === -1 || (firstBracket !== -1 && firstBracket < firstBrace)
? firstBracket
: firstBrace;
if (startIndex === -1) {
logger.error(
{ responseText },
"[_parseJsonFromAiResponse] Could not find starting '{' or '[' in response.",
);
return null;
}
// Find the last brace or bracket to gracefully handle trailing text.
// This is a robust way to handle cases where the AI might add trailing text after the JSON.
const lastBrace = jsonString.lastIndexOf('}');
const lastBracket = jsonString.lastIndexOf(']');
const endIndex = Math.max(lastBrace, lastBracket);
if (endIndex === -1) {
logger.error(
{ responseText },
"[_parseJsonFromAiResponse] Could not find ending '}' or ']' in response.",
);
return null;
}
const jsonSlice = jsonString.substring(startIndex, endIndex + 1);
logger.debug(
{ sliceLength: jsonSlice.length },
'[_parseJsonFromAiResponse] Extracted JSON slice for parsing.',
);
try {
const parsed = JSON.parse(jsonSlice) as T;
logger.info('[_parseJsonFromAiResponse] Successfully parsed JSON from AI response.');
return parsed;
} catch (e) {
logger.error(
{ jsonSlice, error: e, errorMessage: (e as Error).message, stack: (e as Error).stack },
'[_parseJsonFromAiResponse] Failed to parse JSON slice.',
);
return null;
}
}
async extractItemsFromReceiptImage(
imagePath: string,
imageMimeType: string,
logger: Logger = this.logger,
): Promise<{ raw_item_description: string; price_paid_cents: number }[] | null> {
const prompt = `
Analyze the provided receipt image. Extract all purchased line items.
For each item, identify its description and total price.
Return the data as a valid JSON array of objects. Each object should have two keys:
1. "raw_item_description": a string containing the item's name as written on the receipt.
2. "price_paid_cents": an integer representing the total price for that line item in cents (do not include currency symbols).
Example format:
[
{ "raw_item_description": "ORGANIC BANANAS", "price_paid_cents": 129 },
{ "raw_item_description": "AVOCADO", "price_paid_cents": 299 }
]
Only output the JSON array. Do not include any other text, explanations, or markdown formatting.
`;
const imagePart = await this.serverFileToGenerativePart(imagePath, imageMimeType);
logger.info('[extractItemsFromReceiptImage] Entering method.');
try {
logger.debug('[extractItemsFromReceiptImage] PRE-RATE-LIMITER: Preparing to call AI.');
// Wrap the AI call with the rate limiter.
const result = await this.rateLimiter(() =>
this.aiClient.generateContent({
contents: [{ parts: [{ text: prompt }, imagePart] }],
}),
);
logger.debug(
'[extractItemsFromReceiptImage] POST-RATE-LIMITER: AI call successful, parsing response.',
);
// The response from the SDK is structured, we need to access the text part.
const text = result.text;
logger.debug(
{ rawText: text?.substring(0, 100) },
'[extractItemsFromReceiptImage] Raw text from AI.',
);
const parsedJson = this._parseJsonFromAiResponse<
{ raw_item_description: string; price_paid_cents: number }[]
>(text, logger);
if (!parsedJson) {
logger.error(
{ responseText: text },
'[extractItemsFromReceiptImage] Failed to parse valid JSON from response.',
);
throw new Error('AI response did not contain a valid JSON array.');
}
logger.info('[extractItemsFromReceiptImage] Successfully extracted items. Exiting method.');
return parsedJson;
} catch (apiError) {
logger.error(
{ err: apiError },
'[extractItemsFromReceiptImage] An error occurred during the process.',
);
throw apiError;
}
}
async extractCoreDataFromFlyerImage(
imagePaths: { path: string; mimetype: string }[],
masterItems: MasterGroceryItem[],
submitterIp?: string,
userProfileAddress?: string,
logger: Logger = this.logger,
): Promise<
{
store_name: string | null;
valid_from: string | null;
valid_to: string | null;
store_address: string | null;
items: z.infer<typeof ExtractedFlyerItemSchema>[];
} & z.infer<typeof AiFlyerDataSchema>
> {
logger.info(
`[extractCoreDataFromFlyerImage] Entering method with ${imagePaths.length} image(s).`,
);
const prompt = this._buildFlyerExtractionPrompt(masterItems, submitterIp, userProfileAddress);
const imageParts = await Promise.all(
imagePaths.map((file) => this.serverFileToGenerativePart(file.path, file.mimetype)),
);
const totalImageSize = imageParts.reduce((acc, part) => acc + part.inlineData.data.length, 0);
logger.info(
`[aiService.server] Total base64 image data size for Gemini: ${(totalImageSize / (1024 * 1024)).toFixed(2)} MB`,
);
try {
logger.debug(
`[extractCoreDataFromFlyerImage] PRE-RATE-LIMITER: Preparing to call Gemini API.`,
);
const geminiCallStartTime = process.hrtime.bigint();
// Wrap the AI call with the rate limiter.
const result = await this.rateLimiter(() => {
logger.debug(
'[extractCoreDataFromFlyerImage] INSIDE-RATE-LIMITER: Executing generateContent call.',
);
return this.aiClient.generateContent({
contents: [{ parts: [{ text: prompt }, ...imageParts] }],
});
});
logger.debug('[extractCoreDataFromFlyerImage] POST-RATE-LIMITER: AI call completed.');
const geminiCallEndTime = process.hrtime.bigint();
const durationMs = Number(geminiCallEndTime - geminiCallStartTime) / 1_000_000;
logger.info(
`[aiService.server] Gemini API call for flyer processing completed in ${durationMs.toFixed(2)} ms.`,
);
const text = result.text;
logger.debug(
`[aiService.server] Raw Gemini response text (first 500 chars): ${text?.substring(0, 500)}`,
);
const extractedData = this._parseJsonFromAiResponse<z.infer<typeof AiFlyerDataSchema>>(
text,
logger,
);
if (!extractedData) {
logger.error(
{ responseText: text },
'[extractCoreDataFromFlyerImage] AI response did not contain a valid JSON object after parsing.',
);
throw new Error('AI response did not contain a valid JSON object.');
}
// The FlyerDataTransformer is now responsible for all normalization.
// We return the raw items as parsed from the AI response.
Iif (!Array.isArray(extractedData.items)) {
extractedData.items = [];
}
logger.info(
`[extractCoreDataFromFlyerImage] Successfully processed flyer data for store: ${extractedData.store_name}. Exiting method.`,
);
return extractedData;
} catch (apiError) {
logger.error({ err: apiError }, '[extractCoreDataFromFlyerImage] The entire process failed.');
throw apiError;
}
}
/**
* SERVER-SIDE FUNCTION
* Extracts a specific piece of text from a cropped area of an image.
* @param imagePath The path to the original image file on the server.
* @param cropArea The coordinates and dimensions { x, y, width, height } to crop.
* @param extractionType The type of data to extract, which determines the AI prompt.
* @returns A promise that resolves to the extracted text.
*/
async extractTextFromImageArea(
imagePath: string,
imageMimeType: string,
cropArea: { x: number; y: number; width: number; height: number },
extractionType: 'store_name' | 'dates' | 'item_details',
logger: Logger = this.logger,
): Promise<{ text: string | undefined }> {
logger.info(
`[extractTextFromImageArea] Entering method for extraction type: ${extractionType}.`,
);
// 1. Define prompts based on the extraction type
const prompts = {
store_name: 'What is the store name in this image? Respond with only the name.',
dates:
'What are the sale dates in this image? Respond with the date range as text (e.g., "Jan 1 - Jan 7").',
item_details:
'Extract the item name, price, and quantity from this image. Respond with the text as seen.',
};
const prompt = prompts[extractionType] || 'Extract the text from this image.';
// 2. Crop the image using sharp
logger.debug('[extractTextFromImageArea] Cropping image with sharp.');
const sharp = (await import('sharp')).default;
const croppedImageBuffer = await sharp(imagePath)
.extract({
left: Math.round(cropArea.x),
top: Math.round(cropArea.y),
width: Math.round(cropArea.width),
height: Math.round(cropArea.height),
})
.toBuffer();
// 3. Convert cropped buffer to GenerativePart
const imagePart = {
inlineData: {
data: croppedImageBuffer.toString('base64'),
mimeType: imageMimeType,
},
};
// 4. Call the AI model
try {
logger.debug(`[extractTextFromImageArea] PRE-RATE-LIMITER: Preparing to call AI.`);
// Wrap the AI call with the rate limiter.
const result = await this.rateLimiter(() => {
logger.debug(`[extractTextFromImageArea] INSIDE-RATE-LIMITER: Executing generateContent.`);
return this.aiClient.generateContent({
contents: [{ parts: [{ text: prompt }, imagePart] }],
});
});
logger.debug('[extractTextFromImageArea] POST-RATE-LIMITER: AI call completed.');
const text = result.text?.trim();
logger.info(
`[extractTextFromImageArea] Gemini rescan completed. Extracted text: "${text}". Exiting method.`,
);
return { text };
} catch (apiError) {
logger.error(
{ err: apiError },
`[extractTextFromImageArea] An error occurred for type ${extractionType}.`,
);
throw apiError;
}
}
/**
* Generates a simple recipe suggestion based on a list of ingredients.
* Uses the 'lite' models for faster/cheaper generation.
* @param ingredients List of available ingredients.
* @param logger Logger instance.
* @returns The recipe suggestion text.
*/
async generateRecipeSuggestion(
ingredients: string[],
logger: Logger = this.logger,
): Promise<string | null> {
const prompt = `Suggest a simple recipe using these ingredients: ${ingredients.join(', ')}. Keep it brief.`;
try {
const result = await this.rateLimiter(() =>
this.aiClient.generateContent({
contents: [{ parts: [{ text: prompt }] }],
useLiteModels: true,
}),
);
return result.text || null;
} catch (error) {
logger.error({ err: error }, 'Failed to generate recipe suggestion');
return null;
}
}
/**
* SERVER-SIDE FUNCTION
* Uses Google Maps grounding to find nearby stores and plan a shopping trip.
* @param items The items from the flyer.
* @param store The store associated with the flyer.
* @param userLocation The user's current geographic coordinates.
* @returns A text response with trip planning advice and a list of map sources.
*/
async planTripWithMaps(
items: FlyerItem[],
store: { name: string } | undefined,
userLocation: GeolocationCoordinates,
logger: Logger = this.logger,
): Promise<{ text: string; sources: { uri: string; title: string }[] }> {
// Return a 501 Not Implemented error as this feature is disabled.
logger.warn('[AIService] planTripWithMaps called, but feature is disabled. Throwing error.');
throw new Error("The 'planTripWithMaps' feature is currently disabled due to API costs.");
/* const topItems = items.slice(0, 5).map(i => i.item).join(', ');
const storeName = store?.name || 'the grocery store';
try {
// Wrap the AI call with the rate limiter.
const result = await this.rateLimiter(() => this.aiClient.generateContent({
contents: [{ parts: [{ text: `My current location is latitude ${userLocation.latitude}, longitude ${userLocation.longitude}.
I have a shopping list with items like ${topItems}. Find the nearest ${storeName} to me and suggest the best route.
Also, are there any other specialty stores nearby (like a bakery or butcher) that might have good deals on related items?`}]}],
tools: [{ "googleSearch": {} }],
}));
// In a real implementation, you would render the map URLs from the sources.
// The new SDK provides the search queries used, not a direct list of web attributions.
// We will transform these queries into searchable links to fulfill the contract of the function.
const searchQueries = result.candidates?.[0]?.groundingMetadata?.webSearchQueries || [];
const sources = searchQueries.map((query: string) => ({
uri: `https://www.google.com/search?q=${encodeURIComponent(query)}`,
title: query
}));
return { text: result.text ?? '', sources };
} catch (apiError) {
logger.error({ err: apiError }, "Google GenAI API call failed in planTripWithMaps");
throw apiError;
}
*/
}
async enqueueFlyerProcessing(
file: Express.Multer.File,
checksum: string,
userProfile: UserProfile | undefined,
submitterIp: string,
logger: Logger,
baseUrlOverride?: string,
): Promise<Job> {
// 1. Check for duplicate flyer
const existingFlyer = await db.flyerRepo.findFlyerByChecksum(checksum, logger);
if (existingFlyer) {
// Throw a specific error for the route to handle
throw new DuplicateFlyerError(
'This flyer has already been processed.',
existingFlyer.flyer_id,
);
}
// 2. Construct user address string
let userProfileAddress: string | undefined = undefined;
if (userProfile?.address) {
userProfileAddress = [
userProfile.address.address_line_1,
userProfile.address.address_line_2,
userProfile.address.city,
userProfile.address.province_state,
userProfile.address.postal_code,
userProfile.address.country,
]
.filter(Boolean)
.join(', ');
}
const baseUrl = baseUrlOverride || getBaseUrl(logger);
// --- START DEBUGGING ---
console.error(`[DEBUG] aiService.enqueueFlyerProcessing resolved baseUrl: "${baseUrl}"`);
// Add a fail-fast check to ensure the baseUrl is a valid URL before enqueuing.
// This will make the test fail at the upload step if the URL is the problem,
// which is easier to debug than a worker failure.
Iif (!baseUrl || !baseUrl.startsWith('http')) {
const errorMessage = `[aiService] FATAL: The generated baseUrl is not a valid absolute URL. Value: "${baseUrl}". This will cause the flyer processing worker to fail. Check the FRONTEND_URL environment variable.`;
logger.error(errorMessage);
// Throw a standard error that the calling route can handle.
throw new Error(errorMessage);
}
logger.info({ baseUrl }, '[aiService] Enqueuing job with valid baseUrl.');
// --- END DEBUGGING ---
// 3. Add job to the queue with context propagation (ADR-051)
const bindings = logger.bindings?.() || {};
const job = await flyerQueue.add('process-flyer', {
filePath: file.path,
originalFileName: file.originalname,
checksum: checksum,
userId: userProfile?.user.user_id,
submitterIp: submitterIp,
userProfileAddress: userProfileAddress,
baseUrl: baseUrl,
meta: {
requestId: bindings.request_id as string | undefined,
userId: userProfile?.user.user_id,
origin: 'api',
},
});
logger.info(`Enqueued flyer for processing. File: ${file.originalname}, Job ID: ${job.id}`);
return job;
}
private _parseLegacyPayload(
body: unknown,
logger: Logger,
): { parsed: FlyerProcessPayload; extractedData: Partial<ExtractedCoreData> | null | undefined } {
logger.debug({ body, type: typeof body }, '[AIService] Starting _parseLegacyPayload');
let parsed: FlyerProcessPayload = {};
try {
parsed = typeof body === 'string' ? JSON.parse(body) : body || {};
} catch (e) {
logger.warn(
{ error: errMsg(e) },
'[AIService] Failed to parse top-level request body string.',
);
return { parsed: {}, extractedData: {} };
}
logger.debug({ parsed }, '[AIService] Parsed top-level body');
// If the real payload is nested inside a 'data' property (which could be a string),
// we parse it out but keep the original `parsed` object for top-level properties like checksum.
let potentialPayload: FlyerProcessPayload = parsed;
if (parsed.data) {
if (typeof parsed.data === 'string') {
try {
potentialPayload = JSON.parse(parsed.data);
} catch (e) {
logger.warn(
{ error: errMsg(e) },
'[AIService] Failed to parse nested "data" property string.',
);
}
E} else if (typeof parsed.data === 'object') {
potentialPayload = parsed.data;
}
}
logger.debug(
{ potentialPayload },
'[AIService] Potential payload after checking "data" property',
);
// The extracted data is either in an `extractedData` key or is the payload itself.
const extractedData = potentialPayload.extractedData ?? potentialPayload;
logger.debug({ extractedData: !!extractedData }, '[AIService] Extracted data object');
// Merge for checksum lookup: properties in the outer `parsed` object (like a top-level checksum)
// take precedence over any same-named properties inside `potentialPayload`.
const finalParsed = { ...potentialPayload, ...parsed };
logger.debug({ finalParsed }, '[AIService] Final parsed object for checksum lookup');
return { parsed: finalParsed, extractedData };
}
async processLegacyFlyerUpload(
file: Express.Multer.File,
body: unknown,
userProfile: UserProfile | undefined,
logger: Logger,
): Promise<Flyer> {
logger.debug({ body, file }, '[AIService] Starting processLegacyFlyerUpload');
const { parsed, extractedData: initialExtractedData } = this._parseLegacyPayload(body, logger);
let extractedData = initialExtractedData;
const checksum = parsed.checksum ?? parsed?.data?.checksum ?? '';
logger.debug({ checksum, parsed }, '[AIService] Extracted checksum from legacy payload');
if (!checksum) {
throw new ValidationError([], 'Checksum is required.');
}
const existingFlyer = await db.flyerRepo.findFlyerByChecksum(checksum, logger);
if (existingFlyer) {
throw new DuplicateFlyerError(
'This flyer has already been processed.',
existingFlyer.flyer_id,
);
}
const originalFileName =
parsed.originalFileName ?? parsed?.data?.originalFileName ?? file.originalname;
Iif (!extractedData || typeof extractedData !== 'object') {
logger.warn({ bodyData: parsed }, 'Missing extractedData in legacy payload.');
extractedData = {};
}
const rawItems = extractedData.items ?? [];
const itemsArray = Array.isArray(rawItems)
? rawItems
: typeof rawItems === 'string'
? JSON.parse(rawItems)
: [];
const itemsForDb = itemsArray.map((item: Partial<ExtractedFlyerItem>) => ({
...item,
// Ensure empty or nullish price_display is stored as NULL to satisfy database constraints.
price_display: item.price_display || null,
master_item_id: item.master_item_id === null ? undefined : item.master_item_id,
quantity: item.quantity ?? 1,
view_count: 0,
click_count: 0,
updated_at: new Date().toISOString(),
}));
const storeName =
extractedData.store_name && String(extractedData.store_name).trim().length > 0
? String(extractedData.store_name)
: 'Unknown Store (auto)';
if (storeName.startsWith('Unknown')) {
logger.warn('extractedData.store_name missing; using fallback store name.');
}
// Process the uploaded image to strip metadata and optimize it.
const flyerImageDir = path.dirname(file.path);
const processedImageFileName = await processAndSaveImage(
file.path,
flyerImageDir,
originalFileName,
logger,
);
const processedImagePath = path.join(flyerImageDir, processedImageFileName);
// Generate the icon from the newly processed (and cleaned) image.
const iconsDir = path.join(flyerImageDir, 'icons');
const iconFileName = await generateFlyerIcon(processedImagePath, iconsDir, logger);
const baseUrl = getBaseUrl(logger);
const iconUrl = `${baseUrl}/flyer-images/icons/${iconFileName}`;
const imageUrl = `${baseUrl}/flyer-images/${processedImageFileName}`;
logger.debug({ imageUrl, iconUrl }, 'Constructed URLs for legacy upload');
const flyerData: FlyerInsert = {
file_name: originalFileName,
image_url: imageUrl,
icon_url: iconUrl,
checksum: checksum,
store_name: storeName,
valid_from: extractedData.valid_from ?? null,
valid_to: extractedData.valid_to ?? null,
store_address: extractedData.store_address ?? null,
item_count: 0,
status: 'needs_review',
uploaded_by: userProfile?.user.user_id,
};
return db
.withTransaction(async (client) => {
const { flyer, items } = await createFlyerAndItems(flyerData, itemsForDb, logger, client);
logger.info(
`Successfully processed legacy flyer: ${flyer.file_name} (ID: ${flyer.flyer_id}) with ${items.length} items.`,
);
const transactionalAdminRepo = new AdminRepository(client);
await transactionalAdminRepo.logActivity(
{
userId: userProfile?.user.user_id,
action: 'flyer_processed',
displayText: `Processed a new flyer for ${flyerData.store_name}.`,
details: { flyerId: flyer.flyer_id, storeName: flyerData.store_name },
},
logger,
);
return flyer;
})
.catch((error) => {
logger.error({ err: error, checksum }, 'Legacy flyer upload database transaction failed.');
throw error;
});
}
}
// Export a singleton instance of the service for use throughout the application.
import { createScopedLogger } from './logger.server';
export const aiService = new AIService(createScopedLogger('ai-service'));
|