Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 71x 71x 71x 2x 2x 2x 2x 2x 71x 4x 4x 2x 2x 2x 14x 14x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 13x 13x 2x 13x 2x 2x 2x 2x 13x 2x 2x 2x 11x 2x 11x 20x 2x 11x 2x 11x 2x 2x 13x 2x 2x 2x 13x 13x 6x 2x 2x 2x 2x 2x 13x 2x 2x 13x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 16x 2x 2x 2x 2x 16x 2x 2x 2x 2x 2x 2x 15x 15x 15x 14x 14x 14x 14x 17x 2x 2x 14x 2x 2x 2x 2x 2x 2x 2x 14x 2x 2x 2x 14x 17x 2x 2x 2x 17x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 17x 14x 14x 14x 14x 2x 3x 2x 3x 2x 3x 2x 3x 2x 2x 2x 2x 17x 2x 2x 14x 14x 2x 2x 2x 2x 2x 14x 2x 2x | // src/services/flyerAiProcessor.server.ts
import { z } from 'zod';
import type { Logger } from 'pino';
import type { AIService } from './aiService.server';
import type { PersonalizationRepository } from './db/personalization.db';
import { AiDataValidationError } from './processingErrors';
import type { FlyerJobData } from '../types/job-data';
import { AiFlyerDataSchema } from '../types/ai'; // Import consolidated schemas and helper
export type ValidatedAiDataType = z.infer<typeof AiFlyerDataSchema>;
export interface AiProcessorResult {
data: ValidatedAiDataType;
needsReview: boolean;
}
/**
* Type definition for the extractAndValidateData method signature.
* Used for dependency injection in tests.
*/
export type ExtractAndValidateDataFn = (
imagePaths: { path: string; mimetype: string }[],
jobData: FlyerJobData,
logger: Logger,
) => Promise<AiProcessorResult>;
/**
* This class encapsulates the logic for interacting with the AI service
* to extract and validate data from flyer images.
*/
export class FlyerAiProcessor {
private extractFn: ExtractAndValidateDataFn | null = null;
constructor(
private ai: AIService,
private personalizationRepo: PersonalizationRepository,
) {}
/**
* Allows replacing the extractAndValidateData implementation at runtime.
* This is primarily used for testing to inject mock implementations.
* @internal
*/
// Unique ID for this instance (for debugging multiple instance issues)
private readonly instanceId = Math.random().toString(36).substring(7);
_setExtractAndValidateData(fn: ExtractAndValidateDataFn | null): void {
console.error(
`[DEBUG] FlyerAiProcessor[${this.instanceId}]._setExtractAndValidateData called, ${fn ? 'replacing' : 'resetting'} extract function`,
);
this.extractFn = fn;
}
/**
* Validates the raw data from the AI against the Zod schema.
*/
private _validateAiData(extractedData: unknown, logger: Logger): AiProcessorResult {
const validationResult = AiFlyerDataSchema.safeParse(extractedData);
if (!validationResult.success) {
const errors = validationResult.error.flatten();
logger.error({ errors, rawData: extractedData }, 'AI response failed validation.');
throw new AiDataValidationError(
'AI response validation failed. The returned data structure is incorrect.',
errors,
extractedData,
);
}
// --- Data Quality Checks ---
// After structural validation, perform semantic quality checks to flag low-quality
// extractions for manual review.
const { store_name, items, valid_from, valid_to } = validationResult.data;
const qualityIssues: string[] = [];
// 1. Check for a store name.
if (!store_name || store_name.trim() === '') {
qualityIssues.push('Missing store name');
}
// 2. Check that items were extracted.
if (!items || items.length === 0) {
qualityIssues.push('No items were extracted');
} else {
// 3. If items exist, check their quality (e.g., missing prices).
// The threshold is configurable via an environment variable, defaulting to 0.5 (50%).
const priceQualityThreshold = parseFloat(process.env.AI_PRICE_QUALITY_THRESHOLD || '0.5');
const itemsWithPrice = items.filter(
(item) => item.price_in_cents != null && item.price_in_cents > 0,
).length;
const priceQualityRatio = itemsWithPrice / items.length;
if (priceQualityRatio < priceQualityThreshold) {
// If the ratio of items with a valid price is below the threshold, flag for review.
qualityIssues.push(
`Low price quality (${(priceQualityRatio * 100).toFixed(0)}% of items have a price)`,
);
}
}
// 4. Check for flyer validity dates.
if (!valid_from && !valid_to) {
qualityIssues.push('Missing both valid_from and valid_to dates');
}
const needsReview = qualityIssues.length > 0;
if (needsReview) {
logger.warn(
{ rawData: extractedData, qualityIssues },
`AI response has quality issues. Flagging for review. Issues: ${qualityIssues.join(', ')}`,
);
}
logger.info(
`AI extracted ${validationResult.data.items.length} items. Needs Review: ${needsReview}`,
);
return { data: validationResult.data, needsReview };
}
/**
* Calls the AI service to extract structured data from the flyer images and validates the response.
*/
public async extractAndValidateData(
imagePaths: { path: string; mimetype: string }[],
jobData: FlyerJobData,
logger: Logger,
): Promise<AiProcessorResult> {
console.error(
`[WORKER DEBUG] FlyerAiProcessor[${this.instanceId}]: extractAndValidateData called with ${imagePaths.length} images, extractFn=${this.extractFn ? 'SET' : 'null'}`,
);
// If a mock function is injected (for testing), use it instead of the real implementation
if (this.extractFn) {
console.error(
`[WORKER DEBUG] FlyerAiProcessor[${this.instanceId}]: Using injected extractFn mock`,
);
return this.extractFn(imagePaths, jobData, logger);
}
logger.info(`Starting AI data extraction for ${imagePaths.length} pages.`);
const { submitterIp, userProfileAddress } = jobData;
const { items: masterItems } = await this.personalizationRepo.getAllMasterItems(logger);
logger.debug(`Retrieved ${masterItems.length} master items for AI matching.`);
// BATCHING LOGIC: Process images in chunks to avoid hitting AI payload/token limits.
const BATCH_SIZE = 4;
const batches = [];
for (let i = 0; i < imagePaths.length; i += BATCH_SIZE) {
batches.push(imagePaths.slice(i, i + BATCH_SIZE));
}
// Initialize container for merged data
const mergedData: ValidatedAiDataType = {
store_name: null,
valid_from: null,
valid_to: null,
store_address: null,
items: [],
};
logger.info(
`Processing ${imagePaths.length} pages in ${batches.length} batches (Batch Size: ${BATCH_SIZE}).`,
);
for (const [index, batch] of batches.entries()) {
logger.info(`Processing batch ${index + 1}/${batches.length} (${batch.length} pages)...`);
// The AI service handles rate limiting internally (e.g., max 5 RPM).
// Processing these sequentially ensures we respect that limit.
const batchResult = await this.ai.extractCoreDataFromFlyerImage(
batch,
masterItems,
submitterIp,
userProfileAddress,
logger,
);
// MERGE LOGIC:
// 1. Metadata (Store Name, Dates): Prioritize the first batch (usually the cover page).
// If subsequent batches have data and the current is null, fill it in.
if (index === 0) {
mergedData.store_name = batchResult.store_name;
mergedData.valid_from = batchResult.valid_from;
mergedData.valid_to = batchResult.valid_to;
mergedData.store_address = batchResult.store_address;
} else {
if (!mergedData.store_name && batchResult.store_name)
mergedData.store_name = batchResult.store_name;
Iif (!mergedData.valid_from && batchResult.valid_from)
mergedData.valid_from = batchResult.valid_from;
Iif (!mergedData.valid_to && batchResult.valid_to)
mergedData.valid_to = batchResult.valid_to;
if (!mergedData.store_address && batchResult.store_address)
mergedData.store_address = batchResult.store_address;
}
// 2. Items: Append all found items to the master list.
mergedData.items.push(...(batchResult.items || []));
}
logger.info(`Batch processing complete. Total items extracted: ${mergedData.items.length}`);
console.error(
`[WORKER DEBUG] FlyerAiProcessor: Merged AI Data:`,
JSON.stringify(mergedData, null, 2),
);
// Validate the final merged dataset
return this._validateAiData(mergedData, logger);
}
}
|