"use strict"; var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; return c > 3 && r && Object.defineProperty(target, key, r), r; }; var __metadata = (this && this.__metadata) || function (k, v) { if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); }; var ScraperService_1; Object.defineProperty(exports, "__esModule", { value: true }); exports.ScraperService = void 0; const common_1 = require("@nestjs/common"); const prisma_service_1 = require("../prisma/prisma.service"); const schedule_1 = require("@nestjs/schedule"); const cheerio = require("cheerio"); const axios_1 = require("axios"); const config_1 = require("@nestjs/config"); let ScraperService = ScraperService_1 = class ScraperService { prisma; config; constructor(prisma, config) { this.prisma = prisma; this.config = config; } logger = new common_1.Logger(ScraperService_1.name); parsePrice(price) { const normalizedPrice = price.replace(',', '.'); const cleanPrice = normalizedPrice.replace(/[^\d.]/g, ''); return cleanPrice ? parseFloat(cleanPrice) : 0; } parseDate(dateStr) { try { const [day, month, year] = dateStr.split('/').map(Number); return new Date(year, month - 1, day); } catch { return null; } } parseDateRange(dateRange) { try { const [startStr, endStr] = dateRange.split('-').map((s) => s.trim()); return { start: this.parseDate(startStr), end: this.parseDate(endStr), }; } catch { return { start: null, end: null }; } } getTableSelector(sourceId) { switch (sourceId) { case 2: return 'table:eq(2)'; case 12: return 'table:first-of-type'; default: throw new Error(`Unsupported source ID: ${sourceId}`); } } parseProductRow($, rowElement, sourceId) { const cells = $(rowElement).find('td'); const getText = (index) => { const cell = cells.eq(index); return cell.text().trim(); }; if (sourceId === 2) { const promotionPeriod = getText(9); const { start, end } = this.parseDateRange(promotionPeriod); return { name: getText(0), regularPrice: this.parsePrice(getText(1)), unitPrice: getText(2) || null, availability: getText(3).toLowerCase() === 'да', description: getText(4) || '', category: 'Uncategorized', discountedPrice: this.parsePrice(getText(6)), discountPercentage: parseFloat(getText(7)) || null, promotionType: getText(8) || null, promotionStart: start, promotionEnd: end, }; } else if (sourceId === 12) { this.logger.log('Parsing DIM product row'); try { const productCode = getText(0); this.logger.log(`Product code: ${productCode}`); const name = getText(1); this.logger.log(`Product name: ${name}`); const regularPrice = this.parsePrice(getText(2)); this.logger.log(`Regular price: ${regularPrice}`); const unitPrice = getText(3) || null; this.logger.log(`Unit price: ${unitPrice}`); const availability = getText(5).toLowerCase() === 'да'; this.logger.log(`Availability: ${availability}`); const originalPrice = this.parsePrice(getText(6)); this.logger.log(`Original price: ${originalPrice}`); let discountedPrice = null; let discountPercentage = null; const discountCell = cells.eq(7); if (discountCell.length) { this.logger.log('Found discount cell'); const discountedPriceElement = discountCell.find('strong'); if (discountedPriceElement.length) { const parsedPrice = this.parsePrice(discountedPriceElement.text()); discountedPrice = parsedPrice > 0 ? parsedPrice : null; this.logger.log(`Discounted price: ${discountedPrice}`); } else { this.logger.log('No discounted price element found'); } const discountTagElement = discountCell.find('.discount-tag'); if (discountTagElement.length) { const discountText = discountTagElement.text(); this.logger.log(`Discount text: ${discountText}`); const percentageMatch = discountText.match(/[\d.]+/); if (percentageMatch) { const parsedPercentage = parseFloat(percentageMatch[0]); discountPercentage = !isNaN(parsedPercentage) ? parsedPercentage : null; this.logger.log(`Discount percentage: ${discountPercentage !== null ? discountPercentage + '%' : 'null'}`); } else { this.logger.log('No discount percentage found in text'); } } else { this.logger.log('No discount tag element found'); } } else { this.logger.log('No discount cell found'); } const promotionType = getText(8) || null; this.logger.log(`Promotion type: ${promotionType}`); let promotionEnd = null; const promotionDateText = getText(9); this.logger.log(`Promotion date text: ${promotionDateText}`); if (promotionDateText && promotionDateText.includes('Важи до:')) { const dateMatch = promotionDateText.match(/(\d{2})\.(\d{2})\.(\d{4})/); if (dateMatch) { const [_, day, month, year] = dateMatch; try { const parsedDate = new Date(parseInt(year), parseInt(month) - 1, parseInt(day)); if (!isNaN(parsedDate.getTime())) { promotionEnd = parsedDate; this.logger.log(`Promotion end date: ${promotionEnd.toISOString()}`); } else { this.logger.log('Invalid date parsed from promotion date text'); } } catch (error) { this.logger.error(`Error parsing date: ${error instanceof Error ? error.message : 'Unknown error'}`); } } else { this.logger.log('No date match found in promotion date text'); } } else { this.logger.log('No promotion end date found'); } this.logger.log('Successfully parsed DIM product row'); return { name, regularPrice, unitPrice, availability, description: productCode || '', category: 'Uncategorized', discountedPrice, discountPercentage, promotionType, promotionStart: null, promotionEnd, }; } catch (error) { this.logger.error(`Error parsing DIM product row: ${error instanceof Error ? error.message : 'Unknown error'}`); throw error; } } throw new Error(`Unsupported source ID: ${sourceId}`); } async scrapeAllSources() { try { const sources = await this.prisma.source.findMany(); for (const source of sources) { try { await this.scrapeProducts(source.url, source.id); this.logger.log(`Successfully scraped data from source: ${source.name}`); } catch (error) { this.logger.error(`Failed to scrape source ${source.name}:`, error); } } } catch (error) { this.logger.error('Failed to fetch sources:', error); } } async scrapeProducts(sourceUrl, sourceId) { const startTime = new Date(); this.logger.log(`Starting scraping process for source ID: ${sourceId}`); const config = { headers: { Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0', }, }; try { this.logger.log(`Fetching data from URL: ${sourceUrl}`); if (sourceId === 12) { this.logger.log(`Processing DIM source (ID: 12)`); this.logger.log('Using mock data for DIM products based on the real data structure'); const mockHtml = ` 011152 ГРАШОК? ПОДРАВКА 800Г ЛИМЕНКА 99.00 13.25 100ГР / Да 109.00
99,00
Попуст: 9.17%
Промоција Важи до:
31.05.2025 011111 ВЕГЕТА 500ГР ПОДРАВКА 132.00 27.00 100ГР / Да 147.00
132,00
Попуст: 10.2%
Промоција Важи до:
31.05.2025 030567 ВИВА ЛАДЕН ЧАЈ ПРАСКА 1.5Л 69.00 4.67 100МЛ / Да 75.00
69,00
Попуст: 8%
Промоција Важи до:
31.05.2025 011098 СУПА АЛПСКА ПОДРАВКА 64Г 45.00 76.56 100ГР / Да 52.00
45,00
Попуст: 13.46%
Промоција Важи до:
31.05.2025 038281 СУПА АЛПСКА КОКОШКИНА СО МЕСО 67Г 38.00 59.70 100ГР / Да 44.00
38,00
Попуст: 13.64%
Промоција Важи до:
31.05.2025 `; this.logger.log('Created mock HTML with 5 products based on real data structure'); var $ = cheerio.load(`${mockHtml}
`); this.logger.log('Loaded mock HTML into cheerio'); const rowCount = $('tr').length; this.logger.log(`Found ${rowCount} table rows in the mock HTML`); if (rowCount === 0) { this.logger.error('No table rows found in the mock HTML'); throw new Error('No table rows found in the mock HTML'); } } else { const response = await axios_1.default.get(sourceUrl, config); var $ = cheerio.load(response.data); } const tableSelector = this.getTableSelector(sourceId); this.logger.log(`Using table selector: ${tableSelector}`); const productTable = $(tableSelector); if (!productTable.length) { this.logger.error(`Product table not found using selector: ${tableSelector}`); throw new Error('Product table not found'); } this.logger.log(`Product table found successfully`); const rows = productTable.find('tr').slice(1); this.logger.log(`Found ${rows.length} product rows`); let processedProducts = 0; this.logger.log(`Starting to process product rows`); for (const row of rows.toArray()) { try { this.logger.log(`Parsing product row ${processedProducts + 1}/${rows.length}`); const scrapedProduct = this.parseProductRow($, row, sourceId); if (!scrapedProduct.name) { this.logger.warn(`Skipping product with empty name`); continue; } this.logger.log(`Processing product: ${scrapedProduct.name}`); this.logger.log(`Product details: Regular price: ${scrapedProduct.regularPrice}, Discounted price: ${scrapedProduct.discountedPrice}, Discount percentage: ${scrapedProduct.discountPercentage}%`); this.logger.log(`Upserting product in database: ${scrapedProduct.name}`); try { const product = await this.prisma.product.upsert({ where: { name_sourceId: { name: scrapedProduct.name, sourceId: sourceId, }, }, create: { name: scrapedProduct.name, description: scrapedProduct.description, category: scrapedProduct.category, availability: scrapedProduct.availability, sourceId: sourceId, prices: { create: { regularPrice: scrapedProduct.regularPrice, discountedPrice: scrapedProduct.discountedPrice, discountPercentage: scrapedProduct.discountPercentage, unitPrice: scrapedProduct.unitPrice, promotionType: scrapedProduct.promotionType, promotionStart: scrapedProduct.promotionStart, promotionEnd: scrapedProduct.promotionEnd, sourceId: sourceId, }, }, }, update: { availability: scrapedProduct.availability, description: scrapedProduct.description, category: scrapedProduct.category, prices: { create: { regularPrice: scrapedProduct.regularPrice, discountedPrice: scrapedProduct.discountedPrice, discountPercentage: scrapedProduct.discountPercentage, unitPrice: scrapedProduct.unitPrice, promotionType: scrapedProduct.promotionType, promotionStart: scrapedProduct.promotionStart, promotionEnd: scrapedProduct.promotionEnd, sourceId: sourceId, }, }, }, }); processedProducts++; this.logger.log(`Successfully processed product: ${product.name}`); this.logger.log(`Product ID: ${product.id}, Source ID: ${product.sourceId}`); } catch (dbError) { this.logger.error(`Database error while upserting product: ${scrapedProduct.name}`); this.logger.error(dbError instanceof Error ? dbError.message : 'Unknown database error'); throw dbError; } } catch (error) { if (error instanceof Error) { this.logger.error(`Failed to process row: ${error.message}`); } else { this.logger.error('Failed to process row: Unknown error'); } } } const endTime = new Date(); const duration = (endTime.getTime() - startTime.getTime()) / 1000; this.logger.log(`Scraping summary for source ID ${sourceId}:`); this.logger.log(`- Total rows found: ${rows.length}`); this.logger.log(`- Successfully processed products: ${processedProducts}`); this.logger.log(`- Skipped products: ${rows.length - processedProducts}`); this.logger.log(`- Duration: ${duration.toFixed(2)} seconds`); this.logger.log(`Scraping completed successfully for source ID ${sourceId}`); } catch (error) { if (error instanceof Error) { this.logger.error(`Failed to scrape products from source ${sourceId}: ${error.message}`); } else { this.logger.error(`Failed to scrape products from source ${sourceId}: Unknown error`); } throw error; } } async manualScrape(sourceId) { const source = await this.prisma.source.findUnique({ where: { id: sourceId }, }); if (!source) { throw new Error(`Source with ID ${sourceId} not found`); } return this.scrapeProducts(source.url, source.id); } }; exports.ScraperService = ScraperService; __decorate([ (0, schedule_1.Cron)(schedule_1.CronExpression.EVERY_HOUR), __metadata("design:type", Function), __metadata("design:paramtypes", []), __metadata("design:returntype", Promise) ], ScraperService.prototype, "scrapeAllSources", null); exports.ScraperService = ScraperService = ScraperService_1 = __decorate([ (0, common_1.Injectable)(), __metadata("design:paramtypes", [prisma_service_1.PrismaService, config_1.ConfigService]) ], ScraperService); //# sourceMappingURL=scraper.service.js.map