"use strict"; var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; return c > 3 && r && Object.defineProperty(target, key, r), r; }; var __metadata = (this && this.__metadata) || function (k, v) { if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); }; var ScraperService_1; Object.defineProperty(exports, "__esModule", { value: true }); exports.ScraperService = void 0; const common_1 = require("@nestjs/common"); const prisma_service_1 = require("../prisma/prisma.service"); const schedule_1 = require("@nestjs/schedule"); const cheerio = require("cheerio"); const axios_1 = require("axios"); const config_1 = require("@nestjs/config"); let ScraperService = ScraperService_1 = class ScraperService { prisma; config; constructor(prisma, config) { this.prisma = prisma; this.config = config; } logger = new common_1.Logger(ScraperService_1.name); parsePrice(price) { const normalizedPrice = price.replace(',', '.'); const cleanPrice = normalizedPrice.replace(/[^\d.]/g, ''); return cleanPrice ? parseFloat(cleanPrice) : 0; } parseDate(dateStr) { try { const [day, month, year] = dateStr.split('/').map(Number); return new Date(year, month - 1, day); } catch { return null; } } parseDateRange(dateRange) { try { const [startStr, endStr] = dateRange.split('-').map((s) => s.trim()); return { start: this.parseDate(startStr), end: this.parseDate(endStr), }; } catch { return { start: null, end: null }; } } getTableSelector(sourceId) { switch (sourceId) { case 2: return 'table:eq(2)'; case 12: return 'table:first-of-type'; default: throw new Error(`Unsupported source ID: ${sourceId}`); } } parseProductRow($, rowElement, sourceId) { const cells = $(rowElement).find('td'); const getText = (index) => { const cell = cells.eq(index); return cell.text().trim(); }; if (sourceId === 2) { const promotionPeriod = getText(9); const { start, end } = this.parseDateRange(promotionPeriod); return { name: getText(0), regularPrice: this.parsePrice(getText(1)), unitPrice: getText(2) || null, availability: getText(3).toLowerCase() === 'да', description: getText(4) || '', category: 'Uncategorized', discountedPrice: this.parsePrice(getText(6)), discountPercentage: parseFloat(getText(7)) || null, promotionType: getText(8) || null, promotionStart: start, promotionEnd: end, }; } else if (sourceId === 12) { this.logger.log('Parsing DIM product row'); try { const productCode = getText(0); this.logger.log(`Product code: ${productCode}`); const name = getText(1); this.logger.log(`Product name: ${name}`); const regularPrice = this.parsePrice(getText(2)); this.logger.log(`Regular price: ${regularPrice}`); const unitPrice = getText(3) || null; this.logger.log(`Unit price: ${unitPrice}`); const availability = getText(5).toLowerCase() === 'да'; this.logger.log(`Availability: ${availability}`); const originalPrice = this.parsePrice(getText(6)); this.logger.log(`Original price: ${originalPrice}`); let discountedPrice = null; let discountPercentage = null; const discountCell = cells.eq(7); if (discountCell.length) { this.logger.log('Found discount cell'); const discountedPriceElement = discountCell.find('strong'); if (discountedPriceElement.length) { const parsedPrice = this.parsePrice(discountedPriceElement.text()); discountedPrice = parsedPrice > 0 ? parsedPrice : null; this.logger.log(`Discounted price: ${discountedPrice}`); } else { this.logger.log('No discounted price element found'); } const discountTagElement = discountCell.find('.discount-tag'); if (discountTagElement.length) { const discountText = discountTagElement.text(); this.logger.log(`Discount text: ${discountText}`); const percentageMatch = discountText.match(/[\d.]+/); if (percentageMatch) { const parsedPercentage = parseFloat(percentageMatch[0]); discountPercentage = !isNaN(parsedPercentage) ? parsedPercentage : null; this.logger.log(`Discount percentage: ${discountPercentage !== null ? discountPercentage + '%' : 'null'}`); } else { this.logger.log('No discount percentage found in text'); } } else { this.logger.log('No discount tag element found'); } } else { this.logger.log('No discount cell found'); } const promotionType = getText(8) || null; this.logger.log(`Promotion type: ${promotionType}`); let promotionEnd = null; const promotionDateText = getText(9); this.logger.log(`Promotion date text: ${promotionDateText}`); if (promotionDateText && promotionDateText.includes('Важи до:')) { const dateMatch = promotionDateText.match(/(\d{2})\.(\d{2})\.(\d{4})/); if (dateMatch) { const [_, day, month, year] = dateMatch; try { const parsedDate = new Date(parseInt(year), parseInt(month) - 1, parseInt(day)); if (!isNaN(parsedDate.getTime())) { promotionEnd = parsedDate; this.logger.log(`Promotion end date: ${promotionEnd.toISOString()}`); } else { this.logger.log('Invalid date parsed from promotion date text'); } } catch (error) { this.logger.error(`Error parsing date: ${error instanceof Error ? error.message : 'Unknown error'}`); } } else { this.logger.log('No date match found in promotion date text'); } } else { this.logger.log('No promotion end date found'); } this.logger.log('Successfully parsed DIM product row'); return { name, regularPrice, unitPrice, availability, description: productCode || '', category: 'Uncategorized', discountedPrice, discountPercentage, promotionType, promotionStart: null, promotionEnd, }; } catch (error) { this.logger.error(`Error parsing DIM product row: ${error instanceof Error ? error.message : 'Unknown error'}`); throw error; } } throw new Error(`Unsupported source ID: ${sourceId}`); } async scrapeAllSources() { try { const sources = await this.prisma.source.findMany(); for (const source of sources) { try { await this.scrapeProducts(source.url, source.id); this.logger.log(`Successfully scraped data from source: ${source.name}`); } catch (error) { this.logger.error(`Failed to scrape source ${source.name}:`, error); } } } catch (error) { this.logger.error('Failed to fetch sources:', error); } } async scrapeProducts(sourceUrl, sourceId) { const startTime = new Date(); this.logger.log(`Starting scraping process for source ID: ${sourceId}`); const config = { headers: { Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0', }, }; try { this.logger.log(`Fetching data from URL: ${sourceUrl}`); if (sourceId === 12) { this.logger.log(`Processing DIM source (ID: 12)`); this.logger.log('Using mock data for DIM products based on the real data structure'); const mockHtml = `