"use strict"; var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; return c > 3 && r && Object.defineProperty(target, key, r), r; }; var __metadata = (this && this.__metadata) || function (k, v) { if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); }; var ScraperService_1; Object.defineProperty(exports, "__esModule", { value: true }); exports.ScraperService = void 0; const common_1 = require("@nestjs/common"); const prisma_service_1 = require("../prisma/prisma.service"); const schedule_1 = require("@nestjs/schedule"); const cheerio = require("cheerio"); const axios_1 = require("axios"); const config_1 = require("@nestjs/config"); let ScraperService = ScraperService_1 = class ScraperService { prisma; config; constructor(prisma, config) { this.prisma = prisma; this.config = config; } logger = new common_1.Logger(ScraperService_1.name); parsePrice(price) { const cleanPrice = price.replace(/[^\d.]/g, ''); return cleanPrice ? parseFloat(cleanPrice) : 0; } parseDate(dateStr) { try { const [day, month, year] = dateStr.split('/').map(Number); return new Date(year, month - 1, day); } catch { return null; } } parseDateRange(dateRange) { try { const [startStr, endStr] = dateRange.split('-').map((s) => s.trim()); return { start: this.parseDate(startStr), end: this.parseDate(endStr), }; } catch { return { start: null, end: null }; } } getTableSelector(sourceId) { switch (sourceId) { case 2: return 'table:eq(2)'; case 12: return '#product-table'; default: throw new Error(`Unsupported source ID: ${sourceId}`); } } calculateDiscountPercentage(regularPrice, discountedPrice) { if (!discountedPrice || !regularPrice) return null; return Math.round(((regularPrice - discountedPrice) / regularPrice) * 100); } parseProductRow($, rowElement, sourceId) { const cells = $(rowElement).find('td'); const getText = (index) => { const cell = cells.eq(index); return cell.text().trim(); }; if (sourceId === 2) { const promotionPeriod = getText(9); const { start, end } = this.parseDateRange(promotionPeriod); return { name: getText(0), regularPrice: this.parsePrice(getText(1)), unitPrice: getText(2) || null, availability: getText(3).toLowerCase() === 'да', description: getText(4) || '', category: 'Uncategorized', discountedPrice: this.parsePrice(getText(6)), discountPercentage: parseFloat(getText(7)) || null, promotionType: getText(8) || null, promotionStart: start, promotionEnd: end, }; } else if (sourceId === 12) { const name = getText(1); const regularPrice = this.parsePrice(getText(3)); const description = getText(2) || ''; const discountedPrice = this.parsePrice(getText(4)) || null; return { name, regularPrice, unitPrice: null, availability: true, description, category: 'Uncategorized', discountedPrice, discountPercentage: this.calculateDiscountPercentage(regularPrice, discountedPrice), promotionType: null, promotionStart: null, promotionEnd: null, }; } throw new Error(`Unsupported source ID: ${sourceId}`); } async scrapeAllSources() { try { const sources = await this.prisma.source.findMany(); for (const source of sources) { try { await this.scrapeProducts(source.url, source.id); this.logger.log(`Successfully scraped data from source: ${source.name}`); } catch (error) { this.logger.error(`Failed to scrape source ${source.name}:`, error); continue; } } } catch (error) { this.logger.error('Failed to fetch sources:', error); } } async scrapeProducts(sourceUrl, sourceId) { const config = { headers: { Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0', }, }; try { this.logger.log(`Fetching data from URL: ${sourceUrl}`); const response = await axios_1.default.get(sourceUrl, config); const $ = cheerio.load(response.data); const productTable = $(this.getTableSelector(sourceId)); if (!productTable.length) { throw new Error('Product table not found'); } const rows = productTable.find('tr').slice(1); this.logger.log(`Found ${rows.length} product rows`); let processedProducts = 0; for (const row of rows.toArray()) { try { const scrapedProduct = this.parseProductRow($, row, sourceId); if (!scrapedProduct.name) continue; this.logger.log(`Processing product: ${scrapedProduct.name}`); const product = await this.prisma.product.upsert({ where: { name_sourceId: { name: scrapedProduct.name, sourceId: sourceId, }, }, create: { name: scrapedProduct.name, description: scrapedProduct.description, category: scrapedProduct.category, availability: scrapedProduct.availability, sourceId: sourceId, prices: { create: { regularPrice: scrapedProduct.regularPrice, discountedPrice: scrapedProduct.discountedPrice, discountPercentage: scrapedProduct.discountPercentage, unitPrice: scrapedProduct.unitPrice, promotionType: scrapedProduct.promotionType, promotionStart: scrapedProduct.promotionStart, promotionEnd: scrapedProduct.promotionEnd, sourceId: sourceId, }, }, }, update: { availability: scrapedProduct.availability, description: scrapedProduct.description, category: scrapedProduct.category, prices: { create: { regularPrice: scrapedProduct.regularPrice, discountedPrice: scrapedProduct.discountedPrice, discountPercentage: scrapedProduct.discountPercentage, unitPrice: scrapedProduct.unitPrice, promotionType: scrapedProduct.promotionType, promotionStart: scrapedProduct.promotionStart, promotionEnd: scrapedProduct.promotionEnd, sourceId: sourceId, }, }, }, }); processedProducts++; this.logger.log(`Successfully processed product: ${product.name}`); } catch (error) { if (error instanceof Error) { this.logger.error(`Failed to process row: ${error.message}`); } else { this.logger.error('Failed to process row: Unknown error'); } } } this.logger.log(`Successfully processed ${processedProducts} products`); } catch (error) { if (error instanceof Error) { this.logger.error(`Failed to scrape products from source ${sourceId}: ${error.message}`); } else { this.logger.error(`Failed to scrape products from source ${sourceId}: Unknown error`); } throw error; } } async manualScrape(sourceId) { const source = await this.prisma.source.findUnique({ where: { id: sourceId }, }); if (!source) { throw new Error(`Source with ID ${sourceId} not found`); } return this.scrapeProducts(source.url, source.id); } }; exports.ScraperService = ScraperService; __decorate([ (0, schedule_1.Cron)(schedule_1.CronExpression.EVERY_HOUR), __metadata("design:type", Function), __metadata("design:paramtypes", []), __metadata("design:returntype", Promise) ], ScraperService.prototype, "scrapeAllSources", null); exports.ScraperService = ScraperService = ScraperService_1 = __decorate([ (0, common_1.Injectable)(), __metadata("design:paramtypes", [prisma_service_1.PrismaService, config_1.ConfigService]) ], ScraperService); //# sourceMappingURL=scraper.service.js.map