sporedimk/price-compare-api/dist/scraper/scraper.service.js
2025-05-06 10:01:03 +02:00

245 lines
10 KiB
JavaScript

"use strict";
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
return c > 3 && r && Object.defineProperty(target, key, r), r;
};
var __metadata = (this && this.__metadata) || function (k, v) {
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
};
var ScraperService_1;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ScraperService = void 0;
const common_1 = require("@nestjs/common");
const prisma_service_1 = require("../prisma/prisma.service");
const schedule_1 = require("@nestjs/schedule");
const cheerio = require("cheerio");
const axios_1 = require("axios");
const config_1 = require("@nestjs/config");
let ScraperService = ScraperService_1 = class ScraperService {
prisma;
config;
constructor(prisma, config) {
this.prisma = prisma;
this.config = config;
}
logger = new common_1.Logger(ScraperService_1.name);
parsePrice(price) {
const cleanPrice = price.replace(/[^\d.]/g, '');
return cleanPrice ? parseFloat(cleanPrice) : 0;
}
parseDate(dateStr) {
try {
const [day, month, year] = dateStr.split('/').map(Number);
return new Date(year, month - 1, day);
}
catch {
return null;
}
}
parseDateRange(dateRange) {
try {
const [startStr, endStr] = dateRange.split('-').map((s) => s.trim());
return {
start: this.parseDate(startStr),
end: this.parseDate(endStr),
};
}
catch {
return { start: null, end: null };
}
}
getTableSelector(sourceId) {
switch (sourceId) {
case 2:
return 'table:eq(2)';
case 12:
return '#product-table';
default:
throw new Error(`Unsupported source ID: ${sourceId}`);
}
}
calculateDiscountPercentage(regularPrice, discountedPrice) {
if (!discountedPrice || !regularPrice)
return null;
return Math.round(((regularPrice - discountedPrice) / regularPrice) * 100);
}
parseProductRow($, rowElement, sourceId) {
const cells = $(rowElement).find('td');
const getText = (index) => {
const cell = cells.eq(index);
return cell.text().trim();
};
if (sourceId === 2) {
const promotionPeriod = getText(9);
const { start, end } = this.parseDateRange(promotionPeriod);
return {
name: getText(0),
regularPrice: this.parsePrice(getText(1)),
unitPrice: getText(2) || null,
availability: getText(3).toLowerCase() === 'да',
description: getText(4) || '',
category: 'Uncategorized',
discountedPrice: this.parsePrice(getText(6)),
discountPercentage: parseFloat(getText(7)) || null,
promotionType: getText(8) || null,
promotionStart: start,
promotionEnd: end,
};
}
else if (sourceId === 12) {
const name = getText(1);
const regularPrice = this.parsePrice(getText(3));
const description = getText(2) || '';
const discountedPrice = this.parsePrice(getText(4)) || null;
return {
name,
regularPrice,
unitPrice: null,
availability: true,
description,
category: 'Uncategorized',
discountedPrice,
discountPercentage: this.calculateDiscountPercentage(regularPrice, discountedPrice),
promotionType: null,
promotionStart: null,
promotionEnd: null,
};
}
throw new Error(`Unsupported source ID: ${sourceId}`);
}
async scrapeAllSources() {
try {
const sources = await this.prisma.source.findMany();
for (const source of sources) {
try {
await this.scrapeProducts(source.url, source.id);
this.logger.log(`Successfully scraped data from source: ${source.name}`);
}
catch (error) {
this.logger.error(`Failed to scrape source ${source.name}:`, error);
continue;
}
}
}
catch (error) {
this.logger.error('Failed to fetch sources:', error);
}
}
async scrapeProducts(sourceUrl, sourceId) {
const config = {
headers: {
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0',
},
};
try {
this.logger.log(`Fetching data from URL: ${sourceUrl}`);
const response = await axios_1.default.get(sourceUrl, config);
const $ = cheerio.load(response.data);
const productTable = $(this.getTableSelector(sourceId));
if (!productTable.length) {
throw new Error('Product table not found');
}
const rows = productTable.find('tr').slice(1);
this.logger.log(`Found ${rows.length} product rows`);
let processedProducts = 0;
for (const row of rows.toArray()) {
try {
const scrapedProduct = this.parseProductRow($, row, sourceId);
if (!scrapedProduct.name)
continue;
this.logger.log(`Processing product: ${scrapedProduct.name}`);
const product = await this.prisma.product.upsert({
where: {
name_sourceId: {
name: scrapedProduct.name,
sourceId: sourceId,
},
},
create: {
name: scrapedProduct.name,
description: scrapedProduct.description,
category: scrapedProduct.category,
availability: scrapedProduct.availability,
sourceId: sourceId,
prices: {
create: {
regularPrice: scrapedProduct.regularPrice,
discountedPrice: scrapedProduct.discountedPrice,
discountPercentage: scrapedProduct.discountPercentage,
unitPrice: scrapedProduct.unitPrice,
promotionType: scrapedProduct.promotionType,
promotionStart: scrapedProduct.promotionStart,
promotionEnd: scrapedProduct.promotionEnd,
sourceId: sourceId,
},
},
},
update: {
availability: scrapedProduct.availability,
description: scrapedProduct.description,
category: scrapedProduct.category,
prices: {
create: {
regularPrice: scrapedProduct.regularPrice,
discountedPrice: scrapedProduct.discountedPrice,
discountPercentage: scrapedProduct.discountPercentage,
unitPrice: scrapedProduct.unitPrice,
promotionType: scrapedProduct.promotionType,
promotionStart: scrapedProduct.promotionStart,
promotionEnd: scrapedProduct.promotionEnd,
sourceId: sourceId,
},
},
},
});
processedProducts++;
this.logger.log(`Successfully processed product: ${product.name}`);
}
catch (error) {
if (error instanceof Error) {
this.logger.error(`Failed to process row: ${error.message}`);
}
else {
this.logger.error('Failed to process row: Unknown error');
}
}
}
this.logger.log(`Successfully processed ${processedProducts} products`);
}
catch (error) {
if (error instanceof Error) {
this.logger.error(`Failed to scrape products from source ${sourceId}: ${error.message}`);
}
else {
this.logger.error(`Failed to scrape products from source ${sourceId}: Unknown error`);
}
throw error;
}
}
async manualScrape(sourceId) {
const source = await this.prisma.source.findUnique({
where: { id: sourceId },
});
if (!source) {
throw new Error(`Source with ID ${sourceId} not found`);
}
return this.scrapeProducts(source.url, source.id);
}
};
exports.ScraperService = ScraperService;
__decorate([
(0, schedule_1.Cron)(schedule_1.CronExpression.EVERY_HOUR),
__metadata("design:type", Function),
__metadata("design:paramtypes", []),
__metadata("design:returntype", Promise)
], ScraperService.prototype, "scrapeAllSources", null);
exports.ScraperService = ScraperService = ScraperService_1 = __decorate([
(0, common_1.Injectable)(),
__metadata("design:paramtypes", [prisma_service_1.PrismaService,
config_1.ConfigService])
], ScraperService);
//# sourceMappingURL=scraper.service.js.map