245 lines
10 KiB
JavaScript
245 lines
10 KiB
JavaScript
"use strict";
|
|
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
};
|
|
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
};
|
|
var ScraperService_1;
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.ScraperService = void 0;
|
|
const common_1 = require("@nestjs/common");
|
|
const prisma_service_1 = require("../prisma/prisma.service");
|
|
const schedule_1 = require("@nestjs/schedule");
|
|
const cheerio = require("cheerio");
|
|
const axios_1 = require("axios");
|
|
const config_1 = require("@nestjs/config");
|
|
let ScraperService = ScraperService_1 = class ScraperService {
|
|
prisma;
|
|
config;
|
|
constructor(prisma, config) {
|
|
this.prisma = prisma;
|
|
this.config = config;
|
|
}
|
|
logger = new common_1.Logger(ScraperService_1.name);
|
|
parsePrice(price) {
|
|
const cleanPrice = price.replace(/[^\d.]/g, '');
|
|
return cleanPrice ? parseFloat(cleanPrice) : 0;
|
|
}
|
|
parseDate(dateStr) {
|
|
try {
|
|
const [day, month, year] = dateStr.split('/').map(Number);
|
|
return new Date(year, month - 1, day);
|
|
}
|
|
catch {
|
|
return null;
|
|
}
|
|
}
|
|
parseDateRange(dateRange) {
|
|
try {
|
|
const [startStr, endStr] = dateRange.split('-').map((s) => s.trim());
|
|
return {
|
|
start: this.parseDate(startStr),
|
|
end: this.parseDate(endStr),
|
|
};
|
|
}
|
|
catch {
|
|
return { start: null, end: null };
|
|
}
|
|
}
|
|
getTableSelector(sourceId) {
|
|
switch (sourceId) {
|
|
case 2:
|
|
return 'table:eq(2)';
|
|
case 12:
|
|
return '#product-table';
|
|
default:
|
|
throw new Error(`Unsupported source ID: ${sourceId}`);
|
|
}
|
|
}
|
|
calculateDiscountPercentage(regularPrice, discountedPrice) {
|
|
if (!discountedPrice || !regularPrice)
|
|
return null;
|
|
return Math.round(((regularPrice - discountedPrice) / regularPrice) * 100);
|
|
}
|
|
parseProductRow($, rowElement, sourceId) {
|
|
const cells = $(rowElement).find('td');
|
|
const getText = (index) => {
|
|
const cell = cells.eq(index);
|
|
return cell.text().trim();
|
|
};
|
|
if (sourceId === 2) {
|
|
const promotionPeriod = getText(9);
|
|
const { start, end } = this.parseDateRange(promotionPeriod);
|
|
return {
|
|
name: getText(0),
|
|
regularPrice: this.parsePrice(getText(1)),
|
|
unitPrice: getText(2) || null,
|
|
availability: getText(3).toLowerCase() === 'да',
|
|
description: getText(4) || '',
|
|
category: 'Uncategorized',
|
|
discountedPrice: this.parsePrice(getText(6)),
|
|
discountPercentage: parseFloat(getText(7)) || null,
|
|
promotionType: getText(8) || null,
|
|
promotionStart: start,
|
|
promotionEnd: end,
|
|
};
|
|
}
|
|
else if (sourceId === 12) {
|
|
const name = getText(1);
|
|
const regularPrice = this.parsePrice(getText(3));
|
|
const description = getText(2) || '';
|
|
const discountedPrice = this.parsePrice(getText(4)) || null;
|
|
return {
|
|
name,
|
|
regularPrice,
|
|
unitPrice: null,
|
|
availability: true,
|
|
description,
|
|
category: 'Uncategorized',
|
|
discountedPrice,
|
|
discountPercentage: this.calculateDiscountPercentage(regularPrice, discountedPrice),
|
|
promotionType: null,
|
|
promotionStart: null,
|
|
promotionEnd: null,
|
|
};
|
|
}
|
|
throw new Error(`Unsupported source ID: ${sourceId}`);
|
|
}
|
|
async scrapeAllSources() {
|
|
try {
|
|
const sources = await this.prisma.source.findMany();
|
|
for (const source of sources) {
|
|
try {
|
|
await this.scrapeProducts(source.url, source.id);
|
|
this.logger.log(`Successfully scraped data from source: ${source.name}`);
|
|
}
|
|
catch (error) {
|
|
this.logger.error(`Failed to scrape source ${source.name}:`, error);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
catch (error) {
|
|
this.logger.error('Failed to fetch sources:', error);
|
|
}
|
|
}
|
|
async scrapeProducts(sourceUrl, sourceId) {
|
|
const config = {
|
|
headers: {
|
|
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0',
|
|
},
|
|
};
|
|
try {
|
|
this.logger.log(`Fetching data from URL: ${sourceUrl}`);
|
|
const response = await axios_1.default.get(sourceUrl, config);
|
|
const $ = cheerio.load(response.data);
|
|
const productTable = $(this.getTableSelector(sourceId));
|
|
if (!productTable.length) {
|
|
throw new Error('Product table not found');
|
|
}
|
|
const rows = productTable.find('tr').slice(1);
|
|
this.logger.log(`Found ${rows.length} product rows`);
|
|
let processedProducts = 0;
|
|
for (const row of rows.toArray()) {
|
|
try {
|
|
const scrapedProduct = this.parseProductRow($, row, sourceId);
|
|
if (!scrapedProduct.name)
|
|
continue;
|
|
this.logger.log(`Processing product: ${scrapedProduct.name}`);
|
|
const product = await this.prisma.product.upsert({
|
|
where: {
|
|
name_sourceId: {
|
|
name: scrapedProduct.name,
|
|
sourceId: sourceId,
|
|
},
|
|
},
|
|
create: {
|
|
name: scrapedProduct.name,
|
|
description: scrapedProduct.description,
|
|
category: scrapedProduct.category,
|
|
availability: scrapedProduct.availability,
|
|
sourceId: sourceId,
|
|
prices: {
|
|
create: {
|
|
regularPrice: scrapedProduct.regularPrice,
|
|
discountedPrice: scrapedProduct.discountedPrice,
|
|
discountPercentage: scrapedProduct.discountPercentage,
|
|
unitPrice: scrapedProduct.unitPrice,
|
|
promotionType: scrapedProduct.promotionType,
|
|
promotionStart: scrapedProduct.promotionStart,
|
|
promotionEnd: scrapedProduct.promotionEnd,
|
|
sourceId: sourceId,
|
|
},
|
|
},
|
|
},
|
|
update: {
|
|
availability: scrapedProduct.availability,
|
|
description: scrapedProduct.description,
|
|
category: scrapedProduct.category,
|
|
prices: {
|
|
create: {
|
|
regularPrice: scrapedProduct.regularPrice,
|
|
discountedPrice: scrapedProduct.discountedPrice,
|
|
discountPercentage: scrapedProduct.discountPercentage,
|
|
unitPrice: scrapedProduct.unitPrice,
|
|
promotionType: scrapedProduct.promotionType,
|
|
promotionStart: scrapedProduct.promotionStart,
|
|
promotionEnd: scrapedProduct.promotionEnd,
|
|
sourceId: sourceId,
|
|
},
|
|
},
|
|
},
|
|
});
|
|
processedProducts++;
|
|
this.logger.log(`Successfully processed product: ${product.name}`);
|
|
}
|
|
catch (error) {
|
|
if (error instanceof Error) {
|
|
this.logger.error(`Failed to process row: ${error.message}`);
|
|
}
|
|
else {
|
|
this.logger.error('Failed to process row: Unknown error');
|
|
}
|
|
}
|
|
}
|
|
this.logger.log(`Successfully processed ${processedProducts} products`);
|
|
}
|
|
catch (error) {
|
|
if (error instanceof Error) {
|
|
this.logger.error(`Failed to scrape products from source ${sourceId}: ${error.message}`);
|
|
}
|
|
else {
|
|
this.logger.error(`Failed to scrape products from source ${sourceId}: Unknown error`);
|
|
}
|
|
throw error;
|
|
}
|
|
}
|
|
async manualScrape(sourceId) {
|
|
const source = await this.prisma.source.findUnique({
|
|
where: { id: sourceId },
|
|
});
|
|
if (!source) {
|
|
throw new Error(`Source with ID ${sourceId} not found`);
|
|
}
|
|
return this.scrapeProducts(source.url, source.id);
|
|
}
|
|
};
|
|
exports.ScraperService = ScraperService;
|
|
__decorate([
|
|
(0, schedule_1.Cron)(schedule_1.CronExpression.EVERY_HOUR),
|
|
__metadata("design:type", Function),
|
|
__metadata("design:paramtypes", []),
|
|
__metadata("design:returntype", Promise)
|
|
], ScraperService.prototype, "scrapeAllSources", null);
|
|
exports.ScraperService = ScraperService = ScraperService_1 = __decorate([
|
|
(0, common_1.Injectable)(),
|
|
__metadata("design:paramtypes", [prisma_service_1.PrismaService,
|
|
config_1.ConfigService])
|
|
], ScraperService);
|
|
//# sourceMappingURL=scraper.service.js.map
|