import EPub from 'epub-gen'
import type {Options as BookDataModel} from 'epub-gen'
import type {BookServiceModel} from "../../models/book-service.model";
import type {BookInfoModel} from "../../models/book-info.model";
import type {BookChaptersModel} from "../../models/book-chapters.model";
import type {BookContentModel} from "../../models/book-content.model";
import {ErrorMsgModel} from "../../models/error-handler-service.model";
import {ErrorHandler} from "../ErrorHandler/index";
import {BrowserService} from "../BrowserService/index"; // Импорт класса BrowserService
import {CommonService} from "../CommonService/index";
import {Config, Prompt} from "prompt-sync";
import * as fs from 'fs';
import * as path from 'path';
// Список User-Agent для ротации
const USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.82',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/117.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15',
'Mozilla/5.0 (iPhone; CPU iPhone OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (iPad; CPU OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/117.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 OPR/100.0.0.0'
];
export class BookService implements BookServiceModel {
constructor(
private $errorService: ErrorHandler,
private $browserService: BrowserService,
private $commonService: CommonService,
private $promptSync: (config?: Config) => Prompt
) {}
public async getBookInfo(url: string): Promise<BookInfoModel> {
const maxRetries = 3;
let currentTry = 1;
let browser: any;
let page: any;
while (currentTry <= maxRetries) {
try {
const browserData = await this.$browserService.startBrowser();
browser = browserData.browser;
page = browserData.page;
await page.setUserAgent(USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]);
await this.$browserService.gotoPage(page, url);
break;
} catch (error: any) {
const errorMessage = (error instanceof Error ? error.message : String(error)).toLowerCase();
const errorStatus = error.statusCode;
const isRateLimitError = errorStatus === 429 || errorMessage.includes('429') || errorMessage.includes('too many requests');
if (browser) { try { await this.$browserService.closeBrowser(browser); } catch (e) {} }
if (isRateLimitError && currentTry < maxRetries) {
const delaySeconds = 30 * Math.pow(2, currentTry - 1);
console.log(`⏸️ Ошибка 429 при получении информации. Ожидание ${delaySeconds} секунд...`);
await this.showCountdown(delaySeconds);
currentTry++;
continue;
} else { throw error; }
}
}
const bookInfo = await page.evaluate(() => {
const titleSelectors = ['h1.media-name__main', '.media-name__main', 'div.media-name__body > div.media-name__main', 'h1', '.title', '[data-media-name]'];
const authorSelectors = ['.media-info-list__value a', 'div.media-info-list__item > div.media-info-list__value > a', '[data-media-author]', '.author a', '.media-info a'];
let title = ''; let author = '';
for (const selector of titleSelectors) { const element = document.querySelector(selector); if (element && element.textContent?.trim()) { title = element.textContent.trim(); break; } }
for (const selector of authorSelectors) { const element = document.querySelector(selector); if (element && element.textContent?.trim()) { author = element.textContent.trim(); break; } }
if (!title) { const pageTitle = document.title; if (pageTitle && pageTitle.includes('·')) { const parts = pageTitle.split('·'); if (parts.length > 1) title = parts[1].trim().replace(' • RanobeLIB', ''); } }
if (!title) title = 'Неизвестное название';
if (!author) author = 'Неизвестный автор';
const coverImg: HTMLImageElement | null = document.querySelector('div.media-sidebar__cover.paper > img, .media-cover img, .cover img');
return { title, author, cover: coverImg?.src || 'https://aeroclub-issoire.fr/wp-content/uploads/2020/05/image-not-found.jpg', lang: 'ru', tocTitle: 'Содержание' }
})
await this.$browserService.closeBrowser(browser);
return bookInfo;
}
public async getChapters(url: string): Promise<BookChaptersModel[]> {
const { browser, page } = await this.$browserService.startBrowser();
await page.setUserAgent(USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]);
// Проверяем доступность основного URL
try {
await this.$browserService.gotoPage(page, url);
} catch (error: any) {
console.log(`⚠️ Не удалось загрузить основной URL: ${error.message}`);
}
// Извлекаем ID книги
const urlWithoutParams = url.split('?')[0];
let bookId: string;
const bookIdMatch = urlWithoutParams.match(/\/book\/([^\/\?]+)/);
if (!bookIdMatch) {
const altMatch = urlWithoutParams.match(/\/(\d+[^\/\?]*)/);
if (!altMatch) { await this.$browserService.closeBrowser(browser); this.$errorService.throwError(ErrorMsgModel.ELEMENT_COULD_NOT_BE_FOUND, 'ID книги в URL'); return []; }
bookId = altMatch[1];
} else { bookId = bookIdMatch[1]; }
bookId = bookId.split('?')[0].split('&')[0];
const numericIdMatch = bookId.match(/^(\d+)/);
const numericBookId = numericIdMatch ? numericIdMatch[1] : bookId;
const cleanUrl = url.split('?')[0].replace(/\/$/, '');
const chapterPageUrl = cleanUrl + '?section=chapters';
let chaptersWithTitles: BookChaptersModel[] = [];
try {
// Пытаемся загрузить страницу глав
await this.$browserService.gotoPage(page, chapterPageUrl);
await page.waitForTimeout(3000);
// Прокрутка
await page.evaluate(async () => {
await new Promise<void>((resolve) => {
let lastHeight = document.body.scrollHeight;
let attempts = 0;
const maxAttempts = 20;
const scrollDown = () => {
window.scrollTo(0, document.body.scrollHeight);
attempts++;
setTimeout(() => {
const newHeight = document.body.scrollHeight;
if (newHeight === lastHeight || attempts >= maxAttempts) resolve();
else { lastHeight = newHeight; scrollDown(); }
}, 500);
};
scrollDown();
});
});
// Пробуем API
let apiUrl = `https://api.cdnlibs.org/api/manga/${numericBookId}/chapters`;
let chaptersData = await page.evaluate(async (apiUrl: string) => {
try {
const response = await fetch(apiUrl);
if (!response.ok) throw new Error;
return await response.json();
} catch (e) { return null; }
}, apiUrl);
// Если числовой ID не сработал, пробуем полный (как в первой версии)
if (!chaptersData || !chaptersData.data || chaptersData.data.length === 0) {
const apiUrlFull = `https://api.cdnlibs.org/api/manga/${bookId}/chapters`;
chaptersData = await page.evaluate(async (apiUrl: string) => {
try {
const response = await fetch(apiUrl);
if (!response.ok) throw new Error;
return await response.json();
} catch (e) { return null; }
}, apiUrlFull);
}
if (chaptersData && chaptersData.data && Array.isArray(chaptersData.data)) {
console.log(`✅ Получено глав через API: ${chaptersData.data.length}`);
chaptersWithTitles = chaptersData.data.map((chapter: any, index: number) => {
const baseUrl = cleanUrl.replace('/book/', '/');
let selectedBranch = chapter.branches[0];
if (chapter.branches.length > 1) selectedBranch = chapter.branches.reduce((prev: any, current: any) => (prev.branch_id < current.branch_id) ? prev : current);
const chapterUrl = `${baseUrl}/read/v${chapter.volume}/c${chapter.number}?bid=${selectedBranch.branch_id}&ui=${selectedBranch.id}`;
let title = chapter.name && chapter.name.trim() ? `Том ${chapter.volume}, Глава ${chapter.number}: ${chapter.name}` : `Том ${chapter.volume}, Глава ${chapter.number}`;
return { id: index, title, link: chapterUrl };
});
chaptersWithTitles.sort((a, b) => {
const p = (t: string) => {
const m = t.match(/Том (\d+), Глава ([0-9.]+)/);
return m ? parseInt(m[1]) * 1000 + parseFloat(m[2]) : 0;
};
return p(a.title) - p(b.title);
});
chaptersWithTitles.forEach((c, i) => c.id = i);
}
// Fallback: Парсинг DOM если API пустой
if (chaptersWithTitles.length === 0) {
console.log('⚠️ API не вернул данные, парсим страницу...');
chaptersWithTitles = await page.evaluate(() => {
let chapterLinks: NodeListOf<HTMLAnchorElement> = document.querySelectorAll('a[href*="/read/"]');
if (chapterLinks.length === 0) {
const alts = ['.chapter-item a', '.chapters-list a', '.media-chapter a'];
for (const sel of alts) {
chapterLinks = document.querySelectorAll(sel);
if (chapterLinks.length > 0) break;
}
}
const chapters = Array.from(chapterLinks).map((link, i) => {
let title = link.textContent?.trim() || `Глава ${i + 1}`;
title = title.replace(/^\s*[-–—]\s*/, '').trim();
return { id: i, title, link: link.href };
}).filter(c => c.link.includes('/read/'));
chapters.sort((a, b) => {
const getVC = (url: string) => {
const m = url.match(/\/v(\d+)\/c([0-9.]+)/);
return m ? parseInt(m[1]) * 1000 + parseFloat(m[2]) : 0;
};
return getVC(a.link) - getVC(b.link);
});
chapters.forEach((c, i) => c.id = i);
return chapters;
});
}
} catch (e) {
// === ЭТОТ БЛОК ОТВЕЧАЕТ ЗА ПОИСК ГЛАВ, ЕСЛИ СТРАНИЦА НЕ ЗАГРУЗИЛАСЬ ===
console.log('⚠️ Не удалось загрузить страницу глав, пробуем искать на основной странице...');
try {
// Возвращаемся на главную страницу книги
await this.$browserService.gotoPage(page, url);
await page.waitForTimeout(2000);
// Ищем ссылки там
chaptersWithTitles = await page.evaluate(() => {
const chapterLinks: NodeListOf<HTMLAnchorElement> = document.querySelectorAll('a[href*="/read/"]') as NodeListOf<HTMLAnchorElement>;
return Array.from(chapterLinks).map((link, i) => ({
id: i,
title: link.textContent?.trim() || `Глава ${i + 1}`,
link: link.href
})).filter(chapter => chapter.link.includes('/read/'));
});
console.log(`Найдено ${chaptersWithTitles.length} глав на основной странице`);
} catch (err) {
console.log('❌ Не удалось найти главы даже на основной странице');
}
}
await this.$browserService.closeBrowser(browser);
if (chaptersWithTitles.length === 0) this.$errorService.throwError(ErrorMsgModel.ELEMENT_COULD_NOT_BE_FOUND, 'список глав книги');
return chaptersWithTitles;
}
private async showCountdown(seconds: number): Promise<void> {
return new Promise((resolve) => {
let remaining = seconds;
process.stdout.write(`\r⏳ Ожидание: ${remaining} секунд...`);
const interval = setInterval(() => {
remaining--;
if (remaining > 0) process.stdout.write(`\r⏳ Ожидание: ${remaining} секунд...`);
else { process.stdout.write(`\r✅ Ожидание завершено! \n`); clearInterval(interval); resolve(); }
}, 1000);
});
}
public async getChapterContent(url: string, userAgent?: string): Promise<string> {
const maxRetries = 3;
let currentTry = 1;
while (currentTry <= maxRetries) {
let browser: any;
let page: any;
try {
const browserData = await this.$browserService.startBrowser();
browser = browserData.browser;
page = browserData.page;
const ua = userAgent || USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
await page.setUserAgent(ua);
await this.$browserService.gotoPage(page, url);
await page.waitForTimeout(2000);
const getContentWithTimeout = async (timeout: number) => {
await page.waitForTimeout(timeout);
return await page.evaluate(() => {
const selectors = ['main[data-reader-content]', 'main.l1_b[data-reader-content]', '[data-reader-content]', 'div.reader-container.container.container_center', '.reader-container', '.chapter-content', '.content', '.reader-content', 'main .container', '.text-content'];
let content = null;
let foundSelector = '';
for (const selector of selectors) {
const el = document.querySelector(selector);
if (el && el.innerHTML.trim().length > 0) {
content = el;
foundSelector = selector;
break;
}
}
if (content) {
content.querySelectorAll('img').forEach((img) => img.src = img.src || '');
return { content: content.innerHTML, selector: foundSelector };
}
return null;
});
};
let bookContentResult = await getContentWithTimeout(0);
if (!bookContentResult) {
await page.reload({ waitUntil: 'networkidle2' });
bookContentResult = await getContentWithTimeout(8000);
}
if (!bookContentResult) {
await page.reload({ waitUntil: 'networkidle0' });
bookContentResult = await getContentWithTimeout(15000);
}
await this.$browserService.closeBrowser(browser);
if (!bookContentResult || !bookContentResult.content) return '';
return bookContentResult.content;
} catch (error) {
if (browser) try { await this.$browserService.closeBrowser(browser); } catch (e) {}
const errorMessage = (error instanceof Error ? error.message : String(error)).toLowerCase();
const errorStatus = (error as any).statusCode;
const isRateLimitError = errorStatus === 429 || errorMessage.includes('429') || errorMessage.includes('too many requests');
if (isRateLimitError) {
console.log(`\n⚠️ Ошибка 429! Ожидание 20 секунд...`);
await this.showCountdown(20);
currentTry++;
continue;
} else if (currentTry < maxRetries) {
currentTry++;
continue;
}
return '';
}
}
return '';
}
public async getAllBookContent(bookChapters: BookChaptersModel[], bookId: string, url?: string, allChapters?: BookChaptersModel[], options?: { threads: number, delay: number }): Promise<BookContentModel[]> {
const threads = options?.threads || 1;
const delayMs = options?.delay || 1000;
const currentUrl = url || '';
const chaptersToSave = allChapters || bookChapters;
let bookContent = this.loadProgress(bookId);
const completedChapterIds = new Set(bookContent.map(ch => ch.id));
const remainingChapters = bookChapters.filter(ch => !completedChapterIds.has(ch.id));
console.log(`\n📚 Статистика: Всего ${bookChapters.length}, Загружено ${bookContent.length}, Осталось ${remainingChapters.length}`);
if (remainingChapters.length === 0) return bookContent;
let successCount = bookContent.length;
const processChapter = async (chapter: BookChaptersModel) => {
const randomUA = USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
const content = await this.getChapterContent(chapter.link, randomUA);
if (content && content.trim().length > 0) {
bookContent.push({ data: content, id: chapter.id, title: chapter.title });
successCount++;
if (successCount % 5 === 0) this.saveProgress(bookId, bookContent, currentUrl, chaptersToSave);
}
if (delayMs > 0) await this.$commonService.delay(delayMs);
};
if (threads > 1) {
const queue: Promise<void>[] = [];
for (const chapter of remainingChapters) {
if (queue.length >= threads) await Promise.race(queue);
const p = processChapter(chapter).then(() => { const idx = queue.indexOf(p); if (idx > -1) queue.splice(idx, 1); });
queue.push(p);
}
await Promise.all(queue);
} else {
for (const chapter of remainingChapters) await processChapter(chapter);
}
this.saveProgress(bookId, bookContent, currentUrl, chaptersToSave);
bookContent.sort((a, b) => a.id - b.id);
return { content: bookContent, hasRateLimitErrors: false, rateLimitErrorCount: 0, totalChapters: bookChapters.length, loadedChapters: successCount } as any;
}
public async generateEpubFromData(bookData: BookDataModel): Promise<any> {
try {
const epub = new EPub(bookData, bookData.output);
return await epub.promise;
} catch (error: any) {
if (error.message?.toLowerCase().includes('network')) return await this.generateEpubFromDataNoImages(bookData);
throw error;
}
}
public async generateEpubFromDataNoImages(bookData: BookDataModel): Promise<any> {
const noImagesContent = bookData.content.map((chapter: any) => {
if (chapter.data) return { ...chapter, data: chapter.data.replace(/<img[^>]*>/gi, '') };
return chapter;
});
const epub = new EPub({ ...bookData, content: noImagesContent }, bookData.output);
return await epub.promise;
}
private saveProgress(bookId: string, completedChapters: BookContentModel[], url?: string, allChapters?: BookChaptersModel[]): void {
try {
const progressDir = path.join(process.cwd(), 'progress');
if (!fs.existsSync(progressDir)) fs.mkdirSync(progressDir, { recursive: true });
const progressFile = path.join(progressDir, `${bookId}_progress.json`);
fs.writeFileSync(progressFile, JSON.stringify({ timestamp: new Date().toISOString(), completedCount: completedChapters.length, chapters: completedChapters, url, allChapters }, null, 2));
} catch (error) {}
}
public findProgressFiles(): Array<{bookId: string, filePath: string, progressData: any}> {
const progressFiles: Array<{bookId: string, filePath: string, progressData: any}> = [];
try {
const progressDir = path.join(process.cwd(), 'progress');
if (!fs.existsSync(progressDir)) return progressFiles;
const files = fs.readdirSync(progressDir);
for (const file of files) {
if (file.endsWith('_progress.json') && !file.includes('_том_')) {
const filePath = path.join(progressDir, file);
try {
progressFiles.push({ bookId: file.replace('_progress.json', ''), filePath, progressData: JSON.parse(fs.readFileSync(filePath, 'utf8')) });
} catch (error) {}
}
}
} catch (error) {}
return progressFiles;
}
private loadProgress(bookId: string): BookContentModel[] {
try {
const progressFile = path.join(process.cwd(), 'progress', `${bookId}_progress.json`);
if (fs.existsSync(progressFile)) return JSON.parse(fs.readFileSync(progressFile, 'utf8')).chapters || [];
} catch (error) {}
return [];
}
public getVolumeFromChapter(chapter: BookChaptersModel): number | null {
const titleMatch = chapter.title.match(/Том (\d+)/); if (titleMatch) return parseInt(titleMatch[1]);
const urlMatch = chapter.link.match(/\/v(\d+)\//); if (urlMatch) return parseInt(urlMatch[1]);
return null;
}
public filterChaptersByVolumes(chapters: BookChaptersModel[], selectedVolumes: number[]): BookChaptersModel[] {
if (selectedVolumes.length === 2 && selectedVolumes[0] === -1) return chapters.slice(0, selectedVolumes[1]);
return chapters.filter(c => selectedVolumes.includes(this.getVolumeFromChapter(c) || -1));
}
public groupChaptersByVolumes(chapters: BookChaptersModel[]): Map<number, BookChaptersModel[]> {
const map = new Map<number, BookChaptersModel[]>();
chapters.forEach(c => { const v = this.getVolumeFromChapter(c); if (v !== null) { if (!map.has(v)) map.set(v, []); map.get(v)!.push(c); } });
return map;
}
public async processVolumesByOne(chapters: BookChaptersModel[], bookId: string, bookInfo: BookInfoModel, basePath: string, noImagesMode: boolean = false, options?: { threads: number, delay: number }): Promise<string[]> {
const volumeGroups = this.groupChaptersByVolumes(chapters);
const volumes = Array.from(volumeGroups.keys()).sort((a, b) => a - b);
const createdFiles: string[] = [];
for (const volume of volumes) {
const volumeChapters = volumeGroups.get(volume)!;
console.log(`\n🔥 === ТОМ ${volume} ===`);
const volumeResult = await this.getAllBookContent(volumeChapters, `${bookId}_том_${volume}`, '', undefined, options);
const volumeContent = (volumeResult as any).content || volumeResult;
if (volumeContent.length === 0) continue;
const volumeFilePath = `${basePath}/${bookId}_том_${volume}.epub`;
const volumeBookOptions: BookDataModel = { ...bookInfo, title: `${bookInfo.title} - Том ${volume}`, content: volumeContent, output: volumeFilePath };
try {
if (noImagesMode) await this.generateEpubFromDataNoImages(volumeBookOptions);
else await this.generateEpubFromData(volumeBookOptions);
createdFiles.push(volumeFilePath);
console.log(`✅ Том ${volume} сохранен.`);
} catch (e) { console.log(`❌ Ошибка в томе ${volume}`); }
}
return createdFiles;
}
}
версия 2.8.2
Win 10 x64
Elib2Ebook-windows-latest-min.zip
я сейчас использую ranobelib-parser но модифицировал его под под мульти поточную загрузку с возможностью выбора timeuot-а + окна хрома (он должен быть установлен обязательно, через него парсит страницы) скрыто
модифицированная версия cloud.mail.ru
вот фрагмент подключения на node js
на каждый запрос свой USER_AGENTS из списка.
это немного обманывает ренобку при множественных запросов