From 6ae507ec8f109e1202209d128359284fcc3d4583 Mon Sep 17 00:00:00 2001 From: Alexei Bezborodov Date: Sat, 21 Oct 2023 10:02:03 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D0=B4=D0=B3=D0=BE=D1=82=D0=BE?= =?UTF-8?q?=D0=B2=D0=BA=D0=B0=20=D0=BA=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B5=20=D1=82=D0=B5=D0=B3=D0=BE=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- news_parser.js | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/news_parser.js b/news_parser.js index 7290bc1..9a5e864 100644 --- a/news_parser.js +++ b/news_parser.js @@ -11,7 +11,7 @@ // @grant none // ==/UserScript== -// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) +// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) (function() { 'use strict'; @@ -77,7 +77,7 @@ return a_String; } - function GetContentInContainers(a_OutTag, baseClass, parent, textAlign, a_ElementFilterFunc, a_ClearTextFunc) { + function GetContentInContainers(a_GrubTextFunc, a_OutTag, baseClass, parent, textAlign, a_ElementFilterFunc, a_ClearTextFunc) { var elems = getByClass(baseClass, parent); if (!elems) { return 'Не удалось найти ' + baseClass; @@ -92,21 +92,17 @@ var content = ''; if (e.querySelectorAll) { - var children = e.querySelectorAll("*"); - if (children.length == 0 || e.innerText) { - if (e.innerText) { - content += Trim(e.textContent); + var children = e.querySelectorAll("*"); + if (children.length == 0 || e.innerText) { + content += a_GrubTextFunc(e); } - } - else { - for (let i = 0; i < children.length; i++) { - var c = children[i]; - if (c.innerText) { - content += Trim(c.textContent); + else { + for (let i = 0; i < children.length; i++) { + var c = children[i]; + content += a_GrubTextFunc(c); } } } - } if (a_ClearTextFunc) { content = a_ClearTextFunc(content); } @@ -155,6 +151,17 @@ return ClearTextFunc } + function GrubTextFuncTemplate() { + function GrubTextFunc(a_Element) { + var content = ''; + if (a_Element.innerText) { + content = Trim(a_Element.textContent); + } + return content; + } + return GrubTextFunc + } + function MakeContent() { var content = ''; var title_tag = 'h2'; @@ -162,19 +169,19 @@ var zero_tag = ''; var source_add = true; if (location.hostname == 'tass.ru') { - content += GetContentInContainers(title_tag, 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box'), 'center'); + content += GetContentInContainers(GrubTextFuncTemplate(), title_tag, 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box'), 'center'); content += GetImageInContainers('Image_wrapper_.*', document.getElementById('content_box'), 'center'); - content += GetContentInContainers(p_tag, 'Paragraph_paragraph.*', document.getElementById('content_box'), 'justify', FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. '])); + content += GetContentInContainers(GrubTextFuncTemplate(), p_tag, 'Paragraph_paragraph.*', document.getElementById('content_box'), 'justify', FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. '])); } else if (location.hostname == 'ria.ru') { - content += GetContentInContainers(title_tag, 'article__title', document.getElementsByClassName('article__header')[0], 'center'); + content += GetContentInContainers(GrubTextFuncTemplate(), title_tag, 'article__title', document.getElementsByClassName('article__header')[0], 'center'); content += GetImageInContainers('photoview__open', document.getElementsByClassName('article__header')[0], 'center'); - content += GetContentInContainers(p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. '])); + content += GetContentInContainers(GrubTextFuncTemplate(), p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. '])); } else if (location.hostname == 'zakonvremeni.ru') { - var title = GetContentInContainers(zero_tag, 'page-header', document.getElementsByClassName('item-page')[0]); - var parent_category = GetContentInContainers(zero_tag, 'parent-category-name', document.getElementsByClassName('item-page')[0]); - var category = GetContentInContainers(zero_tag, 'category-name', document.getElementsByClassName('item-page')[0]); + var title = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'page-header', document.getElementsByClassName('item-page')[0]); + var parent_category = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'parent-category-name', document.getElementsByClassName('item-page')[0]); + var category = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'category-name', document.getElementsByClassName('item-page')[0]); var page = RemoveAfterSplitter(Trim(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL; source_add = false;