diff --git a/news_parser.js b/news_parser.js index 7290bc1..9a5e864 100644 --- a/news_parser.js +++ b/news_parser.js @@ -11,7 +11,7 @@ // @grant none // ==/UserScript== -// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) +// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) (function() { 'use strict'; @@ -77,7 +77,7 @@ return a_String; } - function GetContentInContainers(a_OutTag, baseClass, parent, textAlign, a_ElementFilterFunc, a_ClearTextFunc) { + function GetContentInContainers(a_GrubTextFunc, a_OutTag, baseClass, parent, textAlign, a_ElementFilterFunc, a_ClearTextFunc) { var elems = getByClass(baseClass, parent); if (!elems) { return 'Не удалось найти ' + baseClass; @@ -92,21 +92,17 @@ var content = ''; if (e.querySelectorAll) { - var children = e.querySelectorAll("*"); - if (children.length == 0 || e.innerText) { - if (e.innerText) { - content += Trim(e.textContent); + var children = e.querySelectorAll("*"); + if (children.length == 0 || e.innerText) { + content += a_GrubTextFunc(e); } - } - else { - for (let i = 0; i < children.length; i++) { - var c = children[i]; - if (c.innerText) { - content += Trim(c.textContent); + else { + for (let i = 0; i < children.length; i++) { + var c = children[i]; + content += a_GrubTextFunc(c); } } } - } if (a_ClearTextFunc) { content = a_ClearTextFunc(content); } @@ -155,6 +151,17 @@ return ClearTextFunc } + function GrubTextFuncTemplate() { + function GrubTextFunc(a_Element) { + var content = ''; + if (a_Element.innerText) { + content = Trim(a_Element.textContent); + } + return content; + } + return GrubTextFunc + } + function MakeContent() { var content = ''; var title_tag = 'h2'; @@ -162,19 +169,19 @@ var zero_tag = ''; var source_add = true; if (location.hostname == 'tass.ru') { - content += GetContentInContainers(title_tag, 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box'), 'center'); + content += GetContentInContainers(GrubTextFuncTemplate(), title_tag, 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box'), 'center'); content += GetImageInContainers('Image_wrapper_.*', document.getElementById('content_box'), 'center'); - content += GetContentInContainers(p_tag, 'Paragraph_paragraph.*', document.getElementById('content_box'), 'justify', FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. '])); + content += GetContentInContainers(GrubTextFuncTemplate(), p_tag, 'Paragraph_paragraph.*', document.getElementById('content_box'), 'justify', FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. '])); } else if (location.hostname == 'ria.ru') { - content += GetContentInContainers(title_tag, 'article__title', document.getElementsByClassName('article__header')[0], 'center'); + content += GetContentInContainers(GrubTextFuncTemplate(), title_tag, 'article__title', document.getElementsByClassName('article__header')[0], 'center'); content += GetImageInContainers('photoview__open', document.getElementsByClassName('article__header')[0], 'center'); - content += GetContentInContainers(p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. '])); + content += GetContentInContainers(GrubTextFuncTemplate(), p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. '])); } else if (location.hostname == 'zakonvremeni.ru') { - var title = GetContentInContainers(zero_tag, 'page-header', document.getElementsByClassName('item-page')[0]); - var parent_category = GetContentInContainers(zero_tag, 'parent-category-name', document.getElementsByClassName('item-page')[0]); - var category = GetContentInContainers(zero_tag, 'category-name', document.getElementsByClassName('item-page')[0]); + var title = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'page-header', document.getElementsByClassName('item-page')[0]); + var parent_category = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'parent-category-name', document.getElementsByClassName('item-page')[0]); + var category = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'category-name', document.getElementsByClassName('item-page')[0]); var page = RemoveAfterSplitter(Trim(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL; source_add = false;