From b3bb8fdc8765a7d20627976a164ee804d397a130 Mon Sep 17 00:00:00 2001 From: Alexei Date: Tue, 24 Oct 2023 19:54:57 +0300 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B5=D1=84=D0=B0=D0=BA=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=B8=D0=BD=D0=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- news_parser.js | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/news_parser.js b/news_parser.js index 9a5e864..ea46d51 100644 --- a/news_parser.js +++ b/news_parser.js @@ -77,7 +77,19 @@ return a_String; } - function GetContentInContainers(a_GrubTextFunc, a_OutTag, baseClass, parent, textAlign, a_ElementFilterFunc, a_ClearTextFunc) { + function FinishWorkFuncTemplate(a_OutTag, a_TextAlign) { + function FinishWorkFunc(a_Content, a_Element) { + if (a_OutTag && a_TextAlign) { + return '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + a_Content + ''; + } + else { + return a_Content; + } + } + return FinishWorkFunc + } + + function GetContentInContainers(a_FinishWorkFunc, a_GrubTextFunc, baseClass, parent, a_ElementFilterFunc, a_ClearTextFunc) { var elems = getByClass(baseClass, parent); if (!elems) { return 'Не удалось найти ' + baseClass; @@ -106,11 +118,12 @@ if (a_ClearTextFunc) { content = a_ClearTextFunc(content); } - if (a_OutTag == '') { - result += content; + + if (a_FinishWorkFunc) { + result += a_FinishWorkFunc(content, e); } else { - result += '<' + a_OutTag + ' style = "text-align:' + textAlign + ';">' + content + ''; + result += content; } } return result; @@ -168,20 +181,23 @@ var p_tag = 'p'; var zero_tag = ''; var source_add = true; + var title_func = FinishWorkFuncTemplate(title_tag, 'center') + var paragraph_func = FinishWorkFuncTemplate(p_tag, 'justify') + var zero_tag_func = FinishWorkFuncTemplate() if (location.hostname == 'tass.ru') { - content += GetContentInContainers(GrubTextFuncTemplate(), title_tag, 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box'), 'center'); + content += GetContentInContainers(title_func, GrubTextFuncTemplate(), 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box')); content += GetImageInContainers('Image_wrapper_.*', document.getElementById('content_box'), 'center'); - content += GetContentInContainers(GrubTextFuncTemplate(), p_tag, 'Paragraph_paragraph.*', document.getElementById('content_box'), 'justify', FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. '])); + content += GetContentInContainers(paragraph_func, GrubTextFuncTemplate(), 'Paragraph_paragraph.*', document.getElementById('content_box'), FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. '])); } else if (location.hostname == 'ria.ru') { - content += GetContentInContainers(GrubTextFuncTemplate(), title_tag, 'article__title', document.getElementsByClassName('article__header')[0], 'center'); + content += GetContentInContainers(title_func, GrubTextFuncTemplate(), title_tag, document.getElementsByClassName('article__header')[0]); content += GetImageInContainers('photoview__open', document.getElementsByClassName('article__header')[0], 'center'); - content += GetContentInContainers(GrubTextFuncTemplate(), p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. '])); + content += GetContentInContainers(paragraph_func, GrubTextFuncTemplate(), p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. '])); } else if (location.hostname == 'zakonvremeni.ru') { - var title = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'page-header', document.getElementsByClassName('item-page')[0]); - var parent_category = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'parent-category-name', document.getElementsByClassName('item-page')[0]); - var category = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'category-name', document.getElementsByClassName('item-page')[0]); + var title = GetContentInContainers(zero_tag_func, GrubTextFuncTemplate(), 'page-header', document.getElementsByClassName('item-page')[0]); + var parent_category = GetContentInContainers(zero_tag_func, GrubTextFuncTemplate(), 'parent-category-name', document.getElementsByClassName('item-page')[0]); + var category = GetContentInContainers(zero_tag_func, GrubTextFuncTemplate(), 'category-name', document.getElementsByClassName('item-page')[0]); var page = RemoveAfterSplitter(Trim(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL; source_add = false;