From 79b00c6702e4356b9dedd7052befd1c3c2229f97 Mon Sep 17 00:00:00 2001 From: Alexei Bezborodov Date: Sat, 28 Oct 2023 18:54:05 +0300 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B5=D0=B0=D0=BB=D0=B8=D0=B7=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0=D0=BD=D1=8B=20CNEWS=20#7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- news_parser.js | 68 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/news_parser.js b/news_parser.js index 11869a9..d11b76b 100644 --- a/news_parser.js +++ b/news_parser.js @@ -7,6 +7,7 @@ // @match https://tass.ru/* // @match https://ria.ru/* // @match https://rg.ru/* +// @match https://www.cnews.ru/* // @match https://russian.rt.com/* // @match https://zakonvremeni.ru/* // @icon https://icons.duckduckgo.com/ip2/zakonvremeni.ru.ico @@ -142,6 +143,10 @@ return true; } + function ElementCheckerFalse(a_Element) { + return false; + } + function ElementCheckerRia(a_Element) { if (a_Element.dataset.type == 'article' || a_Element.dataset.type == 'banner' || a_Element.dataset.type == 'media') { return false; @@ -157,6 +162,22 @@ return false; } + function SubElementCheckerToRemoveTemplate(a_Classes, a_NodeNames) { + function SubElementCheckerToRemove(a_Element) { + if (!a_Element || !a_Element.parentNode) { + return false; + } + if (CheckRegExp(GetElementClassName, '.*(' + a_Classes + ').*', a_Element)) { + return true; + } + if ((a_NodeNames && CheckRegExp(GetNodeName, '(' + a_NodeNames + ')', a_Element))) { + return true; + } + return false; + } + return SubElementCheckerToRemove + } + // Обработка элементов function GrubTextFuncTemplate() { @@ -182,15 +203,12 @@ parent.removeChild(a_Element); } - function GetClearHtml(a_Element, a_OutTag, a_TextAlign) { + function GetClearHtml(a_Element, a_OutTag, a_TextAlign, a_SubElementCheckerToRemove) { let clear_element = a_Element.cloneNode(true); let elements = clear_element.querySelectorAll('*'); elements.forEach(function (element) { - let for_rt_com = 'read-more|article__cover'; - let for_rg_com = 'portal|rg-incut|article-img|Section'; - let delete_in_rg = element.nodeName == 'RG-VIDEO' || element.nodeName == 'RG-INCUT'; - if (element && (element.parentNode && CheckRegExp(GetElementClassName, '.*(' + for_rt_com + '|' + for_rg_com + ').*', element) || delete_in_rg)) { + if (a_SubElementCheckerToRemove(element)) { element.parentNode.removeChild(element); } else { @@ -236,13 +254,13 @@ } - function FinishWorkFuncTemplate(a_OutTag, a_TextAlign, a_ClearTextFunc) { + function FinishWorkFuncTemplate(a_OutTag, a_TextAlign, a_ClearTextFunc, a_SubElementCheckerToRemove) { function FinishWorkFunc(a_Element, a_ElementIndex) { let out_tag = a_OutTag; if (a_Element && CheckRegExp(GetElementClassName, 'Title_title.*', a_Element)) { out_tag = 'h2'; } - let a_Content = GetClearHtml(a_Element, out_tag, a_TextAlign) + let a_Content = GetClearHtml(a_Element, out_tag, a_TextAlign, a_SubElementCheckerToRemove) if (a_ClearTextFunc) { a_Content = a_ClearTextFunc(a_Content, a_ElementIndex); } @@ -258,14 +276,14 @@ // Создание контента для стандартных новостей - function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_ClearTextPatterns) { + function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_SubElementCheckerToRemove, a_ClearTextPatterns) { const title_tag = 'h2'; const p_tag = 'p'; const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') const grub_func = GrubTextFuncTemplate(); let content = ''; - const paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns, true)); + const paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns, true), a_SubElementCheckerToRemove); content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_func, title_finish_text_func); content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center'); content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_func, paragraph_finish_text_func); @@ -291,6 +309,7 @@ 'Image_wrapper_.*', '(Paragraph_paragraph|Title_title).*', ElementCheckerTrue, + ElementCheckerFalse, ['. '] ); } @@ -313,6 +332,7 @@ 'photoview__open', 'article__block', ElementCheckerRia, + ElementCheckerFalse, [''] ); } @@ -328,7 +348,11 @@ 'PageArticleContent_title.*', 'PageArticleContent_image.*', '(PageContentCommonStyling_text|PageArticleContent_lead).*', - ElementCheckerRia, + ElementCheckerTrue, + SubElementCheckerToRemoveTemplate( + 'portal|rg-incut|article-img|Section', + 'RG-VIDEO|RG-INCUT' + ), [] ); } @@ -344,7 +368,29 @@ 'article__heading', 'article__cover article__cover_article-page', 'article__text', - ElementCheckerRia, + ElementCheckerTrue, + SubElementCheckerToRemoveTemplate( + 'read-more|article__cover' + ), + [] + ); + } + else if (location.hostname == 'www.cnews.ru') { + // test: https://www.cnews.ru/news/top/2023-10-27_rossiyane_sozdali_polnotsennyj + const base_element = document.getElementsByClassName('news_containere')[0]; + + content = MakeContentByNews( + base_element, + base_element, + document, + '!!!', // Нет названия + 'img-block', + 'news_container', + ElementCheckerTrue, + SubElementCheckerToRemoveTemplate( + 'article-top-author|article-menu_base|d-flex|img-block|NewsBodyLeftInclude|mobile-zone|other-news-note|cnLike|article-bottom-info|banner|comments_all', + 'NOINDEX|BR' + ), [] ); }