From d9055b0894a695506bbab1fdf23e4fa95c4bf704 Mon Sep 17 00:00:00 2001 From: Alexei Date: Wed, 25 Oct 2023 15:50:08 +0300 Subject: [PATCH] =?UTF-8?q?=D0=A2=D0=B8=D0=BF=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=8B=D1=85=20=D1=82=20=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=B0=20=D1=81=20rg.ru?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- news_parser.js | 131 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 82 insertions(+), 49 deletions(-) diff --git a/news_parser.js b/news_parser.js index e49f5bb..e01e6ac 100644 --- a/news_parser.js +++ b/news_parser.js @@ -6,6 +6,7 @@ // @author AlexeiBv+mirocod@narod.ru // @match https://tass.ru/* // @match https://ria.ru/* +// @match https://rg.ru/* // @match https://zakonvremeni.ru/* // @icon https://icons.duckduckgo.com/ip2/zakonvremeni.ru.ico // @grant none @@ -22,12 +23,20 @@ return a_Element.className; } - function FindElementsByRegExp(a_GenElementNameFunc, a_RegExpPattern, a_ElementParent) { + function GetNodeName(a_Element) { + return a_Element.nodeName; + } + + function CheckRegExp(a_GetElementNameFunc, a_RegExpPattern, a_Element) { + let re = new RegExp("(?:^|\\s)" + a_RegExpPattern + "(?!\\S)"); + return re.test(a_GetElementNameFunc(a_Element)); + } + + function FindElementsByRegExp(a_GetElementNameFunc, a_RegExpPattern, a_ElementParent) { a_ElementParent || (a_ElementParent=document); - var descendants = a_ElementParent.getElementsByTagName('*'), i=-1, e, result=[]; - var re = new RegExp("(?:^|\\s)" + a_RegExpPattern + "(?!\\S)"); - while (e=descendants[++i]) { - if (re.test(a_GenElementNameFunc(e))){ + let descendants = a_ElementParent.getElementsByTagName('*'), i=-1, e, result=[]; + while (e = descendants[++i]) { + if (CheckRegExp(a_GetElementNameFunc, a_RegExpPattern, e)){ result.push(e); } } @@ -41,7 +50,7 @@ } function RemoveBeforeSplitter(a_String, a_Splitter) { - var index = a_String.indexOf(a_Splitter) + let index = a_String.indexOf(a_Splitter) if (index != -1) { return a_String.substring(index + a_Splitter.length); } @@ -49,9 +58,9 @@ } function RemoveAfterSplitter(a_String, a_Splitter, a_SaveSplitter) { - var index = a_String.indexOf(a_Splitter) + let index = a_String.indexOf(a_Splitter) if (index != -1) { - var spl_len = a_Splitter.length + let spl_len = a_Splitter.length if (!a_SaveSplitter) { spl_len = 0; } @@ -61,15 +70,15 @@ } function ClearUrl(a_Url) { - var separator = '?'; + const separator = '?'; return RemoveAfterSplitter(a_Url, separator, false); } function ClearTextFuncTemplate(a_RemoveBeforeList) { function ClearTextFunc(a_Content) { - var content = a_Content; + let content = a_Content; for (let i = 0; i < a_RemoveBeforeList.length; i++) { - var r = a_RemoveBeforeList[i]; + let r = a_RemoveBeforeList[i]; content = RemoveBeforeSplitter(content, r); } return content; @@ -80,12 +89,12 @@ // Работа с контейнерами function GetImageInContainers(a_Elements, a_TextAlign) { - var i; - var img_src = ''; - var re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); + let i; + let img_src = ''; + let re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); for (i in a_Elements) { - var e = a_Elements[i]; - var children = e.querySelectorAll("*"); + let e = a_Elements[i]; + let children = e.querySelectorAll("*"); for(let i = 0; i < children.length; i++){ var c = children[i]; if (c.nodeName == 'IMG' && re.test(c.src)) { @@ -100,11 +109,11 @@ } function GetContentInContainers(a_Elements, a_GrubTextFunc, a_FinishWorkFunc) { - var result = ''; + let result = ''; for (var i in a_Elements) { - var e = a_Elements[i]; + let e = a_Elements[i]; - var content = ''; + let content = ''; if (e.querySelectorAll) { var children = e.querySelectorAll("*"); if (children.length == 0 || e.innerText) { @@ -112,7 +121,7 @@ } else { for (let i = 0; i < children.length; i++) { - var c = children[i]; + let c = children[i]; content += a_GrubTextFunc(c); } } @@ -131,9 +140,9 @@ // Фильтрация элементов function FIlterElements(a_Elements, a_ElementChecker) { - var result = []; + let result = []; for (let i = 0; i < a_Elements.length; i++) { - var e = a_Elements[i]; + let e = a_Elements[i]; if (a_ElementChecker(e)) { result.push(e); } @@ -180,14 +189,23 @@ a_Content = a_ClearTextFunc(a_Content); } if (a_Element && a_Element.dataset && a_Element.dataset.type == 'list') { - var childrens = FindElementsByRegExp(GetElementClassName, 'article__list-item', a_Element); + let childrens = FindElementsByRegExp(GetElementClassName, 'article__list-item', a_Element); let content = ''; for (let i = 0; i < childrens.length; i++) { - var c = childrens[i]; + let c = childrens[i]; content += '
  • ' + GrubTextFuncTemplate()(c) + '
  • '; } a_Content = ''; } + if (a_Element && CheckRegExp(GetElementClassName, 'PageContentCommonStyling_text.*', a_Element)) { + let content = ''; + let childrens = FindElementsByRegExp(GetNodeName, 'P', a_Element); + for (let i = 0; i < childrens.length; i++) { + let c = childrens[i]; + content += FinishWorkFunc(GrubTextFuncTemplate()(c), c); + } + return content + } if (a_OutTag && a_TextAlign) { a_Content = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + a_Content + ''; @@ -204,13 +222,13 @@ // Создание контента для стандартных новостей function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_ClearTextPatterns) { - var title_tag = 'h2'; - var p_tag = 'p'; - var title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') - var grub_func = GrubTextFuncTemplate(); + const title_tag = 'h2'; + const p_tag = 'p'; + const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') + const grub_func = GrubTextFuncTemplate(); - var content = ''; - var paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns)); + let content = ''; + const paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns)); content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_func, title_finish_text_func); content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center'); content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_func, paragraph_finish_text_func); @@ -220,13 +238,13 @@ // Создание контента для сайта function MakeContent() { - var content = ''; - var source_add = true; - var zero_tag_func = FinishWorkFuncTemplate() - var grub_text_func = GrubTextFuncTemplate() + let content = ''; + let source_add = true; + const zero_tag_func = FinishWorkFuncTemplate() + const grub_text_func = GrubTextFuncTemplate() if (location.hostname == 'tass.ru') { - let base_element = document.getElementById('content_box'); + const base_element = document.getElementById('content_box'); content = MakeContentByNews( base_element, base_element, @@ -239,12 +257,12 @@ ); } else if (location.hostname == 'ria.ru') { - let base_element = document.getElementsByClassName('article__header')[0]; - var base_element_text = document.getElementsByClassName('article__body')[0]; - var tire = ['–', '—', '‒', '―', '⸺', '⸻']; - var clear_text = []; - for (var i in tire) { - var t = tire[i]; + const base_element = document.getElementsByClassName('article__header')[0]; + const base_element_text = document.getElementsByClassName('article__body')[0]; + const tire = ['–', '—', '‒', '―', '⸺', '⸻']; + let clear_text = []; + for (let i in tire) { + let t = tire[i]; clear_text.push(t + ' РИА Новости. '); } @@ -259,17 +277,32 @@ clear_text ); } + else if (location.hostname == 'rg.ru') { + const base_element = document.getElementsByClassName('article__header')[0]; + const base_element_text = document.getElementsByClassName('article__body')[0]; + + content = MakeContentByNews( + document, + document, + base_element_text, + 'PageArticleContent_title.*', + 'PageArticleContent_image.*', + '(PageContentCommonStyling_text|PageArticleContent_lead).*', + ElementCheckerRia, + [] + ); + } else if (location.hostname == 'zakonvremeni.ru') { - let base_element = document.getElementsByClassName('item-page')[0]; - var title = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'page-header', base_element), grub_text_func); - var parent_category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'parent-category-name', base_element), grub_text_func); - var category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'category-name', base_element), grub_text_func); - var page = RemoveAfterSplitter(TrimString(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); + const base_element = document.getElementsByClassName('item-page')[0]; + const title = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'page-header', base_element), grub_text_func); + const parent_category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'parent-category-name', base_element), grub_text_func); + const category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'category-name', base_element), grub_text_func); + const page = RemoveAfterSplitter(TrimString(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL; source_add = false; } - var result = ''; + let result = ''; if (content.length > 0) { result = '