// ==UserScript== // @name News parser // @namespace http://zakonvremeni.ru // @version 0.2 // @description Parse news // @author AlexeiBv+mirocod@narod.ru // @match https://tass.ru/* // @match https://ria.ru/* // @match https://rg.ru/* // @match https://www.cnews.ru/* // @match https://russian.rt.com/* // @match https://zakonvremeni.ru/* // @icon https://icons.duckduckgo.com/ip2/zakonvremeni.ru.ico // @grant none // ==/UserScript== // Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) (function() { 'use strict'; // Поиск элементов по регулярному выражению function GetElementClassName(a_Element) { return a_Element.className; } function GetNodeName(a_Element) { return a_Element.nodeName; } function CheckRegExp(a_GetElementNameFunc, a_RegExpPattern, a_Element) { let re = new RegExp("(?:^|\\s)" + a_RegExpPattern + "(?!\\S)"); return re.test(a_GetElementNameFunc(a_Element)); } function FindElementsByRegExp(a_GetElementNameFunc, a_RegExpPattern, a_ElementParent) { a_ElementParent || (a_ElementParent=document); let descendants = a_ElementParent.getElementsByTagName('*'), i=-1, e, result=[]; while (e = descendants[++i]) { if (CheckRegExp(a_GetElementNameFunc, a_RegExpPattern, e)){ result.push(e); } } return result; } // Работа со строками function TrimString(str) { return ( str || '' ).replace( /^\s+|\s+$/g, '' ); } function RemoveBeforeSplitter(a_String, a_Splitter) { let index = a_String.indexOf(a_Splitter) if (index != -1) { return a_String.substring(index + a_Splitter.length); } return a_String; } function RemoveAfterSplitter(a_String, a_Splitter, a_SaveSplitter) { let index = a_String.indexOf(a_Splitter) if (index != -1) { let spl_len = a_Splitter.length if (!a_SaveSplitter) { spl_len = 0; } return a_String.substring(0, index + spl_len); } return a_String; } function ClearUrl(a_Url) { const separator = '?'; return RemoveAfterSplitter(a_Url, separator, false); } function ClearTextFuncTemplate(a_RemoveBeforeList, a_OnlyFirstIndex) { function ClearTextFunc(a_Content, a_ElementIndex) { if (a_OnlyFirstIndex && a_ElementIndex && a_ElementIndex != 1) { return a_Content; } let content = a_Content; for (let i = 0; i < a_RemoveBeforeList.length; i++) { let r = a_RemoveBeforeList[i]; content = RemoveBeforeSplitter(content, r); } return content; } return ClearTextFunc } // Работа с контейнерами function GetImageInContainers(a_Elements, a_TextAlign) { let i; let img_src = ''; let re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); for (i in a_Elements) { let e = a_Elements[i]; if (e.nodeName == 'IMG' && re.test(e.src)) { img_src = e.src; } let children = e.querySelectorAll("*"); for(let i = 0; i < children.length; i++){ let c = children[i]; if (c.nodeName == 'IMG' && re.test(c.src)) { img_src = c.src; } } } if (img_src.length > 0) { return '

'; } return ''; } function GetContentInContainers(a_Elements, a_GrubTextFunc, a_FinishWorkFunc) { let result = ''; let element_index = 1; for (var i in a_Elements) { let e = a_Elements[i]; result += a_FinishWorkFunc(e, element_index); element_index += 1; } return result; } // Фильтрация элементов function FIlterElements(a_Elements, a_ElementChecker) { let result = []; for (let i = 0; i < a_Elements.length; i++) { let e = a_Elements[i]; if (a_ElementChecker(e)) { result.push(e); } } return result; } function ElementCheckerTrue(a_Element) { return true; } function ElementCheckerFalse(a_Element) { return false; } function ElementCheckerRia(a_Element) { if (a_Element.dataset.type == 'article' || a_Element.dataset.type == 'banner' || a_Element.dataset.type == 'media' || a_Element.dataset.type == 'video') { return false; } return true; } function ElementCheckerZV(a_Element) { if (a_Element.itemprop == 'articleBody') { return true; } return false; } function SubElementCheckerToRemoveTemplate(a_Classes, a_NodeNames) { function SubElementCheckerToRemove(a_Element) { if (!a_Element || !a_Element.parentNode) { return false; } if (CheckRegExp(GetElementClassName, '.*(' + a_Classes + ').*', a_Element)) { return true; } if ((a_NodeNames && CheckRegExp(GetNodeName, '(' + a_NodeNames + ')', a_Element))) { return true; } return false; } return SubElementCheckerToRemove } // Обработка элементов function GrubTextFuncTemplate() { function GrubTextFunc(a_Element) { var content = ''; if (a_Element.innerText) { content = TrimString(a_Element.textContent); } return content; } return GrubTextFunc } function RemoveAllAttributes(a_Element) { let new_el = document.createElement(a_Element.nodeName); new_el.innerHTML = a_Element.innerHTML; a_Element.outherHTML = new_el.outherHTML; } function RemoveCurrentElementSaveChild(a_Element, a_Parent) { var parent = a_Element.parentNode || a_Parent; while(a_Element.firstChild) parent.insertBefore(a_Element.firstChild, a_Element); parent.removeChild(a_Element); } function GetClearHtml(a_Element, a_OutTag, a_TextAlign, a_SubElementCheckerToRemove, a_ClearTextFunc, a_ElementIndex) { let clear_element = a_Element.cloneNode(true); let elements = clear_element.querySelectorAll('*'); elements.forEach(function (element) { if (a_SubElementCheckerToRemove && a_SubElementCheckerToRemove(element)) { element.parentNode.removeChild(element); } else { RemoveAllAttributes(element); element.removeAttribute('class'); element.removeAttribute('id'); } }); let tags_to_delete = ['div', 'span', 'em', 'svg', 'path']; tags_to_delete.forEach(function (del_tag_name) { let elements = clear_element.querySelectorAll(del_tag_name); elements.forEach(function (element) { RemoveCurrentElementSaveChild(element, clear_element); }); }); elements = clear_element.querySelectorAll('a'); elements.forEach(function (element) { if (element.host == location.hostname) { RemoveCurrentElementSaveChild(element, clear_element); } }); let tags_to_align = ['p', 'h2']; tags_to_align.forEach(function (align_tag_name) { let elements = clear_element.querySelectorAll(align_tag_name); elements.forEach(function (element) { element.style.textAlign = a_TextAlign; if (TrimString(element.innerHTML).replace(' ', '') == '') { element.parentNode.removeChild(element); } }); }); let result = TrimString(clear_element.innerHTML); if (a_ClearTextFunc) { result = a_ClearTextFunc(result, a_ElementIndex); } if (a_OutTag && a_TextAlign && TrimString(result).replace(' ', '') != '') { result = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + result + ''; } return result; } function FinishWorkFuncTemplate(a_OutTag, a_TextAlign, a_ClearTextFunc, a_SubElementCheckerToRemove) { function FinishWorkFunc(a_Element, a_ElementIndex) { let out_tag = a_OutTag; if (a_Element && CheckRegExp(GetElementClassName, 'Title_title.*', a_Element)) { out_tag = 'h2'; } let a_Content = GetClearHtml(a_Element, out_tag, a_TextAlign, a_SubElementCheckerToRemove, a_ClearTextFunc, a_ElementIndex); if (a_Element && a_Element.dataset && a_Element.dataset.type == 'quote') { a_Content = '
' + a_Content + '
'; } return a_Content; } return FinishWorkFunc } function FinishWorkFuncZV(a_Element, a_ElementIndex) { return GrubTextFuncTemplate()(a_Element); } // Создание контента для стандартных новостей const title_tag = 'h2'; const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center'); function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_SubElementCheckerToRemove, a_ClearTextPatterns) { const p_tag = 'p'; const grub_func = GrubTextFuncTemplate(); let content = ''; const paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns, true), a_SubElementCheckerToRemove); content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_func, title_finish_text_func); content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center'); content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_func, paragraph_finish_text_func); return content; } // Создание контента для сайта function MakeContent() { let content = ''; let source_add = true; const zero_tag_func = FinishWorkFuncTemplate() const grub_text_func = GrubTextFuncTemplate() let host_name = null; if (location.hostname == 'tass.ru') { // test: https://tass.ru/proisshestviya/19117971 const base_element = document.getElementById('content_box'); content = MakeContentByNews( base_element, base_element, base_element, '(ArticleHeader_titles|tass_pkg_title--variant_h1_default).*', 'Image_wrapper_.*', '(Paragraph_paragraph|Title_title).*', ElementCheckerTrue, ElementCheckerFalse, ['. '] ); } else if (location.hostname == 'ria.ru') { // test: https://ria.ru/20231020/ssha-1904210900.html const base_element = document.getElementsByClassName('article__header')[0]; const base_element_text = document.getElementsByClassName('article__body')[0]; const tire = ['-', '–', '—', '‒', '―', '⸺', '⸻']; let clear_text = []; for (let i in tire) { let t = tire[i]; clear_text.push(t + ' РИА Новости. '); } content = MakeContentByNews( base_element, base_element, base_element_text, 'article__title', 'photoview__open', 'article__block', ElementCheckerRia, ElementCheckerFalse, [''] ); } else if (location.hostname == 'rg.ru') { // test: https://rg.ru/2023/10/28/volontery-iz-evrosoiuza-privezli-dlia-zhitelej-donbassa-20-tonn-gumanitarnogo-gruza.html const base_element = document.getElementsByClassName('article__header')[0]; const base_element_text = document.getElementsByClassName('article__body')[0]; content = MakeContentByNews( document, document, base_element_text, '.*Content_title.*', '.*Content_image.*', '(PageContentCommonStyling_text|.*Content_lead).*', ElementCheckerTrue, SubElementCheckerToRemoveTemplate( 'portal|rg-incut|article-img|Section', 'RG-VIDEO|RG-INCUT' ), [] ); } else if (location.hostname == 'russian.rt.com') { // test: https://russian.rt.com/business/article/1222163-centrobank-stavka-oktyabr-2023 const base_element = document.getElementsByClassName('article article_article-page')[0]; const base_element_text = document.getElementsByClassName('article__body')[0]; content = MakeContentByNews( base_element, base_element, base_element_text, 'article__heading', 'article__cover article__cover_article-page', 'article__text', ElementCheckerTrue, SubElementCheckerToRemoveTemplate( 'read-more|article__cover' ), [] ); if (content.length == 0) { // test: https://russian.rt.com/inotv/2023-10-27/DELFI-Latviya-budet-konfiskovivat-mashini const base_element_title = document.getElementsByClassName('left-column page')[0].getElementsByTagName("h1")[0]; const base_element_image = document.getElementsByTagName("figure")[0]; const base_element_text = document.getElementsByTagName("article")[0]; content = title_finish_text_func(base_element_title) + MakeContentByNews( base_element_title, base_element_image, base_element_text, '!!!!', '.*', 'article-intro|article-body', ElementCheckerTrue, SubElementCheckerToRemoveTemplate( 'meta', 'IMG' ), [] ); if (content.length > 0) { host_name = 'inotv'; } } } else if (location.hostname == 'www.cnews.ru') { // test: https://www.cnews.ru/news/top/2023-10-27_rossiyane_sozdali_polnotsennyj const base_element = document.getElementsByClassName('news_containere')[0]; content = MakeContentByNews( base_element, base_element, document, '!!!', // Нет названия 'img-block', 'news_container', ElementCheckerTrue, SubElementCheckerToRemoveTemplate( 'article-top-author|article-menu_base|d-flex|img-block|NewsBodyLeftInclude|mobile-zone|other-news-note|cnLike|article-bottom-info|banner|comments_all', 'NOINDEX|BR' ), [] ); } else if (location.hostname == 'zakonvremeni.ru') { const base_element = document.getElementsByClassName('item-page')[0]; const title = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'page-header', base_element), grub_text_func, FinishWorkFuncZV); const parent_category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'parent-category-name', base_element), grub_text_func, FinishWorkFuncZV); const category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'category-name', base_element), grub_text_func, FinishWorkFuncZV); const page = RemoveAfterSplitter(TrimString(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL; source_add = false; } let result = ''; if (content.length > 0) { result = ''; } return result; } let content = MakeContent(); let logo = document.createElement("div"); logo.innerHTML = '
' + content + '
'; document.body.insertBefore(logo, document.body.firstChild); })();