// ==UserScript== // @name News parser // @namespace http://zakonvremeni.ru // @version 0.2 // @description Parse news // @author AlexeiBv+mirocod@narod.ru // @match https://tass.ru/* // @match https://ria.ru/* // @match https://rg.ru/* // @match https://zakonvremeni.ru/* // @icon https://icons.duckduckgo.com/ip2/zakonvremeni.ru.ico // @grant none // ==/UserScript== // Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) (function() { 'use strict'; // Поиск элементов по регулярному выражению function GetElementClassName(a_Element) { return a_Element.className; } function GetNodeName(a_Element) { return a_Element.nodeName; } function CheckRegExp(a_GetElementNameFunc, a_RegExpPattern, a_Element) { let re = new RegExp("(?:^|\\s)" + a_RegExpPattern + "(?!\\S)"); return re.test(a_GetElementNameFunc(a_Element)); } function FindElementsByRegExp(a_GetElementNameFunc, a_RegExpPattern, a_ElementParent) { a_ElementParent || (a_ElementParent=document); let descendants = a_ElementParent.getElementsByTagName('*'), i=-1, e, result=[]; while (e = descendants[++i]) { if (CheckRegExp(a_GetElementNameFunc, a_RegExpPattern, e)){ result.push(e); } } return result; } // Работа со строками function TrimString(s) { return ( s || '' ).replace( /^\s+|\s+$/g, '' ); } function RemoveBeforeSplitter(a_String, a_Splitter) { let index = a_String.indexOf(a_Splitter) if (index != -1) { return a_String.substring(index + a_Splitter.length); } return a_String; } function RemoveAfterSplitter(a_String, a_Splitter, a_SaveSplitter) { let index = a_String.indexOf(a_Splitter) if (index != -1) { let spl_len = a_Splitter.length if (!a_SaveSplitter) { spl_len = 0; } return a_String.substring(0, index + spl_len); } return a_String; } function ClearUrl(a_Url) { const separator = '?'; return RemoveAfterSplitter(a_Url, separator, false); } function ClearTextFuncTemplate(a_RemoveBeforeList) { function ClearTextFunc(a_Content) { let content = a_Content; for (let i = 0; i < a_RemoveBeforeList.length; i++) { let r = a_RemoveBeforeList[i]; content = RemoveBeforeSplitter(content, r); } return content; } return ClearTextFunc } // Работа с контейнерами function GetImageInContainers(a_Elements, a_TextAlign) { let i; let img_src = ''; let re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); for (i in a_Elements) { let e = a_Elements[i]; let children = e.querySelectorAll("*"); for(let i = 0; i < children.length; i++){ var c = children[i]; if (c.nodeName == 'IMG' && re.test(c.src)) { img_src = c.src; } } } if (img_src.length > 0) { return '

'; } return ''; } function GetContentInContainers(a_Elements, a_GrubTextFunc, a_FinishWorkFunc) { let result = ''; for (var i in a_Elements) { let e = a_Elements[i]; let content = ''; if (e.querySelectorAll) { var children = e.querySelectorAll("*"); if (children.length == 0 || e.innerText) { content += a_GrubTextFunc(e); } else { for (let i = 0; i < children.length; i++) { let c = children[i]; content += a_GrubTextFunc(c); } } } if (a_FinishWorkFunc) { result += a_FinishWorkFunc(content, e); } else { result += content; } } return result; } // Фильтрация элементов function FIlterElements(a_Elements, a_ElementChecker) { let result = []; for (let i = 0; i < a_Elements.length; i++) { let e = a_Elements[i]; if (a_ElementChecker(e)) { result.push(e); } } return result; } function ElementCheckerTrue(a_Element) { return true; } function ElementCheckerRia(a_Element) { if (a_Element.dataset.type == 'article' || a_Element.dataset.type == 'banner') { return false; } return true; } function ElementCheckerZV(a_Element) { if (a_Element.itemprop == 'articleBody') { return true; } return false; } // Обработка элементов function GrubTextFuncTemplate() { function GrubTextFunc(a_Element) { var content = ''; if (a_Element.innerText) { content = TrimString(a_Element.textContent); } return content; } return GrubTextFunc } function FinishWorkFuncTemplate(a_OutTag, a_TextAlign, a_ClearTextFunc) { function FinishWorkFunc(a_Content, a_Element) { if (a_ClearTextFunc) { a_Content = a_ClearTextFunc(a_Content); } if (a_Element && a_Element.dataset && a_Element.dataset.type == 'list') { let childrens = FindElementsByRegExp(GetElementClassName, 'article__list-item', a_Element); let content = ''; for (let i = 0; i < childrens.length; i++) { let c = childrens[i]; content += '
  • ' + GrubTextFuncTemplate()(c) + '
  • '; } a_Content = ''; } if (a_Element && CheckRegExp(GetElementClassName, 'PageContentCommonStyling_text.*', a_Element)) { let content = ''; let childrens = FindElementsByRegExp(GetNodeName, 'P', a_Element); for (let i = 0; i < childrens.length; i++) { let c = childrens[i]; content += FinishWorkFunc(GrubTextFuncTemplate()(c), c); } return content } if (a_OutTag && a_TextAlign) { a_Content = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + a_Content + ''; } if (a_Element && a_Element.dataset && a_Element.dataset.type == 'quote') { a_Content = '
    ' + a_Content + '
    '; } return a_Content; } return FinishWorkFunc } // Создание контента для стандартных новостей function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_ClearTextPatterns) { const title_tag = 'h2'; const p_tag = 'p'; const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') const grub_func = GrubTextFuncTemplate(); let content = ''; const paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns)); content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_func, title_finish_text_func); content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center'); content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_func, paragraph_finish_text_func); return content; } // Создание контента для сайта function MakeContent() { let content = ''; let source_add = true; const zero_tag_func = FinishWorkFuncTemplate() const grub_text_func = GrubTextFuncTemplate() if (location.hostname == 'tass.ru') { const base_element = document.getElementById('content_box'); content = MakeContentByNews( base_element, base_element, base_element, 'tass_pkg_title--variant_h1_default.*', 'Image_wrapper_.*', 'Paragraph_paragraph.*', ElementCheckerTrue, ['/ТАСС/. '] ); } else if (location.hostname == 'ria.ru') { const base_element = document.getElementsByClassName('article__header')[0]; const base_element_text = document.getElementsByClassName('article__body')[0]; const tire = ['-', '–', '—', '‒', '―', '⸺', '⸻']; let clear_text = []; for (let i in tire) { let t = tire[i]; clear_text.push(t + ' РИА Новости. '); } content = MakeContentByNews( base_element, base_element, base_element_text, 'article__title', 'photoview__open', 'article__block', ElementCheckerRia, clear_text ); } else if (location.hostname == 'rg.ru') { const base_element = document.getElementsByClassName('article__header')[0]; const base_element_text = document.getElementsByClassName('article__body')[0]; content = MakeContentByNews( document, document, base_element_text, 'PageArticleContent_title.*', 'PageArticleContent_image.*', '(PageContentCommonStyling_text|PageArticleContent_lead).*', ElementCheckerRia, [] ); } else if (location.hostname == 'zakonvremeni.ru') { const base_element = document.getElementsByClassName('item-page')[0]; const title = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'page-header', base_element), grub_text_func); const parent_category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'parent-category-name', base_element), grub_text_func); const category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'category-name', base_element), grub_text_func); const page = RemoveAfterSplitter(TrimString(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL; source_add = false; } let result = ''; if (content.length > 0) { result = ''; } return result; } let content = MakeContent(); let logo = document.createElement("div"); logo.innerHTML = '
    ' + content + '
    '; document.body.insertBefore(logo, document.body.firstChild); })();