diff --git a/news_parser.js b/news_parser.js index ea46d51..19ae73f 100644 --- a/news_parser.js +++ b/news_parser.js @@ -1,7 +1,7 @@ // ==UserScript== // @name News parser // @namespace http://zakonvremeni.ru -// @version 0.1 +// @version 0.2 // @description Parse news // @author AlexeiBv+mirocod@narod.ru // @match https://tass.ru/* @@ -11,49 +11,32 @@ // @grant none // ==/UserScript== -// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) +// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) (function() { 'use strict'; - function getByClass (className, parent) { - parent || (parent=document); - var descendants=parent.getElementsByTagName('*'), i=-1, e, result=[]; - var re = new RegExp("(?:^|\\s)" + className + "(?!\\S)"); + // Поиск элементов по регулярному выражению + + function GetElementClassName(a_Element) { + return a_Element.className; + } + + function FindElementsByRegExp(a_GenElementNameFunc, a_RegExpPattern, a_ElementParent) { + a_ElementParent || (a_ElementParent=document); + var descendants = a_ElementParent.getElementsByTagName('*'), i=-1, e, result=[]; + var re = new RegExp("(?:^|\\s)" + a_RegExpPattern + "(?!\\S)"); while (e=descendants[++i]) { - if (re.test(e.className)){ + if (re.test(a_GenElementNameFunc(e))){ result.push(e); } } return result; } - function GetImageInContainers(baseClass, parent, textAlign) { - var elems = getByClass(baseClass, parent); - if (!elems) { - return ''; - } + // Работа со строками - var i; - var img_src = ''; - var re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); - for (i in elems) { - var e = elems[i]; - var children = e.querySelectorAll("*"); - for(let i = 0; i < children.length; i++){ - var c = children[i]; - if (c.nodeName == 'IMG' && re.test(c.src)) { - img_src = c.src; - } - } - } - if (img_src.length > 0) { - return '

'; - } - return ''; - } - - function Trim(s) { + function TrimString(s) { return ( s || '' ).replace( /^\s+|\s+$/g, '' ); } @@ -77,8 +60,11 @@ return a_String; } - function FinishWorkFuncTemplate(a_OutTag, a_TextAlign) { + function FinishWorkFuncTemplate(a_OutTag, a_TextAlign, a_ClearTextFunc) { function FinishWorkFunc(a_Content, a_Element) { + if (a_ClearTextFunc) { + a_Content = a_ClearTextFunc(a_Content); + } if (a_OutTag && a_TextAlign) { return '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + a_Content + ''; } @@ -89,18 +75,49 @@ return FinishWorkFunc } - function GetContentInContainers(a_FinishWorkFunc, a_GrubTextFunc, baseClass, parent, a_ElementFilterFunc, a_ClearTextFunc) { - var elems = getByClass(baseClass, parent); - if (!elems) { - return 'Не удалось найти ' + baseClass; + function ClearUrl(a_Url) { + var separator = '?'; + return RemoveAfterSplitter(a_Url, separator, false); + } + + function ClearTextFuncTemplate(a_RemoveBeforeList) { + function ClearTextFunc(a_Content) { + var content = a_Content; + for (let i = 0; i < a_RemoveBeforeList.length; i++) { + var r = a_RemoveBeforeList[i]; + content = RemoveBeforeSplitter(content, r); + } + return content; } + return ClearTextFunc + } - var result = ''; - for (var i in elems) { - var e = elems[i]; - if (a_ElementFilterFunc && !a_ElementFilterFunc(e)) { - continue; + // Работа с контейнерами + + function GetImageInContainers(a_Elements, a_TextAlign) { + var i; + var img_src = ''; + var re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); + for (i in a_Elements) { + var e = a_Elements[i]; + var children = e.querySelectorAll("*"); + for(let i = 0; i < children.length; i++){ + var c = children[i]; + if (c.nodeName == 'IMG' && re.test(c.src)) { + img_src = c.src; + } } + } + if (img_src.length > 0) { + return '

'; + } + return ''; + } + + function GetContentInContainers(a_Elements, a_GrubTextFunc, a_FinishWorkFunc) { + var result = ''; + for (var i in a_Elements) { + var e = a_Elements[i]; var content = ''; if (e.querySelectorAll) { @@ -115,9 +132,6 @@ } } } - if (a_ClearTextFunc) { - content = a_ClearTextFunc(content); - } if (a_FinishWorkFunc) { result += a_FinishWorkFunc(content, e); @@ -129,76 +143,107 @@ return result; } - function ClearUrl(a_Url) { - var separator = '?'; - return RemoveAfterSplitter(a_Url, separator, false); + // Фильтрация элементов + + function FIlterElements(a_Elements, a_ElementChecker) { + var result = []; + for (let i = 0; i < a_Elements.length; i++) { + var e = a_Elements[i]; + if (a_ElementChecker(e)) { + result.push(e); + } + } + + return result; } - function FIlterTrue(element) { + function ElementCheckerTrue(a_Element) { return true; } - function FIlterRia(element) { - if (element.dataset.type == 'text' || element.dataset.type == 'quote' || element.dataset.type == 'list') { + function ElementCheckerRia(a_Element) { + if (a_Element.dataset.type == 'text' || a_Element.dataset.type == 'quote' || a_Element.dataset.type == 'list') { return true; } + return false; } - function FIlterZV(element) { - if (element.itemprop == 'articleBody') { + function ElementCheckerZV(a_Element) { + if (a_Element.itemprop == 'articleBody') { return true; } return false; } - function ClearTextFuncTemplate(a_RemoveBeforeList) { - function ClearTextFunc(a_Content) { - var content = a_Content; - for (let i = 0; i < a_RemoveBeforeList.length; i++) { - var r = a_RemoveBeforeList[i]; - content = RemoveBeforeSplitter(content, r); - } - return content; - } - return ClearTextFunc - } - function GrubTextFuncTemplate() { function GrubTextFunc(a_Element) { var content = ''; if (a_Element.innerText) { - content = Trim(a_Element.textContent); + content = TrimString(a_Element.textContent); } return content; } return GrubTextFunc } - function MakeContent() { - var content = ''; + // Создание контента для стандартных новостей + + function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_ClearTextPatterns) { var title_tag = 'h2'; var p_tag = 'p'; - var zero_tag = ''; + var title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') + var grub_text_func = GrubTextFuncTemplate() + + var content = ''; + var paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns)); + content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_text_func, title_finish_text_func); + content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center'); + content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_text_func, paragraph_finish_text_func); + return content; + } + + // Создание контента для сайта + + function MakeContent() { + var content = ''; var source_add = true; - var title_func = FinishWorkFuncTemplate(title_tag, 'center') - var paragraph_func = FinishWorkFuncTemplate(p_tag, 'justify') var zero_tag_func = FinishWorkFuncTemplate() + var grub_text_func = GrubTextFuncTemplate() + if (location.hostname == 'tass.ru') { - content += GetContentInContainers(title_func, GrubTextFuncTemplate(), 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box')); - content += GetImageInContainers('Image_wrapper_.*', document.getElementById('content_box'), 'center'); - content += GetContentInContainers(paragraph_func, GrubTextFuncTemplate(), 'Paragraph_paragraph.*', document.getElementById('content_box'), FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. '])); + let base_element = document.getElementById('content_box'); + content = MakeContentByNews( + base_element, + base_element, + base_element, + 'tass_pkg_title--variant_h1_default.*', + 'Image_wrapper_.*', + 'Paragraph_paragraph.*', + ElementCheckerTrue, + ['/ТАСС/. '] + ); } else if (location.hostname == 'ria.ru') { - content += GetContentInContainers(title_func, GrubTextFuncTemplate(), title_tag, document.getElementsByClassName('article__header')[0]); - content += GetImageInContainers('photoview__open', document.getElementsByClassName('article__header')[0], 'center'); - content += GetContentInContainers(paragraph_func, GrubTextFuncTemplate(), p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. '])); + let base_element = document.getElementsByClassName('article__header')[0]; + var base_element_text = document.getElementsByClassName('article__body')[0]; + content = MakeContentByNews( + base_element, + base_element, + base_element_text, + 'article__title', + 'photoview__open', + 'article__block', + ElementCheckerRia, + ['– РИА Новости. ', '— РИА Новости. '] + ); } else if (location.hostname == 'zakonvremeni.ru') { - var title = GetContentInContainers(zero_tag_func, GrubTextFuncTemplate(), 'page-header', document.getElementsByClassName('item-page')[0]); - var parent_category = GetContentInContainers(zero_tag_func, GrubTextFuncTemplate(), 'parent-category-name', document.getElementsByClassName('item-page')[0]); - var category = GetContentInContainers(zero_tag_func, GrubTextFuncTemplate(), 'category-name', document.getElementsByClassName('item-page')[0]); - var page = RemoveAfterSplitter(Trim(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); + let base_element = document.getElementsByClassName('item-page')[0]; + var title = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'page-header', base_element), grub_text_func); + var parent_category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'parent-category-name', base_element), grub_text_func); + var category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'category-name', base_element), grub_text_func); + var page = RemoveAfterSplitter(TrimString(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL; source_add = false; }