// ==UserScript==
// @name News parser
// @namespace http://zakonvremeni.ru
// @version 0.1
// @description Parse news
// @author AlexeiBv+mirocod@narod.ru
// @match https://tass.ru/*
// @match https://ria.ru/*
// @match https://zakonvremeni.ru/*
// @icon https://icons.duckduckgo.com/ip2/zakonvremeni.ru.ico
// @grant none
// ==/UserScript==
// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) <AlexeiBv+mirocod_platform_bot@narod.ru>
( function ( ) {
'use strict' ;
function getByClass ( className , parent ) {
parent || ( parent = document ) ;
var descendants = parent . getElementsByTagName ( '*' ) , i = - 1 , e , result = [ ] ;
var re = new RegExp ( "(?:^|\\s)" + className + "(?!\\S)" ) ;
while ( e = descendants [ ++ i ] ) {
if ( re . test ( e . className ) ) {
result . push ( e ) ;
}
}
return result ;
}
function GetImageInContainers ( baseClass , parent , textAlign ) {
var elems = getByClass ( baseClass , parent ) ;
if ( ! elems ) {
return '' ;
}
var i ;
var img _src = '' ;
var re = new RegExp ( "(https?:\/\/.*\.(?:png|jpg))" ) ;
for ( i in elems ) {
var e = elems [ i ] ;
var children = e . querySelectorAll ( "*" ) ;
for ( let i = 0 ; i < children . length ; i ++ ) {
var c = children [ i ] ;
if ( c . nodeName == 'IMG' && re . test ( c . src ) ) {
img _src = c . src ;
}
}
}
if ( img _src . length > 0 ) {
return '<p style = "text-align:' + textAlign + ';"><img src = "' + img _src + '" width = "600px"/></p>' ;
}
return '' ;
}
function Trim ( s ) {
return ( s || '' ) . replace ( /^\s+|\s+$/g , '' ) ;
}
function RemoveBeforeSplitter ( a _String , a _Splitter ) {
var index = a _String . indexOf ( a _Splitter )
if ( index != - 1 ) {
return a _String . substring ( index + a _Splitter . length ) ;
}
return a _String ;
}
function RemoveAfterSplitter ( a _String , a _Splitter , a _SaveSplitter ) {
var index = a _String . indexOf ( a _Splitter )
if ( index != - 1 ) {
var spl _len = a _Splitter . length
if ( ! a _SaveSplitter ) {
spl _len = 0 ;
}
return a _String . substring ( 0 , index + spl _len ) ;
}
return a _String ;
}
function GetContentInContainers ( a _GrubTextFunc , a _OutTag , baseClass , parent , textAlign , a _ElementFilterFunc , a _ClearTextFunc ) {
var elems = getByClass ( baseClass , parent ) ;
if ( ! elems ) {
return 'Не удалось найти ' + baseClass ;
}
var result = '' ;
for ( var i in elems ) {
var e = elems [ i ] ;
if ( a _ElementFilterFunc && ! a _ElementFilterFunc ( e ) ) {
continue ;
}
var content = '' ;
if ( e . querySelectorAll ) {
var children = e . querySelectorAll ( "*" ) ;
if ( children . length == 0 || e . innerText ) {
content += a _GrubTextFunc ( e ) ;
}
else {
for ( let i = 0 ; i < children . length ; i ++ ) {
var c = children [ i ] ;
content += a _GrubTextFunc ( c ) ;
}
}
}
if ( a _ClearTextFunc ) {
content = a _ClearTextFunc ( content ) ;
}
if ( a _OutTag == '' ) {
result += content ;
}
else {
result += '<' + a _OutTag + ' style = "text-align:' + textAlign + ';">' + content + '</' + a _OutTag + '>' ;
}
}
return result ;
}
function ClearUrl ( a _Url ) {
var separator = '?' ;
return RemoveAfterSplitter ( a _Url , separator , false ) ;
}
function FIlterTrue ( element ) {
return true ;
}
function FIlterRia ( element ) {
if ( element . dataset . type == 'text' || element . dataset . type == 'quote' || element . dataset . type == 'list' ) {
return true ;
}
return false ;
}
function FIlterZV ( element ) {
if ( element . itemprop == 'articleBody' ) {
return true ;
}
return false ;
}
function ClearTextFuncTemplate ( a _RemoveBeforeList ) {
function ClearTextFunc ( a _Content ) {
var content = a _Content ;
for ( let i = 0 ; i < a _RemoveBeforeList . length ; i ++ ) {
var r = a _RemoveBeforeList [ i ] ;
content = RemoveBeforeSplitter ( content , r ) ;
}
return content ;
}
return ClearTextFunc
}
function GrubTextFuncTemplate ( ) {
function GrubTextFunc ( a _Element ) {
var content = '' ;
if ( a _Element . innerText ) {
content = Trim ( a _Element . textContent ) ;
}
return content ;
}
return GrubTextFunc
}
function MakeContent ( ) {
var content = '' ;
var title _tag = 'h2' ;
var p _tag = 'p' ;
var zero _tag = '' ;
var source _add = true ;
if ( location . hostname == 'tass.ru' ) {
content += GetContentInContainers ( GrubTextFuncTemplate ( ) , title _tag , 'tass_pkg_title--variant_h1_default.*' , document . getElementById ( 'content_box' ) , 'center' ) ;
content += GetImageInContainers ( 'Image_wrapper_.*' , document . getElementById ( 'content_box' ) , 'center' ) ;
content += GetContentInContainers ( GrubTextFuncTemplate ( ) , p _tag , 'Paragraph_paragraph.*' , document . getElementById ( 'content_box' ) , 'justify' , FIlterTrue , ClearTextFuncTemplate ( [ '/ТАСС/. ' ] ) ) ;
}
else if ( location . hostname == 'ria.ru' ) {
content += GetContentInContainers ( GrubTextFuncTemplate ( ) , title _tag , 'article__title' , document . getElementsByClassName ( 'article__header' ) [ 0 ] , 'center' ) ;
content += GetImageInContainers ( 'photoview__open' , document . getElementsByClassName ( 'article__header' ) [ 0 ] , 'center' ) ;
content += GetContentInContainers ( GrubTextFuncTemplate ( ) , p _tag , 'article__block' , document . getElementsByClassName ( 'article__body' ) [ 0 ] , 'justify' , FIlterRia , ClearTextFuncTemplate ( [ '– РИА Новости. ' , '— РИА Новости. ' ] ) ) ;
}
else if ( location . hostname == 'zakonvremeni.ru' ) {
var title = GetContentInContainers ( GrubTextFuncTemplate ( ) , zero _tag , 'page-header' , document . getElementsByClassName ( 'item-page' ) [ 0 ] ) ;
var parent _category = GetContentInContainers ( GrubTextFuncTemplate ( ) , zero _tag , 'parent-category-name' , document . getElementsByClassName ( 'item-page' ) [ 0 ] ) ;
var category = GetContentInContainers ( GrubTextFuncTemplate ( ) , zero _tag , 'category-name' , document . getElementsByClassName ( 'item-page' ) [ 0 ] ) ;
var page = RemoveAfterSplitter ( Trim ( document . getElementsByClassName ( 'item-page' ) [ 0 ] . querySelector ( '[itemprop=articleBody]' ) . textContent ) , '.' , true ) ;
content = title + '\n' + parent _category + ' ' + category + '\n\n' + page + '\n' + document . URL ;
source _add = false ;
}
var result = '' ;
if ( content . length > 0 ) {
result = '<textarea id = "news_content" rows="10" cols="100">' + content ;
if ( source _add ) {
result += '<p style="text-align: justify;">Источник: <a href = "' + ClearUrl ( document . URL ) + '">' + location . hostname + '</a></p>' ;
}
result += '</textarea>' ;
}
return result ;
}
var content = MakeContent ( ) ;
var logo = document . createElement ( "div" ) ;
logo . innerHTML = '<div style="margin: 0pt auto; width: 800px; text-align: center;">' + content + '</div>' ;
document . body . insertBefore ( logo , document . body . firstChild ) ;
} ) ( ) ;