Browse Source

Тип переменых т работа с rg.ru

master
Alexei 1 year ago
parent
commit
d9055b0894
  1. 131
      news_parser.js

131
news_parser.js

@ -6,6 +6,7 @@
// @author AlexeiBv+mirocod@narod.ru // @author AlexeiBv+mirocod@narod.ru
// @match https://tass.ru/* // @match https://tass.ru/*
// @match https://ria.ru/* // @match https://ria.ru/*
// @match https://rg.ru/*
// @match https://zakonvremeni.ru/* // @match https://zakonvremeni.ru/*
// @icon https://icons.duckduckgo.com/ip2/zakonvremeni.ru.ico // @icon https://icons.duckduckgo.com/ip2/zakonvremeni.ru.ico
// @grant none // @grant none
@ -22,12 +23,20 @@
return a_Element.className; return a_Element.className;
} }
function FindElementsByRegExp(a_GenElementNameFunc, a_RegExpPattern, a_ElementParent) { function GetNodeName(a_Element) {
return a_Element.nodeName;
}
function CheckRegExp(a_GetElementNameFunc, a_RegExpPattern, a_Element) {
let re = new RegExp("(?:^|\\s)" + a_RegExpPattern + "(?!\\S)");
return re.test(a_GetElementNameFunc(a_Element));
}
function FindElementsByRegExp(a_GetElementNameFunc, a_RegExpPattern, a_ElementParent) {
a_ElementParent || (a_ElementParent=document); a_ElementParent || (a_ElementParent=document);
var descendants = a_ElementParent.getElementsByTagName('*'), i=-1, e, result=[]; let descendants = a_ElementParent.getElementsByTagName('*'), i=-1, e, result=[];
var re = new RegExp("(?:^|\\s)" + a_RegExpPattern + "(?!\\S)"); while (e = descendants[++i]) {
while (e=descendants[++i]) { if (CheckRegExp(a_GetElementNameFunc, a_RegExpPattern, e)){
if (re.test(a_GenElementNameFunc(e))){
result.push(e); result.push(e);
} }
} }
@ -41,7 +50,7 @@
} }
function RemoveBeforeSplitter(a_String, a_Splitter) { function RemoveBeforeSplitter(a_String, a_Splitter) {
var index = a_String.indexOf(a_Splitter) let index = a_String.indexOf(a_Splitter)
if (index != -1) { if (index != -1) {
return a_String.substring(index + a_Splitter.length); return a_String.substring(index + a_Splitter.length);
} }
@ -49,9 +58,9 @@
} }
function RemoveAfterSplitter(a_String, a_Splitter, a_SaveSplitter) { function RemoveAfterSplitter(a_String, a_Splitter, a_SaveSplitter) {
var index = a_String.indexOf(a_Splitter) let index = a_String.indexOf(a_Splitter)
if (index != -1) { if (index != -1) {
var spl_len = a_Splitter.length let spl_len = a_Splitter.length
if (!a_SaveSplitter) { if (!a_SaveSplitter) {
spl_len = 0; spl_len = 0;
} }
@ -61,15 +70,15 @@
} }
function ClearUrl(a_Url) { function ClearUrl(a_Url) {
var separator = '?'; const separator = '?';
return RemoveAfterSplitter(a_Url, separator, false); return RemoveAfterSplitter(a_Url, separator, false);
} }
function ClearTextFuncTemplate(a_RemoveBeforeList) { function ClearTextFuncTemplate(a_RemoveBeforeList) {
function ClearTextFunc(a_Content) { function ClearTextFunc(a_Content) {
var content = a_Content; let content = a_Content;
for (let i = 0; i < a_RemoveBeforeList.length; i++) { for (let i = 0; i < a_RemoveBeforeList.length; i++) {
var r = a_RemoveBeforeList[i]; let r = a_RemoveBeforeList[i];
content = RemoveBeforeSplitter(content, r); content = RemoveBeforeSplitter(content, r);
} }
return content; return content;
@ -80,12 +89,12 @@
// Работа с контейнерами // Работа с контейнерами
function GetImageInContainers(a_Elements, a_TextAlign) { function GetImageInContainers(a_Elements, a_TextAlign) {
var i; let i;
var img_src = ''; let img_src = '';
var re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); let re = new RegExp("(https?:\/\/.*\.(?:png|jpg))");
for (i in a_Elements) { for (i in a_Elements) {
var e = a_Elements[i]; let e = a_Elements[i];
var children = e.querySelectorAll("*"); let children = e.querySelectorAll("*");
for(let i = 0; i < children.length; i++){ for(let i = 0; i < children.length; i++){
var c = children[i]; var c = children[i];
if (c.nodeName == 'IMG' && re.test(c.src)) { if (c.nodeName == 'IMG' && re.test(c.src)) {
@ -100,11 +109,11 @@
} }
function GetContentInContainers(a_Elements, a_GrubTextFunc, a_FinishWorkFunc) { function GetContentInContainers(a_Elements, a_GrubTextFunc, a_FinishWorkFunc) {
var result = ''; let result = '';
for (var i in a_Elements) { for (var i in a_Elements) {
var e = a_Elements[i]; let e = a_Elements[i];
var content = ''; let content = '';
if (e.querySelectorAll) { if (e.querySelectorAll) {
var children = e.querySelectorAll("*"); var children = e.querySelectorAll("*");
if (children.length == 0 || e.innerText) { if (children.length == 0 || e.innerText) {
@ -112,7 +121,7 @@
} }
else { else {
for (let i = 0; i < children.length; i++) { for (let i = 0; i < children.length; i++) {
var c = children[i]; let c = children[i];
content += a_GrubTextFunc(c); content += a_GrubTextFunc(c);
} }
} }
@ -131,9 +140,9 @@
// Фильтрация элементов // Фильтрация элементов
function FIlterElements(a_Elements, a_ElementChecker) { function FIlterElements(a_Elements, a_ElementChecker) {
var result = []; let result = [];
for (let i = 0; i < a_Elements.length; i++) { for (let i = 0; i < a_Elements.length; i++) {
var e = a_Elements[i]; let e = a_Elements[i];
if (a_ElementChecker(e)) { if (a_ElementChecker(e)) {
result.push(e); result.push(e);
} }
@ -180,14 +189,23 @@
a_Content = a_ClearTextFunc(a_Content); a_Content = a_ClearTextFunc(a_Content);
} }
if (a_Element && a_Element.dataset && a_Element.dataset.type == 'list') { if (a_Element && a_Element.dataset && a_Element.dataset.type == 'list') {
var childrens = FindElementsByRegExp(GetElementClassName, 'article__list-item', a_Element); let childrens = FindElementsByRegExp(GetElementClassName, 'article__list-item', a_Element);
let content = ''; let content = '';
for (let i = 0; i < childrens.length; i++) { for (let i = 0; i < childrens.length; i++) {
var c = childrens[i]; let c = childrens[i];
content += '<li>' + GrubTextFuncTemplate()(c) + '</li>'; content += '<li>' + GrubTextFuncTemplate()(c) + '</li>';
} }
a_Content = '<ul>' + content + '</ul>'; a_Content = '<ul>' + content + '</ul>';
} }
if (a_Element && CheckRegExp(GetElementClassName, 'PageContentCommonStyling_text.*', a_Element)) {
let content = '';
let childrens = FindElementsByRegExp(GetNodeName, 'P', a_Element);
for (let i = 0; i < childrens.length; i++) {
let c = childrens[i];
content += FinishWorkFunc(GrubTextFuncTemplate()(c), c);
}
return content
}
if (a_OutTag && a_TextAlign) { if (a_OutTag && a_TextAlign) {
a_Content = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + a_Content + '</' + a_OutTag + '>'; a_Content = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + a_Content + '</' + a_OutTag + '>';
@ -204,13 +222,13 @@
// Создание контента для стандартных новостей // Создание контента для стандартных новостей
function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_ClearTextPatterns) { function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_ClearTextPatterns) {
var title_tag = 'h2'; const title_tag = 'h2';
var p_tag = 'p'; const p_tag = 'p';
var title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center')
var grub_func = GrubTextFuncTemplate(); const grub_func = GrubTextFuncTemplate();
var content = ''; let content = '';
var paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns)); const paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns));
content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_func, title_finish_text_func); content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_func, title_finish_text_func);
content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center'); content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center');
content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_func, paragraph_finish_text_func); content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_func, paragraph_finish_text_func);
@ -220,13 +238,13 @@
// Создание контента для сайта // Создание контента для сайта
function MakeContent() { function MakeContent() {
var content = ''; let content = '';
var source_add = true; let source_add = true;
var zero_tag_func = FinishWorkFuncTemplate() const zero_tag_func = FinishWorkFuncTemplate()
var grub_text_func = GrubTextFuncTemplate() const grub_text_func = GrubTextFuncTemplate()
if (location.hostname == 'tass.ru') { if (location.hostname == 'tass.ru') {
let base_element = document.getElementById('content_box'); const base_element = document.getElementById('content_box');
content = MakeContentByNews( content = MakeContentByNews(
base_element, base_element,
base_element, base_element,
@ -239,12 +257,12 @@
); );
} }
else if (location.hostname == 'ria.ru') { else if (location.hostname == 'ria.ru') {
let base_element = document.getElementsByClassName('article__header')[0]; const base_element = document.getElementsByClassName('article__header')[0];
var base_element_text = document.getElementsByClassName('article__body')[0]; const base_element_text = document.getElementsByClassName('article__body')[0];
var tire = ['–', '—', '‒', '―', '⸺', '⸻']; const tire = ['–', '—', '‒', '―', '⸺', '⸻'];
var clear_text = []; let clear_text = [];
for (var i in tire) { for (let i in tire) {
var t = tire[i]; let t = tire[i];
clear_text.push(t + ' РИА Новости. '); clear_text.push(t + ' РИА Новости. ');
} }
@ -259,17 +277,32 @@
clear_text clear_text
); );
} }
else if (location.hostname == 'rg.ru') {
const base_element = document.getElementsByClassName('article__header')[0];
const base_element_text = document.getElementsByClassName('article__body')[0];
content = MakeContentByNews(
document,
document,
base_element_text,
'PageArticleContent_title.*',
'PageArticleContent_image.*',
'(PageContentCommonStyling_text|PageArticleContent_lead).*',
ElementCheckerRia,
[]
);
}
else if (location.hostname == 'zakonvremeni.ru') { else if (location.hostname == 'zakonvremeni.ru') {
let base_element = document.getElementsByClassName('item-page')[0]; const base_element = document.getElementsByClassName('item-page')[0];
var title = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'page-header', base_element), grub_text_func); const title = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'page-header', base_element), grub_text_func);
var parent_category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'parent-category-name', base_element), grub_text_func); const parent_category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'parent-category-name', base_element), grub_text_func);
var category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'category-name', base_element), grub_text_func); const category = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'category-name', base_element), grub_text_func);
var page = RemoveAfterSplitter(TrimString(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true); const page = RemoveAfterSplitter(TrimString(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true);
content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL; content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL;
source_add = false; source_add = false;
} }
var result = ''; let result = '';
if (content.length > 0) { if (content.length > 0) {
result = '<textarea id = "news_content" rows="10" cols="100">' + content; result = '<textarea id = "news_content" rows="10" cols="100">' + content;
if (source_add) { if (source_add) {
@ -280,8 +313,8 @@
return result; return result;
} }
var content = MakeContent(); let content = MakeContent();
var logo = document.createElement("div"); let logo = document.createElement("div");
logo.innerHTML = '<div style="margin: 0pt auto; width: 800px; text-align: center;">' + content + '</div>'; logo.innerHTML = '<div style="margin: 0pt auto; width: 800px; text-align: center;">' + content + '</div>';
document.body.insertBefore(logo, document.body.firstChild); document.body.insertBefore(logo, document.body.firstChild);

Loading…
Cancel
Save