Browse Source

В перво приближении реализована rt.com #6

master
parent
commit
0b8d0ce4ca
  1. 50
      news_parser.js

50
news_parser.js

@ -7,6 +7,7 @@
// @match https://tass.ru/*
// @match https://ria.ru/*
// @match https://rg.ru/*
// @match https://russian.rt.com/*
// @match https://zakonvremeni.ru/*
// @icon https://icons.duckduckgo.com/ip2/zakonvremeni.ru.ico
// @grant none
@ -178,6 +179,15 @@
function GetClearHtml(a_Element, a_OutTag, a_TextAlign) {
let clear_element = a_Element.cloneNode(true);
let elements = clear_element.querySelectorAll('*');
elements.forEach(function (element) {
RemoveAllAttributes(element);
if (element && element.parentNode && element.nodeName == 'DIV' && CheckRegExp(GetElementClassName, '.*(read-more|article__cover).*', element)) {
element.parentNode.removeChild(element);
}
});
let tags_to_delete = ['div', 'span', 'em', 'svg', 'path'];
tags_to_delete.forEach(function (del_tag_name) {
@ -187,19 +197,25 @@
});
});
let elements = clear_element.querySelectorAll('*');
elements.forEach(function (element) {
RemoveAllAttributes(element);
});
elements = clear_element.querySelectorAll('a');
//let re = new RegExp("(https?:\/\/.*)");
elements.forEach(function (element) {
if (element.host == location.hostname) {
RemoveCurrentElementSaveChild(element, clear_element);
}
});
let tags_to_align = ['p', 'h2'];
tags_to_align.forEach(function (align_tag_name) {
let elements = clear_element.querySelectorAll(align_tag_name);
elements.forEach(function (element) {
element.style.textAlign = a_TextAlign;
if (element.innerHTML == ' ' || element.innerHTML == '') {
element.parentNode.removeChild(element);
}
});
});
let result = clear_element.innerHTML;
if (a_OutTag && a_TextAlign) {
result = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + result + '</' + a_OutTag + '>';
@ -218,12 +234,12 @@
if (a_ClearTextFunc) {
a_Content = a_ClearTextFunc(a_Content);
}
if (a_Element && CheckRegExp(GetElementClassName, 'PageContentCommonStyling_text.*', a_Element)) {
if (a_Element && CheckRegExp(GetElementClassName, '(PageContentCommonStyling_text.*)', a_Element)) {
let content = '';
let childrens = FindElementsByRegExp(GetNodeName, 'P', a_Element);
let childrens = FindElementsByRegExp(GetNodeName, '(P)', a_Element);
for (let i = 0; i < childrens.length; i++) {
let c = childrens[i];
content += FinishWorkFunc(GrubTextFuncTemplate()(c), c);
content += FinishWorkFunc(c);
}
return content;
}
@ -312,6 +328,22 @@
[]
);
}
else if (location.hostname == 'russian.rt.com') {
// test: https://russian.rt.com/business/article/1222163-centrobank-stavka-oktyabr-2023
const base_element = document.getElementsByClassName('article article_article-page')[0];
const base_element_text = document.getElementsByClassName('article__body')[0];
content = MakeContentByNews(
base_element,
base_element,
base_element_text,
'article__heading',
'article__cover article__cover_article-page',
'article__text',
ElementCheckerRia,
[]
);
}
else if (location.hostname == 'zakonvremeni.ru') {
const base_element = document.getElementsByClassName('item-page')[0];
const title = GetContentInContainers(FindElementsByRegExp(GetElementClassName, 'page-header', base_element), grub_text_func);

Loading…
Cancel
Save