Browse Source

Сайт ИННОТВ и фильтрация пустых тегов. #8

master
parent
commit
2cb8c45d24
  1. 44
      news_parser.js

44
news_parser.js

@ -47,8 +47,8 @@
// Работа со строками // Работа со строками
function TrimString(s) { function TrimString(str) {
return ( s || '' ).replace( /^\s+|\s+$/g, '' ); return ( str || '' ).replace( /^\s+|\s+$/g, '' );
} }
function RemoveBeforeSplitter(a_String, a_Splitter) { function RemoveBeforeSplitter(a_String, a_Splitter) {
@ -99,9 +99,12 @@
let re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); let re = new RegExp("(https?:\/\/.*\.(?:png|jpg))");
for (i in a_Elements) { for (i in a_Elements) {
let e = a_Elements[i]; let e = a_Elements[i];
if (e.nodeName == 'IMG' && re.test(e.src)) {
img_src = e.src;
}
let children = e.querySelectorAll("*"); let children = e.querySelectorAll("*");
for(let i = 0; i < children.length; i++){ for(let i = 0; i < children.length; i++){
var c = children[i]; let c = children[i];
if (c.nodeName == 'IMG' && re.test(c.src)) { if (c.nodeName == 'IMG' && re.test(c.src)) {
img_src = c.src; img_src = c.src;
} }
@ -208,7 +211,7 @@
let elements = clear_element.querySelectorAll('*'); let elements = clear_element.querySelectorAll('*');
elements.forEach(function (element) { elements.forEach(function (element) {
if (a_SubElementCheckerToRemove(element)) { if (a_SubElementCheckerToRemove && a_SubElementCheckerToRemove(element)) {
element.parentNode.removeChild(element); element.parentNode.removeChild(element);
} }
else { else {
@ -240,14 +243,14 @@
let elements = clear_element.querySelectorAll(align_tag_name); let elements = clear_element.querySelectorAll(align_tag_name);
elements.forEach(function (element) { elements.forEach(function (element) {
element.style.textAlign = a_TextAlign; element.style.textAlign = a_TextAlign;
if (element.innerHTML == '&nbsp;' || element.innerHTML == '') { if (TrimString(element.innerHTML).replace('&nbsp;', '') == '') {
element.parentNode.removeChild(element); element.parentNode.removeChild(element);
} }
}); });
}); });
let result = clear_element.innerHTML; let result = TrimString(clear_element.innerHTML);
if (a_OutTag && a_TextAlign) { if (a_OutTag && a_TextAlign && TrimString(result).replace('&nbsp;', '') != '') {
result = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + result + '</' + a_OutTag + '>'; result = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + result + '</' + a_OutTag + '>';
} }
return result; return result;
@ -275,11 +278,11 @@
} }
// Создание контента для стандартных новостей // Создание контента для стандартных новостей
const title_tag = 'h2';
const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center')
function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_SubElementCheckerToRemove, a_ClearTextPatterns) { function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_SubElementCheckerToRemove, a_ClearTextPatterns) {
const title_tag = 'h2';
const p_tag = 'p'; const p_tag = 'p';
const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center')
const grub_func = GrubTextFuncTemplate(); const grub_func = GrubTextFuncTemplate();
let content = ''; let content = '';
@ -374,6 +377,29 @@
), ),
[] []
); );
if (content.length == 0) {
// test: https://russian.rt.com/inotv/2023-10-27/DELFI-Latviya-budet-konfiskovivat-mashini
const base_element_title = document.getElementsByClassName('left-column page')[0].getElementsByTagName("h1")[0];
const base_element_image = document.getElementsByTagName("figure")[0];
const base_element_text = document.getElementsByTagName("article")[0];
content = title_finish_text_func(base_element_title) +
MakeContentByNews(
base_element_title,
base_element_image,
base_element_text,
'!!!!',
'.*',
'article-intro|article-body',
ElementCheckerTrue,
SubElementCheckerToRemoveTemplate(
'meta',
'IMG'
),
[]
);
}
} }
else if (location.hostname == 'www.cnews.ru') { else if (location.hostname == 'www.cnews.ru') {
// test: https://www.cnews.ru/news/top/2023-10-27_rossiyane_sozdali_polnotsennyj // test: https://www.cnews.ru/news/top/2023-10-27_rossiyane_sozdali_polnotsennyj

Loading…
Cancel
Save