From 2cb8c45d2459fd63b5c4a521881cb7ad538e0869 Mon Sep 17 00:00:00 2001 From: Alexei Bezborodov Date: Sat, 28 Oct 2023 21:44:46 +0300 Subject: [PATCH] =?UTF-8?q?=D0=A1=D0=B0=D0=B9=D1=82=20=D0=98=D0=9D=D0=9D?= =?UTF-8?q?=D0=9E=D0=A2=D0=92=20=D0=B8=20=D1=84=D0=B8=D0=BB=D1=8C=D1=82?= =?UTF-8?q?=D1=80=D0=B0=D1=86=D0=B8=D1=8F=20=D0=BF=D1=83=D1=81=D1=82=D1=8B?= =?UTF-8?q?=D1=85=20=D1=82=D0=B5=D0=B3=D0=BE=D0=B2.=20#8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- news_parser.js | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/news_parser.js b/news_parser.js index d11b76b..aebf395 100644 --- a/news_parser.js +++ b/news_parser.js @@ -47,8 +47,8 @@ // Работа со строками - function TrimString(s) { - return ( s || '' ).replace( /^\s+|\s+$/g, '' ); + function TrimString(str) { + return ( str || '' ).replace( /^\s+|\s+$/g, '' ); } function RemoveBeforeSplitter(a_String, a_Splitter) { @@ -99,9 +99,12 @@ let re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); for (i in a_Elements) { let e = a_Elements[i]; + if (e.nodeName == 'IMG' && re.test(e.src)) { + img_src = e.src; + } let children = e.querySelectorAll("*"); for(let i = 0; i < children.length; i++){ - var c = children[i]; + let c = children[i]; if (c.nodeName == 'IMG' && re.test(c.src)) { img_src = c.src; } @@ -208,7 +211,7 @@ let elements = clear_element.querySelectorAll('*'); elements.forEach(function (element) { - if (a_SubElementCheckerToRemove(element)) { + if (a_SubElementCheckerToRemove && a_SubElementCheckerToRemove(element)) { element.parentNode.removeChild(element); } else { @@ -240,14 +243,14 @@ let elements = clear_element.querySelectorAll(align_tag_name); elements.forEach(function (element) { element.style.textAlign = a_TextAlign; - if (element.innerHTML == ' ' || element.innerHTML == '') { + if (TrimString(element.innerHTML).replace(' ', '') == '') { element.parentNode.removeChild(element); } }); }); - let result = clear_element.innerHTML; - if (a_OutTag && a_TextAlign) { + let result = TrimString(clear_element.innerHTML); + if (a_OutTag && a_TextAlign && TrimString(result).replace(' ', '') != '') { result = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + result + ''; } return result; @@ -275,11 +278,11 @@ } // Создание контента для стандартных новостей + const title_tag = 'h2'; + const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_SubElementCheckerToRemove, a_ClearTextPatterns) { - const title_tag = 'h2'; const p_tag = 'p'; - const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') const grub_func = GrubTextFuncTemplate(); let content = ''; @@ -374,6 +377,29 @@ ), [] ); + + if (content.length == 0) { + // test: https://russian.rt.com/inotv/2023-10-27/DELFI-Latviya-budet-konfiskovivat-mashini + const base_element_title = document.getElementsByClassName('left-column page')[0].getElementsByTagName("h1")[0]; + const base_element_image = document.getElementsByTagName("figure")[0]; + const base_element_text = document.getElementsByTagName("article")[0]; + + content = title_finish_text_func(base_element_title) + + MakeContentByNews( + base_element_title, + base_element_image, + base_element_text, + '!!!!', + '.*', + 'article-intro|article-body', + ElementCheckerTrue, + SubElementCheckerToRemoveTemplate( + 'meta', + 'IMG' + ), + [] + ); + } } else if (location.hostname == 'www.cnews.ru') { // test: https://www.cnews.ru/news/top/2023-10-27_rossiyane_sozdali_polnotsennyj