diff --git a/news_parser.js b/news_parser.js index d11b76b..aebf395 100644 --- a/news_parser.js +++ b/news_parser.js @@ -47,8 +47,8 @@ // Работа со строками - function TrimString(s) { - return ( s || '' ).replace( /^\s+|\s+$/g, '' ); + function TrimString(str) { + return ( str || '' ).replace( /^\s+|\s+$/g, '' ); } function RemoveBeforeSplitter(a_String, a_Splitter) { @@ -99,9 +99,12 @@ let re = new RegExp("(https?:\/\/.*\.(?:png|jpg))"); for (i in a_Elements) { let e = a_Elements[i]; + if (e.nodeName == 'IMG' && re.test(e.src)) { + img_src = e.src; + } let children = e.querySelectorAll("*"); for(let i = 0; i < children.length; i++){ - var c = children[i]; + let c = children[i]; if (c.nodeName == 'IMG' && re.test(c.src)) { img_src = c.src; } @@ -208,7 +211,7 @@ let elements = clear_element.querySelectorAll('*'); elements.forEach(function (element) { - if (a_SubElementCheckerToRemove(element)) { + if (a_SubElementCheckerToRemove && a_SubElementCheckerToRemove(element)) { element.parentNode.removeChild(element); } else { @@ -240,14 +243,14 @@ let elements = clear_element.querySelectorAll(align_tag_name); elements.forEach(function (element) { element.style.textAlign = a_TextAlign; - if (element.innerHTML == ' ' || element.innerHTML == '') { + if (TrimString(element.innerHTML).replace(' ', '') == '') { element.parentNode.removeChild(element); } }); }); - let result = clear_element.innerHTML; - if (a_OutTag && a_TextAlign) { + let result = TrimString(clear_element.innerHTML); + if (a_OutTag && a_TextAlign && TrimString(result).replace(' ', '') != '') { result = '<' + a_OutTag + ' style = "text-align:' + a_TextAlign + ';">' + result + ''; } return result; @@ -275,11 +278,11 @@ } // Создание контента для стандартных новостей + const title_tag = 'h2'; + const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') function MakeContentByNews(a_BaseElementTitle, a_BaseElementImage, a_BaseElementText, a_TitleRegExpElementPattern, a_ImageRegExpElementPattern, a_TextRegExpElementPattern, a_ElementChecker, a_SubElementCheckerToRemove, a_ClearTextPatterns) { - const title_tag = 'h2'; const p_tag = 'p'; - const title_finish_text_func = FinishWorkFuncTemplate(title_tag, 'center') const grub_func = GrubTextFuncTemplate(); let content = ''; @@ -374,6 +377,29 @@ ), [] ); + + if (content.length == 0) { + // test: https://russian.rt.com/inotv/2023-10-27/DELFI-Latviya-budet-konfiskovivat-mashini + const base_element_title = document.getElementsByClassName('left-column page')[0].getElementsByTagName("h1")[0]; + const base_element_image = document.getElementsByTagName("figure")[0]; + const base_element_text = document.getElementsByTagName("article")[0]; + + content = title_finish_text_func(base_element_title) + + MakeContentByNews( + base_element_title, + base_element_image, + base_element_text, + '!!!!', + '.*', + 'article-intro|article-body', + ElementCheckerTrue, + SubElementCheckerToRemoveTemplate( + 'meta', + 'IMG' + ), + [] + ); + } } else if (location.hostname == 'www.cnews.ru') { // test: https://www.cnews.ru/news/top/2023-10-27_rossiyane_sozdali_polnotsennyj