|
|
@ -75,8 +75,11 @@ |
|
|
|
return RemoveAfterSplitter(a_Url, separator, false); |
|
|
|
return RemoveAfterSplitter(a_Url, separator, false); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
function ClearTextFuncTemplate(a_RemoveBeforeList) { |
|
|
|
function ClearTextFuncTemplate(a_RemoveBeforeList, a_OnlyFirstIndex) { |
|
|
|
function ClearTextFunc(a_Content) { |
|
|
|
function ClearTextFunc(a_Content, a_ElementIndex) { |
|
|
|
|
|
|
|
if (a_OnlyFirstIndex && a_ElementIndex && a_ElementIndex != 1) { |
|
|
|
|
|
|
|
return a_Content; |
|
|
|
|
|
|
|
} |
|
|
|
let content = a_Content; |
|
|
|
let content = a_Content; |
|
|
|
for (let i = 0; i < a_RemoveBeforeList.length; i++) { |
|
|
|
for (let i = 0; i < a_RemoveBeforeList.length; i++) { |
|
|
|
let r = a_RemoveBeforeList[i]; |
|
|
|
let r = a_RemoveBeforeList[i]; |
|
|
@ -111,10 +114,12 @@ |
|
|
|
|
|
|
|
|
|
|
|
function GetContentInContainers(a_Elements, a_GrubTextFunc, a_FinishWorkFunc) { |
|
|
|
function GetContentInContainers(a_Elements, a_GrubTextFunc, a_FinishWorkFunc) { |
|
|
|
let result = ''; |
|
|
|
let result = ''; |
|
|
|
|
|
|
|
let element_index = 1; |
|
|
|
for (var i in a_Elements) { |
|
|
|
for (var i in a_Elements) { |
|
|
|
let e = a_Elements[i]; |
|
|
|
let e = a_Elements[i]; |
|
|
|
|
|
|
|
|
|
|
|
result += a_FinishWorkFunc(e); |
|
|
|
result += a_FinishWorkFunc(e, element_index); |
|
|
|
|
|
|
|
element_index += 1; |
|
|
|
} |
|
|
|
} |
|
|
|
return result; |
|
|
|
return result; |
|
|
|
} |
|
|
|
} |
|
|
@ -182,11 +187,17 @@ |
|
|
|
|
|
|
|
|
|
|
|
let elements = clear_element.querySelectorAll('*'); |
|
|
|
let elements = clear_element.querySelectorAll('*'); |
|
|
|
elements.forEach(function (element) { |
|
|
|
elements.forEach(function (element) { |
|
|
|
RemoveAllAttributes(element); |
|
|
|
let for_rt_com = 'read-more|article__cover'; |
|
|
|
element.removeAttribute('class'); |
|
|
|
let for_rg_com = 'portal|rg-incut|article-img|Section'; |
|
|
|
if (element && element.parentNode && element.nodeName == 'DIV' && CheckRegExp(GetElementClassName, '.*(read-more|article__cover).*', element)) { |
|
|
|
let delete_in_rg = element.nodeName == 'RG-VIDEO' || element.nodeName == 'RG-INCUT'; |
|
|
|
|
|
|
|
if (element && (element.parentNode && CheckRegExp(GetElementClassName, '.*(' + for_rt_com + '|' + for_rg_com + ').*', element) || delete_in_rg)) { |
|
|
|
element.parentNode.removeChild(element); |
|
|
|
element.parentNode.removeChild(element); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
else { |
|
|
|
|
|
|
|
RemoveAllAttributes(element); |
|
|
|
|
|
|
|
element.removeAttribute('class'); |
|
|
|
|
|
|
|
element.removeAttribute('id'); |
|
|
|
|
|
|
|
} |
|
|
|
}); |
|
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
let tags_to_delete = ['div', 'span', 'em', 'svg', 'path']; |
|
|
|
let tags_to_delete = ['div', 'span', 'em', 'svg', 'path']; |
|
|
@ -226,23 +237,14 @@ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function FinishWorkFuncTemplate(a_OutTag, a_TextAlign, a_ClearTextFunc) { |
|
|
|
function FinishWorkFuncTemplate(a_OutTag, a_TextAlign, a_ClearTextFunc) { |
|
|
|
function FinishWorkFunc(a_Element) { |
|
|
|
function FinishWorkFunc(a_Element, a_ElementIndex) { |
|
|
|
let out_tag = a_OutTag; |
|
|
|
let out_tag = a_OutTag; |
|
|
|
if (a_Element && CheckRegExp(GetElementClassName, 'Title_title.*', a_Element)) { |
|
|
|
if (a_Element && CheckRegExp(GetElementClassName, 'Title_title.*', a_Element)) { |
|
|
|
out_tag = 'h2'; |
|
|
|
out_tag = 'h2'; |
|
|
|
} |
|
|
|
} |
|
|
|
let a_Content = GetClearHtml(a_Element, out_tag, a_TextAlign) |
|
|
|
let a_Content = GetClearHtml(a_Element, out_tag, a_TextAlign) |
|
|
|
if (a_ClearTextFunc) { |
|
|
|
if (a_ClearTextFunc) { |
|
|
|
a_Content = a_ClearTextFunc(a_Content); |
|
|
|
a_Content = a_ClearTextFunc(a_Content, a_ElementIndex); |
|
|
|
} |
|
|
|
|
|
|
|
if (a_Element && CheckRegExp(GetElementClassName, '(PageContentCommonStyling_text.*)', a_Element)) { |
|
|
|
|
|
|
|
let content = ''; |
|
|
|
|
|
|
|
let childrens = FindElementsByRegExp(GetNodeName, '(P)', a_Element); |
|
|
|
|
|
|
|
for (let i = 0; i < childrens.length; i++) { |
|
|
|
|
|
|
|
let c = childrens[i]; |
|
|
|
|
|
|
|
content += FinishWorkFunc(c); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return content; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (a_Element && a_Element.dataset && a_Element.dataset.type == 'quote') { |
|
|
|
if (a_Element && a_Element.dataset && a_Element.dataset.type == 'quote') { |
|
|
@ -263,7 +265,7 @@ |
|
|
|
const grub_func = GrubTextFuncTemplate(); |
|
|
|
const grub_func = GrubTextFuncTemplate(); |
|
|
|
|
|
|
|
|
|
|
|
let content = ''; |
|
|
|
let content = ''; |
|
|
|
const paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns)); |
|
|
|
const paragraph_finish_text_func = FinishWorkFuncTemplate(p_tag, 'justify', ClearTextFuncTemplate(a_ClearTextPatterns, true)); |
|
|
|
content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_func, title_finish_text_func); |
|
|
|
content += GetContentInContainers(FindElementsByRegExp(GetElementClassName, a_TitleRegExpElementPattern, a_BaseElementTitle), grub_func, title_finish_text_func); |
|
|
|
content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center'); |
|
|
|
content += GetImageInContainers(FindElementsByRegExp(GetElementClassName, a_ImageRegExpElementPattern, a_BaseElementImage), 'center'); |
|
|
|
content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_func, paragraph_finish_text_func); |
|
|
|
content += GetContentInContainers(FIlterElements(FindElementsByRegExp(GetElementClassName, a_TextRegExpElementPattern, a_BaseElementText), a_ElementChecker), grub_func, paragraph_finish_text_func); |
|
|
@ -289,7 +291,7 @@ |
|
|
|
'Image_wrapper_.*', |
|
|
|
'Image_wrapper_.*', |
|
|
|
'(Paragraph_paragraph|Title_title).*', |
|
|
|
'(Paragraph_paragraph|Title_title).*', |
|
|
|
ElementCheckerTrue, |
|
|
|
ElementCheckerTrue, |
|
|
|
['/ТАСС/. '] |
|
|
|
['. '] |
|
|
|
); |
|
|
|
); |
|
|
|
} |
|
|
|
} |
|
|
|
else if (location.hostname == 'ria.ru') { |
|
|
|
else if (location.hostname == 'ria.ru') { |
|
|
@ -315,6 +317,7 @@ |
|
|
|
); |
|
|
|
); |
|
|
|
} |
|
|
|
} |
|
|
|
else if (location.hostname == 'rg.ru') { |
|
|
|
else if (location.hostname == 'rg.ru') { |
|
|
|
|
|
|
|
// test: https://rg.ru/2023/10/28/volontery-iz-evrosoiuza-privezli-dlia-zhitelej-donbassa-20-tonn-gumanitarnogo-gruza.html
|
|
|
|
const base_element = document.getElementsByClassName('article__header')[0]; |
|
|
|
const base_element = document.getElementsByClassName('article__header')[0]; |
|
|
|
const base_element_text = document.getElementsByClassName('article__body')[0]; |
|
|
|
const base_element_text = document.getElementsByClassName('article__body')[0]; |
|
|
|
|
|
|
|
|
|
|
|