Browse Source

Подготовка к обработке тегов

master
parent
commit
6ae507ec8f
  1. 37
      news_parser.js

37
news_parser.js

@ -11,7 +11,7 @@
// @grant none
// ==/UserScript==
// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) <AlexeiBv+mirocod_zv@narod.ru>
// Общественное достояние, 2023, Алексей Безбородов (Alexei Bezborodov) <AlexeiBv+mirocod_platform_bot@narod.ru>
(function() {
'use strict';
@ -77,7 +77,7 @@
return a_String;
}
function GetContentInContainers(a_OutTag, baseClass, parent, textAlign, a_ElementFilterFunc, a_ClearTextFunc) {
function GetContentInContainers(a_GrubTextFunc, a_OutTag, baseClass, parent, textAlign, a_ElementFilterFunc, a_ClearTextFunc) {
var elems = getByClass(baseClass, parent);
if (!elems) {
return 'Не удалось найти ' + baseClass;
@ -94,16 +94,12 @@
if (e.querySelectorAll) {
var children = e.querySelectorAll("*");
if (children.length == 0 || e.innerText) {
if (e.innerText) {
content += Trim(e.textContent);
}
content += a_GrubTextFunc(e);
}
else {
for (let i = 0; i < children.length; i++) {
var c = children[i];
if (c.innerText) {
content += Trim(c.textContent);
}
content += a_GrubTextFunc(c);
}
}
}
@ -155,6 +151,17 @@
return ClearTextFunc
}
function GrubTextFuncTemplate() {
function GrubTextFunc(a_Element) {
var content = '';
if (a_Element.innerText) {
content = Trim(a_Element.textContent);
}
return content;
}
return GrubTextFunc
}
function MakeContent() {
var content = '';
var title_tag = 'h2';
@ -162,19 +169,19 @@
var zero_tag = '';
var source_add = true;
if (location.hostname == 'tass.ru') {
content += GetContentInContainers(title_tag, 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box'), 'center');
content += GetContentInContainers(GrubTextFuncTemplate(), title_tag, 'tass_pkg_title--variant_h1_default.*', document.getElementById('content_box'), 'center');
content += GetImageInContainers('Image_wrapper_.*', document.getElementById('content_box'), 'center');
content += GetContentInContainers(p_tag, 'Paragraph_paragraph.*', document.getElementById('content_box'), 'justify', FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. ']));
content += GetContentInContainers(GrubTextFuncTemplate(), p_tag, 'Paragraph_paragraph.*', document.getElementById('content_box'), 'justify', FIlterTrue, ClearTextFuncTemplate(['/ТАСС/. ']));
}
else if (location.hostname == 'ria.ru') {
content += GetContentInContainers(title_tag, 'article__title', document.getElementsByClassName('article__header')[0], 'center');
content += GetContentInContainers(GrubTextFuncTemplate(), title_tag, 'article__title', document.getElementsByClassName('article__header')[0], 'center');
content += GetImageInContainers('photoview__open', document.getElementsByClassName('article__header')[0], 'center');
content += GetContentInContainers(p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. ']));
content += GetContentInContainers(GrubTextFuncTemplate(), p_tag, 'article__block', document.getElementsByClassName('article__body')[0], 'justify', FIlterRia, ClearTextFuncTemplate(['– РИА Новости. ', '— РИА Новости. ']));
}
else if (location.hostname == 'zakonvremeni.ru') {
var title = GetContentInContainers(zero_tag, 'page-header', document.getElementsByClassName('item-page')[0]);
var parent_category = GetContentInContainers(zero_tag, 'parent-category-name', document.getElementsByClassName('item-page')[0]);
var category = GetContentInContainers(zero_tag, 'category-name', document.getElementsByClassName('item-page')[0]);
var title = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'page-header', document.getElementsByClassName('item-page')[0]);
var parent_category = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'parent-category-name', document.getElementsByClassName('item-page')[0]);
var category = GetContentInContainers(GrubTextFuncTemplate(), zero_tag, 'category-name', document.getElementsByClassName('item-page')[0]);
var page = RemoveAfterSplitter(Trim(document.getElementsByClassName('item-page')[0].querySelector('[itemprop=articleBody]').textContent), '.', true);
content = title + '\n' + parent_category + ' ' + category + '\n\n' + page + '\n' + document.URL;
source_add = false;

Loading…
Cancel
Save