feat: habr navigation

fix: dompurify
fix: middleware processing
This commit is contained in:
Artemy
2024-05-15 17:06:51 +03:00
parent 8be52a0237
commit a98f532cf6
7 changed files with 269 additions and 32 deletions

View File

@@ -8,7 +8,7 @@ const Readability = new Engine(
);
Readability.route('*path', async (input, ro: Route<{ path: string }>) => {
const reader = new OReadability(input.document);
const reader = new OReadability(input.document.cloneNode(true) as Document);
const parsed = reader.parse();
if (!parsed) {

View File

@@ -8,7 +8,7 @@ export const engineList = [
import * as middlewares from './middlewares';
export { middlewares };
export const middlewareList = [middlewares.Highlight];
export const middlewareList = [middlewares.Highlight, middlewares.HabrNav];
import { compile } from 'html-to-text';
export const html2text = compile({

View File

@@ -1,3 +1,4 @@
import Highlight from './highlight';
import { HabrNav } from './navigation';
export { Highlight };
export { Highlight, HabrNav };

View File

@@ -0,0 +1,27 @@
import { Middleware, JSX } from '@txtdot/sdk';
const HabrNav = new Middleware('Habr Nav', 'Adds navigation in habr pages', [
'habr.com',
]);
HabrNav.use(async (input, ro, out) => {
let nav = [...input.document.querySelectorAll('.tm-main-menu__item')];
return {
...out,
content: (
<>
<ul>
{nav.map((item) => (
<li>
<a href={item.getAttribute('href')}>{item.textContent}</a>
</li>
))}
</ul>
{out.content}
</>
),
};
});
export { HabrNav };

View File

@@ -26,19 +26,18 @@
"@txtdot/plugins": "workspace:*",
"@txtdot/sdk": "workspace:*",
"axios": "^1.6.8",
"dompurify": "^3.1.2",
"dotenv": "^16.3.1",
"ejs": "^3.1.10",
"fastify": "^4.26.2",
"iconv-lite": "^0.6.3",
"ip-range-check": "^0.2.0",
"isomorphic-dompurify": "^2.10.0",
"json-schema-to-ts": "^3.0.1",
"linkedom": "^0.18.0",
"micromatch": "^4.0.5",
"sharp": "^0.33.3"
},
"devDependencies": {
"@types/dompurify": "^3.0.5",
"@types/ejs": "^3.1.5",
"@types/jsdom": "^21.1.6",
"@types/micromatch": "^4.0.7",

View File

@@ -1,6 +1,5 @@
import axios, { oaxios } from './types/axios';
import micromatch from 'micromatch';
import DOMPurify from 'dompurify';
import { Readable } from 'stream';
import { NotHtmlMimetypeError } from './errors/main';
import { decodeStream, parseEncodingName } from './utils/http';
@@ -11,6 +10,7 @@ import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
import config from './config';
import { parseHTML } from 'linkedom';
import { html2text } from './utils/html2text';
import DOMPurify from 'isomorphic-dompurify';
interface IEngineId {
[key: string]: number;
@@ -70,7 +70,15 @@ export class Distributor {
remoteUrl
);
const output = await engine.handle(input);
let output = await engine.handle(input);
// Sanitize output before middlewares, because middlewares can add unsafe tags
output = {
...output,
content: DOMPurify.sanitize(output.content),
};
output = await this.processMiddlewares(urlObj.hostname, input, output);
const dom = parseHTML(output.content);
@@ -78,7 +86,6 @@ export class Distributor {
const stdTextContent = dom.document.documentElement.textContent;
// post-process
// TODO: generate dom in handler and not parse here twice
replaceHref(
dom.document,
requestUrl,
@@ -87,28 +94,14 @@ export class Distributor {
redirectPath
);
const purify = DOMPurify(dom);
const purified_content = purify.sanitize(dom.document.toString());
const purified = {
...output,
content: purified_content,
};
const processed = await this.processMiddlewares(
urlObj.hostname,
input,
purified
);
const title = processed.title || dom.document.title;
const lang = processed.lang || dom.document.documentElement.lang;
const title = output.title || dom.document.title;
const lang = output.lang || dom.document.documentElement.lang;
const textContent =
html2text(stdTextContent, processed, title) ||
html2text(stdTextContent, output, title) ||
'Text output cannot be generated.';
return {
content: processed.content,
content: output.content,
textContent,
title,
lang,