feat: habr navigation
fix: dompurify fix: middleware processing
This commit is contained in:
@@ -8,7 +8,7 @@ const Readability = new Engine(
|
||||
);
|
||||
|
||||
Readability.route('*path', async (input, ro: Route<{ path: string }>) => {
|
||||
const reader = new OReadability(input.document);
|
||||
const reader = new OReadability(input.document.cloneNode(true) as Document);
|
||||
const parsed = reader.parse();
|
||||
|
||||
if (!parsed) {
|
||||
|
@@ -8,7 +8,7 @@ export const engineList = [
|
||||
|
||||
import * as middlewares from './middlewares';
|
||||
export { middlewares };
|
||||
export const middlewareList = [middlewares.Highlight];
|
||||
export const middlewareList = [middlewares.Highlight, middlewares.HabrNav];
|
||||
|
||||
import { compile } from 'html-to-text';
|
||||
export const html2text = compile({
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import Highlight from './highlight';
|
||||
import { HabrNav } from './navigation';
|
||||
|
||||
export { Highlight };
|
||||
export { Highlight, HabrNav };
|
||||
|
27
packages/plugins/src/middlewares/navigation.tsx
Normal file
27
packages/plugins/src/middlewares/navigation.tsx
Normal file
@@ -0,0 +1,27 @@
|
||||
import { Middleware, JSX } from '@txtdot/sdk';
|
||||
|
||||
const HabrNav = new Middleware('Habr Nav', 'Adds navigation in habr pages', [
|
||||
'habr.com',
|
||||
]);
|
||||
|
||||
HabrNav.use(async (input, ro, out) => {
|
||||
let nav = [...input.document.querySelectorAll('.tm-main-menu__item')];
|
||||
|
||||
return {
|
||||
...out,
|
||||
content: (
|
||||
<>
|
||||
<ul>
|
||||
{nav.map((item) => (
|
||||
<li>
|
||||
<a href={item.getAttribute('href')}>{item.textContent}</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
{out.content}
|
||||
</>
|
||||
),
|
||||
};
|
||||
});
|
||||
|
||||
export { HabrNav };
|
@@ -26,19 +26,18 @@
|
||||
"@txtdot/plugins": "workspace:*",
|
||||
"@txtdot/sdk": "workspace:*",
|
||||
"axios": "^1.6.8",
|
||||
"dompurify": "^3.1.2",
|
||||
"dotenv": "^16.3.1",
|
||||
"ejs": "^3.1.10",
|
||||
"fastify": "^4.26.2",
|
||||
"iconv-lite": "^0.6.3",
|
||||
"ip-range-check": "^0.2.0",
|
||||
"isomorphic-dompurify": "^2.10.0",
|
||||
"json-schema-to-ts": "^3.0.1",
|
||||
"linkedom": "^0.18.0",
|
||||
"micromatch": "^4.0.5",
|
||||
"sharp": "^0.33.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/dompurify": "^3.0.5",
|
||||
"@types/ejs": "^3.1.5",
|
||||
"@types/jsdom": "^21.1.6",
|
||||
"@types/micromatch": "^4.0.7",
|
||||
|
@@ -1,6 +1,5 @@
|
||||
import axios, { oaxios } from './types/axios';
|
||||
import micromatch from 'micromatch';
|
||||
import DOMPurify from 'dompurify';
|
||||
import { Readable } from 'stream';
|
||||
import { NotHtmlMimetypeError } from './errors/main';
|
||||
import { decodeStream, parseEncodingName } from './utils/http';
|
||||
@@ -11,6 +10,7 @@ import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
|
||||
import config from './config';
|
||||
import { parseHTML } from 'linkedom';
|
||||
import { html2text } from './utils/html2text';
|
||||
import DOMPurify from 'isomorphic-dompurify';
|
||||
|
||||
interface IEngineId {
|
||||
[key: string]: number;
|
||||
@@ -70,7 +70,15 @@ export class Distributor {
|
||||
remoteUrl
|
||||
);
|
||||
|
||||
const output = await engine.handle(input);
|
||||
let output = await engine.handle(input);
|
||||
|
||||
// Sanitize output before middlewares, because middlewares can add unsafe tags
|
||||
output = {
|
||||
...output,
|
||||
content: DOMPurify.sanitize(output.content),
|
||||
};
|
||||
|
||||
output = await this.processMiddlewares(urlObj.hostname, input, output);
|
||||
|
||||
const dom = parseHTML(output.content);
|
||||
|
||||
@@ -78,7 +86,6 @@ export class Distributor {
|
||||
const stdTextContent = dom.document.documentElement.textContent;
|
||||
|
||||
// post-process
|
||||
// TODO: generate dom in handler and not parse here twice
|
||||
replaceHref(
|
||||
dom.document,
|
||||
requestUrl,
|
||||
@@ -87,28 +94,14 @@ export class Distributor {
|
||||
redirectPath
|
||||
);
|
||||
|
||||
const purify = DOMPurify(dom);
|
||||
const purified_content = purify.sanitize(dom.document.toString());
|
||||
|
||||
const purified = {
|
||||
...output,
|
||||
content: purified_content,
|
||||
};
|
||||
|
||||
const processed = await this.processMiddlewares(
|
||||
urlObj.hostname,
|
||||
input,
|
||||
purified
|
||||
);
|
||||
|
||||
const title = processed.title || dom.document.title;
|
||||
const lang = processed.lang || dom.document.documentElement.lang;
|
||||
const title = output.title || dom.document.title;
|
||||
const lang = output.lang || dom.document.documentElement.lang;
|
||||
const textContent =
|
||||
html2text(stdTextContent, processed, title) ||
|
||||
html2text(stdTextContent, output, title) ||
|
||||
'Text output cannot be generated.';
|
||||
|
||||
return {
|
||||
content: processed.content,
|
||||
content: output.content,
|
||||
textContent,
|
||||
title,
|
||||
lang,
|
||||
|
Reference in New Issue
Block a user