feat: middlewares

feat: highlighter
This commit is contained in:
Artemy
2024-05-15 16:09:01 +03:00
parent 752939d099
commit d477de027a
15 changed files with 209 additions and 46 deletions

View File

@ -1,7 +1,5 @@
import { Readability as OReadability } from '@mozilla/readability'; import { Readability as OReadability } from '@mozilla/readability';
import { Engine, EngineParseError, Route } from '@txtdot/sdk';
import { Engine, EngineParseError } from '@txtdot/sdk';
import { parseHTML } from 'linkedom';
const Readability = new Engine( const Readability = new Engine(
'Readability', 'Readability',
@ -9,7 +7,7 @@ const Readability = new Engine(
['*'] ['*']
); );
Readability.route('*path', async (input, ro) => { Readability.route('*path', async (input, ro: Route<{ path: string }>) => {
const reader = new OReadability(input.document); const reader = new OReadability(input.document);
const parsed = reader.parse(); const parsed = reader.parse();

View File

@ -1,6 +1,5 @@
import { Engine, JSX } from '@txtdot/sdk'; import { Engine, JSX } from '@txtdot/sdk';
import { HandlerInput, Route } from '@txtdot/sdk'; import { HandlerInput, Route } from '@txtdot/sdk';
import { parseHTML } from 'linkedom';
import { PageFooter, ResultItem } from '../components/searchers'; import { PageFooter, ResultItem } from '../components/searchers';
const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [ const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [

View File

@ -1,15 +1,16 @@
import * as engines from './engines'; import * as engines from './engines';
export { engines }; export { engines };
export const engineList = [ export const engineList = [
engines.StackOverflow, engines.StackOverflow,
engines.SearX, engines.SearX,
engines.Readability, engines.Readability,
]; ];
import { compile } from 'html-to-text'; import * as middlewares from './middlewares';
export { middlewares };
export const middlewareList = [middlewares.Highlight];
import { compile } from 'html-to-text';
export const html2text = compile({ export const html2text = compile({
longWordSplit: { longWordSplit: {
forceWrapOnLimit: true, forceWrapOnLimit: true,

View File

@ -0,0 +1,39 @@
import { Middleware, JSX } from '@txtdot/sdk';
const Highlight = new Middleware(
'highlight',
'Highlights code with highlight.js',
['*']
);
Highlight.use(async (input, ro, out) => {
if (out.content.indexOf('<code') !== -1)
return {
...out,
content: <Highlighter content={out.content} />,
};
return out;
});
function Highlighter({ content }: { content: string }) {
return (
<>
<style>
@import
"https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/styles/atom-one-light.min.css";
@import
"https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/styles/atom-one-dark.min.css"
screen and (prefers-color-scheme: dark);
</style>
<script
src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/highlight.min.js"
type="text/javascript"
/>
<script>hljs.highlightAll();</script>
{content}
</>
);
}
export default Highlight;

View File

@ -0,0 +1,3 @@
import Highlight from './highlight';
export { Highlight };

View File

@ -34,7 +34,7 @@ export class Engine {
} }
async handle(input: HandlerInput): Promise<EngineOutput> { async handle(input: HandlerInput): Promise<EngineOutput> {
const url = new URL(input.getUrl()); const url = new URL(input.url);
const path = url.pathname + url.search + url.hash; const path = url.pathname + url.search + url.hash;
for (const route of this.routes) { for (const route of this.routes) {
const match = route.route.match(path); const match = route.route.match(path);

View File

@ -24,9 +24,7 @@ export function createElement(
}) })
.join(' '); .join(' ');
return inner.length === 0 return `<${name} ${propsstr}>${content}</${name}>`;
? `<${name} ${propsstr}/>`
: `<${name} ${propsstr}>${content}</${name}>`;
} else if (typeof name === 'function') { } else if (typeof name === 'function') {
return name(props, content); return name(props, content);
} else { } else {

View File

@ -1,4 +1,5 @@
import { Engine } from './engine'; import { Engine } from './engine';
import { Middleware } from './middleware';
import { import {
EngineParseError, EngineParseError,
@ -16,17 +17,22 @@ import {
HandlerOutput, HandlerOutput,
Route, Route,
handlerSchema, handlerSchema,
EngineOutput,
MiddleFunction,
} from './types/handler'; } from './types/handler';
import * as JSX from './jsx'; import * as JSX from './jsx';
export { export {
Engine, Engine,
Middleware,
EngineParseError, EngineParseError,
NoHandlerFoundError, NoHandlerFoundError,
TxtDotError, TxtDotError,
EngineFunction, EngineFunction,
MiddleFunction,
EngineMatch, EngineMatch,
EngineOutput,
Engines, Engines,
RouteValues, RouteValues,
EnginesMatch, EnginesMatch,

View File

@ -0,0 +1,61 @@
import Route from 'route-parser';
import {
HandlerInput,
RouteValues,
EngineOutput,
MiddleFunction,
} from './types/handler';
interface IMiddle<TParams extends RouteValues> {
route: Route;
handler: MiddleFunction<TParams>;
}
export class Middleware {
name: string;
description: string;
domains: string[];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
middles: IMiddle<any>[] = [];
constructor(name: string, description: string, domains: string[] = []) {
this.domains = domains;
this.name = name;
this.description = description;
}
route<TParams extends RouteValues>(
path: string,
handler: MiddleFunction<TParams>
) {
this.middles.push({ route: new Route<TParams>(path), handler });
}
use<TParams extends RouteValues>(handler: MiddleFunction<TParams>) {
this.middles.push({ route: new Route<{ path: string }>('*path'), handler });
}
async handle(input: HandlerInput, out: EngineOutput): Promise<EngineOutput> {
const url = new URL(input.url);
const path = url.pathname + url.search + url.hash;
let processed_out = out;
for (const middle of this.middles) {
const match = middle.route.match(path);
if (match) {
processed_out = await middle.handler(
input,
{
q: match,
reverse: (req) => middle.route.reverse(req),
},
out
);
}
}
return processed_out;
}
}

View File

@ -2,26 +2,30 @@ import { parseHTML } from 'linkedom';
import { Engine } from '../engine'; import { Engine } from '../engine';
export class HandlerInput { export class HandlerInput {
private data: string; private _data: string;
private url: string; private _url: string;
private window?: Window; private _window?: Window;
constructor(data: string, url: string) { constructor(data: string, url: string) {
this.data = data; this._data = data;
this.url = url; this._url = url;
} }
getUrl(): string { get url(): string {
return this.url; return this._url;
}
get data(): string {
return this._data;
} }
get document(): Document { get document(): Document {
if (this.window) { if (this._window) {
return this.window.document; return this._window.document;
} }
this.window = parseHTML(this.data); this._window = parseHTML(this._data);
return this.window.document; return this._window.document;
} }
} }
@ -75,6 +79,12 @@ export type EngineFunction<TParams extends RouteValues> = (
ro: Route<TParams> ro: Route<TParams>
) => Promise<EngineOutput>; ) => Promise<EngineOutput>;
export type MiddleFunction<TParams extends RouteValues> = (
input: HandlerInput,
ro: Route<TParams>,
out: EngineOutput
) => Promise<EngineOutput>;
export type EnginesMatch<TParams extends RouteValues> = EngineMatch<TParams>[]; export type EnginesMatch<TParams extends RouteValues> = EngineMatch<TParams>[];
export interface Route<TParams extends RouteValues> { export interface Route<TParams extends RouteValues> {

View File

@ -1,5 +1,5 @@
import { IAppConfig } from '../types/pluginConfig'; import { IAppConfig } from '../types/pluginConfig';
import { engineList, html2text } from '@txtdot/plugins'; import { engineList, middlewareList, html2text } from '@txtdot/plugins';
/** /**
* Configuration of plugins * Configuration of plugins
@ -7,6 +7,7 @@ import { engineList, html2text } from '@txtdot/plugins';
*/ */
const plugin_config: IAppConfig = { const plugin_config: IAppConfig = {
engines: [...engineList], engines: [...engineList],
middlewares: [...middlewareList],
html2text, html2text,
}; };

View File

@ -6,7 +6,7 @@ import { NotHtmlMimetypeError } from './errors/main';
import { decodeStream, parseEncodingName } from './utils/http'; import { decodeStream, parseEncodingName } from './utils/http';
import replaceHref from './utils/replace-href'; import replaceHref from './utils/replace-href';
import { Engine } from '@txtdot/sdk'; import { Engine, EngineOutput, Middleware } from '@txtdot/sdk';
import { HandlerInput, HandlerOutput } from '@txtdot/sdk'; import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
import config from './config'; import config from './config';
import { parseHTML } from 'linkedom'; import { parseHTML } from 'linkedom';
@ -18,14 +18,25 @@ interface IEngineId {
export class Distributor { export class Distributor {
engines_id: IEngineId = {}; engines_id: IEngineId = {};
fallback: Engine[] = []; engines_fallback: Engine[] = [];
list: string[] = []; engines_list: string[] = [];
middles_id: IEngineId = {};
middles_fallback: Middleware[] = [];
middles_list: string[] = [];
constructor() {} constructor() {}
engine(engine: Engine) { engine(engine: Engine) {
this.engines_id[engine.name] = this.list.length; this.engines_id[engine.name] = this.engines_list.length;
this.fallback.push(engine); this.engines_fallback.push(engine);
this.list.push(engine.name); this.engines_list.push(engine.name);
}
middleware(middleware: Middleware) {
this.middles_id[middleware.name] = this.middles_list.length;
this.middles_fallback.push(middleware);
this.middles_list.push(middleware.name);
} }
async handlePage( async handlePage(
@ -54,13 +65,13 @@ export class Distributor {
const engine = this.getFallbackEngine(urlObj.hostname, engineName); const engine = this.getFallbackEngine(urlObj.hostname, engineName);
const output = await engine.handle( const input = new HandlerInput(
new HandlerInput( await decodeStream(data, parseEncodingName(mime)),
await decodeStream(data, parseEncodingName(mime)), remoteUrl
remoteUrl
)
); );
const output = await engine.handle(input);
const dom = parseHTML(output.content); const dom = parseHTML(output.content);
// Get text content before link replacement, because in text format we need original links // Get text content before link replacement, because in text format we need original links
@ -77,15 +88,27 @@ export class Distributor {
); );
const purify = DOMPurify(dom); const purify = DOMPurify(dom);
const content = purify.sanitize(dom.document.toString()); const purified_content = purify.sanitize(dom.document.toString());
const title = output.title || dom.document.title;
const lang = output.lang || dom.document.documentElement.lang; const purified = {
...output,
content: purified_content,
};
const processed = await this.processMiddlewares(
urlObj.hostname,
input,
purified
);
const title = processed.title || dom.document.title;
const lang = processed.lang || dom.document.documentElement.lang;
const textContent = const textContent =
html2text(stdTextContent, output, title) || html2text(stdTextContent, processed, title) ||
'Text output cannot be generated.'; 'Text output cannot be generated.';
return { return {
content, content: processed.content,
textContent, textContent,
title, title,
lang, lang,
@ -94,15 +117,31 @@ export class Distributor {
getFallbackEngine(host: string, specified?: string): Engine { getFallbackEngine(host: string, specified?: string): Engine {
if (specified) { if (specified) {
return this.fallback[this.engines_id[specified]]; return this.engines_fallback[this.engines_id[specified]];
} }
for (const engine of this.fallback) { for (const engine of this.engines_fallback) {
if (micromatch.isMatch(host, engine.domains)) { if (micromatch.isMatch(host, engine.domains)) {
return engine; return engine;
} }
} }
return this.fallback[0]; return this.engines_fallback[0];
}
async processMiddlewares(
host: string,
input: HandlerInput,
output: EngineOutput
): Promise<EngineOutput> {
let processed_output = output;
for (const middle of this.middles_fallback) {
if (micromatch.isMatch(host, middle.domains)) {
processed_output = await middle.handle(input, processed_output);
}
}
return processed_output;
} }
} }

View File

@ -7,5 +7,9 @@ for (const engine of plugin_config.engines) {
distributor.engine(engine); distributor.engine(engine);
} }
export const engineList = distributor.list; for (const middleware of plugin_config.middlewares || []) {
distributor.middleware(middleware);
}
export const engineList = distributor.engines_list;
export { distributor }; export { distributor };

View File

@ -8,7 +8,7 @@ import config from '../../config';
export default async function configurationRoute(fastify: FastifyInstance) { export default async function configurationRoute(fastify: FastifyInstance) {
fastify.get('/configuration', { schema: indexSchema }, async (_, reply) => { fastify.get('/configuration', { schema: indexSchema }, async (_, reply) => {
return reply.view('/templates/configuration.ejs', { return reply.view('/templates/configuration.ejs', {
engines: distributor.fallback, engines: distributor.engines_fallback,
config, config,
}); });
}); });

View File

@ -1,4 +1,4 @@
import { Engine } from '@txtdot/sdk'; import { Engine, Middleware } from '@txtdot/sdk';
type Html2TextConverter = (html: string) => string; type Html2TextConverter = (html: string) => string;
@ -7,6 +7,10 @@ export interface IAppConfig {
* List of engines, ordered * List of engines, ordered
*/ */
engines: Engine[]; engines: Engine[];
/**
* List of middlewares, ordered
*/
middlewares?: Middleware[];
/** /**
* HTML to text converter, if engine doesn't support text * HTML to text converter, if engine doesn't support text
*/ */