* refactor: move engines to the sdk

* refactor: move engines to plugins

* refactor: move engines to plugins

* fix: prettier
This commit is contained in:
Artemy Egorov 2024-04-27 19:21:41 +03:00 committed by GitHub
parent 4460d3df1d
commit c04ea407ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 2889 additions and 4648 deletions

1
.prettierignore Normal file
View File

@ -0,0 +1 @@
pnpm-lock.yaml

View File

@ -18,11 +18,13 @@ Mozilla's Readability library is used under the hood.
- Server-side page simplification - Server-side page simplification
- Media proxy - Media proxy
- Image compression with Sharp - Image compression with Sharp
- Rendering client-side apps (Vanilla, React, Vue, etc) with [webder](https://github.com/TxtDot/webder)
- Search with SearXNG - Search with SearXNG
- Custom parsers for StackOverflow and SearXNG - Custom parsers for StackOverflow and SearXNG
- Handy API endpoints - Handy API endpoints
- No client JavaScript - No client JavaScript
- Some kind of Material Design 3 - Some kind of Material Design 3
- Customization with plugins, see [@txtdot/sdk](https://github.com/TxtDot/sdk) and [@txtdot/plugins](https://github.com/TxtDot/plugins)
## Running ## Running

4280
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,8 @@
"@fastify/swagger": "^8.14.0", "@fastify/swagger": "^8.14.0",
"@fastify/swagger-ui": "^3.0.0", "@fastify/swagger-ui": "^3.0.0",
"@fastify/view": "^9.0.0", "@fastify/view": "^9.0.0",
"@mozilla/readability": "^0.5.0", "@txtdot/plugins": "^1.0.0",
"@txtdot/sdk": "^1.1.1",
"axios": "^1.6.8", "axios": "^1.6.8",
"dompurify": "^3.1.0", "dompurify": "^3.1.0",
"dotenv": "^16.3.1", "dotenv": "^16.3.1",
@ -20,7 +21,6 @@
"json-schema-to-ts": "^3.0.1", "json-schema-to-ts": "^3.0.1",
"linkedom": "^0.16.11", "linkedom": "^0.16.11",
"micromatch": "^4.0.5", "micromatch": "^4.0.5",
"route-parser": "^0.0.5",
"sharp": "^0.33.3" "sharp": "^0.33.3"
}, },
"devDependencies": { "devDependencies": {
@ -29,7 +29,6 @@
"@types/jsdom": "^21.1.6", "@types/jsdom": "^21.1.6",
"@types/micromatch": "^4.0.7", "@types/micromatch": "^4.0.7",
"@types/node": "^20.12.7", "@types/node": "^20.12.7",
"@types/route-parser": "^0.1.7",
"@typescript-eslint/eslint-plugin": "^7.7.0", "@typescript-eslint/eslint-plugin": "^7.7.0",
"@typescript-eslint/parser": "^7.7.0", "@typescript-eslint/parser": "^7.7.0",
"clean-css-cli": "^5.6.3", "clean-css-cli": "^5.6.3",
@ -52,8 +51,7 @@
"keywords": [], "keywords": [],
"authors": [ "authors": [
"Artemy Egorov <me@artegoser.ru> https://github.com/artegoser", "Artemy Egorov <me@artegoser.ru> https://github.com/artegoser",
"DarkCat09 <darkcat09@vivaldi.net> https://dc09.ru/", "DarkCat09 <darkcat09@vivaldi.net> https://dc09.ru/"
"megahomyak https://github.com/megahomyak"
], ],
"license": "MIT" "license": "MIT"
} }

2870
pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,9 @@
import { FastifyReply, FastifyRequest } from 'fastify'; import { FastifyReply, FastifyRequest } from 'fastify';
import { NotHtmlMimetypeError, TxtDotError } from './main'; import { NotHtmlMimetypeError } from './main';
import { getFastifyError } from './validation'; import { getFastifyError } from './validation';
import { TxtDotError } from '@txtdot/sdk/dist/types/errors';
import { IGetSchema } from '../types/requests/browser'; import { IGetSchema } from '../types/requests/browser';
import getConfig from '../config/main'; import getConfig from '../config/main';

View File

@ -1,29 +1,5 @@
import getConfig from '../config/main'; import getConfig from '../config/main';
import { TxtDotError } from '@txtdot/sdk/dist/types/errors';
export abstract class TxtDotError extends Error {
code: number;
name: string;
description: string;
constructor(code: number, name: string, description: string) {
super(description);
this.code = code;
this.name = name;
this.description = description;
}
}
export class EngineParseError extends TxtDotError {
constructor(message: string) {
super(422, 'EngineParseError', `Parse error: ${message}`);
}
}
export class NoHandlerFoundError extends TxtDotError {
constructor(message: string) {
super(404, 'NoHandlerFoundError', `No handler found for: ${message}`);
}
}
export class LocalResourceError extends TxtDotError { export class LocalResourceError extends TxtDotError {
constructor() { constructor() {

View File

@ -1,19 +1,15 @@
import { IHandlerOutput } from './handler.interface';
import axios, { oaxios } from '../types/axios'; import axios, { oaxios } from '../types/axios';
import micromatch from 'micromatch'; import micromatch from 'micromatch';
import DOMPurify from 'dompurify'; import DOMPurify from 'dompurify';
import { Readable } from 'stream'; import { Readable } from 'stream';
import { NotHtmlMimetypeError } from '../errors/main'; import { NotHtmlMimetypeError } from '../errors/main';
import { HandlerInput } from './handler-input';
import { decodeStream, parseEncodingName } from '../utils/http'; import { decodeStream, parseEncodingName } from '../utils/http';
import replaceHref from '../utils/replace-href'; import replaceHref from '../utils/replace-href';
import { parseHTML } from 'linkedom'; import { parseHTML } from 'linkedom';
import { Engine } from './engine';
import getConfig from '../config/main'; import getConfig from '../config/main';
import { Engine } from '@txtdot/sdk';
import { HandlerInput, IHandlerOutput } from '@txtdot/sdk/dist/types/handler';
interface IEngineId { interface IEngineId {
[key: string]: number; [key: string]: number;
@ -64,7 +60,7 @@ export class Distributor {
); );
// post-process // post-process
// TODO: generate dom in handler and not parse here twice
const dom = parseHTML(output.content); const dom = parseHTML(output.content);
replaceHref(dom, requestUrl, new URL(remoteUrl), engineName, redirectPath); replaceHref(dom, requestUrl, new URL(remoteUrl), engineName, redirectPath);

View File

@ -1,47 +0,0 @@
import Route from 'route-parser';
import { HandlerInput } from './handler-input';
import { IHandlerOutput } from './handler.interface';
import { NoHandlerFoundError } from '../errors/main';
import { EngineFunction, RouteValues } from '../types/handlers';
interface IRoute<TParams extends RouteValues> {
route: Route;
handler: EngineFunction<TParams>;
}
export class Engine {
name: string;
description: string;
domains: string[];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
routes: IRoute<any>[] = [];
constructor(name: string, description: string, domains: string[] = []) {
this.domains = domains;
this.name = name;
this.description = description;
}
route<TParams extends RouteValues>(
path: string,
handler: EngineFunction<TParams>
) {
this.routes.push({ route: new Route<TParams>(path), handler });
}
async handle(input: HandlerInput): Promise<IHandlerOutput> {
const url = new URL(input.getUrl());
const path = url.pathname + url.search + url.hash;
for (const route of this.routes) {
const match = route.route.match(path);
if (match) {
return await route.handler(input, {
q: match,
reverse: (req) => route.route.reverse(req),
});
}
}
throw new NoHandlerFoundError(`${path}. [${this.name}]`);
}
}

View File

@ -1,28 +0,0 @@
import { Readability } from '@mozilla/readability';
import { EngineParseError } from '../../errors/main';
import { Engine } from '../engine';
const ReadabilityEngine = new Engine(
'Readability',
'Engine for parsing content with Readability',
['*']
);
ReadabilityEngine.route('*path', async (input, ro) => {
const reader = new Readability(input.parseDom().window.document);
const parsed = reader.parse();
if (!parsed) {
throw new EngineParseError(`(${ro.q.path}). [${ReadabilityEngine.name}]`);
}
return {
content: parsed.content,
textContent: parsed.textContent,
title: parsed.title,
lang: parsed.lang,
};
});
export default ReadabilityEngine;

View File

@ -1,59 +0,0 @@
import { Route } from '../../types/handlers';
import { Engine } from '../engine';
import { HandlerInput } from '../handler-input';
const SearXEngine = new Engine('SearX', "Engine for searching with 'SearXNG'", [
'searx.*',
]);
async function search(
input: HandlerInput,
ro: Route<{ search: string; pageno?: string }>
) {
const document = input.parseDom().window.document;
const search = ro.q.search;
const page = parseInt(ro.q.pageno || '1');
const page_footer = `${
page !== 1
? `<a href="${ro.reverse({ search, pageno: page - 1 })}">Previous </a>|`
: ''
}<a href="${ro.reverse({ search, pageno: page + 1 })}"> Next</a>`;
const articles = Array.from(document.querySelectorAll('.result'));
const articles_parsed = articles.map((a) => {
const parsed = {
url:
(a.getElementsByClassName('url_wrapper')[0] as HTMLAnchorElement)
.href || '',
title:
(a.getElementsByTagName('h3')[0] as HTMLHeadingElement).textContent ||
'',
content:
(a.getElementsByClassName('content')[0] as HTMLDivElement)
.textContent || '',
};
return {
html: `<a href="${parsed.url}">${parsed.title}</a><p>${parsed.content}</p><hr>`,
text: `${parsed.title} (${parsed.url})\n${parsed.content}\n---\n\n`,
};
});
const content = `${articles_parsed
.map((a) => a.html)
.join('')}${page_footer}`;
const textContent = articles_parsed.map((a) => a.text).join('');
return {
content,
textContent,
title: `${search} - Searx - Page ${page}`,
lang: document.documentElement.lang,
};
}
SearXEngine.route('/search?q=:search&pageno=:pageno', search);
SearXEngine.route('/search?q=:search', search);
export default SearXEngine;

View File

@ -1,22 +0,0 @@
import { Engine } from '../../engine';
import questions from './questions';
import users from './users';
const soEngine = new Engine(
'StackOverflow',
"Engine for 'StackOverflow'. Available routes: '/questions/' and '/users/'",
[
'stackoverflow.com',
'*.stackoverflow.com',
'*.stackexchange.com',
'askubuntu.com',
'stackapps.com',
'mathoverflow.net',
'superuser.com',
'serverfault.com',
]
);
soEngine.route('/questions/:id/*slug', questions);
soEngine.route('/users/:id/*slug', users);
export default soEngine;

View File

@ -1,49 +0,0 @@
import { Route } from '../../../types/handlers';
import { HandlerInput } from '../../handler-input';
async function questions(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
const document = input.parseDom().window.document;
const questionEl = document.getElementById('question');
const question = postParser(questionEl);
const title = document.querySelector('.question-hyperlink')?.innerHTML || '';
const allAnswers = [...document.querySelectorAll('.answer')];
const answers = allAnswers.map((a) => postParser(a));
return {
content: `${question}<hr>${answers.length} answers <hr>${answers.join(
'<hr>'
)}`,
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title,
lang: document.documentElement.lang,
};
}
function postParser(el: Element | null): string {
if (!el) {
return '';
}
const body = el.querySelector('.js-post-body')?.innerHTML || '';
const voteCount = el.querySelector('.js-vote-count')?.textContent || '';
const footer = [...el.querySelectorAll('.post-signature')].map((el) => {
const userName = el.querySelector('.user-details a')?.textContent || '';
const userUrl =
(el.querySelector('.user-details a') as HTMLAnchorElement)?.href || '';
const userTitle = el.querySelector('.user-action-time')?.textContent || '';
return `<h4>${userTitle}${
userUrl ? ` by <a href="${userUrl}">${userName}</a>` : ''
}</h4>`;
});
return `<h3>${voteCount} votes</h3>${body}${footer.join('')}`;
}
export default questions;

View File

@ -1,37 +0,0 @@
import { Route } from '../../../types/handlers';
import { HandlerInput } from '../../handler-input';
async function users(
input: HandlerInput,
ro: Route<{ id: string; slug: string }>
) {
const document = input.parseDom().window.document;
const userInfo =
document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent ||
'';
const topPosts = [
...(document.querySelector('#js-top-posts > div:nth-child(2)')?.children ||
[]),
]
.map((el) => {
const title = el.querySelector('a')?.textContent || '';
const url = el.querySelector('a')?.href || '';
const votes = el.querySelector('.s-badge__votes')?.textContent || '';
const type =
el.querySelector('.iconAnswer, .iconQuestion')?.textContent || '';
return `<strong>${type} (${votes}) </strong><a href="${url}">${title}</a>`;
})
.join('<br/>');
return {
content: `${userInfo}<hr><h3>Top Posts</h3>${topPosts}`,
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
title: document.querySelector('title')?.textContent || '',
lang: document.documentElement.lang,
};
}
export default users;

View File

@ -1,25 +0,0 @@
import { parseHTML } from 'linkedom';
export class HandlerInput {
private data: string;
private url: string;
private dom?: Window;
constructor(data: string, url: string) {
this.data = data;
this.url = url;
}
getUrl(): string {
return this.url;
}
parseDom(): Window {
if (this.dom) {
return this.dom;
}
this.dom = parseHTML(this.data);
return this.dom;
}
}

View File

@ -1,24 +0,0 @@
export interface IHandlerOutput {
content: string;
textContent: string;
title?: string;
lang?: string;
}
export const handlerSchema = {
type: 'object',
properties: {
content: {
type: 'string',
},
textContent: {
type: 'string',
},
title: {
type: 'string',
},
lang: {
type: 'string',
},
},
};

View File

@ -1,13 +1,11 @@
import { Distributor } from './distributor'; import { Distributor } from './distributor';
import Readability from './engines/readability'; import { engines } from '@txtdot/plugins';
import SearX from './engines/searx';
import StackOverflow from './engines/stackoverflow/main';
const distributor = new Distributor(); const distributor = new Distributor();
distributor.engine(StackOverflow); distributor.engine(engines.StackOverflow);
distributor.engine(SearX); distributor.engine(engines.SearX);
distributor.engine(Readability); distributor.engine(engines.Readability);
export const engineList = distributor.list; export const engineList = distributor.list;
export default distributor; export default distributor;

View File

@ -1,31 +0,0 @@
// import Route from 'route-parser';
import { Engine } from '../handlers/engine';
import { HandlerInput } from '../handlers/handler-input';
import { IHandlerOutput } from '../handlers/handler.interface';
export interface Engines {
[key: string]: Engine;
}
export type EngineMatch<TParams extends RouteValues> = {
pattern: string | string[];
engine: EngineFunction<TParams>;
};
export interface RouteValues {
[key: string]: string;
}
export type EngineFunction<TParams extends RouteValues> = (
input: HandlerInput,
ro: Route<TParams>
) => Promise<IHandlerOutput>;
export type EnginesMatch<TParams extends RouteValues> = EngineMatch<TParams>[];
export interface Route<TParams extends RouteValues> {
q: TParams;
reverse: (req: { [K in keyof TParams]: string | number | boolean }) =>
| string
| false;
}

View File

@ -1,8 +1,8 @@
import { FastifySchema, FastifyRequest } from 'fastify'; import { FastifySchema, FastifyRequest } from 'fastify';
import { IApiError, errorResponseSchema } from '../../errors/api'; import { IApiError, errorResponseSchema } from '../../errors/api';
import { handlerSchema } from '../../handlers/handler.interface';
import { engineList } from '../../handlers/main'; import { engineList } from '../../handlers/main';
import { FromSchema } from 'json-schema-to-ts'; import { FromSchema } from 'json-schema-to-ts';
import { handlerSchema } from '@txtdot/sdk/dist/types/handler';
export interface IApiResponse<T> { export interface IApiResponse<T> {
data?: T; data?: T;