refactor: engine output
This commit is contained in:
parent
494d1c8134
commit
bdf625bb1f
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@txtdot/plugins",
|
||||
"version": "1.1.1",
|
||||
"version": "2.0.0",
|
||||
"description": "Official txtdot plugins",
|
||||
"main": "dist/lib.js",
|
||||
"types": "dist/lib.d.ts",
|
||||
@ -19,6 +19,7 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@mozilla/readability": "^0.5.0",
|
||||
"linkedom": "^0.18.0",
|
||||
"@txtdot/sdk": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
@ -1,6 +1,7 @@
|
||||
import { Readability as OReadability } from '@mozilla/readability';
|
||||
|
||||
import { Engine, EngineParseError } from '@txtdot/sdk';
|
||||
import { parseHTML } from 'linkedom';
|
||||
|
||||
const Readability = new Engine(
|
||||
'Readability',
|
||||
@ -9,7 +10,7 @@ const Readability = new Engine(
|
||||
);
|
||||
|
||||
Readability.route('*path', async (input, ro) => {
|
||||
const reader = new OReadability(input.parseDom().window.document);
|
||||
const reader = new OReadability(input.document);
|
||||
const parsed = reader.parse();
|
||||
|
||||
if (!parsed) {
|
||||
@ -17,7 +18,7 @@ Readability.route('*path', async (input, ro) => {
|
||||
}
|
||||
|
||||
return {
|
||||
content: parsed.content,
|
||||
document: parseHTML(parsed.content).document,
|
||||
textContent: parsed.textContent,
|
||||
title: parsed.title,
|
||||
lang: parsed.lang,
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { Engine } from '@txtdot/sdk';
|
||||
import { HandlerInput, Route } from '@txtdot/sdk';
|
||||
import { parseHTML } from 'linkedom';
|
||||
|
||||
const SearX = new Engine('SearX', "Engine for searching with 'SearXNG'", [
|
||||
'searx.*',
|
||||
@ -9,7 +10,7 @@ async function search(
|
||||
input: HandlerInput,
|
||||
ro: Route<{ search: string; pageno?: string }>
|
||||
) {
|
||||
const document = input.parseDom().window.document;
|
||||
const document = input.document;
|
||||
const search = ro.q.search;
|
||||
const page = parseInt(ro.q.pageno || '1');
|
||||
|
||||
@ -45,7 +46,7 @@ async function search(
|
||||
const textContent = articles_parsed.map((a) => a.text).join('');
|
||||
|
||||
return {
|
||||
content,
|
||||
document: parseHTML(content).document,
|
||||
textContent,
|
||||
title: `${search} - Searx - Page ${page}`,
|
||||
lang: document.documentElement.lang,
|
||||
|
@ -1,10 +1,11 @@
|
||||
import { HandlerInput, Route } from '@txtdot/sdk';
|
||||
import { parseHTML } from 'linkedom';
|
||||
|
||||
async function questions(
|
||||
input: HandlerInput,
|
||||
ro: Route<{ id: string; slug: string }>
|
||||
) {
|
||||
const document = input.parseDom().window.document;
|
||||
const document = input.document;
|
||||
|
||||
const questionEl = document.getElementById('question');
|
||||
const question = postParser(questionEl);
|
||||
@ -15,9 +16,9 @@ async function questions(
|
||||
const answers = allAnswers.map((a) => postParser(a));
|
||||
|
||||
return {
|
||||
content: `${question}<hr>${answers.length} answers <hr>${answers.join(
|
||||
'<hr>'
|
||||
)}`,
|
||||
document: parseHTML(
|
||||
`${question}<hr>${answers.length} answers <hr>${answers.join('<hr>')}`
|
||||
).document,
|
||||
textContent: `${ro.q.id}/${ro.q.slug}\nText output not supported`, // TODO
|
||||
title,
|
||||
lang: document.documentElement.lang,
|
||||
|
@ -1,10 +1,11 @@
|
||||
import { HandlerInput, Route } from '@txtdot/sdk';
|
||||
import { parseHTML } from 'linkedom';
|
||||
|
||||
async function users(
|
||||
input: HandlerInput,
|
||||
ro: Route<{ id: string; slug: string }>
|
||||
) {
|
||||
const document = input.parseDom().window.document;
|
||||
const document = input.document;
|
||||
|
||||
const userInfo =
|
||||
document.querySelector('.md\\:ai-start > div:nth-child(2)')?.textContent ||
|
||||
@ -26,7 +27,8 @@ async function users(
|
||||
.join('<br/>');
|
||||
|
||||
return {
|
||||
content: `${userInfo}<hr><h3>Top Posts</h3>${topPosts}`,
|
||||
document: parseHTML(`${userInfo}<hr><h3>Top Posts</h3>${topPosts}`)
|
||||
.document,
|
||||
textContent: `${ro.q.id}/${ro.q.slug}\n`, // TODO
|
||||
title: document.querySelector('title')?.textContent || '',
|
||||
lang: document.documentElement.lang,
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@txtdot/sdk",
|
||||
"version": "1.1.2",
|
||||
"version": "2.0.0",
|
||||
"description": "SDK for creating plugins for TxtDot",
|
||||
"main": "dist/lib.js",
|
||||
"types": "dist/lib.d.ts",
|
||||
|
@ -2,9 +2,9 @@ import Route from 'route-parser';
|
||||
|
||||
import {
|
||||
HandlerInput,
|
||||
IHandlerOutput,
|
||||
EngineFunction,
|
||||
RouteValues,
|
||||
EngineOutput,
|
||||
} from './types/handler';
|
||||
|
||||
import { NoHandlerFoundError } from './types/errors';
|
||||
@ -33,7 +33,7 @@ export class Engine {
|
||||
this.routes.push({ route: new Route<TParams>(path), handler });
|
||||
}
|
||||
|
||||
async handle(input: HandlerInput): Promise<IHandlerOutput> {
|
||||
async handle(input: HandlerInput): Promise<EngineOutput> {
|
||||
const url = new URL(input.getUrl());
|
||||
const path = url.pathname + url.search + url.hash;
|
||||
for (const route of this.routes) {
|
||||
|
@ -13,7 +13,7 @@ import {
|
||||
RouteValues,
|
||||
EnginesMatch,
|
||||
HandlerInput,
|
||||
IHandlerOutput,
|
||||
HandlerOutput,
|
||||
Route,
|
||||
handlerSchema,
|
||||
} from './types/handler';
|
||||
@ -29,7 +29,7 @@ export {
|
||||
RouteValues,
|
||||
EnginesMatch,
|
||||
HandlerInput,
|
||||
IHandlerOutput,
|
||||
HandlerOutput,
|
||||
Route,
|
||||
handlerSchema,
|
||||
};
|
||||
|
@ -4,7 +4,7 @@ import { Engine } from '../engine';
|
||||
export class HandlerInput {
|
||||
private data: string;
|
||||
private url: string;
|
||||
private dom?: Window;
|
||||
private window?: Window;
|
||||
|
||||
constructor(data: string, url: string) {
|
||||
this.data = data;
|
||||
@ -15,23 +15,30 @@ export class HandlerInput {
|
||||
return this.url;
|
||||
}
|
||||
|
||||
parseDom(): Window {
|
||||
if (this.dom) {
|
||||
return this.dom;
|
||||
get document(): Document {
|
||||
if (this.window) {
|
||||
return this.window.document;
|
||||
}
|
||||
|
||||
this.dom = parseHTML(this.data);
|
||||
return this.dom;
|
||||
this.window = parseHTML(this.data);
|
||||
return this.window.document;
|
||||
}
|
||||
}
|
||||
|
||||
export interface IHandlerOutput {
|
||||
export interface HandlerOutput {
|
||||
content: string;
|
||||
textContent: string;
|
||||
title?: string;
|
||||
lang?: string;
|
||||
}
|
||||
|
||||
export interface EngineOutput {
|
||||
document: Document;
|
||||
textContent?: string;
|
||||
title?: string;
|
||||
lang?: string;
|
||||
}
|
||||
|
||||
export const handlerSchema = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
@ -66,7 +73,7 @@ export interface RouteValues {
|
||||
export type EngineFunction<TParams extends RouteValues> = (
|
||||
input: HandlerInput,
|
||||
ro: Route<TParams>
|
||||
) => Promise<IHandlerOutput>;
|
||||
) => Promise<EngineOutput>;
|
||||
|
||||
export type EnginesMatch<TParams extends RouteValues> = EngineMatch<TParams>[];
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@txtdot/server",
|
||||
"version": "1.7.0",
|
||||
"version": "1.8.0",
|
||||
"private": true,
|
||||
"description": "txtdot is an HTTP proxy that parses only text, links and pictures from pages reducing internet bandwidth usage, removing ads and heavy scripts",
|
||||
"main": "dist/app.js",
|
||||
|
@ -5,10 +5,9 @@ import { Readable } from 'stream';
|
||||
import { NotHtmlMimetypeError } from './errors/main';
|
||||
import { decodeStream, parseEncodingName } from './utils/http';
|
||||
import replaceHref from './utils/replace-href';
|
||||
import { parseHTML } from 'linkedom';
|
||||
|
||||
import { Engine } from '@txtdot/sdk';
|
||||
import { HandlerInput, IHandlerOutput } from '@txtdot/sdk';
|
||||
import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
|
||||
import config from './config';
|
||||
|
||||
interface IEngineId {
|
||||
@ -32,7 +31,7 @@ export class Distributor {
|
||||
requestUrl: URL, // proxy URL
|
||||
engineName?: string,
|
||||
redirectPath: string = 'get'
|
||||
): Promise<IHandlerOutput> {
|
||||
): Promise<HandlerOutput> {
|
||||
const urlObj = new URL(remoteUrl);
|
||||
|
||||
const webder_url = config.env.third_party.webder_url;
|
||||
@ -61,13 +60,23 @@ export class Distributor {
|
||||
|
||||
// post-process
|
||||
// TODO: generate dom in handler and not parse here twice
|
||||
const dom = parseHTML(output.content);
|
||||
replaceHref(dom, requestUrl, new URL(remoteUrl), engineName, redirectPath);
|
||||
replaceHref(
|
||||
output.document,
|
||||
requestUrl,
|
||||
new URL(remoteUrl),
|
||||
engineName,
|
||||
redirectPath
|
||||
);
|
||||
|
||||
const purify = DOMPurify(dom.window);
|
||||
output.content = purify.sanitize(dom.document.toString());
|
||||
const purify = DOMPurify();
|
||||
const content = purify.sanitize(output.document.toString());
|
||||
|
||||
return output;
|
||||
return {
|
||||
content,
|
||||
textContent: output.textContent || output.document.textContent || '',
|
||||
title: output.title,
|
||||
lang: output.lang,
|
||||
};
|
||||
}
|
||||
|
||||
getFallbackEngine(host: string, specified?: string): Engine {
|
||||
|
@ -2,13 +2,12 @@ import config from '../config';
|
||||
import { generateParserUrl, generateProxyUrl } from './generate';
|
||||
|
||||
export default function replaceHref(
|
||||
dom: Window,
|
||||
doc: Document,
|
||||
requestUrl: URL,
|
||||
remoteUrl: URL,
|
||||
engine?: string,
|
||||
redirectPath: string = 'get'
|
||||
) {
|
||||
const doc: Document = dom.window.document;
|
||||
const parserUrl = (href: string) =>
|
||||
generateParserUrl(requestUrl, remoteUrl, href, engine, redirectPath);
|
||||
|
||||
|
72
pnpm-lock.yaml
generated
72
pnpm-lock.yaml
generated
@ -13,7 +13,7 @@ importers:
|
||||
version: 20.12.11
|
||||
'@typescript-eslint/eslint-plugin':
|
||||
specifier: ^7.7.0
|
||||
version: 7.8.0(@typescript-eslint/parser@7.8.0)(eslint@8.57.0)(typescript@5.4.5)
|
||||
version: 7.8.0(@typescript-eslint/parser@7.8.0(eslint@8.57.0)(typescript@5.4.5))(eslint@8.57.0)(typescript@5.4.5)
|
||||
'@typescript-eslint/parser':
|
||||
specifier: ^7.7.0
|
||||
version: 7.8.0(eslint@8.57.0)(typescript@5.4.5)
|
||||
@ -22,7 +22,7 @@ importers:
|
||||
version: 8.57.0
|
||||
lerna:
|
||||
specifier: ^8.1.2
|
||||
version: 8.1.2
|
||||
version: 8.1.2(encoding@0.1.13)
|
||||
prettier:
|
||||
specifier: ^3.1.1
|
||||
version: 3.2.5
|
||||
@ -41,6 +41,9 @@ importers:
|
||||
'@txtdot/sdk':
|
||||
specifier: workspace:*
|
||||
version: link:../sdk
|
||||
linkedom:
|
||||
specifier: ^0.18.0
|
||||
version: 0.18.0
|
||||
devDependencies:
|
||||
typescript:
|
||||
specifier: ^5.4.5
|
||||
@ -3378,12 +3381,12 @@ snapshots:
|
||||
dependencies:
|
||||
'@sinclair/typebox': 0.27.8
|
||||
|
||||
'@lerna/create@8.1.2(typescript@5.4.5)':
|
||||
'@lerna/create@8.1.2(encoding@0.1.13)(typescript@5.4.5)':
|
||||
dependencies:
|
||||
'@npmcli/run-script': 7.0.2
|
||||
'@nx/devkit': 18.3.4(nx@18.3.4)
|
||||
'@octokit/plugin-enterprise-rest': 6.0.1
|
||||
'@octokit/rest': 19.0.11
|
||||
'@octokit/rest': 19.0.11(encoding@0.1.13)
|
||||
byte-size: 8.1.1
|
||||
chalk: 4.1.0
|
||||
clone-deep: 4.0.1
|
||||
@ -3413,7 +3416,7 @@ snapshots:
|
||||
make-dir: 4.0.0
|
||||
minimatch: 3.0.5
|
||||
multimatch: 5.0.0
|
||||
node-fetch: 2.6.7
|
||||
node-fetch: 2.6.7(encoding@0.1.13)
|
||||
npm-package-arg: 8.1.1
|
||||
npm-packlist: 5.1.1
|
||||
npm-registry-fetch: 14.0.5
|
||||
@ -3578,11 +3581,11 @@ snapshots:
|
||||
|
||||
'@octokit/auth-token@3.0.4': {}
|
||||
|
||||
'@octokit/core@4.2.4':
|
||||
'@octokit/core@4.2.4(encoding@0.1.13)':
|
||||
dependencies:
|
||||
'@octokit/auth-token': 3.0.4
|
||||
'@octokit/graphql': 5.0.6
|
||||
'@octokit/request': 6.2.8
|
||||
'@octokit/graphql': 5.0.6(encoding@0.1.13)
|
||||
'@octokit/request': 6.2.8(encoding@0.1.13)
|
||||
'@octokit/request-error': 3.0.3
|
||||
'@octokit/types': 9.3.2
|
||||
before-after-hook: 2.2.3
|
||||
@ -3596,9 +3599,9 @@ snapshots:
|
||||
is-plain-object: 5.0.0
|
||||
universal-user-agent: 6.0.1
|
||||
|
||||
'@octokit/graphql@5.0.6':
|
||||
'@octokit/graphql@5.0.6(encoding@0.1.13)':
|
||||
dependencies:
|
||||
'@octokit/request': 6.2.8
|
||||
'@octokit/request': 6.2.8(encoding@0.1.13)
|
||||
'@octokit/types': 9.3.2
|
||||
universal-user-agent: 6.0.1
|
||||
transitivePeerDependencies:
|
||||
@ -3608,19 +3611,19 @@ snapshots:
|
||||
|
||||
'@octokit/plugin-enterprise-rest@6.0.1': {}
|
||||
|
||||
'@octokit/plugin-paginate-rest@6.1.2(@octokit/core@4.2.4)':
|
||||
'@octokit/plugin-paginate-rest@6.1.2(@octokit/core@4.2.4(encoding@0.1.13))':
|
||||
dependencies:
|
||||
'@octokit/core': 4.2.4
|
||||
'@octokit/core': 4.2.4(encoding@0.1.13)
|
||||
'@octokit/tsconfig': 1.0.2
|
||||
'@octokit/types': 9.3.2
|
||||
|
||||
'@octokit/plugin-request-log@1.0.4(@octokit/core@4.2.4)':
|
||||
'@octokit/plugin-request-log@1.0.4(@octokit/core@4.2.4(encoding@0.1.13))':
|
||||
dependencies:
|
||||
'@octokit/core': 4.2.4
|
||||
'@octokit/core': 4.2.4(encoding@0.1.13)
|
||||
|
||||
'@octokit/plugin-rest-endpoint-methods@7.2.3(@octokit/core@4.2.4)':
|
||||
'@octokit/plugin-rest-endpoint-methods@7.2.3(@octokit/core@4.2.4(encoding@0.1.13))':
|
||||
dependencies:
|
||||
'@octokit/core': 4.2.4
|
||||
'@octokit/core': 4.2.4(encoding@0.1.13)
|
||||
'@octokit/types': 10.0.0
|
||||
|
||||
'@octokit/request-error@3.0.3':
|
||||
@ -3629,23 +3632,23 @@ snapshots:
|
||||
deprecation: 2.3.1
|
||||
once: 1.4.0
|
||||
|
||||
'@octokit/request@6.2.8':
|
||||
'@octokit/request@6.2.8(encoding@0.1.13)':
|
||||
dependencies:
|
||||
'@octokit/endpoint': 7.0.6
|
||||
'@octokit/request-error': 3.0.3
|
||||
'@octokit/types': 9.3.2
|
||||
is-plain-object: 5.0.0
|
||||
node-fetch: 2.6.7
|
||||
node-fetch: 2.6.7(encoding@0.1.13)
|
||||
universal-user-agent: 6.0.1
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
|
||||
'@octokit/rest@19.0.11':
|
||||
'@octokit/rest@19.0.11(encoding@0.1.13)':
|
||||
dependencies:
|
||||
'@octokit/core': 4.2.4
|
||||
'@octokit/plugin-paginate-rest': 6.1.2(@octokit/core@4.2.4)
|
||||
'@octokit/plugin-request-log': 1.0.4(@octokit/core@4.2.4)
|
||||
'@octokit/plugin-rest-endpoint-methods': 7.2.3(@octokit/core@4.2.4)
|
||||
'@octokit/core': 4.2.4(encoding@0.1.13)
|
||||
'@octokit/plugin-paginate-rest': 6.1.2(@octokit/core@4.2.4(encoding@0.1.13))
|
||||
'@octokit/plugin-request-log': 1.0.4(@octokit/core@4.2.4(encoding@0.1.13))
|
||||
'@octokit/plugin-rest-endpoint-methods': 7.2.3(@octokit/core@4.2.4(encoding@0.1.13))
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
|
||||
@ -3771,7 +3774,7 @@ snapshots:
|
||||
|
||||
'@types/trusted-types@2.0.7': {}
|
||||
|
||||
'@typescript-eslint/eslint-plugin@7.8.0(@typescript-eslint/parser@7.8.0)(eslint@8.57.0)(typescript@5.4.5)':
|
||||
'@typescript-eslint/eslint-plugin@7.8.0(@typescript-eslint/parser@7.8.0(eslint@8.57.0)(typescript@5.4.5))(eslint@8.57.0)(typescript@5.4.5)':
|
||||
dependencies:
|
||||
'@eslint-community/regexpp': 4.10.0
|
||||
'@typescript-eslint/parser': 7.8.0(eslint@8.57.0)(typescript@5.4.5)
|
||||
@ -3786,6 +3789,7 @@ snapshots:
|
||||
natural-compare: 1.4.0
|
||||
semver: 7.6.2
|
||||
ts-api-utils: 1.3.0(typescript@5.4.5)
|
||||
optionalDependencies:
|
||||
typescript: 5.4.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
@ -3798,6 +3802,7 @@ snapshots:
|
||||
'@typescript-eslint/visitor-keys': 7.8.0
|
||||
debug: 4.3.4
|
||||
eslint: 8.57.0
|
||||
optionalDependencies:
|
||||
typescript: 5.4.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
@ -3814,6 +3819,7 @@ snapshots:
|
||||
debug: 4.3.4
|
||||
eslint: 8.57.0
|
||||
ts-api-utils: 1.3.0(typescript@5.4.5)
|
||||
optionalDependencies:
|
||||
typescript: 5.4.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
@ -3830,6 +3836,7 @@ snapshots:
|
||||
minimatch: 9.0.4
|
||||
semver: 7.6.2
|
||||
ts-api-utils: 1.3.0(typescript@5.4.5)
|
||||
optionalDependencies:
|
||||
typescript: 5.4.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
@ -3909,11 +3916,11 @@ snapshots:
|
||||
indent-string: 4.0.0
|
||||
|
||||
ajv-formats@2.1.1(ajv@8.13.0):
|
||||
dependencies:
|
||||
optionalDependencies:
|
||||
ajv: 8.13.0
|
||||
|
||||
ajv-formats@3.0.1(ajv@8.13.0):
|
||||
dependencies:
|
||||
optionalDependencies:
|
||||
ajv: 8.13.0
|
||||
|
||||
ajv@6.12.6:
|
||||
@ -4300,6 +4307,7 @@ snapshots:
|
||||
js-yaml: 4.1.0
|
||||
parse-json: 5.2.0
|
||||
path-type: 4.0.0
|
||||
optionalDependencies:
|
||||
typescript: 5.4.5
|
||||
|
||||
cross-spawn@7.0.3:
|
||||
@ -5138,13 +5146,13 @@ snapshots:
|
||||
|
||||
kind-of@6.0.3: {}
|
||||
|
||||
lerna@8.1.2:
|
||||
lerna@8.1.2(encoding@0.1.13):
|
||||
dependencies:
|
||||
'@lerna/create': 8.1.2(typescript@5.4.5)
|
||||
'@lerna/create': 8.1.2(encoding@0.1.13)(typescript@5.4.5)
|
||||
'@npmcli/run-script': 7.0.2
|
||||
'@nx/devkit': 18.3.4(nx@18.3.4)
|
||||
'@octokit/plugin-enterprise-rest': 6.0.1
|
||||
'@octokit/rest': 19.0.11
|
||||
'@octokit/rest': 19.0.11(encoding@0.1.13)
|
||||
byte-size: 8.1.1
|
||||
chalk: 4.1.0
|
||||
clone-deep: 4.0.1
|
||||
@ -5180,7 +5188,7 @@ snapshots:
|
||||
make-dir: 4.0.0
|
||||
minimatch: 3.0.5
|
||||
multimatch: 5.0.0
|
||||
node-fetch: 2.6.7
|
||||
node-fetch: 2.6.7(encoding@0.1.13)
|
||||
npm-package-arg: 8.1.1
|
||||
npm-packlist: 5.1.1
|
||||
npm-registry-fetch: 14.0.5
|
||||
@ -5506,9 +5514,11 @@ snapshots:
|
||||
|
||||
node-cleanup@2.1.2: {}
|
||||
|
||||
node-fetch@2.6.7:
|
||||
node-fetch@2.6.7(encoding@0.1.13):
|
||||
dependencies:
|
||||
whatwg-url: 5.0.0
|
||||
optionalDependencies:
|
||||
encoding: 0.1.13
|
||||
|
||||
node-gyp@10.1.0:
|
||||
dependencies:
|
||||
|
Loading…
x
Reference in New Issue
Block a user