parent
04322ea3c5
commit
9a6819e652
@ -19,7 +19,6 @@ Readability.route('*path', async (input, ro) => {
|
||||
|
||||
return {
|
||||
content: parsed.content,
|
||||
textContent: parsed.textContent,
|
||||
title: parsed.title,
|
||||
lang: parsed.lang,
|
||||
};
|
||||
|
@ -30,6 +30,7 @@
|
||||
"dotenv": "^16.3.1",
|
||||
"ejs": "^3.1.10",
|
||||
"fastify": "^4.26.2",
|
||||
"html-to-text": "^9.0.5",
|
||||
"iconv-lite": "^0.6.3",
|
||||
"ip-range-check": "^0.2.0",
|
||||
"json-schema-to-ts": "^3.0.1",
|
||||
@ -40,6 +41,7 @@
|
||||
"devDependencies": {
|
||||
"@types/dompurify": "^3.0.5",
|
||||
"@types/ejs": "^3.1.5",
|
||||
"@types/html-to-text": "^9.0.4",
|
||||
"@types/jsdom": "^21.1.6",
|
||||
"@types/micromatch": "^4.0.7",
|
||||
"clean-css-cli": "^5.6.3",
|
||||
|
@ -1,5 +1,6 @@
|
||||
import { IAppConfig } from '../types/appConfig';
|
||||
import { engineList } from '@txtdot/plugins';
|
||||
import { compile } from 'html-to-text';
|
||||
|
||||
/**
|
||||
* Configuration of plugins
|
||||
@ -7,6 +8,7 @@ import { engineList } from '@txtdot/plugins';
|
||||
*/
|
||||
const plugin_config: IAppConfig = {
|
||||
engines: [...engineList],
|
||||
html2text: compile(),
|
||||
};
|
||||
|
||||
export default plugin_config;
|
||||
|
@ -10,6 +10,7 @@ import { Engine } from '@txtdot/sdk';
|
||||
import { HandlerInput, HandlerOutput } from '@txtdot/sdk';
|
||||
import config from './config';
|
||||
import { parseHTML } from 'linkedom';
|
||||
import { html2text } from './utils/html2text';
|
||||
|
||||
interface IEngineId {
|
||||
[key: string]: number;
|
||||
@ -78,7 +79,7 @@ export class Distributor {
|
||||
return {
|
||||
content,
|
||||
textContent:
|
||||
output.textContent || dom.document.documentElement.textContent || '',
|
||||
html2text(output, dom.document) || 'Text output cannot be generated.',
|
||||
title: output.title || dom.document.title,
|
||||
lang: output.lang || dom.document.documentElement.lang,
|
||||
};
|
||||
|
@ -1,5 +1,8 @@
|
||||
import { Engine } from '@txtdot/sdk';
|
||||
|
||||
type Html2TextConverter = (html: string) => string;
|
||||
|
||||
export interface IAppConfig {
|
||||
engines: Engine[];
|
||||
html2text?: Html2TextConverter;
|
||||
}
|
||||
|
9
packages/server/src/utils/html2text.ts
Normal file
9
packages/server/src/utils/html2text.ts
Normal file
@ -0,0 +1,9 @@
|
||||
import { EngineOutput } from '@txtdot/sdk/dist/types/handler';
|
||||
import config from '../config';
|
||||
|
||||
export function html2text(output: EngineOutput, doc: Document) {
|
||||
if (output.textContent) return output.textContent;
|
||||
else if (config.plugin.html2text)
|
||||
return config.plugin.html2text(output.content);
|
||||
else return doc.documentElement.textContent;
|
||||
}
|
72
pnpm-lock.yaml
generated
72
pnpm-lock.yaml
generated
@ -103,6 +103,9 @@ importers:
|
||||
fastify:
|
||||
specifier: ^4.26.2
|
||||
version: 4.27.0
|
||||
html-to-text:
|
||||
specifier: ^9.0.5
|
||||
version: 9.0.5
|
||||
iconv-lite:
|
||||
specifier: ^0.6.3
|
||||
version: 0.6.3
|
||||
@ -128,6 +131,9 @@ importers:
|
||||
'@types/ejs':
|
||||
specifier: ^3.1.5
|
||||
version: 3.1.5
|
||||
'@types/html-to-text':
|
||||
specifier: ^9.0.4
|
||||
version: 9.0.4
|
||||
'@types/jsdom':
|
||||
specifier: ^21.1.6
|
||||
version: 21.1.6
|
||||
@ -543,6 +549,9 @@ packages:
|
||||
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
|
||||
engines: {node: '>=14'}
|
||||
|
||||
'@selderee/plugin-htmlparser2@0.11.0':
|
||||
resolution: {integrity: sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==}
|
||||
|
||||
'@sigstore/bundle@1.1.0':
|
||||
resolution: {integrity: sha512-PFutXEy0SmQxYI4texPw3dd2KewuNqv7OuK1ZFtY2fM754yhvG2KdgwIhRnoEE2uHdtdGNQ8s0lb94dW9sELog==}
|
||||
engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
|
||||
@ -615,6 +624,9 @@ packages:
|
||||
'@types/ejs@3.1.5':
|
||||
resolution: {integrity: sha512-nv+GSx77ZtXiJzwKdsASqi+YQ5Z7vwHsTP0JY2SiQgjGckkBRKZnk8nIM+7oUZ1VCtuTz0+By4qVR7fqzp/Dfg==}
|
||||
|
||||
'@types/html-to-text@9.0.4':
|
||||
resolution: {integrity: sha512-pUY3cKH/Nm2yYrEmDlPR1mR7yszjGx4DrwPjQ702C4/D5CwHuZTgZdIdwPkRbcuhs7BAh2L5rg3CL5cbRiGTCQ==}
|
||||
|
||||
'@types/jsdom@21.1.6':
|
||||
resolution: {integrity: sha512-/7kkMsC+/kMs7gAYmmBR9P0vGTnOoLhQhyhQJSlXGI5bzTHp6xdo0TtKWQAsz6pmSAeVqKSbqeyP6hytqr9FDw==}
|
||||
|
||||
@ -1170,6 +1182,10 @@ packages:
|
||||
deep-is@0.1.4:
|
||||
resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==}
|
||||
|
||||
deepmerge@4.3.1:
|
||||
resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
defaults@1.0.4:
|
||||
resolution: {integrity: sha512-eFuaLoy/Rxalv2kr+lqMlUnrDWV+3j4pljOIJgLIhI058IQfWJ7vXhyEIHu+HtC738klGALYxOKDO0bQP3tg8A==}
|
||||
|
||||
@ -1637,6 +1653,13 @@ packages:
|
||||
html-escaper@3.0.3:
|
||||
resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==}
|
||||
|
||||
html-to-text@9.0.5:
|
||||
resolution: {integrity: sha512-qY60FjREgVZL03vJU6IfMV4GDjGBIoOyvuFdpBDIX9yTlDw0TjxVBQp+P8NvpdIXNJvfWBTNul7fsAQJq2FNpg==}
|
||||
engines: {node: '>=14'}
|
||||
|
||||
htmlparser2@8.0.2:
|
||||
resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==}
|
||||
|
||||
htmlparser2@9.1.0:
|
||||
resolution: {integrity: sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==}
|
||||
|
||||
@ -1932,6 +1955,9 @@ packages:
|
||||
resolution: {integrity: sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
leac@0.6.0:
|
||||
resolution: {integrity: sha512-y+SqErxb8h7nE/fiEX07jsbuhrpO9lL8eca7/Y1nuWV2moNlXhyd59iDGcRf6moVyDMbmTNzL40SUyrFU/yDpg==}
|
||||
|
||||
lerna@8.1.2:
|
||||
resolution: {integrity: sha512-RCyBAn3XsqqvHbz3TxLfD7ylqzCi1A2UJnFEZmhURgx589vM3qYWQa/uOMeEEf565q6cAdtmulITciX1wgkAtw==}
|
||||
engines: {node: '>=18.0.0'}
|
||||
@ -2427,6 +2453,9 @@ packages:
|
||||
parse5@7.1.2:
|
||||
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
|
||||
|
||||
parseley@0.12.1:
|
||||
resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==}
|
||||
|
||||
path-exists@3.0.0:
|
||||
resolution: {integrity: sha512-bpC7GYwiDYQ4wYLe+FA8lhRjhQCMcQGuSgGGqDkg/QerRWw9CmGRT0iSOVRSZJ29NMLZgIzqaljJ63oaL4NIJQ==}
|
||||
engines: {node: '>=4'}
|
||||
@ -2461,6 +2490,9 @@ packages:
|
||||
pause-stream@0.0.11:
|
||||
resolution: {integrity: sha512-e3FBlXLmN/D1S+zHzanP4E/4Z60oFAa3O051qt1pxa7DEJWKAyil6upYVXCWadEnuoqa4Pkc9oUx9zsxYeRv8A==}
|
||||
|
||||
peberminta@0.9.0:
|
||||
resolution: {integrity: sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ==}
|
||||
|
||||
picocolors@1.0.0:
|
||||
resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==}
|
||||
|
||||
@ -2738,6 +2770,9 @@ packages:
|
||||
secure-json-parse@2.7.0:
|
||||
resolution: {integrity: sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==}
|
||||
|
||||
selderee@0.11.0:
|
||||
resolution: {integrity: sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==}
|
||||
|
||||
semver@5.7.2:
|
||||
resolution: {integrity: sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==}
|
||||
hasBin: true
|
||||
@ -3701,6 +3736,11 @@ snapshots:
|
||||
'@pkgjs/parseargs@0.11.0':
|
||||
optional: true
|
||||
|
||||
'@selderee/plugin-htmlparser2@0.11.0':
|
||||
dependencies:
|
||||
domhandler: 5.0.3
|
||||
selderee: 0.11.0
|
||||
|
||||
'@sigstore/bundle@1.1.0':
|
||||
dependencies:
|
||||
'@sigstore/protobuf-specs': 0.2.1
|
||||
@ -3780,6 +3820,8 @@ snapshots:
|
||||
|
||||
'@types/ejs@3.1.5': {}
|
||||
|
||||
'@types/html-to-text@9.0.4': {}
|
||||
|
||||
'@types/jsdom@21.1.6':
|
||||
dependencies:
|
||||
'@types/node': 20.12.11
|
||||
@ -4387,6 +4429,8 @@ snapshots:
|
||||
|
||||
deep-is@0.1.4: {}
|
||||
|
||||
deepmerge@4.3.1: {}
|
||||
|
||||
defaults@1.0.4:
|
||||
dependencies:
|
||||
clone: 1.0.4
|
||||
@ -4902,6 +4946,21 @@ snapshots:
|
||||
|
||||
html-escaper@3.0.3: {}
|
||||
|
||||
html-to-text@9.0.5:
|
||||
dependencies:
|
||||
'@selderee/plugin-htmlparser2': 0.11.0
|
||||
deepmerge: 4.3.1
|
||||
dom-serializer: 2.0.0
|
||||
htmlparser2: 8.0.2
|
||||
selderee: 0.11.0
|
||||
|
||||
htmlparser2@8.0.2:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
domutils: 3.1.0
|
||||
entities: 4.5.0
|
||||
|
||||
htmlparser2@9.1.0:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
@ -5194,6 +5253,8 @@ snapshots:
|
||||
|
||||
kind-of@6.0.3: {}
|
||||
|
||||
leac@0.6.0: {}
|
||||
|
||||
lerna@8.1.2(encoding@0.1.13):
|
||||
dependencies:
|
||||
'@lerna/create': 8.1.2(encoding@0.1.13)(typescript@5.4.5)
|
||||
@ -5925,6 +5986,11 @@ snapshots:
|
||||
dependencies:
|
||||
entities: 4.5.0
|
||||
|
||||
parseley@0.12.1:
|
||||
dependencies:
|
||||
leac: 0.6.0
|
||||
peberminta: 0.9.0
|
||||
|
||||
path-exists@3.0.0: {}
|
||||
|
||||
path-exists@4.0.0: {}
|
||||
@ -5950,6 +6016,8 @@ snapshots:
|
||||
dependencies:
|
||||
through: 2.3.8
|
||||
|
||||
peberminta@0.9.0: {}
|
||||
|
||||
picocolors@1.0.0: {}
|
||||
|
||||
picomatch@2.3.1: {}
|
||||
@ -6225,6 +6293,10 @@ snapshots:
|
||||
|
||||
secure-json-parse@2.7.0: {}
|
||||
|
||||
selderee@0.11.0:
|
||||
dependencies:
|
||||
parseley: 0.12.1
|
||||
|
||||
semver@5.7.2: {}
|
||||
|
||||
semver@7.6.2: {}
|
||||
|
Loading…
x
Reference in New Issue
Block a user