feat: google parsing and search

This commit is contained in:
Artemy 2023-08-15 13:05:06 +03:00
parent 239994ac5e
commit 7000189d6d
4 changed files with 45 additions and 5 deletions

35
src/handlers/google.ts Normal file
View File

@ -0,0 +1,35 @@
import { DOMWindow } from "jsdom";
import { IHandlerOutput } from "./handler.interface";
export default async function google(
window: DOMWindow
): Promise<IHandlerOutput> {
const searchEl = window.document.querySelectorAll(
"#rso > div > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > a:nth-child(1)"
);
if (!searchEl) {
throw new Error("Failed to find search element [google]");
}
const results = [...searchEl];
const content = results.map((result) => {
const anchor = result as HTMLAnchorElement;
const heading = anchor.childNodes[1] as HTMLHeadingElement;
return `<p><a href="${anchor.href}">${heading.innerHTML}</p>`;
});
const searchForm = `
<div id="searchform" method="get" id="searchform" method="get">
<input type="text" name="q" id="q">
<input type="button" value="Search" onclick="window.location.href = '/?url=https://www.google.com/search?q='+ document.getElementById('q').value.split(' ').join('+');">
</div>
`;
return {
content: `${searchForm}${content.join("")}`,
textContent: "parsed.textContent",
title: window.document.title,
lang: "parsed.lang",
};
}

View File

@ -4,8 +4,8 @@ import axios from "../types/axios";
import { JSDOM } from "jsdom";
import readability from "./readability";
import google from "./google";
import { DOMWindow } from "jsdom";
export default async function handlePage(
url: string,
originalUrl: string,
@ -16,12 +16,13 @@ export default async function handlePage(
}
const response = await axios.get(url);
const window = new JSDOM(response.data, { url: url }).window;
const UrlParsed = new URL(originalUrl);
[...window.document.getElementsByTagName("a")].forEach((link) => {
link.href = `${UrlParsed.origin}/?url=${link.href}${
engine && `&engine=${engine}`
engine ? `&engine=${engine}` : ""
}`;
});
@ -30,7 +31,8 @@ export default async function handlePage(
}
const host = new URL(url).hostname;
return fallback[host](window) || fallback["*"](window);
return fallback[host]?.(window) || fallback["*"](window);
}
interface Engines {
@ -41,10 +43,12 @@ type EngineFunction = (window: DOMWindow) => Promise<IHandlerOutput>;
export const engines: Engines = {
readability,
google,
};
export const engineList: string[] = Object.keys(engines);
const fallback: Engines = {
"www.google.com": engines.google,
"*": engines.readability,
};

View File

@ -7,7 +7,7 @@ import { generateOriginUrl } from "../utils";
export default async function mainRoute(fastify: FastifyInstance) {
fastify.get("/", async (request: GetRequest, reply) => {
const remoteUrl = request.query.url;
const engine = request.query.engine || "readability";
const engine = request.query.engine;
let format: string;

View File

@ -2,6 +2,7 @@ import axios from "axios";
export default axios.create({
headers: {
"User-Agent": "txtdot",
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
},
});