feat: google parsing and search
This commit is contained in:
parent
239994ac5e
commit
7000189d6d
35
src/handlers/google.ts
Normal file
35
src/handlers/google.ts
Normal file
@ -0,0 +1,35 @@
|
||||
import { DOMWindow } from "jsdom";
|
||||
import { IHandlerOutput } from "./handler.interface";
|
||||
|
||||
export default async function google(
|
||||
window: DOMWindow
|
||||
): Promise<IHandlerOutput> {
|
||||
const searchEl = window.document.querySelectorAll(
|
||||
"#rso > div > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > a:nth-child(1)"
|
||||
);
|
||||
|
||||
if (!searchEl) {
|
||||
throw new Error("Failed to find search element [google]");
|
||||
}
|
||||
const results = [...searchEl];
|
||||
|
||||
const content = results.map((result) => {
|
||||
const anchor = result as HTMLAnchorElement;
|
||||
const heading = anchor.childNodes[1] as HTMLHeadingElement;
|
||||
return `<p><a href="${anchor.href}">${heading.innerHTML}</p>`;
|
||||
});
|
||||
|
||||
const searchForm = `
|
||||
<div id="searchform" method="get" id="searchform" method="get">
|
||||
<input type="text" name="q" id="q">
|
||||
<input type="button" value="Search" onclick="window.location.href = '/?url=https://www.google.com/search?q='+ document.getElementById('q').value.split(' ').join('+');">
|
||||
</div>
|
||||
`;
|
||||
|
||||
return {
|
||||
content: `${searchForm}${content.join("")}`,
|
||||
textContent: "parsed.textContent",
|
||||
title: window.document.title,
|
||||
lang: "parsed.lang",
|
||||
};
|
||||
}
|
@ -4,8 +4,8 @@ import axios from "../types/axios";
|
||||
import { JSDOM } from "jsdom";
|
||||
|
||||
import readability from "./readability";
|
||||
import google from "./google";
|
||||
import { DOMWindow } from "jsdom";
|
||||
|
||||
export default async function handlePage(
|
||||
url: string,
|
||||
originalUrl: string,
|
||||
@ -16,12 +16,13 @@ export default async function handlePage(
|
||||
}
|
||||
|
||||
const response = await axios.get(url);
|
||||
|
||||
const window = new JSDOM(response.data, { url: url }).window;
|
||||
const UrlParsed = new URL(originalUrl);
|
||||
|
||||
[...window.document.getElementsByTagName("a")].forEach((link) => {
|
||||
link.href = `${UrlParsed.origin}/?url=${link.href}${
|
||||
engine && `&engine=${engine}`
|
||||
engine ? `&engine=${engine}` : ""
|
||||
}`;
|
||||
});
|
||||
|
||||
@ -30,7 +31,8 @@ export default async function handlePage(
|
||||
}
|
||||
|
||||
const host = new URL(url).hostname;
|
||||
return fallback[host](window) || fallback["*"](window);
|
||||
|
||||
return fallback[host]?.(window) || fallback["*"](window);
|
||||
}
|
||||
|
||||
interface Engines {
|
||||
@ -41,10 +43,12 @@ type EngineFunction = (window: DOMWindow) => Promise<IHandlerOutput>;
|
||||
|
||||
export const engines: Engines = {
|
||||
readability,
|
||||
google,
|
||||
};
|
||||
|
||||
export const engineList: string[] = Object.keys(engines);
|
||||
|
||||
const fallback: Engines = {
|
||||
"www.google.com": engines.google,
|
||||
"*": engines.readability,
|
||||
};
|
||||
|
@ -7,7 +7,7 @@ import { generateOriginUrl } from "../utils";
|
||||
export default async function mainRoute(fastify: FastifyInstance) {
|
||||
fastify.get("/", async (request: GetRequest, reply) => {
|
||||
const remoteUrl = request.query.url;
|
||||
const engine = request.query.engine || "readability";
|
||||
const engine = request.query.engine;
|
||||
|
||||
let format: string;
|
||||
|
||||
|
@ -2,6 +2,7 @@ import axios from "axios";
|
||||
|
||||
export default axios.create({
|
||||
headers: {
|
||||
"User-Agent": "txtdot",
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
|
||||
},
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user