feat: google parsing and search
This commit is contained in:
parent
239994ac5e
commit
7000189d6d
35
src/handlers/google.ts
Normal file
35
src/handlers/google.ts
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
import { DOMWindow } from "jsdom";
|
||||||
|
import { IHandlerOutput } from "./handler.interface";
|
||||||
|
|
||||||
|
export default async function google(
|
||||||
|
window: DOMWindow
|
||||||
|
): Promise<IHandlerOutput> {
|
||||||
|
const searchEl = window.document.querySelectorAll(
|
||||||
|
"#rso > div > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > div:nth-child(1) > a:nth-child(1)"
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!searchEl) {
|
||||||
|
throw new Error("Failed to find search element [google]");
|
||||||
|
}
|
||||||
|
const results = [...searchEl];
|
||||||
|
|
||||||
|
const content = results.map((result) => {
|
||||||
|
const anchor = result as HTMLAnchorElement;
|
||||||
|
const heading = anchor.childNodes[1] as HTMLHeadingElement;
|
||||||
|
return `<p><a href="${anchor.href}">${heading.innerHTML}</p>`;
|
||||||
|
});
|
||||||
|
|
||||||
|
const searchForm = `
|
||||||
|
<div id="searchform" method="get" id="searchform" method="get">
|
||||||
|
<input type="text" name="q" id="q">
|
||||||
|
<input type="button" value="Search" onclick="window.location.href = '/?url=https://www.google.com/search?q='+ document.getElementById('q').value.split(' ').join('+');">
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: `${searchForm}${content.join("")}`,
|
||||||
|
textContent: "parsed.textContent",
|
||||||
|
title: window.document.title,
|
||||||
|
lang: "parsed.lang",
|
||||||
|
};
|
||||||
|
}
|
@ -4,8 +4,8 @@ import axios from "../types/axios";
|
|||||||
import { JSDOM } from "jsdom";
|
import { JSDOM } from "jsdom";
|
||||||
|
|
||||||
import readability from "./readability";
|
import readability from "./readability";
|
||||||
|
import google from "./google";
|
||||||
import { DOMWindow } from "jsdom";
|
import { DOMWindow } from "jsdom";
|
||||||
|
|
||||||
export default async function handlePage(
|
export default async function handlePage(
|
||||||
url: string,
|
url: string,
|
||||||
originalUrl: string,
|
originalUrl: string,
|
||||||
@ -16,12 +16,13 @@ export default async function handlePage(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const response = await axios.get(url);
|
const response = await axios.get(url);
|
||||||
|
|
||||||
const window = new JSDOM(response.data, { url: url }).window;
|
const window = new JSDOM(response.data, { url: url }).window;
|
||||||
const UrlParsed = new URL(originalUrl);
|
const UrlParsed = new URL(originalUrl);
|
||||||
|
|
||||||
[...window.document.getElementsByTagName("a")].forEach((link) => {
|
[...window.document.getElementsByTagName("a")].forEach((link) => {
|
||||||
link.href = `${UrlParsed.origin}/?url=${link.href}${
|
link.href = `${UrlParsed.origin}/?url=${link.href}${
|
||||||
engine && `&engine=${engine}`
|
engine ? `&engine=${engine}` : ""
|
||||||
}`;
|
}`;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -30,7 +31,8 @@ export default async function handlePage(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const host = new URL(url).hostname;
|
const host = new URL(url).hostname;
|
||||||
return fallback[host](window) || fallback["*"](window);
|
|
||||||
|
return fallback[host]?.(window) || fallback["*"](window);
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Engines {
|
interface Engines {
|
||||||
@ -41,10 +43,12 @@ type EngineFunction = (window: DOMWindow) => Promise<IHandlerOutput>;
|
|||||||
|
|
||||||
export const engines: Engines = {
|
export const engines: Engines = {
|
||||||
readability,
|
readability,
|
||||||
|
google,
|
||||||
};
|
};
|
||||||
|
|
||||||
export const engineList: string[] = Object.keys(engines);
|
export const engineList: string[] = Object.keys(engines);
|
||||||
|
|
||||||
const fallback: Engines = {
|
const fallback: Engines = {
|
||||||
|
"www.google.com": engines.google,
|
||||||
"*": engines.readability,
|
"*": engines.readability,
|
||||||
};
|
};
|
||||||
|
@ -7,7 +7,7 @@ import { generateOriginUrl } from "../utils";
|
|||||||
export default async function mainRoute(fastify: FastifyInstance) {
|
export default async function mainRoute(fastify: FastifyInstance) {
|
||||||
fastify.get("/", async (request: GetRequest, reply) => {
|
fastify.get("/", async (request: GetRequest, reply) => {
|
||||||
const remoteUrl = request.query.url;
|
const remoteUrl = request.query.url;
|
||||||
const engine = request.query.engine || "readability";
|
const engine = request.query.engine;
|
||||||
|
|
||||||
let format: string;
|
let format: string;
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ import axios from "axios";
|
|||||||
|
|
||||||
export default axios.create({
|
export default axios.create({
|
||||||
headers: {
|
headers: {
|
||||||
"User-Agent": "txtdot",
|
"User-Agent":
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
Loading…
x
Reference in New Issue
Block a user