el.xwx.moe/lib/api/preservationScheme/handleReadablility.ts

42 lines
1.3 KiB
TypeScript
Raw Normal View History

2024-03-26 00:38:08 -05:00
import { Readability } from "@mozilla/readability";
import { JSDOM } from "jsdom";
import DOMPurify from "dompurify";
import { prisma } from "../db";
import createFile from "../storage/createFile";
import { Link } from "@prisma/client";
2024-06-27 11:39:03 -05:00
const handleReadablility = async (content: string, link: Link) => {
2024-03-26 00:38:08 -05:00
const window = new JSDOM("").window;
const purify = DOMPurify(window);
const cleanedUpContent = purify.sanitize(content);
const dom = new JSDOM(cleanedUpContent, { url: link.url || "" });
const article = new Readability(dom.window.document).parse();
const articleText = article?.textContent
.replace(/ +(?= )/g, "") // strip out multiple spaces
.replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
2024-06-26 12:54:03 -05:00
if (articleText && articleText !== "") {
2024-03-26 00:38:08 -05:00
const collectionId = (
await prisma.link.findUnique({
where: { id: link.id },
select: { collectionId: true },
})
)?.collectionId;
await createFile({
data: JSON.stringify(article),
filePath: `archives/${collectionId}/${link.id}_readability.json`,
});
await prisma.link.update({
where: { id: link.id },
data: {
readable: `archives/${collectionId}/${link.id}_readability.json`,
textContent: articleText,
},
});
}
};
2024-06-27 11:39:03 -05:00
export default handleReadablility;