2024-03-26 00:38:08 -05:00
|
|
|
import { Readability } from "@mozilla/readability";
|
|
|
|
import { JSDOM } from "jsdom";
|
|
|
|
import DOMPurify from "dompurify";
|
|
|
|
import { prisma } from "../db";
|
|
|
|
import createFile from "../storage/createFile";
|
|
|
|
import { Link } from "@prisma/client";
|
|
|
|
|
2024-06-27 11:39:03 -05:00
|
|
|
const handleReadablility = async (content: string, link: Link) => {
|
2024-03-26 00:38:08 -05:00
|
|
|
const window = new JSDOM("").window;
|
|
|
|
const purify = DOMPurify(window);
|
|
|
|
const cleanedUpContent = purify.sanitize(content);
|
|
|
|
const dom = new JSDOM(cleanedUpContent, { url: link.url || "" });
|
|
|
|
const article = new Readability(dom.window.document).parse();
|
|
|
|
const articleText = article?.textContent
|
|
|
|
.replace(/ +(?= )/g, "") // strip out multiple spaces
|
|
|
|
.replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
|
2024-06-26 12:54:03 -05:00
|
|
|
|
|
|
|
if (articleText && articleText !== "") {
|
2024-03-26 00:38:08 -05:00
|
|
|
const collectionId = (
|
|
|
|
await prisma.link.findUnique({
|
|
|
|
where: { id: link.id },
|
|
|
|
select: { collectionId: true },
|
|
|
|
})
|
|
|
|
)?.collectionId;
|
|
|
|
|
2024-06-28 11:12:16 -05:00
|
|
|
const data = JSON.stringify(article);
|
|
|
|
|
|
|
|
if (
|
|
|
|
Buffer.byteLength(data, "utf8") >
|
|
|
|
1024 * 1024 * Number(process.env.READABILITY_MAX_BUFFER || 1)
|
|
|
|
)
|
|
|
|
return console.error(
|
|
|
|
"Error archiving as Readability: Buffer size exceeded"
|
|
|
|
);
|
|
|
|
|
2024-03-26 00:38:08 -05:00
|
|
|
await createFile({
|
2024-06-28 11:12:16 -05:00
|
|
|
data,
|
2024-03-26 00:38:08 -05:00
|
|
|
filePath: `archives/${collectionId}/${link.id}_readability.json`,
|
|
|
|
});
|
|
|
|
|
|
|
|
await prisma.link.update({
|
|
|
|
where: { id: link.id },
|
|
|
|
data: {
|
|
|
|
readable: `archives/${collectionId}/${link.id}_readability.json`,
|
|
|
|
textContent: articleText,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2024-06-27 11:39:03 -05:00
|
|
|
export default handleReadablility;
|