diff --git a/components/Modal/Link/PreservedFormats.tsx b/components/Modal/Link/PreservedFormats.tsx index de0a6de..939f0c6 100644 --- a/components/Modal/Link/PreservedFormats.tsx +++ b/components/Modal/Link/PreservedFormats.tsx @@ -28,7 +28,7 @@ export default function PreservedFormats() { }, [links]); useEffect(() => { - let interval: NodeJS.Timer | undefined; + let interval: any; if (link?.screenshotPath === "pending" || link?.pdfPath === "pending") { let isPublicRoute = router.pathname.startsWith("/public") ? true diff --git a/pages/public/links/[id].tsx b/pages/public/links/[id].tsx index 918f4dd..c26cc1c 100644 --- a/pages/public/links/[id].tsx +++ b/pages/public/links/[id].tsx @@ -81,7 +81,7 @@ export default function Index() { }, [link]); useEffect(() => { - let interval: NodeJS.Timer | undefined; + let interval: any; if ( link?.screenshotPath === "pending" || link?.pdfPath === "pending" || diff --git a/scripts/lib/urlHandler.ts b/scripts/lib/urlHandler.ts index 4127fb8..41514af 100644 --- a/scripts/lib/urlHandler.ts +++ b/scripts/lib/urlHandler.ts @@ -5,35 +5,39 @@ import sendToWayback from "../../lib/api/sendToWayback"; import { Readability } from "@mozilla/readability"; import { JSDOM } from "jsdom"; import DOMPurify from "dompurify"; +import { Collection, Link, User } from "@prisma/client"; -export default async function urlHandler( - linkId: number, - url: string, - userId: number -) { - const user = await prisma.user.findUnique({ where: { id: userId } }); +type LinksAndCollectionAndOwner = Link & { + collection: Collection & { + owner: User; + }; +}; + +export default async function urlHandler(link: LinksAndCollectionAndOwner) { + const user = link.collection?.owner; const targetLink = await prisma.link.update({ - where: { id: linkId }, + where: { id: link.id }, data: { - screenshotPath: user?.archiveAsScreenshot ? "pending" : null, - pdfPath: user?.archiveAsPDF ? "pending" : null, + screenshotPath: user.archiveAsScreenshot ? "pending" : null, + pdfPath: user.archiveAsPDF ? "pending" : null, readabilityPath: "pending", lastPreserved: new Date().toISOString(), }, }); - // Archive.org + // archive.org - if (user?.archiveAsWaybackMachine) sendToWayback(url); + if (user.archiveAsWaybackMachine && link.url) sendToWayback(link.url); - if (user?.archiveAsPDF || user?.archiveAsScreenshot) { - const browser = await chromium.launch(); + if (user.archiveAsPDF || user.archiveAsScreenshot) { + const browser = await chromium.launch({ headless: false }); const context = await browser.newContext(devices["Desktop Chrome"]); const page = await context.newPage(); try { - await page.goto(url, { waitUntil: "domcontentloaded" }); + link.url && + (await page.goto(link.url, { waitUntil: "domcontentloaded" })); const content = await page.content(); @@ -48,7 +52,7 @@ export default async function urlHandler( // console.log(doc); // return createFile({ // data: doc, - // filePath: `archives/${targetLink.collectionId}/${linkId}.mhtml`, + // filePath: `archives/${targetLink.collectionId}/${link.id}.mhtml`, // }); // }; @@ -59,7 +63,7 @@ export default async function urlHandler( const window = new JSDOM("").window; const purify = DOMPurify(window); const cleanedUpContent = purify.sanitize(content); - const dom = new JSDOM(cleanedUpContent, { url: url }); + const dom = new JSDOM(cleanedUpContent, { url: link.url || "" }); const article = new Readability(dom.window.document).parse(); const articleText = article?.textContent @@ -68,13 +72,13 @@ export default async function urlHandler( await createFile({ data: JSON.stringify(article), - filePath: `archives/${targetLink.collectionId}/${linkId}_readability.json`, + filePath: `archives/${targetLink.collectionId}/${link.id}_readability.json`, }); await prisma.link.update({ - where: { id: linkId }, + where: { id: link.id }, data: { - readabilityPath: `archives/${targetLink.collectionId}/${linkId}_readability.json`, + readabilityPath: `archives/${targetLink.collectionId}/${link.id}_readability.json`, textContent: articleText, }, }); @@ -87,16 +91,20 @@ export default async function urlHandler( .catch((e) => (faulty = true)); const linkExists = await prisma.link.findUnique({ - where: { id: linkId }, + where: { id: link.id }, }); if (linkExists && !faulty) { + const processingPromises = []; + if (user.archiveAsScreenshot) { const screenshot = await page.screenshot({ fullPage: true }); - await createFile({ - data: screenshot, - filePath: `archives/${linkExists.collectionId}/${linkId}.png`, - }); + processingPromises.push( + createFile({ + data: screenshot, + filePath: `archives/${linkExists.collectionId}/${link.id}.png`, + }) + ); } if (user.archiveAsPDF) { @@ -106,27 +114,30 @@ export default async function urlHandler( printBackground: true, margin: { top: "15px", bottom: "15px" }, }); - - await createFile({ - data: pdf, - filePath: `archives/${linkExists.collectionId}/${linkId}.pdf`, - }); + processingPromises.push( + createFile({ + data: pdf, + filePath: `archives/${linkExists.collectionId}/${link.id}.pdf`, + }) + ); } + await Promise.allSettled(processingPromises); + await prisma.link.update({ - where: { id: linkId }, + where: { id: link.id }, data: { screenshotPath: user.archiveAsScreenshot - ? `archives/${linkExists.collectionId}/${linkId}.png` + ? `archives/${linkExists.collectionId}/${link.id}.png` : null, pdfPath: user.archiveAsPDF - ? `archives/${linkExists.collectionId}/${linkId}.pdf` + ? `archives/${linkExists.collectionId}/${link.id}.pdf` : null, }, }); } else if (faulty) { await prisma.link.update({ - where: { id: linkId }, + where: { id: link.id }, data: { screenshotPath: null, pdfPath: null, diff --git a/scripts/worker.ts b/scripts/worker.ts index 750b561..025be51 100644 --- a/scripts/worker.ts +++ b/scripts/worker.ts @@ -1,13 +1,20 @@ +import { Collection, Link, User } from "@prisma/client"; import { prisma } from "../lib/api/db"; import urlHandler from "./lib/urlHandler"; const args = process.argv.slice(2).join(" "); -const archiveTakeCount = Number(process.env.ARCHIVE_TAKE_COUNT || "") || 1; +console.log(process.env.NEXTAUTH_URL); -// Function to process links for a given user -async function processLinksForUser() { - // Fetch the first 'maxLinksPerUser' links for the user +const archiveTakeCount = Number(process.env.ARCHIVE_TAKE_COUNT || "") || 5; + +type LinksAndCollectionAndOwner = Link & { + collection: Collection & { + owner: User; + }; +}; + +async function processBatch() { const links = await prisma.link.findMany({ where: { OR: [ @@ -19,6 +26,15 @@ async function processLinksForUser() { }, screenshotPath: null, }, + { + collection: { + owner: { + archiveAsScreenshot: true, + }, + }, + screenshotPath: "pending", + }, + /////////////////////// { collection: { owner: { @@ -27,56 +43,79 @@ async function processLinksForUser() { }, pdfPath: null, }, + { + collection: { + owner: { + archiveAsPDF: true, + }, + }, + pdfPath: "pending", + }, + /////////////////////// { readabilityPath: null, }, - ], - collection: { - owner: { - archiveAsPDF: true, - archiveAsScreenshot: true, + { + readabilityPath: "pending", }, - }, + ], }, take: archiveTakeCount, orderBy: { createdAt: "asc" }, include: { - collection: true, + collection: { + include: { + owner: true, + }, + }, }, }); - // Process each link using the urlHandler function - for (const link of links) { + const archiveLink = async (link: LinksAndCollectionAndOwner) => { try { console.log( - `Processing link ${link.id} for user ${link.collection.ownerId}` + "\x1b[34m%s\x1b[0m", + `Processing link ${link.url} for user ${link.collection.ownerId}` ); - await urlHandler(link.id, link.url || "", link.collection.ownerId); + await urlHandler(link); + + console.log( + "\x1b[34m%s\x1b[0m", + `Succeeded processing link ${link.url} for user ${link.collection.ownerId}.` + ); } catch (error) { console.error( - `Error processing link ${link.id} for user ${link.collection.ownerId}:`, + "\x1b[34m%s\x1b[0m", + `Error processing link ${link.url} for user ${link.collection.ownerId}:`, error ); } - } + }; + + // Process each link in the batch concurrently + const processingPromises = links.map((e) => archiveLink(e)); + + await Promise.allSettled(processingPromises); } -const intervalInMinutes = 10; // Set the interval for the worker to run +const intervalInMinutes = Number(process.env.ARCHIVE_SCRIPT_INTERVAL) || 10; -// Main function to iterate over all users and process their links -async function processLinksForAllUsers() { - console.log("Starting the link processing task"); - try { - const users = await prisma.user.findMany(); // Fetch all users - for (const user of users) { - await processLinksForUser(); // Process links for each user +function delay(sec: number) { + return new Promise((resolve) => setTimeout(resolve, sec * 1000)); +} + +async function init() { + console.log("\x1b[34m%s\x1b[0m", "Starting the link processing task"); + while (true) { + try { + await processBatch(); + await delay(intervalInMinutes); + } catch (error) { + console.error("\x1b[34m%s\x1b[0m", "Error processing links:", error); + await delay(intervalInMinutes); } - } catch (error) { - console.error("Error processing links for users:", error); } - setTimeout(processLinksForAllUsers, intervalInMinutes * 60000); } -// Initial run -processLinksForAllUsers(); +init();