diff --git a/.env.sample b/.env.sample
index 70576b3..c3174e6 100644
--- a/.env.sample
+++ b/.env.sample
@@ -15,7 +15,7 @@ NEXT_PUBLIC_DISABLE_REGISTRATION=
NEXT_PUBLIC_CREDENTIALS_ENABLED=
DISABLE_NEW_SSO_USERS=
RE_ARCHIVE_LIMIT=
-NEXT_PUBLIC_MAX_UPLOAD_SIZE=
+NEXT_PUBLIC_MAX_FILE_SIZE=
# AWS S3 Settings
SPACES_KEY=
diff --git a/components/ModalContent/UploadFileModal.tsx b/components/ModalContent/UploadFileModal.tsx
index f6e7af4..a2a128b 100644
--- a/components/ModalContent/UploadFileModal.tsx
+++ b/components/ModalContent/UploadFileModal.tsx
@@ -174,7 +174,7 @@ export default function UploadFileModal({ onClose }: Props) {
/>
- PDF, PNG, JPG (Up to {process.env.NEXT_PUBLIC_MAX_UPLOAD_SIZE || 30}
+ PDF, PNG, JPG (Up to {process.env.NEXT_PUBLIC_MAX_FILE_SIZE || 30}
MB)
diff --git a/lib/api/archiveHandler.ts b/lib/api/archiveHandler.ts
new file mode 100644
index 0000000..9ae230f
--- /dev/null
+++ b/lib/api/archiveHandler.ts
@@ -0,0 +1,272 @@
+import { chromium, devices } from "playwright";
+import { prisma } from "./db";
+import createFile from "./storage/createFile";
+import sendToWayback from "./sendToWayback";
+import { Readability } from "@mozilla/readability";
+import { JSDOM } from "jsdom";
+import DOMPurify from "dompurify";
+import { Collection, Link, User } from "@prisma/client";
+import validateUrlSize from "./validateUrlSize";
+import {
+ pdfAvailable,
+ readabilityAvailable,
+ screenshotAvailable,
+} from "../shared/getArchiveValidity";
+
+type LinksAndCollectionAndOwner = Link & {
+ collection: Collection & {
+ owner: User;
+ };
+};
+
+export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
+ const browser = await chromium.launch();
+ const context = await browser.newContext(devices["Desktop Chrome"]);
+ const page = await context.newPage();
+
+ try {
+ const validatedUrl = link.url ? await validateUrlSize(link.url) : undefined;
+
+ if (validatedUrl === null) throw "File is too large to be stored.";
+
+ const contentType = validatedUrl?.get("content-type");
+ let linkType = "url";
+ let imageExtension = "png";
+
+ if (!link.url) linkType = link.type;
+ else if (contentType === "application/pdf") linkType = "pdf";
+ else if (contentType?.startsWith("image")) {
+ linkType = "image";
+ if (contentType === "image/jpeg") imageExtension = "jpeg";
+ else if (contentType === "image/png") imageExtension = "png";
+ }
+
+ const user = link.collection?.owner;
+
+ // send to archive.org
+ if (user.archiveAsWaybackMachine && link.url) sendToWayback(link.url);
+
+ const targetLink = await prisma.link.update({
+ where: { id: link.id },
+ data: {
+ type: linkType,
+ screenshotPath: user.archiveAsScreenshot ? "pending" : undefined,
+ pdfPath: user.archiveAsPDF ? "pending" : undefined,
+ readabilityPath: "pending",
+ lastPreserved: new Date().toISOString(),
+ },
+ });
+
+ if (linkType === "image") {
+ await imageHandler(link, imageExtension); // archive image (jpeg/png)
+ return;
+ } else if (linkType === "pdf") {
+ await pdfHandler(link); // archive pdf
+ return;
+ } else if (user.archiveAsPDF || user.archiveAsScreenshot) {
+ // archive url
+ link.url &&
+ (await page.goto(link.url, { waitUntil: "domcontentloaded" }));
+
+ const content = await page.content();
+
+ // TODO Webarchive
+ // const session = await page.context().newCDPSession(page);
+ // const doc = await session.send("Page.captureSnapshot", {
+ // format: "mhtml",
+ // });
+ // const saveDocLocally = (doc: any) => {
+ // console.log(doc);
+ // return createFile({
+ // data: doc,
+ // filePath: `archives/${targetLink.collectionId}/${link.id}.mhtml`,
+ // });
+ // };
+ // saveDocLocally(doc.data);
+
+ // Readability
+ const window = new JSDOM("").window;
+ const purify = DOMPurify(window);
+ const cleanedUpContent = purify.sanitize(content);
+ const dom = new JSDOM(cleanedUpContent, { url: link.url || "" });
+ const article = new Readability(dom.window.document).parse();
+ const articleText = article?.textContent
+ .replace(/ +(?= )/g, "") // strip out multiple spaces
+ .replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
+ if (articleText && articleText !== "") {
+ await createFile({
+ data: JSON.stringify(article),
+ filePath: `archives/${targetLink.collectionId}/${link.id}_readability.json`,
+ });
+
+ await prisma.link.update({
+ where: { id: link.id },
+ data: {
+ readabilityPath: `archives/${targetLink.collectionId}/${link.id}_readability.json`,
+ textContent: articleText,
+ },
+ });
+ }
+
+ // Screenshot/PDF
+ await page.evaluate(
+ autoScroll,
+ Number(process.env.AUTOSCROLL_TIMEOUT) || 30
+ );
+ // Check if the user hasn't deleted the link by the time we're done scrolling
+ const linkExists = await prisma.link.findUnique({
+ where: { id: link.id },
+ });
+ if (linkExists) {
+ const processingPromises = [];
+
+ if (user.archiveAsScreenshot) {
+ processingPromises.push(
+ page.screenshot({ fullPage: true }).then((screenshot) => {
+ return createFile({
+ data: screenshot,
+ filePath: `archives/${linkExists.collectionId}/${link.id}.png`,
+ });
+ })
+ );
+ }
+ if (user.archiveAsPDF) {
+ processingPromises.push(
+ page
+ .pdf({
+ width: "1366px",
+ height: "1931px",
+ printBackground: true,
+ margin: { top: "15px", bottom: "15px" },
+ })
+ .then((pdf) => {
+ return createFile({
+ data: pdf,
+ filePath: `archives/${linkExists.collectionId}/${link.id}.pdf`,
+ });
+ })
+ );
+ }
+ await Promise.allSettled(processingPromises);
+ await prisma.link.update({
+ where: { id: link.id },
+ data: {
+ screenshotPath: user.archiveAsScreenshot
+ ? `archives/${linkExists.collectionId}/${link.id}.png`
+ : undefined,
+ pdfPath: user.archiveAsPDF
+ ? `archives/${linkExists.collectionId}/${link.id}.pdf`
+ : undefined,
+ },
+ });
+ }
+ }
+ } catch (err) {
+ console.log(err);
+ console.log("Failed Link details:", link);
+ throw err;
+ } finally {
+ const finalLink = await prisma.link.findUnique({
+ where: { id: link.id },
+ });
+
+ if (finalLink)
+ await prisma.link.update({
+ where: { id: link.id },
+ data: {
+ readabilityPath:
+ !finalLink.textContent ||
+ finalLink.textContent === "" ||
+ !readabilityAvailable(finalLink) ||
+ finalLink.type !== "url"
+ ? "unavailable"
+ : undefined,
+ screenshotPath:
+ !screenshotAvailable(finalLink) ||
+ (finalLink.type !== "url" && finalLink.type !== "pdf")
+ ? "unavailable"
+ : undefined,
+ pdfPath:
+ !pdfAvailable(finalLink) ||
+ (finalLink.type !== "url" && finalLink.type !== "image")
+ ? "unavailable"
+ : undefined,
+ },
+ });
+
+ await browser.close();
+ }
+}
+
+const autoScroll = async (AUTOSCROLL_TIMEOUT: number) => {
+ const timeoutPromise = new Promise((_, reject) => {
+ setTimeout(() => {
+ reject(new Error(`Webpage was too long to be archived.`));
+ }, AUTOSCROLL_TIMEOUT * 1000);
+ });
+
+ const scrollingPromise = new Promise((resolve) => {
+ let totalHeight = 0;
+ let distance = 100;
+ let scrollDown = setInterval(() => {
+ let scrollHeight = document.body.scrollHeight;
+ window.scrollBy(0, distance);
+ totalHeight += distance;
+ if (totalHeight >= scrollHeight) {
+ clearInterval(scrollDown);
+ window.scroll(0, 0);
+ resolve();
+ }
+ }, 100);
+ });
+
+ await Promise.race([scrollingPromise, timeoutPromise]);
+};
+
+const imageHandler = async ({ url, id }: Link, extension: string) => {
+ const image = await fetch(url as string).then((res) => res.blob());
+
+ const buffer = Buffer.from(await image.arrayBuffer());
+
+ const linkExists = await prisma.link.findUnique({
+ where: { id },
+ });
+
+ if (linkExists) {
+ await createFile({
+ data: buffer,
+ filePath: `archives/${linkExists.collectionId}/${id}.${extension}`,
+ });
+
+ await prisma.link.update({
+ where: { id },
+ data: {
+ screenshotPath: `archives/${linkExists.collectionId}/${id}.${extension}`,
+ },
+ });
+ }
+};
+
+const pdfHandler = async ({ url, id }: Link) => {
+ const pdf = await fetch(url as string).then((res) => res.blob());
+
+ const buffer = Buffer.from(await pdf.arrayBuffer());
+
+ const linkExists = await prisma.link.findUnique({
+ where: { id },
+ });
+
+ if (linkExists) {
+ await createFile({
+ data: buffer,
+ filePath: `archives/${linkExists.collectionId}/${id}.pdf`,
+ });
+
+ await prisma.link.update({
+ where: { id },
+ data: {
+ pdfPath: `archives/${linkExists.collectionId}/${id}.pdf`,
+ },
+ });
+ }
+};
diff --git a/lib/api/controllers/links/postLink.ts b/lib/api/controllers/links/postLink.ts
index eca8a04..912e972 100644
--- a/lib/api/controllers/links/postLink.ts
+++ b/lib/api/controllers/links/postLink.ts
@@ -1,13 +1,10 @@
import { prisma } from "@/lib/api/db";
import { LinkIncludingShortenedCollectionAndTags } from "@/types/global";
import getTitle from "@/lib/shared/getTitle";
-import urlHandler from "@/lib/api/urlHandler";
import { UsersAndCollections } from "@prisma/client";
import getPermission from "@/lib/api/getPermission";
import createFolder from "@/lib/api/storage/createFolder";
-import pdfHandler from "../../pdfHandler";
import validateUrlSize from "../../validateUrlSize";
-import imageHandler from "../../imageHandler";
export default async function postLink(
link: LinkIncludingShortenedCollectionAndTags,
@@ -113,37 +110,5 @@ export default async function postLink(
createFolder({ filePath: `archives/${newLink.collectionId}` });
- newLink.url && linkType === "url"
- ? urlHandler(newLink.id, newLink.url, userId)
- : undefined;
-
- newLink.url && linkType === "pdf"
- ? pdfHandler(newLink.id, newLink.url)
- : undefined;
-
- newLink.url && linkType === "image"
- ? imageHandler(newLink.id, newLink.url, imageExtension)
- : undefined;
-
- !newLink.url && linkType === "pdf"
- ? await prisma.link.update({
- where: { id: newLink.id },
- data: {
- pdfPath: "pending",
- lastPreserved: new Date().toISOString(),
- },
- })
- : undefined;
-
- !newLink.url && linkType === "image"
- ? await prisma.link.update({
- where: { id: newLink.id },
- data: {
- screenshotPath: "pending",
- lastPreserved: new Date().toISOString(),
- },
- })
- : undefined;
-
return { response: newLink, status: 200 };
}
diff --git a/lib/api/imageHandler.ts b/lib/api/imageHandler.ts
index b9437dd..bff89c3 100644
--- a/lib/api/imageHandler.ts
+++ b/lib/api/imageHandler.ts
@@ -9,9 +9,9 @@ export default async function imageHandler(
extension: string,
file?: string
) {
- const pdf = await fetch(url as string).then((res) => res.blob());
+ const image = await fetch(url as string).then((res) => res.blob());
- const buffer = Buffer.from(await pdf.arrayBuffer());
+ const buffer = Buffer.from(await image.arrayBuffer());
const linkExists = await prisma.link.findUnique({
where: { id: linkId },
diff --git a/lib/api/urlHandler.ts b/lib/api/urlHandler.ts
deleted file mode 100644
index 41d7441..0000000
--- a/lib/api/urlHandler.ts
+++ /dev/null
@@ -1,172 +0,0 @@
-import { chromium, devices } from "playwright";
-import { prisma } from "@/lib/api/db";
-import createFile from "@/lib/api/storage/createFile";
-import sendToWayback from "./sendToWayback";
-import { Readability } from "@mozilla/readability";
-import { JSDOM } from "jsdom";
-import DOMPurify from "dompurify";
-
-export default async function urlHandler(
- linkId: number,
- url: string,
- userId: number
-) {
- const user = await prisma.user.findUnique({ where: { id: userId } });
-
- const targetLink = await prisma.link.update({
- where: { id: linkId },
- data: {
- screenshotPath: user?.archiveAsScreenshot ? "pending" : null,
- pdfPath: user?.archiveAsPDF ? "pending" : null,
- readabilityPath: "pending",
- lastPreserved: new Date().toISOString(),
- },
- });
-
- // archive.org
-
- if (user?.archiveAsWaybackMachine) sendToWayback(url);
-
- if (user?.archiveAsPDF || user?.archiveAsScreenshot) {
- const browser = await chromium.launch();
- const context = await browser.newContext(devices["Desktop Chrome"]);
- const page = await context.newPage();
-
- try {
- await page.goto(url, { waitUntil: "domcontentloaded" });
-
- const content = await page.content();
-
- // TODO
- // const session = await page.context().newCDPSession(page);
-
- // const doc = await session.send("Page.captureSnapshot", {
- // format: "mhtml",
- // });
-
- // const saveDocLocally = (doc: any) => {
- // console.log(doc);
- // return createFile({
- // data: doc,
- // filePath: `archives/${targetLink.collectionId}/${linkId}.mhtml`,
- // });
- // };
-
- // saveDocLocally(doc.data);
-
- // Readability
-
- const window = new JSDOM("").window;
- const purify = DOMPurify(window);
- const cleanedUpContent = purify.sanitize(content);
- const dom = new JSDOM(cleanedUpContent, { url: url });
- const article = new Readability(dom.window.document).parse();
-
- const articleText = article?.textContent
- .replace(/ +(?= )/g, "") // strip out multiple spaces
- .replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
-
- await createFile({
- data: JSON.stringify(article),
- filePath: `archives/${targetLink.collectionId}/${linkId}_readability.json`,
- });
-
- await prisma.link.update({
- where: { id: linkId },
- data: {
- readabilityPath: `archives/${targetLink.collectionId}/${linkId}_readability.json`,
- textContent: articleText,
- },
- });
-
- // Screenshot/PDF
-
- let faulty = false;
-
- await page
- .evaluate(autoScroll, Number(process.env.AUTOSCROLL_TIMEOUT) || 30)
- .catch((err) => {
- console.log(err);
- faulty = true;
- });
-
- const linkExists = await prisma.link.findUnique({
- where: { id: linkId },
- });
-
- if (linkExists && !faulty) {
- if (user.archiveAsScreenshot) {
- const screenshot = await page.screenshot({ fullPage: true });
- await createFile({
- data: screenshot,
- filePath: `archives/${linkExists.collectionId}/${linkId}.png`,
- });
- }
-
- if (user.archiveAsPDF) {
- const pdf = await page.pdf({
- width: "1366px",
- height: "1931px",
- printBackground: true,
- margin: { top: "15px", bottom: "15px" },
- });
-
- await createFile({
- data: pdf,
- filePath: `archives/${linkExists.collectionId}/${linkId}.pdf`,
- });
- }
-
- await prisma.link.update({
- where: { id: linkId },
- data: {
- screenshotPath: user.archiveAsScreenshot
- ? `archives/${linkExists.collectionId}/${linkId}.png`
- : null,
- pdfPath: user.archiveAsPDF
- ? `archives/${linkExists.collectionId}/${linkId}.pdf`
- : null,
- },
- });
- } else if (faulty) {
- await prisma.link.update({
- where: { id: linkId },
- data: {
- screenshotPath: null,
- pdfPath: null,
- },
- });
- }
- } catch (err) {
- console.log(err);
- throw err;
- } finally {
- await browser.close();
- }
- }
-}
-
-const autoScroll = async (AUTOSCROLL_TIMEOUT: number) => {
- const timeoutPromise = new Promise((_, reject) => {
- setTimeout(() => {
- reject(new Error(`Webpage was too long to be archived.`));
- }, AUTOSCROLL_TIMEOUT * 1000);
- });
-
- const scrollingPromise = new Promise((resolve) => {
- let totalHeight = 0;
- let distance = 100;
- let scrollDown = setInterval(() => {
- let scrollHeight = document.body.scrollHeight;
- window.scrollBy(0, distance);
- totalHeight += distance;
- if (totalHeight >= scrollHeight) {
- clearInterval(scrollDown);
- window.scroll(0, 0);
- resolve();
- }
- }, 100);
- });
-
- await Promise.race([scrollingPromise, timeoutPromise]);
-};
diff --git a/lib/api/validateUrlSize.ts b/lib/api/validateUrlSize.ts
index cde7e33..02b5bcc 100644
--- a/lib/api/validateUrlSize.ts
+++ b/lib/api/validateUrlSize.ts
@@ -4,7 +4,8 @@ export default async function validateUrlSize(url: string) {
const totalSizeMB =
Number(response.headers.get("content-length")) / Math.pow(1024, 2);
- if (totalSizeMB > 50) return null;
+ if (totalSizeMB > (Number(process.env.NEXT_PUBLIC_MAX_FILE_SIZE) || 30))
+ return null;
else return response.headers;
} catch (err) {
console.log(err);
diff --git a/lib/shared/getArchiveValidity.ts b/lib/shared/getArchiveValidity.ts
index 0650e7c..d2146d3 100644
--- a/lib/shared/getArchiveValidity.ts
+++ b/lib/shared/getArchiveValidity.ts
@@ -1,11 +1,9 @@
-import { Link } from "@prisma/client";
-
export function screenshotAvailable(link: any) {
return (
link &&
link.screenshotPath &&
link.screenshotPath !== "pending" &&
- link.screenshotPath !== "failed"
+ link.screenshotPath !== "unavailable"
);
}
@@ -14,7 +12,7 @@ export function pdfAvailable(link: any) {
link &&
link.pdfPath &&
link.pdfPath !== "pending" &&
- link.pdfPath !== "failed"
+ link.pdfPath !== "unavailable"
);
}
@@ -23,6 +21,6 @@ export function readabilityAvailable(link: any) {
link &&
link.readabilityPath &&
link.readabilityPath !== "pending" &&
- link.readabilityPath !== "failed"
+ link.readabilityPath !== "unavailable"
);
}
diff --git a/pages/api/v1/archives/[linkId].ts b/pages/api/v1/archives/[linkId].ts
index 8686c15..07760be 100644
--- a/pages/api/v1/archives/[linkId].ts
+++ b/pages/api/v1/archives/[linkId].ts
@@ -81,7 +81,7 @@ export default async function Index(req: NextApiRequest, res: NextApiResponse) {
// // await uploadHandler(linkId, )
- // const MAX_UPLOAD_SIZE = Number(process.env.NEXT_PUBLIC_MAX_UPLOAD_SIZE);
+ // const MAX_UPLOAD_SIZE = Number(process.env.NEXT_PUBLIC_MAX_FILE_SIZE);
// const form = formidable({
// maxFields: 1,
diff --git a/pages/api/v1/links/[id]/archive/index.ts b/pages/api/v1/links/[id]/archive/index.ts
index 6c82aed..fe8aaff 100644
--- a/pages/api/v1/links/[id]/archive/index.ts
+++ b/pages/api/v1/links/[id]/archive/index.ts
@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from "next";
-import urlHandler from "@/lib/api/urlHandler";
+import urlHandler from "@/lib/api/archiveHandler";
import { prisma } from "@/lib/api/db";
import verifyUser from "@/lib/api/verifyUser";
import isValidUrl from "@/lib/shared/isValidUrl";
diff --git a/scripts/lib/urlHandler.ts b/scripts/lib/urlHandler.ts
deleted file mode 100644
index c9bdd85..0000000
--- a/scripts/lib/urlHandler.ts
+++ /dev/null
@@ -1,176 +0,0 @@
-import { chromium, devices } from "playwright";
-import { prisma } from "../../lib/api/db";
-import createFile from "../../lib/api/storage/createFile";
-import sendToWayback from "../../lib/api/sendToWayback";
-import { Readability } from "@mozilla/readability";
-import { JSDOM } from "jsdom";
-import DOMPurify from "dompurify";
-import { Collection, Link, User } from "@prisma/client";
-
-type LinksAndCollectionAndOwner = Link & {
- collection: Collection & {
- owner: User;
- };
-};
-
-export default async function urlHandler(link: LinksAndCollectionAndOwner) {
- const user = link.collection?.owner;
-
- const targetLink = await prisma.link.update({
- where: { id: link.id },
- data: {
- screenshotPath: user.archiveAsScreenshot ? "pending" : null,
- pdfPath: user.archiveAsPDF ? "pending" : null,
- readabilityPath: "pending",
- lastPreserved: new Date().toISOString(),
- },
- });
-
- // archive.org
-
- if (user.archiveAsWaybackMachine && link.url) sendToWayback(link.url);
-
- if (user.archiveAsPDF || user.archiveAsScreenshot) {
- const browser = await chromium.launch();
- const context = await browser.newContext(devices["Desktop Chrome"]);
- const page = await context.newPage();
-
- try {
- link.url &&
- (await page.goto(link.url, { waitUntil: "domcontentloaded" }));
-
- const content = await page.content();
-
- // TODO
- // const session = await page.context().newCDPSession(page);
- // const doc = await session.send("Page.captureSnapshot", {
- // format: "mhtml",
- // });
- // const saveDocLocally = (doc: any) => {
- // console.log(doc);
- // return createFile({
- // data: doc,
- // filePath: `archives/${targetLink.collectionId}/${link.id}.mhtml`,
- // });
- // };
- // saveDocLocally(doc.data);
-
- // Readability
-
- const window = new JSDOM("").window;
- const purify = DOMPurify(window);
- const cleanedUpContent = purify.sanitize(content);
- const dom = new JSDOM(cleanedUpContent, { url: link.url || "" });
- const article = new Readability(dom.window.document).parse();
-
- const articleText = article?.textContent
- .replace(/ +(?= )/g, "") // strip out multiple spaces
- .replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
-
- await createFile({
- data: JSON.stringify(article),
- filePath: `archives/${targetLink.collectionId}/${link.id}_readability.json`,
- });
-
- await prisma.link.update({
- where: { id: link.id },
- data: {
- readabilityPath: `archives/${targetLink.collectionId}/${link.id}_readability.json`,
- textContent: articleText,
- },
- });
-
- // Screenshot/PDF
-
- let faulty = false;
- await page
- .evaluate(autoScroll, Number(process.env.AUTOSCROLL_TIMEOUT) || 30)
- .catch((e) => (faulty = true));
-
- const linkExists = await prisma.link.findUnique({
- where: { id: link.id },
- });
-
- if (linkExists && !faulty) {
- const processingPromises = [];
-
- if (user.archiveAsScreenshot) {
- const screenshot = await page.screenshot({ fullPage: true });
- processingPromises.push(
- createFile({
- data: screenshot,
- filePath: `archives/${linkExists.collectionId}/${link.id}.png`,
- })
- );
- }
-
- if (user.archiveAsPDF) {
- const pdf = await page.pdf({
- width: "1366px",
- height: "1931px",
- printBackground: true,
- margin: { top: "15px", bottom: "15px" },
- });
- processingPromises.push(
- createFile({
- data: pdf,
- filePath: `archives/${linkExists.collectionId}/${link.id}.pdf`,
- })
- );
- }
-
- await Promise.allSettled(processingPromises);
-
- await prisma.link.update({
- where: { id: link.id },
- data: {
- screenshotPath: user.archiveAsScreenshot
- ? `archives/${linkExists.collectionId}/${link.id}.png`
- : null,
- pdfPath: user.archiveAsPDF
- ? `archives/${linkExists.collectionId}/${link.id}.pdf`
- : null,
- },
- });
- } else if (faulty) {
- await prisma.link.update({
- where: { id: link.id },
- data: {
- screenshotPath: null,
- pdfPath: null,
- },
- });
- }
- } catch (err) {
- console.log(err);
- throw err;
- } finally {
- await browser.close();
- }
- }
-}
-
-const autoScroll = async (AUTOSCROLL_TIMEOUT: number) => {
- const timeoutPromise = new Promise((_, reject) => {
- setTimeout(() => {
- reject(new Error(`Webpage was too long to be archived.`));
- }, AUTOSCROLL_TIMEOUT * 1000);
- });
-
- const scrollingPromise = new Promise((resolve) => {
- let totalHeight = 0;
- let distance = 100;
- let scrollDown = setInterval(() => {
- let scrollHeight = document.body.scrollHeight;
- window.scrollBy(0, distance);
- totalHeight += distance;
- if (totalHeight >= scrollHeight) {
- clearInterval(scrollDown);
- window.scroll(0, 0);
- resolve();
- }
- }, 100);
- });
-
- await Promise.race([scrollingPromise, timeoutPromise]);
-};
diff --git a/scripts/worker.ts b/scripts/worker.ts
index 213fa7c..c5c3be8 100644
--- a/scripts/worker.ts
+++ b/scripts/worker.ts
@@ -1,11 +1,9 @@
import { Collection, Link, User } from "@prisma/client";
import { prisma } from "../lib/api/db";
-import urlHandler from "./lib/urlHandler";
+import archiveHandler from "../lib/api/archiveHandler";
const args = process.argv.slice(2).join(" ");
-console.log(process.env.NEXTAUTH_URL);
-
const archiveTakeCount = Number(process.env.ARCHIVE_TAKE_COUNT || "") || 5;
type LinksAndCollectionAndOwner = Link & {
@@ -136,7 +134,7 @@ async function processBatch() {
`Processing link ${link.url} for user ${link.collection.ownerId}`
);
- await urlHandler(link);
+ await archiveHandler(link);
console.log(
"\x1b[34m%s\x1b[0m",
@@ -152,9 +150,12 @@ async function processBatch() {
};
// Process each link in the batch concurrently
- const processingPromises = [...linksOldToNew, ...linksNewToOld].map((e) =>
- archiveLink(e)
- );
+ const processingPromises = [...linksOldToNew, ...linksNewToOld]
+ // Make sure we don't process the same link twice
+ .filter((value, index, self) => {
+ return self.findIndex((item) => item.id === value.id) === index;
+ })
+ .map((e) => archiveLink(e));
await Promise.allSettled(processingPromises);
}
diff --git a/types/enviornment.d.ts b/types/enviornment.d.ts
index e4d39cf..b34d57a 100644
--- a/types/enviornment.d.ts
+++ b/types/enviornment.d.ts
@@ -9,7 +9,7 @@ declare global {
STORAGE_FOLDER?: string;
AUTOSCROLL_TIMEOUT?: string;
RE_ARCHIVE_LIMIT?: string;
- NEXT_PUBLIC_MAX_UPLOAD_SIZE?: string;
+ NEXT_PUBLIC_MAX_FILE_SIZE?: string;
SPACES_KEY?: string;
SPACES_SECRET?: string;