- {unescapeString(link?.name || link?.description || "")}
+ {unescapeString(
+ link?.name || link?.description || link?.url || ""
+ )}
{link?.url ? (
{
+ // console.log(doc);
+ // return createFile({
+ // data: doc,
+ // filePath: `archives/${targetLink.collectionId}/${linkId}.mhtml`,
+ // });
+ // };
+
+ // saveDocLocally(doc.data);
+
+ // Readability
+
+ const window = new JSDOM("").window;
+ const purify = DOMPurify(window);
+ const cleanedUpContent = purify.sanitize(content);
+ const dom = new JSDOM(cleanedUpContent, { url: url });
+ const article = new Readability(dom.window.document).parse();
+
+ const articleText = article?.textContent
+ .replace(/ +(?= )/g, "") // strip out multiple spaces
+ .replace(/(\r\n|\n|\r)/gm, " "); // strip out line breaks
+
+ await createFile({
+ data: JSON.stringify(article),
+ filePath: `archives/${targetLink.collectionId}/${linkId}_readability.json`,
+ });
+
+ await prisma.link.update({
+ where: { id: linkId },
+ data: {
+ readabilityPath: `archives/${targetLink.collectionId}/${linkId}_readability.json`,
+ textContent: articleText,
+ },
+ });
+
+ // Screenshot/PDF
+
+ let faulty = false;
+ await page
+ .evaluate(autoScroll, Number(process.env.AUTOSCROLL_TIMEOUT) || 30)
+ .catch((e) => (faulty = true));
+
+ const linkExists = await prisma.link.findUnique({
+ where: { id: linkId },
+ });
+
+ if (linkExists && !faulty) {
+ if (user.archiveAsScreenshot) {
+ const screenshot = await page.screenshot({ fullPage: true });
+ await createFile({
+ data: screenshot,
+ filePath: `archives/${linkExists.collectionId}/${linkId}.png`,
+ });
+ }
+
+ if (user.archiveAsPDF) {
+ const pdf = await page.pdf({
+ width: "1366px",
+ height: "1931px",
+ printBackground: true,
+ margin: { top: "15px", bottom: "15px" },
+ });
+
+ await createFile({
+ data: pdf,
+ filePath: `archives/${linkExists.collectionId}/${linkId}.pdf`,
+ });
+ }
+
+ await prisma.link.update({
+ where: { id: linkId },
+ data: {
+ screenshotPath: user.archiveAsScreenshot
+ ? `archives/${linkExists.collectionId}/${linkId}.png`
+ : null,
+ pdfPath: user.archiveAsPDF
+ ? `archives/${linkExists.collectionId}/${linkId}.pdf`
+ : null,
+ },
+ });
+ } else if (faulty) {
+ await prisma.link.update({
+ where: { id: linkId },
+ data: {
+ screenshotPath: null,
+ pdfPath: null,
+ },
+ });
+ }
+ } catch (err) {
+ console.log(err);
+ throw err;
+ } finally {
+ await browser.close();
+ }
+ }
+}
+
+const autoScroll = async (AUTOSCROLL_TIMEOUT: number) => {
+ const timeoutPromise = new Promise
((_, reject) => {
+ setTimeout(() => {
+ reject(new Error(`Webpage was too long to be archived.`));
+ }, AUTOSCROLL_TIMEOUT * 1000);
+ });
+
+ const scrollingPromise = new Promise((resolve) => {
+ let totalHeight = 0;
+ let distance = 100;
+ let scrollDown = setInterval(() => {
+ let scrollHeight = document.body.scrollHeight;
+ window.scrollBy(0, distance);
+ totalHeight += distance;
+ if (totalHeight >= scrollHeight) {
+ clearInterval(scrollDown);
+ window.scroll(0, 0);
+ resolve();
+ }
+ }, 100);
+ });
+
+ await Promise.race([scrollingPromise, timeoutPromise]);
+};
diff --git a/lib/api/migration/migrateToV2.js b/scripts/migration/migrateToV2.js
similarity index 100%
rename from lib/api/migration/migrateToV2.js
rename to scripts/migration/migrateToV2.js
diff --git a/scripts/start.ts b/scripts/start.ts
deleted file mode 100644
index d64ba78..0000000
--- a/scripts/start.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-import shell from "shelljs";
-import urlHandler from "../lib/api/urlHandler";
-
-const command = process.argv[2];
-
-const args = process.argv.slice(3).join(" ");
-
-if (!command) {
- console.log("Please provide a command to run. (start, dev, etc.)");
- process.exit(1);
-}
-
-shell.exec(`yarn ${command || ""} ${args || ""}`);
diff --git a/scripts/worker.ts b/scripts/worker.ts
new file mode 100644
index 0000000..750b561
--- /dev/null
+++ b/scripts/worker.ts
@@ -0,0 +1,82 @@
+import { prisma } from "../lib/api/db";
+import urlHandler from "./lib/urlHandler";
+
+const args = process.argv.slice(2).join(" ");
+
+const archiveTakeCount = Number(process.env.ARCHIVE_TAKE_COUNT || "") || 1;
+
+// Function to process links for a given user
+async function processLinksForUser() {
+ // Fetch the first 'maxLinksPerUser' links for the user
+ const links = await prisma.link.findMany({
+ where: {
+ OR: [
+ {
+ collection: {
+ owner: {
+ archiveAsScreenshot: true,
+ },
+ },
+ screenshotPath: null,
+ },
+ {
+ collection: {
+ owner: {
+ archiveAsPDF: true,
+ },
+ },
+ pdfPath: null,
+ },
+ {
+ readabilityPath: null,
+ },
+ ],
+ collection: {
+ owner: {
+ archiveAsPDF: true,
+ archiveAsScreenshot: true,
+ },
+ },
+ },
+ take: archiveTakeCount,
+ orderBy: { createdAt: "asc" },
+ include: {
+ collection: true,
+ },
+ });
+
+ // Process each link using the urlHandler function
+ for (const link of links) {
+ try {
+ console.log(
+ `Processing link ${link.id} for user ${link.collection.ownerId}`
+ );
+
+ await urlHandler(link.id, link.url || "", link.collection.ownerId);
+ } catch (error) {
+ console.error(
+ `Error processing link ${link.id} for user ${link.collection.ownerId}:`,
+ error
+ );
+ }
+ }
+}
+
+const intervalInMinutes = 10; // Set the interval for the worker to run
+
+// Main function to iterate over all users and process their links
+async function processLinksForAllUsers() {
+ console.log("Starting the link processing task");
+ try {
+ const users = await prisma.user.findMany(); // Fetch all users
+ for (const user of users) {
+ await processLinksForUser(); // Process links for each user
+ }
+ } catch (error) {
+ console.error("Error processing links for users:", error);
+ }
+ setTimeout(processLinksForAllUsers, intervalInMinutes * 60000);
+}
+
+// Initial run
+processLinksForAllUsers();
diff --git a/yarn.lock b/yarn.lock
index 813b83e..03b95c3 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -630,6 +630,13 @@
"@babel/helper-validator-identifier" "^7.19.1"
to-fast-properties "^2.0.0"
+"@cspotcode/source-map-support@^0.8.0":
+ version "0.8.1"
+ resolved "https://registry.yarnpkg.com/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz#00629c35a688e05a88b1cda684fb9d5e73f000a1"
+ integrity sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==
+ dependencies:
+ "@jridgewell/trace-mapping" "0.3.9"
+
"@emotion/babel-plugin@^11.10.6":
version "11.10.6"
resolved "https://registry.yarnpkg.com/@emotion/babel-plugin/-/babel-plugin-11.10.6.tgz#a68ee4b019d661d6f37dec4b8903255766925ead"
@@ -859,6 +866,11 @@
resolved "https://registry.yarnpkg.com/@jridgewell/resolve-uri/-/resolve-uri-3.1.0.tgz#2203b118c157721addfe69d47b70465463066d78"
integrity sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w==
+"@jridgewell/resolve-uri@^3.0.3":
+ version "3.1.1"
+ resolved "https://registry.yarnpkg.com/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz#c08679063f279615a3326583ba3a90d1d82cc721"
+ integrity sha512-dSYZh7HhCDtCKm4QakX0xFpsRDqjjtZf/kjI/v3T3Nwt5r8/qz/M19F9ySyOqU94SXBmeG9ttTul+YnR4LOxFA==
+
"@jridgewell/set-array@^1.0.1":
version "1.1.2"
resolved "https://registry.yarnpkg.com/@jridgewell/set-array/-/set-array-1.1.2.tgz#7c6cf998d6d20b914c0a55a91ae928ff25965e72"
@@ -874,6 +886,14 @@
resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz#d7c6e6755c78567a951e04ab52ef0fd26de59f32"
integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==
+"@jridgewell/trace-mapping@0.3.9":
+ version "0.3.9"
+ resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz#6534fd5933a53ba7cbf3a17615e273a0d1273ff9"
+ integrity sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==
+ dependencies:
+ "@jridgewell/resolve-uri" "^3.0.3"
+ "@jridgewell/sourcemap-codec" "^1.4.10"
+
"@jridgewell/trace-mapping@^0.3.9":
version "0.3.18"
resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.18.tgz#25783b2086daf6ff1dcb53c9249ae480e4dd4cd6"
@@ -1478,6 +1498,26 @@
resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-2.0.0.tgz#f544a148d3ab35801c1f633a7441fd87c2e484bf"
integrity sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==
+"@tsconfig/node10@^1.0.7":
+ version "1.0.9"
+ resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.9.tgz#df4907fc07a886922637b15e02d4cebc4c0021b2"
+ integrity sha512-jNsYVVxU8v5g43Erja32laIDHXeoNvFEpX33OK4d6hljo3jDhCBDhx5dhCCTMWUojscpAagGiRkBKxpdl9fxqA==
+
+"@tsconfig/node12@^1.0.7":
+ version "1.0.11"
+ resolved "https://registry.yarnpkg.com/@tsconfig/node12/-/node12-1.0.11.tgz#ee3def1f27d9ed66dac6e46a295cffb0152e058d"
+ integrity sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==
+
+"@tsconfig/node14@^1.0.0":
+ version "1.0.3"
+ resolved "https://registry.yarnpkg.com/@tsconfig/node14/-/node14-1.0.3.tgz#e4386316284f00b98435bf40f72f75a09dabf6c1"
+ integrity sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==
+
+"@tsconfig/node16@^1.0.2":
+ version "1.0.4"
+ resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.4.tgz#0b92dcc0cc1c81f6f306a381f28e31b1a56536e9"
+ integrity sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==
+
"@types/bcrypt@^5.0.0":
version "5.0.0"
resolved "https://registry.yarnpkg.com/@types/bcrypt/-/bcrypt-5.0.0.tgz#a835afa2882d165aff5690893db314eaa98b9f20"
@@ -1665,6 +1705,16 @@ acorn-jsx@^5.3.2:
resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.2.tgz#7ed5bb55908b3b2f1bc55c6af1653bada7f07937"
integrity sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==
+acorn-walk@^8.1.1:
+ version "8.3.1"
+ resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-8.3.1.tgz#2f10f5b69329d90ae18c58bf1fa8fccd8b959a43"
+ integrity sha512-TgUZgYvqZprrl7YldZNoa9OciCAyZR+Ejm9eXzKCmjsF5IKp/wgQ7Z/ZpjpGTIUPwrHQIcYeI8qDh4PsEwxMbw==
+
+acorn@^8.4.1:
+ version "8.11.2"
+ resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.2.tgz#ca0d78b51895be5390a5903c5b3bdcdaf78ae40b"
+ integrity sha512-nc0Axzp/0FILLEVsm4fNwLCwMttvhEI263QtVPQcbpfZZ3ts0hLsZGOpE6czNlid7CJ9MlyH8reXkpsf3YUY4w==
+
acorn@^8.9.0:
version "8.9.0"
resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.9.0.tgz#78a16e3b2bcc198c10822786fa6679e245db5b59"
@@ -1737,6 +1787,11 @@ arg@4.1.0:
resolved "https://registry.yarnpkg.com/arg/-/arg-4.1.0.tgz#583c518199419e0037abb74062c37f8519e575f0"
integrity sha512-ZWc51jO3qegGkVh8Hwpv636EkbesNV5ZNQPCtRa+0qytRYPEs9IYT9qITY9buezqUH5uqyzlWLcufrzU2rffdg==
+arg@^4.1.0:
+ version "4.1.3"
+ resolved "https://registry.yarnpkg.com/arg/-/arg-4.1.3.tgz#269fc7ad5b8e42cb63c896d5666017261c144089"
+ integrity sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==
+
arg@^5.0.2:
version "5.0.2"
resolved "https://registry.yarnpkg.com/arg/-/arg-5.0.2.tgz#c81433cc427c92c4dcf4865142dbca6f15acd59c"
@@ -2155,6 +2210,11 @@ cosmiconfig@^7.0.0:
path-type "^4.0.0"
yaml "^1.10.0"
+create-require@^1.1.0:
+ version "1.1.1"
+ resolved "https://registry.yarnpkg.com/create-require/-/create-require-1.1.1.tgz#c1d7e8f1e5f6cfc9ff65f9cd352d37348756c333"
+ integrity sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==
+
cross-spawn@^7.0.2, cross-spawn@^7.0.3:
version "7.0.3"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
@@ -2352,6 +2412,11 @@ didyoumean@^1.2.2:
resolved "https://registry.yarnpkg.com/didyoumean/-/didyoumean-1.2.2.tgz#989346ffe9e839b4555ecf5666edea0d3e8ad037"
integrity sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==
+diff@^4.0.1:
+ version "4.0.2"
+ resolved "https://registry.yarnpkg.com/diff/-/diff-4.0.2.tgz#60f3aecb89d5fae520c11aa19efc2bb982aade7d"
+ integrity sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==
+
dir-glob@^3.0.1:
version "3.0.1"
resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-3.0.1.tgz#56dbf73d992a4a93ba1584f4534063fd2e41717f"
@@ -3731,6 +3796,11 @@ make-dir@^3.1.0:
dependencies:
semver "^6.0.0"
+make-error@^1.1.1:
+ version "1.3.6"
+ resolved "https://registry.yarnpkg.com/make-error/-/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2"
+ integrity sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==
+
memoize-one@^6.0.0:
version "6.0.0"
resolved "https://registry.yarnpkg.com/memoize-one/-/memoize-one-6.0.0.tgz#b2591b871ed82948aee4727dc6abceeeac8c1045"
@@ -5069,6 +5139,25 @@ ts-interface-checker@^0.1.9:
resolved "https://registry.yarnpkg.com/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz#784fd3d679722bc103b1b4b8030bcddb5db2a699"
integrity sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==
+ts-node@^10.9.2:
+ version "10.9.2"
+ resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-10.9.2.tgz#70f021c9e185bccdca820e26dc413805c101c71f"
+ integrity sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==
+ dependencies:
+ "@cspotcode/source-map-support" "^0.8.0"
+ "@tsconfig/node10" "^1.0.7"
+ "@tsconfig/node12" "^1.0.7"
+ "@tsconfig/node14" "^1.0.0"
+ "@tsconfig/node16" "^1.0.2"
+ acorn "^8.4.1"
+ acorn-walk "^8.1.1"
+ arg "^4.1.0"
+ create-require "^1.1.0"
+ diff "^4.0.1"
+ make-error "^1.1.1"
+ v8-compile-cache-lib "^3.0.1"
+ yn "3.1.1"
+
tsconfig-paths@^3.14.1:
version "3.14.1"
resolved "https://registry.yarnpkg.com/tsconfig-paths/-/tsconfig-paths-3.14.1.tgz#ba0734599e8ea36c862798e920bcf163277b137a"
@@ -5212,6 +5301,11 @@ uuid@^8.3.2:
resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2"
integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==
+v8-compile-cache-lib@^3.0.1:
+ version "3.0.1"
+ resolved "https://registry.yarnpkg.com/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz#6336e8d71965cb3d35a1bbb7868445a7c05264bf"
+ integrity sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==
+
verror@1.10.0:
version "1.10.0"
resolved "https://registry.yarnpkg.com/verror/-/verror-1.10.0.tgz#3a105ca17053af55d6e270c1f8288682e18da400"
@@ -5356,6 +5450,11 @@ yaml@^2.1.1:
resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.3.1.tgz#02fe0975d23cd441242aa7204e09fc28ac2ac33b"
integrity sha512-2eHWfjaoXgTBC2jNM1LRef62VQa0umtvRiDSk6HSzW7RvS5YtkabJrwYLLEKWBc8a5U2PTSCs+dJjUTJdlHsWQ==
+yn@3.1.1:
+ version "3.1.1"
+ resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"
+ integrity sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==
+
yocto-queue@^0.1.0:
version "0.1.0"
resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b"