diff --git a/lib/api/archiveHandler.ts b/lib/api/archiveHandler.ts index c5e5f09..b158d97 100644 --- a/lib/api/archiveHandler.ts +++ b/lib/api/archiveHandler.ts @@ -86,17 +86,18 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) { image: user.archiveAsScreenshot && !link.image?.startsWith("archive") ? "pending" - : "unavailable", + : undefined, pdf: user.archiveAsPDF && !link.pdf?.startsWith("archive") ? "pending" - : "unavailable", + : undefined, + monolith: + user.archiveAsMonolith && !link.monolith?.startsWith("archive") + ? "pending" + : undefined, readable: !link.readable?.startsWith("archive") ? "pending" : undefined, - monolith: !link.monolith?.startsWith("archive") - ? "pending" - : undefined, preview: !link.readable?.startsWith("archive") ? "pending" : undefined, diff --git a/scripts/migration/indexArchives.js b/scripts/migration/indexArchives.js new file mode 100644 index 0000000..6f362da --- /dev/null +++ b/scripts/migration/indexArchives.js @@ -0,0 +1,163 @@ +// This is a script that looks for every link and checks if the preservations exist in the filesystem. +// If they do, it updates the link with the path to the preservation. +// If they don't, it passes. + +const { S3 } = require("@aws-sdk/client-s3"); +const { PrismaClient } = require("@prisma/client"); +const { existsSync } = require("fs"); +const util = require("util"); + +const prisma = new PrismaClient(); + +const STORAGE_FOLDER = process.env.STORAGE_FOLDER || "data"; + +const s3Client = + process.env.SPACES_ENDPOINT && + process.env.SPACES_REGION && + process.env.SPACES_KEY && + process.env.SPACES_SECRET + ? new S3({ + forcePathStyle: false, + endpoint: process.env.SPACES_ENDPOINT, + region: process.env.SPACES_REGION, + credentials: { + accessKeyId: process.env.SPACES_KEY, + secretAccessKey: process.env.SPACES_SECRET, + }, + }) + : undefined; + +async function checkFileExistence(path) { + if (s3Client) { + // One millisecond delay to avoid rate limiting + await new Promise((resolve) => setTimeout(resolve, 1)); + + const bucketParams = { + Bucket: process.env.SPACES_BUCKET_NAME, + Key: path, + }; + + try { + const headObjectAsync = util.promisify( + s3Client.headObject.bind(s3Client) + ); + + try { + await headObjectAsync(bucketParams); + return true; + } catch (err) { + return false; + } + } catch (err) { + console.log("Error:", err); + + return false; + } + } else { + try { + if (existsSync(STORAGE_FOLDER + "/" + path)) { + return true; + } else return false; + } catch (err) { + console.log(err); + } + } +} + +async function indexArchives() { + const links = await prisma.link.findMany({ + orderBy: { id: "asc" }, + }); + + // PDFs + for (let link of links) { + const path = `archives/${link.collectionId}/${link.id}.pdf`; + + const res = await checkFileExistence(path); + + if (res) { + await prisma.link.update({ + where: { id: link.id }, + data: { pdf: path }, + }); + console.log(`${link.id}`); + } else { + console.log(`${link.id}`); + } + } + + // Screenshots (PNGs) + for (let link of links) { + const path = `archives/${link.collectionId}/${link.id}.png`; + + const res = await checkFileExistence(path); + + if (res) { + await prisma.link.update({ + where: { id: link.id }, + data: { image: path }, + }); + console.log(`${link.id}`); + } else { + console.log(`${link.id}`); + } + } + + // Screenshots (JPEGs) + for (let link of links) { + const path = `archives/${link.collectionId}/${link.id}.jpeg`; + + const res = await checkFileExistence(path); + + if (res) { + await prisma.link.update({ + where: { id: link.id }, + data: { image: path }, + }); + console.log(`${link.id}`); + } else { + console.log(`${link.id}`); + } + } + + // Readability + for (let link of links) { + const path = `archives/${link.collectionId}/${link.id}_readability.json`; + + const res = await checkFileExistence(path); + + if (res) { + await prisma.link.update({ + where: { id: link.id }, + data: { readable: path }, + }); + console.log(`${link.id}`); + } else { + console.log(`${link.id}`); + } + } + + // Webpages + for (let link of links) { + const path = `archives/${link.collectionId}/${link.id}.html`; + + const res = await checkFileExistence(path); + + if (res) { + await prisma.link.update({ + where: { id: link.id }, + data: { monolith: path }, + }); + console.log(`${link.id}`); + } else { + console.log(`${link.id}`); + } + } + + await prisma.$disconnect(); +} + +indexArchives().catch((e) => { + console.error(e); + process.exit(1); +});