Add Single file archive method.

This commit is contained in:
András Rutkai 2024-03-15 19:41:41 +01:00
parent 5990d4ce2d
commit 5fe6a5b19a
32 changed files with 211 additions and 31 deletions

View File

@ -45,6 +45,10 @@ PROXY_BYPASS=
PDF_MARGIN_TOP=
PDF_MARGIN_BOTTOM=
# Singlefile archive settings
SINGLEFILE_ARCHIVE_COMMAND= # single-file "{{URL}}" --dump-content
SINGLEFILE_ARCHIVE_HTTP_API= # http://singlefile:3000/
#
# SSO Providers
#

View File

@ -57,7 +57,7 @@ We've forked the old version from the current repository into [this repo](https:
## Features
- 📸 Auto capture a screenshot, PDF, and readable view of each webpage.
- 📸 Auto capture a screenshot, PDF, single html file, and readable view of each webpage.
- 🏛️ Send your webpage to Wayback Machine ([archive.org](https://archive.org)) for a snapshot. (Optional)
- 📂 Organize links by collection, sub-collection, name, description and multiple tags.
- 👥 Collaborate on gathering links in a collection.

View File

@ -37,6 +37,7 @@ export default function CollectionCard({ collection, className }: Props) {
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});
@ -52,6 +53,7 @@ export default function CollectionCard({ collection, className }: Props) {
username: account.username as string,
image: account.image as string,
archiveAsScreenshot: account.archiveAsScreenshot as boolean,
archiveAsSinglefile: account.archiveAsSinglefile as boolean,
archiveAsPDF: account.archiveAsPDF as boolean,
});
}

View File

@ -43,6 +43,8 @@ export default function LinkGroupedIconURL({
<i className={`bi-file-earmark-pdf`}></i>
) : link.type === "image" ? (
<i className={`bi-file-earmark-image`}></i>
) : link.type === "singlefile" ? (
<i className={`bi-filetype-html`}></i>
) : undefined}
<p className="truncate bg-white text-black mr-1">
<p className="text-sm">{shortendURL}</p>

View File

@ -42,6 +42,8 @@ export default function LinkIcon({
<i className={`bi-file-earmark-pdf ${iconClasses}`}></i>
) : link.type === "image" ? (
<i className={`bi-file-earmark-image ${iconClasses}`}></i>
) : link.type === "singlefile" ? (
<i className={`bi-filetype-html ${iconClasses}`}></i>
) : undefined}
</>
);

View File

@ -65,6 +65,7 @@ export default function EditCollectionSharingModal({
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});

View File

@ -29,6 +29,7 @@ export default function NewLinkModal({ onClose }: Props) {
image: "",
pdf: "",
readable: "",
singlefile: "",
textContent: "",
collection: {
name: "",

View File

@ -12,6 +12,7 @@ import { useSession } from "next-auth/react";
import {
pdfAvailable,
readabilityAvailable,
singlefileAvailable,
screenshotAvailable,
} from "@/lib/shared/getArchiveValidity";
import PreservedFormatRow from "@/components/PreserverdFormatRow";
@ -42,6 +43,7 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});
@ -59,6 +61,7 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
username: account.username as string,
image: account.image as string,
archiveAsScreenshot: account.archiveAsScreenshot as boolean,
archiveAsSinglefile: account.archiveAsScreenshot as boolean,
archiveAsPDF: account.archiveAsPDF as boolean,
});
}
@ -73,6 +76,9 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
(collectionOwner.archiveAsScreenshot === true
? link.pdf && link.pdf !== "pending"
: true) &&
(collectionOwner.archiveAsSinglefile === true
? link.singlefile && link.singlefile !== "pending"
: true) &&
(collectionOwner.archiveAsPDF === true
? link.pdf && link.pdf !== "pending"
: true) &&
@ -109,7 +115,7 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
clearInterval(interval);
}
};
}, [link?.image, link?.pdf, link?.readable]);
}, [link?.image, link?.pdf, link?.readable, link?.singlefile]);
const updateArchive = async () => {
const load = toast.loading("Sending request...");
@ -140,7 +146,8 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
{isReady() &&
(screenshotAvailable(link) ||
pdfAvailable(link) ||
readabilityAvailable(link)) ? (
readabilityAvailable(link) ||
singlefileAvailable(link)) ? (
<p className="mb-3">
The following formats are available for this link:
</p>
@ -183,6 +190,16 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
activeLink={link}
/>
) : undefined}
{singlefileAvailable(link) ? (
<PreservedFormatRow
name={"Singlefile"}
icon={"bi-filetype-html"}
format={ArchivedFormat.singlefile}
activeLink={link}
downloadable={true}
/>
) : undefined}
</>
) : (
<div

View File

@ -31,6 +31,7 @@ export default function UploadFileModal({ onClose }: Props) {
image: "",
pdf: "",
readable: "",
singlefile: "",
textContent: "",
collection: {
name: "",
@ -101,7 +102,7 @@ export default function UploadFileModal({ onClose }: Props) {
const submit = async () => {
if (!submitLoader && file) {
let fileType: ArchivedFormat | null = null;
let linkType: "url" | "image" | "pdf" | null = null;
let linkType: "url" | "image" | "singlefile" | "pdf" | null = null;
if (file?.type === "image/jpg" || file.type === "image/jpeg") {
fileType = ArchivedFormat.jpeg;
@ -109,6 +110,9 @@ export default function UploadFileModal({ onClose }: Props) {
} else if (file.type === "image/png") {
fileType = ArchivedFormat.png;
linkType = "image";
} else if (file.type === "text/html") {
fileType = ArchivedFormat.singlefile;
linkType = "singlefile";
} else if (file.type === "application/pdf") {
fileType = ArchivedFormat.pdf;
linkType = "pdf";
@ -165,13 +169,13 @@ export default function UploadFileModal({ onClose }: Props) {
<label className="btn h-10 btn-sm w-full border border-neutral-content hover:border-neutral-content flex justify-between">
<input
type="file"
accept=".pdf,.png,.jpg,.jpeg"
accept=".pdf,.png,.jpg,.jpeg,.html"
className="cursor-pointer custom-file-input"
onChange={(e) => e.target.files && setFile(e.target.files[0])}
/>
</label>
<p className="text-xs font-semibold mt-2">
PDF, PNG, JPG (Up to {process.env.NEXT_PUBLIC_MAX_FILE_SIZE || 30}
PDF, PNG, JPG, HTML (Up to {process.env.NEXT_PUBLIC_MAX_FILE_SIZE || 30}
MB)
</p>
</div>

View File

@ -1,10 +1,6 @@
import React, { useEffect, useState } from "react";
import useLinkStore from "@/store/links";
import {
ArchivedFormat,
LinkIncludingShortenedCollectionAndTags,
} from "@/types/global";
import toast from "react-hot-toast";
import { ArchivedFormat, LinkIncludingShortenedCollectionAndTags } from "@/types/global";
import Link from "next/link";
import { useRouter } from "next/router";
import { useSession } from "next-auth/react";
@ -61,7 +57,7 @@ export default function PreservedFormatRow({
clearInterval(interval);
}
};
}, [link?.image, link?.pdf, link?.readable]);
}, [link?.image, link?.pdf, link?.readable, link?.singlefile]);
const handleDownload = () => {
const path = `/api/v1/archives/${link?.id}?format=${format}`;
@ -69,10 +65,10 @@ export default function PreservedFormatRow({
.then((response) => {
if (response.ok) {
// Create a temporary link and click it to trigger the download
const link = document.createElement("a");
link.href = path;
link.download = format === ArchivedFormat.pdf ? "PDF" : "Screenshot";
link.click();
const anchorElement = document.createElement("a");
anchorElement.href = path;
anchorElement.download = format === ArchivedFormat.singlefile ? (link.name ?? 'index') : format === ArchivedFormat.pdf ? "PDF" : "Screenshot";
anchorElement.click();
} else {
console.error("Failed to download file");
}

View File

@ -65,9 +65,11 @@ export default function ReadableView({ link }: Props) {
(link?.image === "pending" ||
link?.pdf === "pending" ||
link?.readable === "pending" ||
link?.singlefile === "pending" ||
!link?.image ||
!link?.pdf ||
!link?.readable)
!link?.readable ||
!link?.singlefile)
) {
interval = setInterval(() => getLink(link.id as number), 5000);
} else {
@ -81,7 +83,7 @@ export default function ReadableView({ link }: Props) {
clearInterval(interval);
}
};
}, [link?.image, link?.pdf, link?.readable]);
}, [link?.image, link?.pdf, link?.readable, link?.singlefile]);
const rgbToHex = (r: number, g: number, b: number): string =>
"#" +

View File

@ -19,3 +19,6 @@ services:
- ./data:/data/data
depends_on:
- postgres
singlefile:
image: rutkai/single-file-web:latest
container_name: singlefile

View File

@ -9,6 +9,9 @@ import { Collection, Link, User } from "@prisma/client";
import validateUrlSize from "./validateUrlSize";
import removeFile from "./storage/removeFile";
import Jimp from "jimp";
import { execSync } from "child_process";
import axios from "axios";
import { Agent } from "http";
import createFolder from "./storage/createFolder";
type LinksAndCollectionAndOwner = Link & {
@ -93,6 +96,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
readable: !link.readable?.startsWith("archive")
? "pending"
: undefined,
singlefile: !link.singlefile?.startsWith("archive")
? "pending"
: undefined,
preview: !link.readable?.startsWith("archive")
? "pending"
: undefined,
@ -113,19 +119,46 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
const content = await page.content();
// TODO single file
// const session = await page.context().newCDPSession(page);
// const doc = await session.send("Page.captureSnapshot", {
// format: "mhtml",
// });
// const saveDocLocally = (doc: any) => {
// console.log(doc);
// return createFile({
// data: doc,
// filePath: `archives/${targetLink.collectionId}/${link.id}.mhtml`,
// });
// };
// saveDocLocally(doc.data);
// Singlefile
if (user.archiveAsSinglefile && !link.singlefile?.startsWith("archive")) {
let command = process.env.SINGLEFILE_ARCHIVE_COMMAND;
let httpApi = process.env.SINGLEFILE_ARCHIVE_HTTP_API;
if (command) {
if (command.includes("{{URL}}")) {
try {
let html = execSync(command.replace("{{URL}}", link.url), {
timeout: 60000,
maxBuffer: 1024 * 1024 * 100,
});
await createFile({
data: html,
filePath: `archives/${targetLink.collectionId}/${link.id}.html`,
});
} catch (err) {
console.error("Error running SINGLEFILE_ARCHIVE_COMMAND:", err);
}
} else {
console.error("Invalid SINGLEFILE_ARCHIVE_COMMAND. Missing {{URL}}");
}
} else if (httpApi) {
try {
let html = await axios.post(httpApi, { url: link.url }, {
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
},
httpAgent: new Agent({ keepAlive: false }),
});
await createFile({
data: html.data,
filePath: `archives/${targetLink.collectionId}/${link.id}.html`,
});
} catch (err) {
console.error("Error fetching Singlefile using SINGLEFILE_ARCHIVE_HTTP_API:", err);
}
} else {
console.error("No SINGLEFILE_ARCHIVE_COMMAND or SINGLEFILE_ARCHIVE_HTTP_API defined.");
}
}
// Readability
const window = new JSDOM("").window;
@ -284,6 +317,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
image: user.archiveAsScreenshot
? `archives/${linkExists.collectionId}/${link.id}.png`
: undefined,
singlefile: user.archiveAsSinglefile
? `archives/${linkExists.collectionId}/${link.id}.html`
: undefined,
pdf: user.archiveAsPDF
? `archives/${linkExists.collectionId}/${link.id}.pdf`
: undefined,
@ -314,6 +350,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
image: !finalLink.image?.startsWith("archives")
? "unavailable"
: undefined,
singlefile: !finalLink.singlefile?.startsWith("archives")
? "unavailable"
: undefined,
pdf: !finalLink.pdf?.startsWith("archives")
? "unavailable"
: undefined,
@ -324,6 +363,7 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
});
else {
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.png` });
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.html` });
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.pdf` });
removeFile({
filePath: `archives/${link.collectionId}/${link.id}_readability.json`,

View File

@ -52,6 +52,9 @@ export default async function deleteLinksById(
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
});
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}.html`,
});
}
return { response: deletedLinks, status: 200 };

View File

@ -30,6 +30,9 @@ export default async function deleteLink(userId: number, linkId: number) {
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
});
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}.html`,
});
return { response: deleteLink, status: 200 };
}

View File

@ -160,6 +160,11 @@ export default async function updateLinkById(
`archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
`archives/${data.collection.id}/${linkId}_readability.json`
);
await moveFile(
`archives/${collectionIsAccessible?.id}/${linkId}.html`,
`archives/${data.collection.id}/${linkId}.html`
);
}
return { response: updatedLink, status: 200 };

View File

@ -75,6 +75,7 @@ export default async function getPublicUser(
username: lessSensitiveInfo.username,
image: lessSensitiveInfo.image,
archiveAsScreenshot: lessSensitiveInfo.archiveAsScreenshot,
archiveAsSinglefile: lessSensitiveInfo.archiveAsSinglefile,
archiveAsPDF: lessSensitiveInfo.archiveAsPDF,
};

View File

@ -187,6 +187,7 @@ export default async function updateUserById(
(value, index, self) => self.indexOf(value) === index
),
archiveAsScreenshot: data.archiveAsScreenshot,
archiveAsSinglefile: data.archiveAsSinglefile,
archiveAsPDF: data.archiveAsPDF,
archiveAsWaybackMachine: data.archiveAsWaybackMachine,
linksRouteTo: data.linksRouteTo,

View File

@ -10,6 +10,7 @@ import util from "util";
type ReturnContentTypes =
| "text/plain"
| "text/html"
| "image/jpeg"
| "image/png"
| "application/pdf"
@ -61,6 +62,8 @@ export default async function readFile(filePath: string) {
contentType = "image/png";
} else if (filePath.endsWith("_readability.json")) {
contentType = "application/json";
} else if (filePath.endsWith(".html")) {
contentType = "text/html";
} else {
// if (filePath.endsWith(".jpg"))
contentType = "image/jpeg";
@ -88,6 +91,8 @@ export default async function readFile(filePath: string) {
contentType = "image/png";
} else if (filePath.endsWith("_readability.json")) {
contentType = "application/json";
} else if (filePath.endsWith(".html")) {
contentType = "text/html";
} else {
// if (filePath.endsWith(".jpg"))
contentType = "image/jpeg";

View File

@ -7,6 +7,7 @@ import { LinksRouteTo } from "@prisma/client";
import {
pdfAvailable,
readabilityAvailable,
singlefileAvailable,
screenshotAvailable,
} from "../shared/getArchiveValidity";
@ -27,6 +28,10 @@ export const generateLinkHref = (
if (!readabilityAvailable(link)) return link.url || "";
return `/preserved/${link?.id}?format=${ArchivedFormat.readability}`;
case LinksRouteTo.SINGLEFILE:
if (!singlefileAvailable(link)) return link.url || "";
return `/preserved/${link?.id}?format=${ArchivedFormat.singlefile}`;
case LinksRouteTo.SCREENSHOT:
if (!screenshotAvailable(link)) return link.url || "";

View File

@ -28,6 +28,17 @@ export function readabilityAvailable(
);
}
export function singlefileAvailable(
link: LinkIncludingShortenedCollectionAndTags
) {
return (
link &&
link.singlefile &&
link.singlefile !== "pending" &&
link.singlefile !== "unavailable"
);
}
export function previewAvailable(link: any) {
return (
link &&

View File

@ -27,6 +27,7 @@ export default async function Index(req: NextApiRequest, res: NextApiResponse) {
else if (format === ArchivedFormat.jpeg) suffix = ".jpeg";
else if (format === ArchivedFormat.pdf) suffix = ".pdf";
else if (format === ArchivedFormat.readability) suffix = "_readability.json";
else if (format === ArchivedFormat.singlefile) suffix = ".html";
//@ts-ignore
if (!linkId || !suffix)

View File

@ -76,6 +76,7 @@ const deleteArchivedFiles = async (link: Link & { collection: Collection }) => {
image: null,
pdf: null,
readable: null,
singlefile: null,
preview: null,
},
});
@ -89,6 +90,9 @@ const deleteArchivedFiles = async (link: Link & { collection: Collection }) => {
await removeFile({
filePath: `archives/${link.collection.id}/${link.id}_readability.json`,
});
await removeFile({
filePath: `archives/${link.collection.id}/${link.id}.html`,
});
await removeFile({
filePath: `archives/preview/${link.collection.id}/${link.id}.png`,
});

View File

@ -61,6 +61,7 @@ export default function Index() {
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});
@ -78,6 +79,7 @@ export default function Index() {
username: account.username as string,
image: account.image as string,
archiveAsScreenshot: account.archiveAsScreenshot as boolean,
archiveAsSinglefile: account.archiveAsScreenshot as boolean,
archiveAsPDF: account.archiveAsPDF as boolean,
});
}

View File

@ -36,6 +36,12 @@ export default function Index() {
{link && Number(router.query.format) === ArchivedFormat.readability && (
<ReadableView link={link} />
)}
{link && Number(router.query.format) === ArchivedFormat.singlefile && (
<iframe
src={`/api/v1/archives/${link.id}?format=${ArchivedFormat.singlefile}`}
className="w-full h-screen border-none"
></iframe>
)}
{link && Number(router.query.format) === ArchivedFormat.pdf && (
<iframe
src={`/api/v1/archives/${link.id}?format=${ArchivedFormat.pdf}`}

View File

@ -53,6 +53,7 @@ export default function PublicCollections() {
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});

View File

@ -20,6 +20,8 @@ export default function Appearance() {
useState<boolean>(false);
const [archiveAsScreenshot, setArchiveAsScreenshot] =
useState<boolean>(false);
const [archiveAsSinglefile, setArchiveAsSinglefile] =
useState<boolean>(false);
const [archiveAsPDF, setArchiveAsPDF] = useState<boolean>(false);
const [archiveAsWaybackMachine, setArchiveAsWaybackMachine] =
useState<boolean>(false);
@ -31,6 +33,7 @@ export default function Appearance() {
setUser({
...account,
archiveAsScreenshot,
archiveAsSinglefile,
archiveAsPDF,
archiveAsWaybackMachine,
linksRouteTo,
@ -39,6 +42,7 @@ export default function Appearance() {
}, [
account,
archiveAsScreenshot,
archiveAsSinglefile,
archiveAsPDF,
archiveAsWaybackMachine,
linksRouteTo,
@ -52,6 +56,7 @@ export default function Appearance() {
useEffect(() => {
if (!objectIsEmpty(account)) {
setArchiveAsScreenshot(account.archiveAsScreenshot);
setArchiveAsSinglefile(account.archiveAsSinglefile);
setArchiveAsPDF(account.archiveAsPDF);
setArchiveAsWaybackMachine(account.archiveAsWaybackMachine);
setLinksRouteTo(account.linksRouteTo);
@ -129,6 +134,12 @@ export default function Appearance() {
onClick={() => setArchiveAsScreenshot(!archiveAsScreenshot)}
/>
<Checkbox
label="Singlefile"
state={archiveAsSinglefile}
onClick={() => setArchiveAsSinglefile(!archiveAsSinglefile)}
/>
<Checkbox
label="PDF"
state={archiveAsPDF}
@ -207,6 +218,22 @@ export default function Appearance() {
<span className="label-text">Open Readable, if available</span>
</label>
<label
className="label cursor-pointer flex gap-2 justify-start w-fit"
tabIndex={0}
role="button"
>
<input
type="radio"
name="link-preference-radio"
className="radio checked:bg-primary"
value="Singlefile"
checked={linksRouteTo === LinksRouteTo.SINGLEFILE}
onChange={() => setLinksRouteTo(LinksRouteTo.SINGLEFILE)}
/>
<span className="label-text">Open Singlefile, if available</span>
</label>
<label
className="label cursor-pointer flex gap-2 justify-start w-fit"
tabIndex={0}

View File

@ -0,0 +1,8 @@
-- AlterEnum
ALTER TYPE "LinksRouteTo" ADD VALUE 'SINGLEFILE';
-- AlterTable
ALTER TABLE "User" ADD COLUMN "archiveAsSinglefile" BOOLEAN NOT NULL DEFAULT false;
-- AlterTable
ALTER TABLE "Link" ADD COLUMN "singlefile" text;

View File

@ -45,6 +45,7 @@ model User {
linksRouteTo LinksRouteTo @default(ORIGINAL)
preventDuplicateLinks Boolean @default(false)
archiveAsScreenshot Boolean @default(true)
archiveAsSinglefile Boolean @default(true)
archiveAsPDF Boolean @default(true)
archiveAsWaybackMachine Boolean @default(false)
isPrivate Boolean @default(false)
@ -56,6 +57,7 @@ enum LinksRouteTo {
ORIGINAL
PDF
READABLE
SINGLEFILE
SCREENSHOT
}
@ -127,6 +129,7 @@ model Link {
image String?
pdf String?
readable String?
singlefile String?
lastPreserved DateTime?
createdAt DateTime @default(now())
updatedAt DateTime @default(now()) @updatedAt

View File

@ -38,6 +38,13 @@ async function processBatch() {
{
readable: "pending",
},
///////////////////////
{
singlefile: null,
},
{
singlefile: "pending",
},
],
},
take: archiveTakeCount,
@ -75,6 +82,13 @@ async function processBatch() {
{
readable: "pending",
},
///////////////////////
{
singlefile: null,
},
{
singlefile: "pending",
},
],
},
take: archiveTakeCount,

View File

@ -46,6 +46,10 @@ declare global {
PDF_MARGIN_TOP?: string;
PDF_MARGIN_BOTTOM?: string;
// PDF archive settings
SINGLEFILE_ARCHIVE_COMMAND?: string;
SINGLEFILE_ARCHIVE_HTTP_API?: string;
//
// SSO Providers
//

View File

@ -128,12 +128,14 @@ export enum ArchivedFormat {
jpeg,
pdf,
readability,
singlefile,
}
export enum LinkType {
url,
pdf,
image,
singlefile,
}
export enum TokenExpiry {