diff --git a/.env.sample b/.env.sample
index 14f74df..98ce893 100644
--- a/.env.sample
+++ b/.env.sample
@@ -45,6 +45,10 @@ PROXY_BYPASS=
PDF_MARGIN_TOP=
PDF_MARGIN_BOTTOM=
+# Singlefile archive settings
+SINGLEFILE_ARCHIVE_COMMAND= # single-file "{{URL}}" --dump-content
+SINGLEFILE_ARCHIVE_HTTP_API= # http://singlefile:3000/
+
#
# SSO Providers
#
diff --git a/README.md b/README.md
index cd3725a..c3f0bc2 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ We've forked the old version from the current repository into [this repo](https:
## Features
-- 📸 Auto capture a screenshot, PDF, and readable view of each webpage.
+- 📸 Auto capture a screenshot, PDF, single html file, and readable view of each webpage.
- 🏛️ Send your webpage to Wayback Machine ([archive.org](https://archive.org)) for a snapshot. (Optional)
- 📂 Organize links by collection, sub-collection, name, description and multiple tags.
- 👥 Collaborate on gathering links in a collection.
diff --git a/components/CollectionCard.tsx b/components/CollectionCard.tsx
index 1a6cc31..9f09a77 100644
--- a/components/CollectionCard.tsx
+++ b/components/CollectionCard.tsx
@@ -37,6 +37,7 @@ export default function CollectionCard({ collection, className }: Props) {
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
+ archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});
@@ -52,6 +53,7 @@ export default function CollectionCard({ collection, className }: Props) {
username: account.username as string,
image: account.image as string,
archiveAsScreenshot: account.archiveAsScreenshot as boolean,
+ archiveAsSinglefile: account.archiveAsSinglefile as boolean,
archiveAsPDF: account.archiveAsPDF as boolean,
});
}
diff --git a/components/LinkViews/LinkComponents/LinkGroupedIconURL.tsx b/components/LinkViews/LinkComponents/LinkGroupedIconURL.tsx
index 2d34f9c..4921c3d 100644
--- a/components/LinkViews/LinkComponents/LinkGroupedIconURL.tsx
+++ b/components/LinkViews/LinkComponents/LinkGroupedIconURL.tsx
@@ -43,6 +43,8 @@ export default function LinkGroupedIconURL({
) : link.type === "image" ? (
+ ) : link.type === "singlefile" ? (
+
) : undefined}
{shortendURL}
diff --git a/components/LinkViews/LinkComponents/LinkIcon.tsx b/components/LinkViews/LinkComponents/LinkIcon.tsx
index 70101e9..bbf6146 100644
--- a/components/LinkViews/LinkComponents/LinkIcon.tsx
+++ b/components/LinkViews/LinkComponents/LinkIcon.tsx
@@ -42,6 +42,8 @@ export default function LinkIcon({
) : link.type === "image" ? (
+ ) : link.type === "singlefile" ? (
+
) : undefined}
>
);
diff --git a/components/ModalContent/EditCollectionSharingModal.tsx b/components/ModalContent/EditCollectionSharingModal.tsx
index 9a73d5b..21aae18 100644
--- a/components/ModalContent/EditCollectionSharingModal.tsx
+++ b/components/ModalContent/EditCollectionSharingModal.tsx
@@ -65,6 +65,7 @@ export default function EditCollectionSharingModal({
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
+ archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});
diff --git a/components/ModalContent/NewLinkModal.tsx b/components/ModalContent/NewLinkModal.tsx
index 46c9ffe..ff610ab 100644
--- a/components/ModalContent/NewLinkModal.tsx
+++ b/components/ModalContent/NewLinkModal.tsx
@@ -29,6 +29,7 @@ export default function NewLinkModal({ onClose }: Props) {
image: "",
pdf: "",
readable: "",
+ singlefile: "",
textContent: "",
collection: {
name: "",
diff --git a/components/ModalContent/PreservedFormatsModal.tsx b/components/ModalContent/PreservedFormatsModal.tsx
index 5bc181d..c52c492 100644
--- a/components/ModalContent/PreservedFormatsModal.tsx
+++ b/components/ModalContent/PreservedFormatsModal.tsx
@@ -12,6 +12,7 @@ import { useSession } from "next-auth/react";
import {
pdfAvailable,
readabilityAvailable,
+ singlefileAvailable,
screenshotAvailable,
} from "@/lib/shared/getArchiveValidity";
import PreservedFormatRow from "@/components/PreserverdFormatRow";
@@ -42,6 +43,7 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
+ archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});
@@ -59,6 +61,7 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
username: account.username as string,
image: account.image as string,
archiveAsScreenshot: account.archiveAsScreenshot as boolean,
+ archiveAsSinglefile: account.archiveAsScreenshot as boolean,
archiveAsPDF: account.archiveAsPDF as boolean,
});
}
@@ -73,6 +76,9 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
(collectionOwner.archiveAsScreenshot === true
? link.pdf && link.pdf !== "pending"
: true) &&
+ (collectionOwner.archiveAsSinglefile === true
+ ? link.singlefile && link.singlefile !== "pending"
+ : true) &&
(collectionOwner.archiveAsPDF === true
? link.pdf && link.pdf !== "pending"
: true) &&
@@ -109,7 +115,7 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
clearInterval(interval);
}
};
- }, [link?.image, link?.pdf, link?.readable]);
+ }, [link?.image, link?.pdf, link?.readable, link?.singlefile]);
const updateArchive = async () => {
const load = toast.loading("Sending request...");
@@ -140,7 +146,8 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
{isReady() &&
(screenshotAvailable(link) ||
pdfAvailable(link) ||
- readabilityAvailable(link)) ? (
+ readabilityAvailable(link) ||
+ singlefileAvailable(link)) ? (
The following formats are available for this link:
@@ -183,6 +190,16 @@ export default function PreservedFormatsModal({ onClose, activeLink }: Props) {
activeLink={link}
/>
) : undefined}
+
+ {singlefileAvailable(link) ? (
+
+ ) : undefined}
>
) : (
diff --git a/components/PreserverdFormatRow.tsx b/components/PreserverdFormatRow.tsx
index 1d3df85..b50036a 100644
--- a/components/PreserverdFormatRow.tsx
+++ b/components/PreserverdFormatRow.tsx
@@ -1,10 +1,6 @@
import React, { useEffect, useState } from "react";
import useLinkStore from "@/store/links";
-import {
- ArchivedFormat,
- LinkIncludingShortenedCollectionAndTags,
-} from "@/types/global";
-import toast from "react-hot-toast";
+import { ArchivedFormat, LinkIncludingShortenedCollectionAndTags } from "@/types/global";
import Link from "next/link";
import { useRouter } from "next/router";
import { useSession } from "next-auth/react";
@@ -61,7 +57,7 @@ export default function PreservedFormatRow({
clearInterval(interval);
}
};
- }, [link?.image, link?.pdf, link?.readable]);
+ }, [link?.image, link?.pdf, link?.readable, link?.singlefile]);
const handleDownload = () => {
const path = `/api/v1/archives/${link?.id}?format=${format}`;
@@ -69,10 +65,10 @@ export default function PreservedFormatRow({
.then((response) => {
if (response.ok) {
// Create a temporary link and click it to trigger the download
- const link = document.createElement("a");
- link.href = path;
- link.download = format === ArchivedFormat.pdf ? "PDF" : "Screenshot";
- link.click();
+ const anchorElement = document.createElement("a");
+ anchorElement.href = path;
+ anchorElement.download = format === ArchivedFormat.singlefile ? (link.name ?? 'index') : format === ArchivedFormat.pdf ? "PDF" : "Screenshot";
+ anchorElement.click();
} else {
console.error("Failed to download file");
}
diff --git a/components/ReadableView.tsx b/components/ReadableView.tsx
index f8eb3b6..7ea4e24 100644
--- a/components/ReadableView.tsx
+++ b/components/ReadableView.tsx
@@ -65,9 +65,11 @@ export default function ReadableView({ link }: Props) {
(link?.image === "pending" ||
link?.pdf === "pending" ||
link?.readable === "pending" ||
+ link?.singlefile === "pending" ||
!link?.image ||
!link?.pdf ||
- !link?.readable)
+ !link?.readable ||
+ !link?.singlefile)
) {
interval = setInterval(() => getLink(link.id as number), 5000);
} else {
@@ -81,7 +83,7 @@ export default function ReadableView({ link }: Props) {
clearInterval(interval);
}
};
- }, [link?.image, link?.pdf, link?.readable]);
+ }, [link?.image, link?.pdf, link?.readable, link?.singlefile]);
const rgbToHex = (r: number, g: number, b: number): string =>
"#" +
diff --git a/docker-compose.yml b/docker-compose.yml
index 63a8740..b809ce4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -19,3 +19,6 @@ services:
- ./data:/data/data
depends_on:
- postgres
+ singlefile:
+ image: rutkai/single-file-web:latest
+ container_name: singlefile
diff --git a/lib/api/archiveHandler.ts b/lib/api/archiveHandler.ts
index 08a35b5..343157d 100644
--- a/lib/api/archiveHandler.ts
+++ b/lib/api/archiveHandler.ts
@@ -9,6 +9,9 @@ import { Collection, Link, User } from "@prisma/client";
import validateUrlSize from "./validateUrlSize";
import removeFile from "./storage/removeFile";
import Jimp from "jimp";
+import { execSync } from "child_process";
+import axios from "axios";
+import { Agent } from "http";
import createFolder from "./storage/createFolder";
type LinksAndCollectionAndOwner = Link & {
@@ -93,6 +96,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
readable: !link.readable?.startsWith("archive")
? "pending"
: undefined,
+ singlefile: !link.singlefile?.startsWith("archive")
+ ? "pending"
+ : undefined,
preview: !link.readable?.startsWith("archive")
? "pending"
: undefined,
@@ -113,19 +119,63 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
const content = await page.content();
- // TODO single file
- // const session = await page.context().newCDPSession(page);
- // const doc = await session.send("Page.captureSnapshot", {
- // format: "mhtml",
- // });
- // const saveDocLocally = (doc: any) => {
- // console.log(doc);
- // return createFile({
- // data: doc,
- // filePath: `archives/${targetLink.collectionId}/${link.id}.mhtml`,
- // });
- // };
- // saveDocLocally(doc.data);
+ // Singlefile
+ if (
+ user.archiveAsSinglefile &&
+ !link.singlefile?.startsWith("archive")
+ ) {
+ let command = process.env.SINGLEFILE_ARCHIVE_COMMAND;
+ let httpApi = process.env.SINGLEFILE_ARCHIVE_HTTP_API;
+ if (command) {
+ if (command.includes("{{URL}}")) {
+ try {
+ let html = execSync(command.replace("{{URL}}", link.url), {
+ timeout: 60000,
+ maxBuffer: 1024 * 1024 * 100,
+ });
+ await createFile({
+ data: html,
+ filePath: `archives/${targetLink.collectionId}/${link.id}.html`,
+ });
+ } catch (err) {
+ console.error(
+ "Error running SINGLEFILE_ARCHIVE_COMMAND:",
+ err
+ );
+ }
+ } else {
+ console.error(
+ "Invalid SINGLEFILE_ARCHIVE_COMMAND. Missing {{URL}}"
+ );
+ }
+ } else if (httpApi) {
+ try {
+ let html = await axios.post(
+ httpApi,
+ { url: link.url },
+ {
+ headers: {
+ "Content-Type": "application/x-www-form-urlencoded",
+ },
+ httpAgent: new Agent({ keepAlive: false }),
+ }
+ );
+ await createFile({
+ data: html.data,
+ filePath: `archives/${targetLink.collectionId}/${link.id}.html`,
+ });
+ } catch (err) {
+ console.error(
+ "Error fetching Singlefile using SINGLEFILE_ARCHIVE_HTTP_API:",
+ err
+ );
+ }
+ } else {
+ console.error(
+ "No SINGLEFILE_ARCHIVE_COMMAND or SINGLEFILE_ARCHIVE_HTTP_API defined."
+ );
+ }
+ }
// Readability
const window = new JSDOM("").window;
@@ -284,6 +334,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
image: user.archiveAsScreenshot
? `archives/${linkExists.collectionId}/${link.id}.png`
: undefined,
+ singlefile: user.archiveAsSinglefile
+ ? `archives/${linkExists.collectionId}/${link.id}.html`
+ : undefined,
pdf: user.archiveAsPDF
? `archives/${linkExists.collectionId}/${link.id}.pdf`
: undefined,
@@ -314,6 +367,9 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
image: !finalLink.image?.startsWith("archives")
? "unavailable"
: undefined,
+ singlefile: !finalLink.singlefile?.startsWith("archives")
+ ? "unavailable"
+ : undefined,
pdf: !finalLink.pdf?.startsWith("archives")
? "unavailable"
: undefined,
@@ -324,6 +380,7 @@ export default async function archiveHandler(link: LinksAndCollectionAndOwner) {
});
else {
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.png` });
+ removeFile({ filePath: `archives/${link.collectionId}/${link.id}.html` });
removeFile({ filePath: `archives/${link.collectionId}/${link.id}.pdf` });
removeFile({
filePath: `archives/${link.collectionId}/${link.id}_readability.json`,
diff --git a/lib/api/controllers/links/bulk/deleteLinksById.ts b/lib/api/controllers/links/bulk/deleteLinksById.ts
index 466db98..85b8bcd 100644
--- a/lib/api/controllers/links/bulk/deleteLinksById.ts
+++ b/lib/api/controllers/links/bulk/deleteLinksById.ts
@@ -52,6 +52,9 @@ export default async function deleteLinksById(
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
});
+ removeFile({
+ filePath: `archives/${collectionIsAccessible?.id}/${linkId}.html`,
+ });
}
return { response: deletedLinks, status: 200 };
diff --git a/lib/api/controllers/links/linkId/deleteLinkById.ts b/lib/api/controllers/links/linkId/deleteLinkById.ts
index db68ee7..80b4c85 100644
--- a/lib/api/controllers/links/linkId/deleteLinkById.ts
+++ b/lib/api/controllers/links/linkId/deleteLinkById.ts
@@ -30,6 +30,9 @@ export default async function deleteLink(userId: number, linkId: number) {
removeFile({
filePath: `archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
});
+ removeFile({
+ filePath: `archives/${collectionIsAccessible?.id}/${linkId}.html`,
+ });
return { response: deleteLink, status: 200 };
}
diff --git a/lib/api/controllers/links/linkId/updateLinkById.ts b/lib/api/controllers/links/linkId/updateLinkById.ts
index e6f7f0d..854d72d 100644
--- a/lib/api/controllers/links/linkId/updateLinkById.ts
+++ b/lib/api/controllers/links/linkId/updateLinkById.ts
@@ -160,6 +160,11 @@ export default async function updateLinkById(
`archives/${collectionIsAccessible?.id}/${linkId}_readability.json`,
`archives/${data.collection.id}/${linkId}_readability.json`
);
+
+ await moveFile(
+ `archives/${collectionIsAccessible?.id}/${linkId}.html`,
+ `archives/${data.collection.id}/${linkId}.html`
+ );
}
return { response: updatedLink, status: 200 };
diff --git a/lib/api/controllers/public/users/getPublicUser.ts b/lib/api/controllers/public/users/getPublicUser.ts
index 04c7994..1816051 100644
--- a/lib/api/controllers/public/users/getPublicUser.ts
+++ b/lib/api/controllers/public/users/getPublicUser.ts
@@ -75,6 +75,7 @@ export default async function getPublicUser(
username: lessSensitiveInfo.username,
image: lessSensitiveInfo.image,
archiveAsScreenshot: lessSensitiveInfo.archiveAsScreenshot,
+ archiveAsSinglefile: lessSensitiveInfo.archiveAsSinglefile,
archiveAsPDF: lessSensitiveInfo.archiveAsPDF,
};
diff --git a/lib/api/controllers/users/userId/updateUserById.ts b/lib/api/controllers/users/userId/updateUserById.ts
index f2b5e91..60c4734 100644
--- a/lib/api/controllers/users/userId/updateUserById.ts
+++ b/lib/api/controllers/users/userId/updateUserById.ts
@@ -187,6 +187,7 @@ export default async function updateUserById(
(value, index, self) => self.indexOf(value) === index
),
archiveAsScreenshot: data.archiveAsScreenshot,
+ archiveAsSinglefile: data.archiveAsSinglefile,
archiveAsPDF: data.archiveAsPDF,
archiveAsWaybackMachine: data.archiveAsWaybackMachine,
linksRouteTo: data.linksRouteTo,
diff --git a/lib/api/storage/readFile.ts b/lib/api/storage/readFile.ts
index 350726d..fc9d2e7 100644
--- a/lib/api/storage/readFile.ts
+++ b/lib/api/storage/readFile.ts
@@ -10,6 +10,7 @@ import util from "util";
type ReturnContentTypes =
| "text/plain"
+ | "text/html"
| "image/jpeg"
| "image/png"
| "application/pdf"
@@ -61,6 +62,8 @@ export default async function readFile(filePath: string) {
contentType = "image/png";
} else if (filePath.endsWith("_readability.json")) {
contentType = "application/json";
+ } else if (filePath.endsWith(".html")) {
+ contentType = "text/html";
} else {
// if (filePath.endsWith(".jpg"))
contentType = "image/jpeg";
@@ -88,6 +91,8 @@ export default async function readFile(filePath: string) {
contentType = "image/png";
} else if (filePath.endsWith("_readability.json")) {
contentType = "application/json";
+ } else if (filePath.endsWith(".html")) {
+ contentType = "text/html";
} else {
// if (filePath.endsWith(".jpg"))
contentType = "image/jpeg";
diff --git a/lib/client/generateLinkHref.ts b/lib/client/generateLinkHref.ts
index 47c1888..fcac1db 100644
--- a/lib/client/generateLinkHref.ts
+++ b/lib/client/generateLinkHref.ts
@@ -7,6 +7,7 @@ import { LinksRouteTo } from "@prisma/client";
import {
pdfAvailable,
readabilityAvailable,
+ singlefileAvailable,
screenshotAvailable,
} from "../shared/getArchiveValidity";
@@ -27,6 +28,10 @@ export const generateLinkHref = (
if (!readabilityAvailable(link)) return link.url || "";
return `/preserved/${link?.id}?format=${ArchivedFormat.readability}`;
+ case LinksRouteTo.SINGLEFILE:
+ if (!singlefileAvailable(link)) return link.url || "";
+
+ return `/preserved/${link?.id}?format=${ArchivedFormat.singlefile}`;
case LinksRouteTo.SCREENSHOT:
if (!screenshotAvailable(link)) return link.url || "";
diff --git a/lib/shared/getArchiveValidity.ts b/lib/shared/getArchiveValidity.ts
index 0da5504..9f19c35 100644
--- a/lib/shared/getArchiveValidity.ts
+++ b/lib/shared/getArchiveValidity.ts
@@ -28,6 +28,17 @@ export function readabilityAvailable(
);
}
+export function singlefileAvailable(
+ link: LinkIncludingShortenedCollectionAndTags
+) {
+ return (
+ link &&
+ link.singlefile &&
+ link.singlefile !== "pending" &&
+ link.singlefile !== "unavailable"
+ );
+}
+
export function previewAvailable(link: any) {
return (
link &&
diff --git a/pages/api/v1/archives/[linkId].ts b/pages/api/v1/archives/[linkId].ts
index b13e690..e3a3d79 100644
--- a/pages/api/v1/archives/[linkId].ts
+++ b/pages/api/v1/archives/[linkId].ts
@@ -27,6 +27,7 @@ export default async function Index(req: NextApiRequest, res: NextApiResponse) {
else if (format === ArchivedFormat.jpeg) suffix = ".jpeg";
else if (format === ArchivedFormat.pdf) suffix = ".pdf";
else if (format === ArchivedFormat.readability) suffix = "_readability.json";
+ else if (format === ArchivedFormat.singlefile) suffix = ".html";
//@ts-ignore
if (!linkId || !suffix)
diff --git a/pages/api/v1/links/[id]/archive/index.ts b/pages/api/v1/links/[id]/archive/index.ts
index 4693fac..e4d7db0 100644
--- a/pages/api/v1/links/[id]/archive/index.ts
+++ b/pages/api/v1/links/[id]/archive/index.ts
@@ -76,6 +76,7 @@ const deleteArchivedFiles = async (link: Link & { collection: Collection }) => {
image: null,
pdf: null,
readable: null,
+ singlefile: null,
preview: null,
},
});
@@ -89,6 +90,9 @@ const deleteArchivedFiles = async (link: Link & { collection: Collection }) => {
await removeFile({
filePath: `archives/${link.collection.id}/${link.id}_readability.json`,
});
+ await removeFile({
+ filePath: `archives/${link.collection.id}/${link.id}.html`,
+ });
await removeFile({
filePath: `archives/preview/${link.collection.id}/${link.id}.png`,
});
diff --git a/pages/collections/[id].tsx b/pages/collections/[id].tsx
index 92456a7..53562c2 100644
--- a/pages/collections/[id].tsx
+++ b/pages/collections/[id].tsx
@@ -61,6 +61,7 @@ export default function Index() {
username: "",
image: "",
archiveAsScreenshot: undefined as unknown as boolean,
+ archiveAsSinglefile: undefined as unknown as boolean,
archiveAsPDF: undefined as unknown as boolean,
});
@@ -78,6 +79,7 @@ export default function Index() {
username: account.username as string,
image: account.image as string,
archiveAsScreenshot: account.archiveAsScreenshot as boolean,
+ archiveAsSinglefile: account.archiveAsScreenshot as boolean,
archiveAsPDF: account.archiveAsPDF as boolean,
});
}
diff --git a/pages/preserved/[id].tsx b/pages/preserved/[id].tsx
index 84e614a..f1159d4 100644
--- a/pages/preserved/[id].tsx
+++ b/pages/preserved/[id].tsx
@@ -36,6 +36,12 @@ export default function Index() {
{link && Number(router.query.format) === ArchivedFormat.readability && (
)}
+ {link && Number(router.query.format) === ArchivedFormat.singlefile && (
+
+ )}
{link && Number(router.query.format) === ArchivedFormat.pdf && (