el.xwx.moe/lib/api/controllers/migration/importFromHTMLFile.ts

293 lines
7.6 KiB
TypeScript
Raw Normal View History

import { prisma } from "@/lib/api/db";
import createFolder from "@/lib/api/storage/createFolder";
import { JSDOM } from "jsdom";
import { parse, Node, Element, TextNode } from "himalaya";
2024-10-26 12:44:52 -05:00
import { hasPassedLimit } from "../../verifyCapacity";
2023-12-19 16:20:09 -06:00
2023-10-16 17:27:04 -05:00
export default async function importFromHTMLFile(
userId: number,
rawData: string
) {
2023-11-07 07:03:35 -06:00
const dom = new JSDOM(rawData);
const document = dom.window.document;
// remove bad tags
document.querySelectorAll("meta").forEach((e) => (e.outerHTML = e.innerHTML));
document.querySelectorAll("META").forEach((e) => (e.outerHTML = e.innerHTML));
document.querySelectorAll("P").forEach((e) => (e.outerHTML = e.innerHTML));
2023-12-19 16:20:09 -06:00
const bookmarks = document.querySelectorAll("A");
const totalImports = bookmarks.length;
2024-10-26 12:44:52 -05:00
const hasTooManyLinks = await hasPassedLimit(userId, totalImports);
2023-12-19 16:20:09 -06:00
2024-10-26 12:44:52 -05:00
if (hasTooManyLinks) {
2023-12-19 16:20:09 -06:00
return {
2024-10-26 12:44:52 -05:00
response: `Your subscription have reached the maximum number of links allowed.`,
2023-12-19 16:20:09 -06:00
status: 400,
};
2024-10-26 12:44:52 -05:00
}
2023-12-19 16:20:09 -06:00
const jsonData = parse(document.documentElement.outerHTML);
2024-03-27 02:20:00 -05:00
const processedArray = processNodes(jsonData);
for (const item of processedArray) {
console.log(item);
await processBookmarks(userId, item as Element);
2024-02-10 01:47:58 -06:00
}
return { response: "Success.", status: 200 };
}
async function processBookmarks(
userId: number,
data: Node,
parentCollectionId?: number
) {
if (data.type === "element") {
for (const item of data.children) {
if (item.type === "element" && item.tagName === "dt") {
2024-02-18 10:07:50 -06:00
// process collection or sub-collection
let collectionId;
const collectionName = item.children.find(
(e) => e.type === "element" && e.tagName === "h3"
) as Element;
if (collectionName) {
2024-07-22 17:50:24 -05:00
const collectionNameContent = (collectionName.children[0] as TextNode)
?.content;
if (collectionNameContent) {
collectionId = await createCollection(
userId,
collectionNameContent,
parentCollectionId
);
} else {
// Handle the case when the collection name is empty
collectionId = await createCollection(
userId,
"Untitled Collection",
parentCollectionId
);
}
}
await processBookmarks(
userId,
item,
collectionId || parentCollectionId
);
} else if (item.type === "element" && item.tagName === "a") {
2024-02-18 10:07:50 -06:00
// process link
2024-03-27 02:20:00 -05:00
const linkUrl = item?.attributes.find(
(e) => e.key.toLowerCase() === "href"
)?.value;
const linkName = (
item?.children.find((e) => e.type === "text") as TextNode
)?.content;
const linkTags = item?.attributes
.find((e) => e.key === "tags")
?.value.split(",");
// set date if available
2024-03-27 02:20:00 -05:00
const linkDateValue = item?.attributes.find(
(e) => e.key.toLowerCase() === "add_date"
)?.value;
2024-03-27 02:20:00 -05:00
const linkDate = linkDateValue
? new Date(Number(linkDateValue) * 1000)
: undefined;
let linkDesc =
(
(
item?.children?.find(
(e) => e.type === "element" && e.tagName === "dd"
) as Element
)?.children[0] as TextNode
)?.content || "";
if (linkUrl && parentCollectionId) {
await createLink(
userId,
linkUrl,
parentCollectionId,
linkName,
linkDesc,
linkTags,
linkDate
);
} else if (linkUrl) {
// create a collection named "Imported Bookmarks" and add the link to it
const collectionId = await createCollection(userId, "Imports");
await createLink(
userId,
linkUrl,
collectionId,
linkName,
linkDesc,
linkTags,
linkDate
);
2023-11-07 07:03:35 -06:00
}
await processBookmarks(userId, item, parentCollectionId);
} else {
2024-02-18 10:07:50 -06:00
// process anything else
await processBookmarks(userId, item, parentCollectionId);
}
}
}
}
2024-02-14 09:35:59 -06:00
const createCollection = async (
2024-02-15 11:26:42 -06:00
userId: number,
collectionName: string,
parentId?: number
2024-02-15 11:26:42 -06:00
) => {
2024-09-14 15:00:19 -05:00
collectionName = collectionName.trim().slice(0, 254);
2024-02-15 11:26:42 -06:00
const findCollection = await prisma.collection.findFirst({
where: {
parentId,
name: collectionName,
2024-02-15 11:26:42 -06:00
ownerId: userId,
},
});
if (findCollection) {
return findCollection.id;
2024-02-15 11:26:42 -06:00
}
const collectionId = await prisma.collection.create({
data: {
name: collectionName,
parent: parentId
? {
2024-03-27 02:20:00 -05:00
connect: {
id: parentId,
},
}
: undefined,
owner: {
connect: {
id: userId,
},
},
2024-10-26 12:44:52 -05:00
createdBy: {
connect: {
id: userId,
},
},
},
});
2024-02-15 11:26:42 -06:00
createFolder({ filePath: `archives/${collectionId.id}` });
2024-02-15 11:26:42 -06:00
return collectionId.id;
2024-02-15 11:26:42 -06:00
};
const createLink = async (
2024-02-14 09:35:59 -06:00
userId: number,
url: string,
collectionId: number,
name?: string,
description?: string,
tags?: string[],
2024-03-27 02:20:00 -05:00
importDate?: Date
2024-02-14 09:35:59 -06:00
) => {
2024-09-14 15:00:19 -05:00
url = url.trim().slice(0, 254);
try {
new URL(url);
} catch (e) {
return;
}
tags = tags?.map((tag) => tag.trim().slice(0, 49));
name = name?.trim().slice(0, 254);
description = description?.trim().slice(0, 254);
if (importDate) {
const dateString = importDate.toISOString();
if (dateString.length > 50) {
importDate = undefined;
}
}
await prisma.link.create({
data: {
name: name || "",
2024-03-27 02:20:00 -05:00
url,
description,
collectionId,
2024-10-26 12:44:52 -05:00
createdById: userId,
tags:
tags && tags[0]
2024-02-15 11:26:42 -06:00
? {
2024-03-27 02:20:00 -05:00
connectOrCreate: tags.map((tag: string) => {
2024-10-26 12:44:52 -05:00
return {
where: {
name_ownerId: {
2024-03-27 02:20:00 -05:00
name: tag.trim(),
2024-10-26 12:44:52 -05:00
ownerId: userId,
},
},
create: {
name: tag.trim(),
owner: {
connect: {
id: userId,
2024-02-14 09:35:59 -06:00
},
2024-02-15 11:26:42 -06:00
},
2024-10-26 12:44:52 -05:00
},
};
2024-03-27 02:20:00 -05:00
}),
}
2024-02-15 11:26:42 -06:00
: undefined,
2024-03-27 02:20:00 -05:00
importDate: importDate || undefined,
},
});
};
2024-03-27 02:20:00 -05:00
function processNodes(nodes: Node[]) {
const findAndProcessDL = (node: Node) => {
if (node.type === "element" && node.tagName === "dl") {
processDLChildren(node);
} else if (
node.type === "element" &&
node.children &&
node.children.length
) {
node.children.forEach((child) => findAndProcessDL(child));
}
};
const processDLChildren = (dlNode: Element) => {
dlNode.children.forEach((child, i) => {
if (child.type === "element" && child.tagName === "dt") {
const nextSibling = dlNode.children[i + 1];
if (
nextSibling &&
nextSibling.type === "element" &&
nextSibling.tagName === "dd"
) {
const aElement = child.children.find(
(el) => el.type === "element" && el.tagName === "a"
);
if (aElement && aElement.type === "element") {
// Add the 'dd' element as a child of the 'a' element
aElement.children.push(nextSibling);
// Remove the 'dd' from the parent 'dl' to avoid duplicate processing
dlNode.children.splice(i + 1, 1);
// Adjust the loop counter due to the removal
}
}
}
});
};
nodes.forEach(findAndProcessDL);
return nodes;
}