diff --git a/lib/api/controllers/migration/importFromHTMLFile.ts b/lib/api/controllers/migration/importFromHTMLFile.ts index 55bbf57..c557133 100644 --- a/lib/api/controllers/migration/importFromHTMLFile.ts +++ b/lib/api/controllers/migration/importFromHTMLFile.ts @@ -1,6 +1,7 @@ import { prisma } from "@/lib/api/db"; import createFolder from "@/lib/api/storage/createFolder"; import { JSDOM } from "jsdom"; +import { parse, Node, Element, TextNode } from "himalaya"; const MAX_LINKS_PER_USER = Number(process.env.MAX_LINKS_PER_USER) || 30000; @@ -11,6 +12,11 @@ export default async function importFromHTMLFile( const dom = new JSDOM(rawData); const document = dom.window.document; + // remove bad tags + document.querySelectorAll("meta").forEach((e) => (e.outerHTML = e.innerHTML)); + document.querySelectorAll("META").forEach((e) => (e.outerHTML = e.innerHTML)); + document.querySelectorAll("P").forEach((e) => (e.outerHTML = e.innerHTML)); + const bookmarks = document.querySelectorAll("A"); const totalImports = bookmarks.length; @@ -28,153 +34,161 @@ export default async function importFromHTMLFile( status: 400, }; - const folders = document.querySelectorAll("H3"); - let unorganizedCollectionId: number | null = null; + const jsonData = parse(document.documentElement.outerHTML); - if (folders.length === 0) { - const unorganizedCollection = await prisma.collection.findFirst({ - where: { - name: "Imported", - ownerId: userId, - }, - }); + // console.log(jsonData); - if (!unorganizedCollection) { - const newUnorganizedCollection = await prisma.collection.create({ - data: { - name: "Imported", - description: - "Automatically created collection for imported bookmarks.", - ownerId: userId, - }, - }); - unorganizedCollectionId = newUnorganizedCollection.id; - } else { - unorganizedCollectionId = unorganizedCollection.id; - } - - createFolder({ filePath: `archives/${unorganizedCollectionId}` }); + for (const item of jsonData) { + console.log(item); + await processBookmarks(userId, item as Element); } - await prisma - .$transaction( - async () => { - if (unorganizedCollectionId) { - // @ts-ignore - for (const bookmark of bookmarks) { - createBookmark(userId, bookmark, unorganizedCollectionId); - } - } else { - // @ts-ignore - for (const folder of folders) { - await createCollectionAndBookmarks( - userId, - folder, - folder.nextElementSibling, - null - ); - } - } - }, - { timeout: 30000 } - ) - .catch((err) => console.log(err)); - return { response: "Success.", status: 200 }; } -const createCollectionAndBookmarks = async ( +async function processBookmarks( userId: number, - folder: any, - folderContent: any, - parentId: number | null + data: Node, + parentCollectionId?: number +) { + if (data.type === "element") { + for (const item of data.children) { + if (item.type === "element" && item.tagName === "dt") { + let collectionId; + const collectionName = item.children.find( + (e) => e.type === "element" && e.tagName === "h3" + ) as Element; + + console.log("collection:", item); + console.log("collectionName:", collectionName); + + // This is a collection or sub-collection + if (collectionName) { + collectionId = await createCollection( + userId, + (collectionName.children[0] as TextNode).content, + parentCollectionId + ); + } + await processBookmarks( + userId, + item, + collectionId || parentCollectionId + ); + } else if (item.type === "element" && item.tagName === "a") { + // This is a link + + // get link href + const linkUrl = item?.attributes.find((e) => e.key === "href")?.value; + + // get link name + const linkName = ( + item?.children.find((e) => e.type === "text") as TextNode + )?.content; + + // get link tags + const linkTags = item?.attributes + .find((e) => e.key === "tags") + ?.value.split(","); + + console.log("link:", item); + + if (linkUrl && parentCollectionId) { + await createLink( + userId, + linkUrl, + parentCollectionId, + linkName, + "", + linkTags + ); + } else if (linkUrl) { + // create a collection named "Imported Bookmarks" and add the link to it + const collectionId = await createCollection(userId, "Imports"); + + await createLink( + userId, + linkUrl, + collectionId, + linkName, + "", + linkTags + ); + } + + await processBookmarks(userId, item, parentCollectionId); + } else { + // This could be anything else + await processBookmarks(userId, item, parentCollectionId); + } + + // Add more conditions as necessary based on your JSON structure + } + } +} + +const createCollection = async ( + userId: number, + collectionName: string, + parentId?: number ) => { const findCollection = await prisma.collection.findFirst({ where: { - name: folder.textContent.trim(), + parentId, + name: collectionName, ownerId: userId, }, }); - const checkIfCollectionExists = findCollection; - let collectionId = findCollection?.id; + if (findCollection) { + return findCollection.id; + } - if (!checkIfCollectionExists || !collectionId) { - const newCollection = await prisma.collection.create({ - data: { - name: folder.textContent.trim(), - description: "", - color: "#0ea5e9", - isPublic: false, - ownerId: userId, - parentId + const collectionId = await prisma.collection.create({ + data: { + name: collectionName, + parent: parentId + ? { + connect: { + id: parentId, + }, + } + : undefined, + owner: { + connect: { + id: userId, + }, }, - }); - - createFolder({ filePath: `archives/${newCollection.id}` }); - - collectionId = newCollection.id; - } - - createFolder({ filePath: `archives/${collectionId}` }); - - const bookmarks = folderContent.querySelectorAll("A"); - for (const bookmark of bookmarks) { - createBookmark(userId, bookmark, collectionId); - } - - const subfolders = folderContent.querySelectorAll("H3"); - for (const subfolder of subfolders) { - await createCollectionAndBookmarks(userId, subfolder, subfolder.nextElementSibling, collectionId); - } -}; - -const createBookmark = async ( - userId: number, - bookmark: any, - collectionId: number -) => { - // Move up to the parent node (
) and then find the next sibling - let parentDT = bookmark.parentNode; - let nextSibling = parentDT ? parentDT.nextSibling : null; - let description = ""; - - // Loop through siblings to skip any potential text nodes or whitespace - while (nextSibling && nextSibling.nodeType !== 1) { - nextSibling = nextSibling.nextSibling; - } - - // Check if the next sibling element is a
tag and use its content as the description - if (nextSibling && nextSibling.tagName === "DD") { - description = nextSibling.textContent.trim(); - } - - const linkName = bookmark.textContent.trim(); - const linkURL = bookmark.getAttribute("HREF"); - - const existingLink = await prisma.link.findFirst({ - where: { - url: linkURL, - collectionId }, }); - // Create the link only if it doesn't already exist - if (!existingLink) { - await prisma.link.create({ - data: { - name: linkName, - url: linkURL, - tags: bookmark.getAttribute("TAGS") + createFolder({ filePath: `archives/${collectionId.id}` }); + + return collectionId.id; +}; + +const createLink = async ( + userId: number, + url: string, + collectionId: number, + name?: string, + description?: string, + tags?: string[] +) => { + await prisma.link.create({ + data: { + name: name || "", + url, + description, + collectionId, + tags: + tags && tags[0] ? { - connectOrCreate: bookmark - .getAttribute("TAGS") - .split(",") - .map((tag: string) => - tag - ? { + connectOrCreate: tags.map((tag: string) => { + return ( + { where: { - data: { + name_ownerId: { name: tag.trim(), ownerId: userId, }, @@ -187,14 +201,11 @@ const createBookmark = async ( }, }, }, - } - : undefined - ), - } + } || undefined + ); + }), + } : undefined, - description, - collectionId, - }, - }); - } -}; \ No newline at end of file + }, + }); +}; diff --git a/package.json b/package.json index 8e741fc..7199f79 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "eslint-config-next": "13.4.9", "formidable": "^3.5.1", "framer-motion": "^10.16.4", + "himalaya": "^1.1.0", "jimp": "^0.22.10", "jsdom": "^22.1.0", "lottie-web": "^5.12.2", diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 8cfa7b3..036f658 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -165,4 +165,3 @@ model AccessToken { createdAt DateTime @default(now()) updatedAt DateTime @default(now()) @updatedAt } - diff --git a/types/himalaya.d.ts b/types/himalaya.d.ts new file mode 100644 index 0000000..e2bd5e0 --- /dev/null +++ b/types/himalaya.d.ts @@ -0,0 +1,22 @@ +declare module "himalaya" { + export interface Attribute { + key: string; + value: string; + } + + export interface TextNode { + type: "text"; + content: string; + } + + export type Node = TextNode | Element; + + export interface Element { + type: "element"; + tagName: string; + attributes: Attribute[]; + children: Node[]; + } + + export function parse(html: string): Node[]; +} diff --git a/yarn.lock b/yarn.lock index 4ef5d2e..7f3b8b8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3713,6 +3713,11 @@ hexoid@^1.0.0: resolved "https://registry.yarnpkg.com/hexoid/-/hexoid-1.0.0.tgz#ad10c6573fb907de23d9ec63a711267d9dc9bc18" integrity sha512-QFLV0taWQOZtvIRIAdBChesmogZrtuXvVWsFHZTk2SU+anspqZ2vMnoLg7IE1+Uk16N19APic1BuF8bC8c2m5g== +himalaya@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/himalaya/-/himalaya-1.1.0.tgz#31724ae9d35714cd7c6f4be94888953f3604606a" + integrity sha512-LLase1dHCRMel68/HZTFft0N0wti0epHr3nNY7ynpLbyZpmrKMQ8YIpiOV77TM97cNpC8Wb2n6f66IRggwdWPw== + hoist-non-react-statics@^3.3.1: version "3.3.2" resolved "https://registry.yarnpkg.com/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz#ece0acaf71d62c2969c2ec59feff42a4b1a85b45"