diff --git a/lib/api/controllers/migration/importFromHTMLFile.ts b/lib/api/controllers/migration/importFromHTMLFile.ts index a2fae27..4398600 100644 --- a/lib/api/controllers/migration/importFromHTMLFile.ts +++ b/lib/api/controllers/migration/importFromHTMLFile.ts @@ -1,6 +1,7 @@ import { prisma } from "@/lib/api/db"; import createFolder from "@/lib/api/storage/createFolder"; import { JSDOM } from "jsdom"; +import { parse, Node, Element, TextNode } from "himalaya"; const MAX_LINKS_PER_USER = Number(process.env.MAX_LINKS_PER_USER) || 30000; @@ -11,6 +12,11 @@ export default async function importFromHTMLFile( const dom = new JSDOM(rawData); const document = dom.window.document; + // remove bad tags + document.querySelectorAll("meta").forEach((e) => (e.outerHTML = e.innerHTML)); + document.querySelectorAll("META").forEach((e) => (e.outerHTML = e.innerHTML)); + document.querySelectorAll("P").forEach((e) => (e.outerHTML = e.innerHTML)); + const bookmarks = document.querySelectorAll("A"); const totalImports = bookmarks.length; @@ -28,94 +34,165 @@ export default async function importFromHTMLFile( status: 400, }; - const folders = document.querySelectorAll("H3"); + const jsonData = parse(document.documentElement.outerHTML); - await prisma - .$transaction( - async () => { - // @ts-ignore - for (const folder of folders) { - const findCollection = await prisma.user.findUnique({ - where: { - id: userId, - }, - select: { - collections: { - where: { - name: folder.textContent.trim(), - }, - }, - }, - }); - - const checkIfCollectionExists = findCollection?.collections[0]; - - let collectionId = findCollection?.collections[0]?.id; - - if (!checkIfCollectionExists || !collectionId) { - const newCollection = await prisma.collection.create({ - data: { - name: folder.textContent.trim(), - description: "", - color: "#0ea5e9", - isPublic: false, - ownerId: userId, - }, - }); - - createFolder({ filePath: `archives/${newCollection.id}` }); - - collectionId = newCollection.id; - } - - createFolder({ filePath: `archives/${collectionId}` }); - - const bookmarks = folder.nextElementSibling.querySelectorAll("A"); - for (const bookmark of bookmarks) { - await prisma.link.create({ - data: { - name: bookmark.textContent.trim(), - url: bookmark.getAttribute("HREF"), - tags: bookmark.getAttribute("TAGS") - ? { - connectOrCreate: bookmark - .getAttribute("TAGS") - .split(",") - .map((tag: string) => - tag - ? { - where: { - name_ownerId: { - name: tag.trim(), - ownerId: userId, - }, - }, - create: { - name: tag.trim(), - owner: { - connect: { - id: userId, - }, - }, - }, - } - : undefined - ), - } - : undefined, - description: bookmark.getAttribute("DESCRIPTION") - ? bookmark.getAttribute("DESCRIPTION") - : "", - collectionId: collectionId, - createdAt: new Date(), - }, - }); - } - } - }, - { timeout: 30000 } - ) - .catch((err) => console.log(err)); + for (const item of jsonData) { + console.log(item); + await processBookmarks(userId, item as Element); + } return { response: "Success.", status: 200 }; } + +async function processBookmarks( + userId: number, + data: Node, + parentCollectionId?: number +) { + if (data.type === "element") { + for (const item of data.children) { + if (item.type === "element" && item.tagName === "dt") { + // process collection or sub-collection + + let collectionId; + const collectionName = item.children.find( + (e) => e.type === "element" && e.tagName === "h3" + ) as Element; + + if (collectionName) { + collectionId = await createCollection( + userId, + (collectionName.children[0] as TextNode).content, + parentCollectionId + ); + } + await processBookmarks( + userId, + item, + collectionId || parentCollectionId + ); + } else if (item.type === "element" && item.tagName === "a") { + // process link + + const linkUrl = item?.attributes.find((e) => e.key === "href")?.value; + const linkName = ( + item?.children.find((e) => e.type === "text") as TextNode + )?.content; + const linkTags = item?.attributes + .find((e) => e.key === "tags") + ?.value.split(","); + + if (linkUrl && parentCollectionId) { + await createLink( + userId, + linkUrl, + parentCollectionId, + linkName, + "", + linkTags + ); + } else if (linkUrl) { + // create a collection named "Imported Bookmarks" and add the link to it + const collectionId = await createCollection(userId, "Imports"); + + await createLink( + userId, + linkUrl, + collectionId, + linkName, + "", + linkTags + ); + } + + await processBookmarks(userId, item, parentCollectionId); + } else { + // process anything else + await processBookmarks(userId, item, parentCollectionId); + } + } + } +} + +const createCollection = async ( + userId: number, + collectionName: string, + parentId?: number +) => { + const findCollection = await prisma.collection.findFirst({ + where: { + parentId, + name: collectionName, + ownerId: userId, + }, + }); + + if (findCollection) { + return findCollection.id; + } + + const collectionId = await prisma.collection.create({ + data: { + name: collectionName, + parent: parentId + ? { + connect: { + id: parentId, + }, + } + : undefined, + owner: { + connect: { + id: userId, + }, + }, + }, + }); + + createFolder({ filePath: `archives/${collectionId.id}` }); + + return collectionId.id; +}; + +const createLink = async ( + userId: number, + url: string, + collectionId: number, + name?: string, + description?: string, + tags?: string[] +) => { + await prisma.link.create({ + data: { + name: name || "", + url, + description, + collectionId, + tags: + tags && tags[0] + ? { + connectOrCreate: tags.map((tag: string) => { + return ( + { + where: { + name_ownerId: { + name: tag.trim(), + ownerId: userId, + }, + }, + create: { + name: tag.trim(), + owner: { + connect: { + id: userId, + }, + }, + }, + } || undefined + ); + }), + } + : undefined, + }, + }); +}; diff --git a/package.json b/package.json index 8e741fc..7199f79 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "eslint-config-next": "13.4.9", "formidable": "^3.5.1", "framer-motion": "^10.16.4", + "himalaya": "^1.1.0", "jimp": "^0.22.10", "jsdom": "^22.1.0", "lottie-web": "^5.12.2", diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 8cfa7b3..036f658 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -165,4 +165,3 @@ model AccessToken { createdAt DateTime @default(now()) updatedAt DateTime @default(now()) @updatedAt } - diff --git a/types/himalaya.d.ts b/types/himalaya.d.ts new file mode 100644 index 0000000..e2bd5e0 --- /dev/null +++ b/types/himalaya.d.ts @@ -0,0 +1,22 @@ +declare module "himalaya" { + export interface Attribute { + key: string; + value: string; + } + + export interface TextNode { + type: "text"; + content: string; + } + + export type Node = TextNode | Element; + + export interface Element { + type: "element"; + tagName: string; + attributes: Attribute[]; + children: Node[]; + } + + export function parse(html: string): Node[]; +} diff --git a/yarn.lock b/yarn.lock index 4ef5d2e..7f3b8b8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3713,6 +3713,11 @@ hexoid@^1.0.0: resolved "https://registry.yarnpkg.com/hexoid/-/hexoid-1.0.0.tgz#ad10c6573fb907de23d9ec63a711267d9dc9bc18" integrity sha512-QFLV0taWQOZtvIRIAdBChesmogZrtuXvVWsFHZTk2SU+anspqZ2vMnoLg7IE1+Uk16N19APic1BuF8bC8c2m5g== +himalaya@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/himalaya/-/himalaya-1.1.0.tgz#31724ae9d35714cd7c6f4be94888953f3604606a" + integrity sha512-LLase1dHCRMel68/HZTFft0N0wti0epHr3nNY7ynpLbyZpmrKMQ8YIpiOV77TM97cNpC8Wb2n6f66IRggwdWPw== + hoist-non-react-statics@^3.3.1: version "3.3.2" resolved "https://registry.yarnpkg.com/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz#ece0acaf71d62c2969c2ec59feff42a4b1a85b45"