From d62c9724b7f4cb728cd5b5496fdcc0eba8330772 Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Sat, 21 Sep 2024 16:56:42 +0000 Subject: [PATCH] feature(web): Preserve title, tags and createdAt when importing a netscape html. Fixes #401 --- .../dashboard/settings/ImportExport.tsx | 133 ++++++++++++++---- apps/web/lib/netscapeBookmarkParser.ts | 35 +++-- apps/web/package.json | 1 + docs/docs/10-import.md | 6 +- packages/shared/types/bookmarks.ts | 1 + packages/trpc/routers/bookmarks.ts | 1 + pnpm-lock.yaml | 57 ++++++++ 7 files changed, 189 insertions(+), 45 deletions(-) diff --git a/apps/web/components/dashboard/settings/ImportExport.tsx b/apps/web/components/dashboard/settings/ImportExport.tsx index 75de14ac..dcc3c8e8 100644 --- a/apps/web/components/dashboard/settings/ImportExport.tsx +++ b/apps/web/components/dashboard/settings/ImportExport.tsx @@ -1,14 +1,18 @@ "use client"; -import assert from "assert"; import { useRouter } from "next/navigation"; import FilePickerButton from "@/components/ui/file-picker-button"; import { toast } from "@/components/ui/use-toast"; import { parseNetscapeBookmarkFile } from "@/lib/netscapeBookmarkParser"; import { useMutation } from "@tanstack/react-query"; +import { TRPCClientError } from "@trpc/client"; import { Upload } from "lucide-react"; -import { useCreateBookmarkWithPostHook } from "@hoarder/shared-react/hooks/bookmarks"; +import { + useCreateBookmarkWithPostHook, + useUpdateBookmark, + useUpdateBookmarkTags, +} from "@hoarder/shared-react/hooks/bookmarks"; import { useAddBookmarkToList, useCreateBookmarkList, @@ -17,29 +21,112 @@ import { BookmarkTypes } from "@hoarder/shared/types/bookmarks"; export function Import() { const router = useRouter(); - const { mutateAsync: createBookmark } = useCreateBookmarkWithPostHook(); + const { mutateAsync: createBookmark } = useCreateBookmarkWithPostHook(); + const { mutateAsync: updateBookmark } = useUpdateBookmark(); const { mutateAsync: createList } = useCreateBookmarkList(); const { mutateAsync: addToList } = useAddBookmarkToList(); + const { mutateAsync: updateTags } = useUpdateBookmarkTags(); + + const { mutateAsync: parseAndCreateBookmark } = useMutation({ + mutationFn: async (toImport: { + bookmark: { + title: string; + url: string | undefined; + tags: string[]; + addDate?: number; + }; + listId: string; + }) => { + const bookmark = toImport.bookmark; + if (bookmark.url === undefined) { + throw new Error("URL is undefined"); + } + const url = new URL(bookmark.url); + const created = await createBookmark({ + type: BookmarkTypes.LINK, + url: url.toString(), + }); + + await Promise.all([ + // Update title and createdAt if they're set + bookmark.title.length > 0 || bookmark.addDate + ? updateBookmark({ + bookmarkId: created.id, + title: bookmark.title, + createdAt: bookmark.addDate + ? new Date(bookmark.addDate * 1000) + : undefined, + }) + : undefined, + + // Add to import list + addToList({ + bookmarkId: created.id, + listId: toImport.listId, + }).catch((e) => { + if ( + e instanceof TRPCClientError && + e.message.includes("already in the list") + ) { + /* empty */ + } else { + throw e; + } + }), + + // Update tags + updateTags({ + bookmarkId: created.id, + attach: bookmark.tags.map((t) => ({ tagName: t })), + detach: [], + }), + ]); + return created; + }, + }); const { mutateAsync: runUploadBookmarkFile } = useMutation({ mutationFn: async (file: File) => { return await parseNetscapeBookmarkFile(file); }, onSuccess: async (resp) => { - const results = await Promise.allSettled( - resp.map((url) => - createBookmark({ type: BookmarkTypes.LINK, url: url.toString() }), - ), - ); - - const failed = results.filter((r) => r.status == "rejected"); - const successes = results.filter( - (r) => r.status == "fulfilled" && !r.value.alreadyExists, - ); - const alreadyExisted = results.filter( - (r) => r.status == "fulfilled" && r.value.alreadyExists, - ); + const importList = await createList({ + name: `Imported Bookmarks`, + icon: "⬆️", + }); + + let done = 0; + const { id, update } = toast({ + description: `Processed 0 bookmarks of ${resp.length}`, + variant: "default", + }); + + const successes = []; + const failed = []; + const alreadyExisted = []; + // Do the imports one by one + for (const parsedBookmark of resp) { + try { + const result = await parseAndCreateBookmark({ + bookmark: parsedBookmark, + listId: importList.id, + }); + if (result.alreadyExists) { + alreadyExisted.push(parsedBookmark); + } else { + successes.push(parsedBookmark); + } + } catch (e) { + failed.push(parsedBookmark); + } + + update({ + id, + description: `Processed ${done + 1} bookmarks of ${resp.length}`, + }); + done++; + } if (successes.length > 0 || alreadyExisted.length > 0) { toast({ @@ -55,20 +142,6 @@ export function Import() { }); } - const importList = await createList({ - name: `Imported Bookmarks`, - icon: "⬆️", - }); - - if (successes.length > 0) { - await Promise.allSettled( - successes.map((r) => { - assert(r.status == "fulfilled"); - addToList({ bookmarkId: r.value.id, listId: importList.id }); - }), - ); - } - router.push(`/dashboard/lists/${importList.id}`); }, onError: (error) => { diff --git a/apps/web/lib/netscapeBookmarkParser.ts b/apps/web/lib/netscapeBookmarkParser.ts index ac5f3ec2..196c0525 100644 --- a/apps/web/lib/netscapeBookmarkParser.ts +++ b/apps/web/lib/netscapeBookmarkParser.ts @@ -1,20 +1,31 @@ -function extractUrls(html: string): string[] { - const regex = /]*?\s+)?href="(http[^"]*)"/gi; - let match; - const urls = []; - - while ((match = regex.exec(html)) !== null) { - urls.push(match[1]); - } - - return urls; -} +// Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9 +import * as cheerio from "cheerio"; export async function parseNetscapeBookmarkFile(file: File) { const textContent = await file.text(); + if (!textContent.startsWith("")) { throw Error("The uploaded html file does not seem to be a bookmark file"); } - return extractUrls(textContent).map((url) => new URL(url)); + const $ = cheerio.load(textContent); + + return $("a") + .map(function (_index, a) { + const $a = $(a); + const addDate = $a.attr("add_date"); + let tags: string[] = []; + try { + tags = $a.attr("tags")?.split(",") ?? []; + } catch (e) { + /* empty */ + } + return { + title: $a.text(), + url: $a.attr("href"), + tags: tags, + addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate), + }; + }) + .get(); } diff --git a/apps/web/package.json b/apps/web/package.json index 5542bb2a..491ad46d 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -43,6 +43,7 @@ "@trpc/react-query": "11.0.0-next-beta.308", "@trpc/server": "11.0.0-next-beta.308", "better-sqlite3": "^9.4.3", + "cheerio": "^1.0.0", "class-variance-authority": "^0.7.0", "clsx": "^2.1.0", "dayjs": "^1.11.10", diff --git a/docs/docs/10-import.md b/docs/docs/10-import.md index 14c59034..bf3a6829 100644 --- a/docs/docs/10-import.md +++ b/docs/docs/10-import.md @@ -2,9 +2,9 @@ ## Import using the WebUI -Hoarder supports importing bookmarks using the Netscape HTML Format. +Hoarder supports importing bookmarks using the Netscape HTML Format. Titles, tags and addition date will be preserved during the import. An automatically created list will contain all the imported bookmarks. -Simply open the WebUI of your Hoarder instance and drag and drop the bookmarks file into the UI. +To import the bookmark file, go to the settings and click "Import Bookmarks from HTML file". :::info All the URLs in the bookmarks file will be added automatically, you will not be able to pick and choose which bookmarks to import! @@ -42,4 +42,4 @@ cat all_links.txt | xargs -I{} hoarder --api-key --server-addr book - Click on the three dots on the top right corner and choose `Export bookmarks` - This will download an html file with all of your bookmarks. -You can use this file to import the bookmarks using the UI or CLI method described above \ No newline at end of file +You can use this file to import the bookmarks using the UI or CLI method described above diff --git a/packages/shared/types/bookmarks.ts b/packages/shared/types/bookmarks.ts index e0118fe7..beefbfb9 100644 --- a/packages/shared/types/bookmarks.ts +++ b/packages/shared/types/bookmarks.ts @@ -133,6 +133,7 @@ export const zUpdateBookmarksRequestSchema = z.object({ favourited: z.boolean().optional(), note: z.string().optional(), title: z.string().max(MAX_TITLE_LENGTH).nullish(), + createdAt: z.date().optional(), }); export type ZUpdateBookmarksRequest = z.infer< typeof zUpdateBookmarksRequestSchema diff --git a/packages/trpc/routers/bookmarks.ts b/packages/trpc/routers/bookmarks.ts index e685d5c2..eb189def 100644 --- a/packages/trpc/routers/bookmarks.ts +++ b/packages/trpc/routers/bookmarks.ts @@ -339,6 +339,7 @@ export const bookmarksAppRouter = router({ archived: input.archived, favourited: input.favourited, note: input.note, + createdAt: input.createdAt, }) .where( and( diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5c072a61..915e81c7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -541,6 +541,9 @@ importers: better-sqlite3: specifier: ^9.4.3 version: 9.4.3 + cheerio: + specifier: ^1.0.0 + version: 1.0.0 class-variance-authority: specifier: ^0.7.0 version: 0.7.0 @@ -5273,6 +5276,10 @@ packages: cheerio-select@2.1.0: resolution: {integrity: sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==} + cheerio@1.0.0: + resolution: {integrity: sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww==} + engines: {node: '>=18.17'} + cheerio@1.0.0-rc.12: resolution: {integrity: sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==} engines: {node: '>= 6'} @@ -6215,6 +6222,9 @@ packages: resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==} engines: {node: '>= 0.8'} + encoding-sniffer@0.2.0: + resolution: {integrity: sha512-ju7Wq1kg04I3HtiYIOrUrdfdDvkyO9s5XM8QAj/bN61Yo/Vb4vgJxy5vi4Yxk01gWHbrofpPtpxM8bKger9jhg==} + encoding@0.1.13: resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==} @@ -7448,6 +7458,9 @@ packages: htmlparser2@8.0.2: resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==} + htmlparser2@9.1.0: + resolution: {integrity: sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==} + http-cache-semantics@4.1.1: resolution: {integrity: sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==} @@ -9510,6 +9523,9 @@ packages: parse5-htmlparser2-tree-adapter@7.0.0: resolution: {integrity: sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==} + parse5-parser-stream@7.1.2: + resolution: {integrity: sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==} + parse5@7.1.2: resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==} @@ -11791,6 +11807,10 @@ packages: undici-types@5.26.5: resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} + undici@6.19.8: + resolution: {integrity: sha512-U8uCCl2x9TK3WANvmBavymRzxbfFYG+tAu+fgx3zxQy3qdagQqBLwJVrdyO1TBfUXvfKveMKJZhpvUYoOjM+4g==} + engines: {node: '>=18.17'} + unicode-canonical-property-names-ecmascript@2.0.0: resolution: {integrity: sha512-yY5PpDlfVIU5+y/BSCxAJRBIS1Zc2dDG3Ujq+sR0U+JjUevW2JhocOF+soROYDSaAezOzOKuyyixhD6mBknSmQ==} engines: {node: '>=4'} @@ -19734,6 +19754,21 @@ snapshots: domhandler: 5.0.3 domutils: 3.1.0 + cheerio@1.0.0: + dependencies: + cheerio-select: 2.1.0 + dom-serializer: 2.0.0 + domhandler: 5.0.3 + domutils: 3.1.0 + encoding-sniffer: 0.2.0 + htmlparser2: 9.1.0 + parse5: 7.1.2 + parse5-htmlparser2-tree-adapter: 7.0.0 + parse5-parser-stream: 7.1.2 + undici: 6.19.8 + whatwg-mimetype: 4.0.0 + dev: false + cheerio@1.0.0-rc.12: dependencies: cheerio-select: 2.1.0 @@ -20798,6 +20833,12 @@ snapshots: encodeurl@1.0.2: dev: false + encoding-sniffer@0.2.0: + dependencies: + iconv-lite: 0.6.3 + whatwg-encoding: 3.1.1 + dev: false + encoding@0.1.13: dependencies: iconv-lite: 0.6.3 @@ -22746,6 +22787,14 @@ snapshots: domutils: 3.1.0 entities: 4.5.0 + htmlparser2@9.1.0: + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + domutils: 3.1.0 + entities: 4.5.0 + dev: false + http-cache-semantics@4.1.1: dev: false @@ -25678,6 +25727,11 @@ snapshots: domhandler: 5.0.3 parse5: 7.1.2 + parse5-parser-stream@7.1.2: + dependencies: + parse5: 7.1.2 + dev: false + parse5@7.1.2: dependencies: entities: 4.5.0 @@ -28618,6 +28672,9 @@ snapshots: undici-types@5.26.5: {} + undici@6.19.8: + dev: false + unicode-canonical-property-names-ecmascript@2.0.0: dev: false