Refactor App into TS modules; add tsc typecheck

This commit is contained in:
2026-02-07 20:30:56 +01:00
parent 11cfade6dc
commit 684a4dd0b8
22 changed files with 3254 additions and 1664 deletions
+186
View File
@@ -0,0 +1,186 @@
import JSZip from "jszip";
import type { BookMetadata, EpubParseResult } from "../types";
// Parse EPUB file and extract text + metadata.
export async function parseEpub(file: File): Promise<EpubParseResult> {
const zip = await JSZip.loadAsync(file);
// Find the container.xml to get the content.opf path.
const containerXml = await zip.file("META-INF/container.xml")?.async("text");
if (!containerXml) throw new Error("Invalid EPUB: missing container.xml");
// Parse container.xml to find rootfile path.
const rootfileMatch = containerXml.match(/rootfile[^>]*full-path="([^"]+)"/);
if (!rootfileMatch) throw new Error("Invalid EPUB: cannot find rootfile");
const opfPath = rootfileMatch[1];
const opfDir = opfPath.substring(0, opfPath.lastIndexOf("/") + 1);
// Read the OPF file.
const opfContent = await zip.file(opfPath)?.async("text");
if (!opfContent) throw new Error("Invalid EPUB: cannot read OPF");
// Extract metadata.
const titleMatch = opfContent.match(/<dc:title[^>]*>([^<]+)<\/dc:title>/i);
const authorMatch = opfContent.match(
/<dc:creator[^>]*>([^<]+)<\/dc:creator>/i,
);
const metadata: BookMetadata = {
title: titleMatch ? titleMatch[1].trim() : null,
author: authorMatch ? authorMatch[1].trim() : null,
cover: null,
};
// Find cover image - try multiple methods.
// Method 1: Look for meta cover element.
const metaCoverMatch = opfContent.match(
/<meta[^>]*name="cover"[^>]*content="([^"]+)"/i,
);
// Method 2: Look for item with properties="cover-image".
const coverImageMatch = opfContent.match(
/<item[^>]*properties="cover-image"[^>]*href="([^"]+)"/i,
);
// Method 3: Look for item with id containing "cover" and image media-type.
const coverIdMatch = opfContent.match(
/<item[^>]*id="[^"]*cover[^"]*"[^>]*href="([^"]+)"[^>]*media-type="image\/[^"]+"/i,
);
// Method 4: Alternate format for cover-image property.
const coverImageMatch2 = opfContent.match(
/<item[^>]*href="([^"]+)"[^>]*properties="cover-image"/i,
);
let coverHref: string | null = null;
if (coverImageMatch) {
coverHref = coverImageMatch[1];
} else if (coverImageMatch2) {
coverHref = coverImageMatch2[1];
} else if (metaCoverMatch) {
// Need to find the href for this id.
const coverId = metaCoverMatch[1];
const itemMatch = opfContent.match(
new RegExp(`<item[^>]*id="${coverId}"[^>]*href="([^"]+)"`, "i"),
);
if (itemMatch) coverHref = itemMatch[1];
} else if (coverIdMatch) {
coverHref = coverIdMatch[1];
}
// Load cover image if found (as base64 data URL for persistence).
if (coverHref) {
const coverPath = coverHref.startsWith("/")
? coverHref.slice(1)
: opfDir + coverHref;
const coverFile = zip.file(coverPath);
if (coverFile) {
const coverBase64 = await coverFile.async("base64");
const mimeMatch = coverHref.match(/\.(jpe?g|png|gif|webp)$/i);
const mimeType = mimeMatch
? `image/${mimeMatch[1].toLowerCase().replace("jpg", "jpeg")}`
: "image/jpeg";
metadata.cover = `data:${mimeType};base64,${coverBase64}`;
}
}
// Get spine items (reading order).
const spineMatches = [
...opfContent.matchAll(/<itemref[^>]*idref="([^"]+)"/g),
];
const manifestMatches = [
...opfContent.matchAll(
/<item[^>]*id="([^"]+)"[^>]*href="([^"]+)"[^>]*media-type="application\/xhtml\+xml"/g,
),
];
// Also try alternate manifest format.
const manifestMatches2 = [
...opfContent.matchAll(
/<item[^>]*href="([^"]+)"[^>]*id="([^"]+)"[^>]*media-type="application\/xhtml\+xml"/g,
),
];
// Build manifest map.
const manifest: Record<string, string> = {};
manifestMatches.forEach((m) => {
manifest[m[1]] = m[2];
});
manifestMatches2.forEach((m) => {
manifest[m[2]] = m[1];
});
// Get ordered content files.
const contentFiles = spineMatches.map((m) => manifest[m[1]]).filter(Boolean);
// If spine parsing failed, try to get all xhtml/html files.
if (contentFiles.length === 0) {
const allFiles = Object.keys(zip.files).filter(
(f) => f.endsWith(".xhtml") || f.endsWith(".html") || f.endsWith(".htm"),
);
contentFiles.push(...allFiles);
}
// Extract text from each content file.
let fullText = "";
for (const href of contentFiles) {
const filePath = href.startsWith("/") ? href.slice(1) : opfDir + href;
const content = await zip.file(filePath)?.async("text");
if (content) {
// Strip HTML tags and get text.
const textContent = content
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
.replace(/<[^>]+>/g, " ")
.replace(/&nbsp;/g, " ")
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#(\d+);/g, (_: string, n: string) =>
String.fromCharCode(Number(n)),
)
.replace(/\s+/g, " ")
.trim();
if (textContent) {
fullText += textContent + " ";
}
}
}
return { text: fullText.trim(), metadata };
}
// Fetch book metadata from Open Library API.
export async function fetchMetadataFromOpenLibrary(
title: string | null,
author: string | null,
): Promise<BookMetadata | null> {
if (!title && !author) return null;
try {
const query = [title, author].filter(Boolean).join(" ");
const url = `https://openlibrary.org/search.json?q=${encodeURIComponent(query)}&limit=1&fields=title,author_name,cover_i`;
const response = await fetch(url);
if (!response.ok) return null;
const data: unknown = await response.json();
const docs = (data as { docs?: Array<Record<string, unknown>> }).docs;
if (!docs || docs.length === 0) return null;
const book = docs[0] as {
title?: string;
author_name?: string[];
cover_i?: number;
};
return {
title: book.title || null,
author: book.author_name?.[0] || null,
cover: book.cover_i
? `https://covers.openlibrary.org/b/id/${book.cover_i}-M.jpg`
: null,
};
} catch (e) {
console.error("Failed to fetch from Open Library:", e);
return null;
}
}