diff --git a/scripts/parse-facebook-cookies.ts b/scripts/parse-facebook-cookies.ts index 9800a36..cddb37f 100644 --- a/scripts/parse-facebook-cookies.ts +++ b/scripts/parse-facebook-cookies.ts @@ -12,131 +12,134 @@ * bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json */ -import { parseFacebookCookieString } from '../src/facebook'; +import { parseFacebookCookieString } from "../src/facebook"; interface Cookie { - name: string; - value: string; - domain: string; - path: string; - secure?: boolean; - httpOnly?: boolean; - sameSite?: "strict" | "lax" | "none" | "unspecified"; - expirationDate?: number; - storeId?: string; + name: string; + value: string; + domain: string; + path: string; + secure?: boolean; + httpOnly?: boolean; + sameSite?: "strict" | "lax" | "none" | "unspecified"; + expirationDate?: number; + storeId?: string; } function parseFacebookCookieStringCLI(cookieString: string): Cookie[] { - if (!cookieString || !cookieString.trim()) { - console.error('❌ Error: Empty or invalid cookie string provided'); - process.exit(1); - } + if (!cookieString || !cookieString.trim()) { + console.error("❌ Error: Empty or invalid cookie string provided"); + process.exit(1); + } - const cookies = parseFacebookCookieString(cookieString); + const cookies = parseFacebookCookieString(cookieString); - if (cookies.length === 0) { - console.error('❌ Error: No valid cookies found in input string'); - console.error('Expected format: "name1=value1; name2=value2;"'); - process.exit(1); - } + if (cookies.length === 0) { + console.error("❌ Error: No valid cookies found in input string"); + console.error('Expected format: "name1=value1; name2=value2;"'); + process.exit(1); + } - return cookies; + return cookies; } async function main() { - const args = process.argv.slice(2); + const args = process.argv.slice(2); - if (args.length === 0 && process.stdin.isTTY === false) { - // Read from stdin - let input = ''; - for await (const chunk of process.stdin) { - input += chunk; - } - input = input.trim(); + if (args.length === 0 && process.stdin.isTTY === false) { + // Read from stdin + let input = ""; + for await (const chunk of process.stdin) { + input += chunk; + } + input = input.trim(); - if (!input) { - console.error('❌ Error: No input provided via stdin'); - process.exit(1); - } + if (!input) { + console.error("❌ Error: No input provided via stdin"); + process.exit(1); + } - const cookies = parseFacebookCookieStringCLI(input); - await writeOutput(cookies, './cookies/facebook.json'); - return; - } + const cookies = parseFacebookCookieStringCLI(input); + await writeOutput(cookies, "./cookies/facebook.json"); + return; + } - let cookieString = ''; - let outputPath = './cookies/facebook.json'; - let inputPath = ''; + let cookieString = ""; + let outputPath = "./cookies/facebook.json"; + let inputPath = ""; - // Parse command line arguments - for (let i = 0; i < args.length; i++) { - const arg = args[i]; + // Parse command line arguments + for (let i = 0; i < args.length; i++) { + const arg = args[i]; - if (arg === '--input' || arg === '-i') { - inputPath = args[i + 1]; - i++; // Skip next arg - } else if (arg === '--output' || arg === '-o') { - outputPath = args[i + 1]; - i++; // Skip next arg - } else if (arg === '--help' || arg === '-h') { - showHelp(); - return; - } else if (!arg.startsWith('-')) { - // Assume this is the cookie string - cookieString = arg; - } else { - console.error(`❌ Unknown option: ${arg}`); - showHelp(); - process.exit(1); - } - } + if (arg === "--input" || arg === "-i") { + inputPath = args[i + 1]; + i++; // Skip next arg + } else if (arg === "--output" || arg === "-o") { + outputPath = args[i + 1]; + i++; // Skip next arg + } else if (arg === "--help" || arg === "-h") { + showHelp(); + return; + } else if (!arg.startsWith("-")) { + // Assume this is the cookie string + cookieString = arg; + } else { + console.error(`❌ Unknown option: ${arg}`); + showHelp(); + process.exit(1); + } + } - // Read from file if specified - if (inputPath) { - try { - const file = Bun.file(inputPath); - if (!(await file.exists())) { - console.error(`❌ Error: Input file not found: ${inputPath}`); - process.exit(1); - } - cookieString = await file.text(); - } catch (error) { - console.error(`❌ Error reading input file: ${error}`); - process.exit(1); - } - } + // Read from file if specified + if (inputPath) { + try { + const file = Bun.file(inputPath); + if (!(await file.exists())) { + console.error(`❌ Error: Input file not found: ${inputPath}`); + process.exit(1); + } + cookieString = await file.text(); + } catch (error) { + console.error(`❌ Error reading input file: ${error}`); + process.exit(1); + } + } - if (!cookieString.trim()) { - console.error('❌ Error: No cookie string provided'); - console.error('Provide cookie string as argument, --input file, or via stdin'); - showHelp(); - process.exit(1); - } + if (!cookieString.trim()) { + console.error("❌ Error: No cookie string provided"); + console.error( + "Provide cookie string as argument, --input file, or via stdin", + ); + showHelp(); + process.exit(1); + } - const cookies = parseFacebookCookieStringCLI(cookieString); - await writeOutput(cookies, outputPath); + const cookies = parseFacebookCookieStringCLI(cookieString); + await writeOutput(cookies, outputPath); } async function writeOutput(cookies: Cookie[], outputPath: string) { - try { - await Bun.write(outputPath, JSON.stringify(cookies, null, 2)); - console.log(`✅ Successfully parsed ${cookies.length} Facebook cookies`); - console.log(`📁 Saved to: ${outputPath}`); + try { + await Bun.write(outputPath, JSON.stringify(cookies, null, 2)); + console.log(`✅ Successfully parsed ${cookies.length} Facebook cookies`); + console.log(`📁 Saved to: ${outputPath}`); - // Show summary of parsed cookies - console.log('\n📋 Parsed cookies:'); - for (const cookie of cookies) { - console.log(` • ${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? '...' : ''}`); - } - - } catch (error) { - console.error(`❌ Error writing to output file: ${error}`); - process.exit(1); - } + // Show summary of parsed cookies + console.log("\n📋 Parsed cookies:"); + for (const cookie of cookies) { + console.log( + ` • ${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? "..." : ""}`, + ); + } + } catch (error) { + console.error(`❌ Error writing to output file: ${error}`); + process.exit(1); + } } function showHelp() { - console.log(` + console.log(` Facebook Cookie Parser CLI Parses Facebook cookie strings into JSON format for the marketplace scraper. @@ -173,8 +176,8 @@ OUTPUT: // Run the CLI if (import.meta.main) { - main().catch(error => { - console.error(`❌ Unexpected error: ${error}`); - process.exit(1); - }); -} \ No newline at end of file + main().catch((error) => { + console.error(`❌ Unexpected error: ${error}`); + process.exit(1); + }); +} diff --git a/src/ebay.ts b/src/ebay.ts index 0fc9320..9c9979e 100644 --- a/src/ebay.ts +++ b/src/ebay.ts @@ -1,97 +1,103 @@ +import cliProgress from "cli-progress"; /* eslint-disable @typescript-eslint/no-explicit-any */ import { parseHTML } from "linkedom"; -import cliProgress from "cli-progress"; // ----------------------------- Types ----------------------------- type HTMLString = string; type ListingDetails = { - url: string; - title: string; - description?: string; - listingPrice?: { - amountFormatted: string; - cents?: number; - currency?: string; - }; - listingType?: string; - listingStatus?: string; - creationDate?: string; - endDate?: string; - numberOfViews?: number; - address?: string | null; + url: string; + title: string; + description?: string; + listingPrice?: { + amountFormatted: string; + cents?: number; + currency?: string; + }; + listingType?: string; + listingStatus?: string; + creationDate?: string; + endDate?: string; + numberOfViews?: number; + address?: string | null; }; // ----------------------------- Utilities ----------------------------- function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null; + return typeof value === "object" && value !== null; } async function delay(ms: number): Promise { - await new Promise((resolve) => setTimeout(resolve, ms)); + await new Promise((resolve) => setTimeout(resolve, ms)); } /** * Turns cents to localized currency string. */ function formatCentsToCurrency( - num: number | string | undefined, - locale = "en-US", + num: number | string | undefined, + locale = "en-US", ): string { - if (num == null) return ""; - const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; - if (Number.isNaN(cents)) return ""; - const dollars = cents / 100; - const formatter = new Intl.NumberFormat(locale, { - minimumFractionDigits: 2, - maximumFractionDigits: 2, - useGrouping: true, - }); - return formatter.format(dollars); + if (num == null) return ""; + const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; + if (Number.isNaN(cents)) return ""; + const dollars = cents / 100; + const formatter = new Intl.NumberFormat(locale, { + minimumFractionDigits: 2, + maximumFractionDigits: 2, + useGrouping: true, + }); + return formatter.format(dollars); } /** * Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents */ -function parseEbayPrice(priceText: string): { cents: number; currency: string } | null { - if (!priceText || typeof priceText !== 'string') return null; +function parseEbayPrice( + priceText: string, +): { cents: number; currency: string } | null { + if (!priceText || typeof priceText !== "string") return null; - // Clean up the price text and extract currency and amount - const cleaned = priceText.trim(); + // Clean up the price text and extract currency and amount + const cleaned = priceText.trim(); - // Find all numbers in the string (including decimals) - const numberMatches = cleaned.match(/[\d,]+\.?\d*/); - if (!numberMatches) return null; + // Find all numbers in the string (including decimals) + const numberMatches = cleaned.match(/[\d,]+\.?\d*/); + if (!numberMatches) return null; - const amountStr = numberMatches[0].replace(/,/g, ''); - const dollars = parseFloat(amountStr); - if (isNaN(dollars)) return null; + const amountStr = numberMatches[0].replace(/,/g, ""); + const dollars = Number.parseFloat(amountStr); + if (Number.isNaN(dollars)) return null; - const cents = Math.round(dollars * 100); + const cents = Math.round(dollars * 100); - // Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc. - let currency = 'USD'; // Default + // Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc. + let currency = "USD"; // Default - if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) { - currency = 'CAD'; - } else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) { - currency = 'USD'; - } + if ( + cleaned.toUpperCase().includes("CAD") || + cleaned.includes("CA$") || + cleaned.includes("C $") + ) { + currency = "CAD"; + } else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) { + currency = "USD"; + } - return { cents, currency }; + return { cents, currency }; } class HttpError extends Error { - constructor( - message: string, - public readonly status: number, - public readonly url: string, - ) { - super(message); - this.name = "HttpError"; - } + constructor( + message: string, + public readonly status: number, + public readonly url: string, + ) { + super(message); + this.name = "HttpError"; + } } // ----------------------------- HTTP Client ----------------------------- @@ -102,69 +108,71 @@ class HttpError extends Error { - Respects X-RateLimit-Reset when present (seconds) */ async function fetchHtml( - url: string, - DELAY_MS: number, - opts?: { - maxRetries?: number; - retryBaseMs?: number; - onRateInfo?: (remaining: string | null, reset: string | null) => void; - }, + url: string, + DELAY_MS: number, + opts?: { + maxRetries?: number; + retryBaseMs?: number; + onRateInfo?: (remaining: string | null, reset: string | null) => void; + }, ): Promise { - const maxRetries = opts?.maxRetries ?? 3; - const retryBaseMs = opts?.retryBaseMs ?? 500; + const maxRetries = opts?.maxRetries ?? 3; + const retryBaseMs = opts?.retryBaseMs ?? 500; - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - const res = await fetch(url, { - method: "GET", - headers: { - accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-CA,en-US;q=0.9,en;q=0.8", - "cache-control": "no-cache", - "upgrade-insecure-requests": "1", - "user-agent": - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", - }, - }); + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const res = await fetch(url, { + method: "GET", + headers: { + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-CA,en-US;q=0.9,en;q=0.8", + "cache-control": "no-cache", + "upgrade-insecure-requests": "1", + "user-agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", + }, + }); - const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); - const rateLimitReset = res.headers.get("X-RateLimit-Reset"); - opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); + const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); + const rateLimitReset = res.headers.get("X-RateLimit-Reset"); + opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); - if (!res.ok) { - // Respect 429 reset if provided - if (res.status === 429) { - const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN; - const waitMs = Number.isFinite(resetSeconds) - ? Math.max(0, resetSeconds * 1000) - : (attempt + 1) * retryBaseMs; - await delay(waitMs); - continue; - } - // Retry on 5xx - if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { - await delay((attempt + 1) * retryBaseMs); - continue; - } - throw new HttpError( - `Request failed with status ${res.status}`, - res.status, - url, - ); - } + if (!res.ok) { + // Respect 429 reset if provided + if (res.status === 429) { + const resetSeconds = rateLimitReset + ? Number(rateLimitReset) + : Number.NaN; + const waitMs = Number.isFinite(resetSeconds) + ? Math.max(0, resetSeconds * 1000) + : (attempt + 1) * retryBaseMs; + await delay(waitMs); + continue; + } + // Retry on 5xx + if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { + await delay((attempt + 1) * retryBaseMs); + continue; + } + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + url, + ); + } - const html = await res.text(); - // Respect per-request delay to keep at or under REQUESTS_PER_SECOND - await delay(DELAY_MS); - return html; - } catch (err) { - if (attempt >= maxRetries) throw err; - await delay((attempt + 1) * retryBaseMs); - } - } + const html = await res.text(); + // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + await delay(DELAY_MS); + return html; + } catch (err) { + if (attempt >= maxRetries) throw err; + await delay((attempt + 1) * retryBaseMs); + } + } - throw new Error("Exhausted retries without response"); + throw new Error("Exhausted retries without response"); } // ----------------------------- Parsing ----------------------------- @@ -173,273 +181,321 @@ async function fetchHtml( Parse eBay search page HTML and extract listings using DOM selectors */ function parseEbayListings( - htmlString: HTMLString, - keywords: string[], - exclusions: string[], - strictMode: boolean + htmlString: HTMLString, + keywords: string[], + exclusions: string[], + strictMode: boolean, ): ListingDetails[] { - const { document } = parseHTML(htmlString); - const results: ListingDetails[] = []; + const { document } = parseHTML(htmlString); + const results: ListingDetails[] = []; - // Find all listing links by looking for eBay item URLs (/itm/) - const linkElements = document.querySelectorAll('a[href*="itm/"]'); + // Find all listing links by looking for eBay item URLs (/itm/) + const linkElements = document.querySelectorAll('a[href*="itm/"]'); + for (const linkElement of linkElements) { + try { + // Get href attribute + let href = linkElement.getAttribute("href"); + if (!href) continue; - for (const linkElement of linkElements) { - try { - // Get href attribute - let href = linkElement.getAttribute('href'); - if (!href) continue; + // Make href absolute + if (!href.startsWith("http")) { + href = href.startsWith("//") + ? `https:${href}` + : `https://www.ebay.com${href}`; + } - // Make href absolute - if (!href.startsWith('http')) { - href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`; - } + // Find the container - go up several levels to find the item container + // Modern eBay uses complex nested structures + let container = linkElement.parentElement?.parentElement?.parentElement; + if (!container) { + // Try a different level + container = linkElement.parentElement?.parentElement; + } + if (!container) continue; - // Find the container - go up several levels to find the item container - // Modern eBay uses complex nested structures - let container = linkElement.parentElement?.parentElement?.parentElement; - if (!container) { - // Try a different level - container = linkElement.parentElement?.parentElement; - } - if (!container) continue; + // Extract title - look for heading or title-related elements near the link + // Modern eBay often uses h3, span, or div with text content near the link + let titleElement = container.querySelector( + 'h3, [role="heading"], .s-item__title span', + ); - // Extract title - look for heading or title-related elements near the link - // Modern eBay often uses h3, span, or div with text content near the link - let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span'); + // If no direct title element, try finding text content around the link + if (!titleElement) { + // Look for spans or divs with text near this link + const nearbySpans = container.querySelectorAll("span, div"); + for (const span of nearbySpans) { + const text = span.textContent?.trim(); + if ( + text && + text.length > 10 && + text.length < 200 && + !text.includes("$") && + !text.includes("item") + ) { + titleElement = span; + break; + } + } + } - // If no direct title element, try finding text content around the link - if (!titleElement) { - // Look for spans or divs with text near this link - const nearbySpans = container.querySelectorAll('span, div'); - for (const span of nearbySpans) { - const text = span.textContent?.trim(); - if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) { - titleElement = span; - break; - } - } - } + let title = titleElement?.textContent?.trim(); - let title = titleElement?.textContent?.trim(); + // Clean up eBay UI strings that get included in titles + if (title) { + // Remove common eBay UI strings that appear at the end of titles + const uiStrings = [ + "Opens in a new window", + "Opens in a new tab", + "Opens in a new window or tab", + "opens in a new window", + "opens in a new tab", + "opens in a new window or tab", + ]; - // Clean up eBay UI strings that get included in titles - if (title) { - // Remove common eBay UI strings that appear at the end of titles - const uiStrings = [ - 'Opens in a new window', - 'Opens in a new tab', - 'Opens in a new window or tab', - 'opens in a new window', - 'opens in a new tab', - 'opens in a new window or tab' - ]; + for (const uiString of uiStrings) { + const uiIndex = title.indexOf(uiString); + if (uiIndex !== -1) { + title = title.substring(0, uiIndex).trim(); + break; // Only remove one UI string per title + } + } - for (const uiString of uiStrings) { - const uiIndex = title.indexOf(uiString); - if (uiIndex !== -1) { - title = title.substring(0, uiIndex).trim(); - break; // Only remove one UI string per title - } - } + // If the title became empty or too short after cleaning, skip this item + if (title.length < 10) { + continue; + } + } - // If the title became empty or too short after cleaning, skip this item - if (title.length < 10) { - continue; - } - } + if (!title) continue; - if (!title) continue; + // Skip irrelevant eBay ads + if (title === "Shop on eBay" || title.length < 3) continue; - // Skip irrelevant eBay ads - if (title === "Shop on eBay" || title.length < 3) continue; + // Extract price - look for eBay's price classes, preferring sale/discount prices + let priceElement = container.querySelector( + '[class*="s-item__price"], .s-item__price, [class*="price"]', + ); - // Extract price - look for eBay's price classes, preferring sale/discount prices - let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]'); + // If no direct price class, look for spans containing $ (but not titles) + if (!priceElement) { + const spansAndElements = container.querySelectorAll( + "span, div, b, em, strong", + ); + for (const el of spansAndElements) { + const text = el.textContent?.trim(); + // Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words + if ( + text?.includes("$") && + text.length < 100 && + !text.includes("laptop") && + !text.includes("computer") && + !text.includes("intel") && + !text.includes("core") && + !text.includes("ram") && + !text.includes("ssd") && + !/\d{4}/.test(text) && // Avoid years like "2024" + !text.includes('"') // Avoid measurements + ) { + priceElement = el; + break; + } + } + } - // If no direct price class, look for spans containing $ (but not titles) - if (!priceElement) { - const spansAndElements = container.querySelectorAll('span, div, b, em, strong'); - for (const el of spansAndElements) { - const text = el.textContent?.trim(); - // Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words - if (text && text.includes('$') && text.length < 100 && - !text.includes('laptop') && !text.includes('computer') && !text.includes('intel') && - !text.includes('core') && !text.includes('ram') && !text.includes('ssd') && - ! /\d{4}/.test(text) && // Avoid years like "2024" - !text.includes('"') // Avoid measurements - ) { - priceElement = el; - break; - } - } - } + // For discounted items, eBay shows both original and sale price + // Prefer sale/current price over original/strikethrough price + if (priceElement) { + // Check if this element or its parent contains multiple price elements + const priceContainer = + priceElement.closest('[class*="s-item__price"]') || + priceElement.parentElement; - // For discounted items, eBay shows both original and sale price - // Prefer sale/current price over original/strikethrough price - if (priceElement) { - // Check if this element or its parent contains multiple price elements - const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement; + if (priceContainer) { + // Look for all price elements within this container, including strikethrough prices + const allPriceElements = priceContainer.querySelectorAll( + '[class*="s-item__price"], span, b, em, strong, s, del, strike', + ); - if (priceContainer) { - // Look for all price elements within this container, including strikethrough prices - const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike'); + // Filter to only elements that actually contain prices (not labels) + const actualPrices: HTMLElement[] = []; + for (const el of allPriceElements) { + const text = el.textContent?.trim(); + if ( + text && + /^\s*[\$£€¥]/u.test(text) && + text.length < 50 && + !/\d{4}/.test(text) + ) { + actualPrices.push(el); + } + } - // Filter to only elements that actually contain prices (not labels) - const actualPrices: HTMLElement[] = []; - for (const el of allPriceElements) { - const text = el.textContent?.trim(); - if (text && /^\s*[\$£€¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) { - actualPrices.push(el); - } - } + // Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices) + if (actualPrices.length > 1) { + // First, look for prices that are NOT struck through + const nonStrikethroughPrices = actualPrices.filter((el) => { + const tagName = el.tagName.toLowerCase(); + const styles = + el.classList.contains("s-strikethrough") || + el.classList.contains("u-flStrike") || + el.closest("s, del, strike"); + return ( + tagName !== "s" && + tagName !== "del" && + tagName !== "strike" && + !styles + ); + }); - // Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices) - if (actualPrices.length > 1) { - // First, look for prices that are NOT struck through - const nonStrikethroughPrices = actualPrices.filter(el => { - const tagName = el.tagName.toLowerCase(); - const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') || - el.closest('s, del, strike'); - return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles; - }); + if (nonStrikethroughPrices.length > 0) { + // Use the first non-strikethrough price (sale price) + priceElement = nonStrikethroughPrices[0]; + } else { + // Fallback: use the last price (likely the most current) + const lastPrice = actualPrices[actualPrices.length - 1]; + priceElement = lastPrice; + } + } + } + } - if (nonStrikethroughPrices.length > 0) { - // Use the first non-strikethrough price (sale price) - priceElement = nonStrikethroughPrices[0]; - } else { - // Fallback: use the last price (likely the most current) - const lastPrice = actualPrices[actualPrices.length - 1]; - priceElement = lastPrice; - } - } - } - } + const priceText = priceElement?.textContent?.trim(); - let priceText = priceElement?.textContent?.trim(); + if (!priceText) continue; - if (!priceText) continue; + // Parse price into cents and currency + const priceInfo = parseEbayPrice(priceText); + if (!priceInfo) continue; - // Parse price into cents and currency - const priceInfo = parseEbayPrice(priceText); - if (!priceInfo) continue; + // Apply exclusion filters + if ( + exclusions.some((exclusion) => + title.toLowerCase().includes(exclusion.toLowerCase()), + ) + ) { + continue; + } - // Apply exclusion filters - if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) { - continue; - } + // Apply strict mode filter (title must contain at least one keyword) + if ( + strictMode && + !keywords.some((keyword) => + title?.toLowerCase().includes(keyword.toLowerCase()), + ) + ) { + continue; + } - // Apply strict mode filter (title must contain at least one keyword) - if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) { - continue; - } + const listing: ListingDetails = { + url: href, + title, + listingPrice: { + amountFormatted: priceText, + cents: priceInfo.cents, + currency: priceInfo.currency, + }, + listingType: "OFFER", // eBay listings are typically offers + listingStatus: "ACTIVE", + address: null, // eBay doesn't typically show detailed addresses in search results + }; - const listing: ListingDetails = { - url: href, - title, - listingPrice: { - amountFormatted: priceText, - cents: priceInfo.cents, - currency: priceInfo.currency, - }, - listingType: "OFFER", // eBay listings are typically offers - listingStatus: "ACTIVE", - address: null, // eBay doesn't typically show detailed addresses in search results - }; + results.push(listing); + } catch (err) { + console.warn(`Error parsing eBay listing: ${err}`); + } + } - results.push(listing); - } catch (err) { - console.warn(`Error parsing eBay listing: ${err}`); - continue; - } - } - - return results; + return results; } // ----------------------------- Main ----------------------------- export default async function fetchEbayItems( - SEARCH_QUERY: string, - REQUESTS_PER_SECOND = 1, - opts: { - minPrice?: number; - maxPrice?: number; - strictMode?: boolean; - exclusions?: string[]; - keywords?: string[]; - } = {}, + SEARCH_QUERY: string, + REQUESTS_PER_SECOND = 1, + opts: { + minPrice?: number; + maxPrice?: number; + strictMode?: boolean; + exclusions?: string[]; + keywords?: string[]; + } = {}, ) { - const { - minPrice = 0, - maxPrice = Number.MAX_SAFE_INTEGER, - strictMode = false, - exclusions = [], - keywords = [SEARCH_QUERY] // Default to search query if no keywords provided - } = opts; + const { + minPrice = 0, + maxPrice = Number.MAX_SAFE_INTEGER, + strictMode = false, + exclusions = [], + keywords = [SEARCH_QUERY], // Default to search query if no keywords provided + } = opts; - // Build eBay search URL - use Canadian site and tracking parameters like real browser - const searchUrl = `https://www.ebay.ca/sch/i.html?_nkw=${encodeURIComponent(SEARCH_QUERY)}^&_sacat=0^&_from=R40^&_trksid=p4432023.m570.l1313`; + // Build eBay search URL - use Canadian site and tracking parameters like real browser + const searchUrl = `https://www.ebay.ca/sch/i.html?_nkw=${encodeURIComponent(SEARCH_QUERY)}^&_sacat=0^&_from=R40^&_trksid=p4432023.m570.l1313`; - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); - console.log(`Fetching eBay search: ${searchUrl}`); + console.log(`Fetching eBay search: ${searchUrl}`); - try { - // Use custom headers modeled after real browser requests to bypass bot detection - const headers: Record = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Accept-Encoding': 'gzip, deflate, br', - 'Referer': 'https://www.ebay.ca/', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', - 'Sec-Fetch-Dest': 'document', - 'Sec-Fetch-Mode': 'navigate', - 'Sec-Fetch-Site': 'same-origin', - 'Sec-Fetch-User': '?1', - 'Priority': 'u=0, i' - }; + try { + // Use custom headers modeled after real browser requests to bypass bot detection + const headers: Record = { + "User-Agent": + "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + Referer: "https://www.ebay.ca/", + Connection: "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "same-origin", + "Sec-Fetch-User": "?1", + Priority: "u=0, i", + }; - const res = await fetch(searchUrl, { - method: "GET", - headers, - }); + const res = await fetch(searchUrl, { + method: "GET", + headers, + }); - if (!res.ok) { - throw new HttpError( - `Request failed with status ${res.status}`, - res.status, - searchUrl, - ); - } + if (!res.ok) { + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + searchUrl, + ); + } - const searchHtml = await res.text(); - // Respect per-request delay to keep at or under REQUESTS_PER_SECOND - await delay(DELAY_MS); + const searchHtml = await res.text(); + // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + await delay(DELAY_MS); - console.log(`\nParsing eBay listings...`); + console.log("\nParsing eBay listings..."); - const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode); + const listings = parseEbayListings( + searchHtml, + keywords, + exclusions, + strictMode, + ); - // Filter by price range (additional safety check) - const filteredListings = listings.filter(listing => { - const cents = listing.listingPrice?.cents; - return cents && cents >= minPrice && cents <= maxPrice; - }); + // Filter by price range (additional safety check) + const filteredListings = listings.filter((listing) => { + const cents = listing.listingPrice?.cents; + return cents && cents >= minPrice && cents <= maxPrice; + }); - console.log(`Parsed ${filteredListings.length} eBay listings.`); - return filteredListings; - - } catch (err) { - if (err instanceof HttpError) { - console.error( - `Failed to fetch eBay search (${err.status}): ${err.message}`, - ); - return []; - } - throw err; - } -} \ No newline at end of file + console.log(`Parsed ${filteredListings.length} eBay listings.`); + return filteredListings; + } catch (err) { + if (err instanceof HttpError) { + console.error( + `Failed to fetch eBay search (${err.status}): ${err.message}`, + ); + return []; + } + throw err; + } +} diff --git a/src/facebook.ts b/src/facebook.ts index ffe1e28..11a7058 100644 --- a/src/facebook.ts +++ b/src/facebook.ts @@ -1,6 +1,6 @@ +import cliProgress from "cli-progress"; /* eslint-disable @typescript-eslint/no-explicit-any */ import { parseHTML } from "linkedom"; -import cliProgress from "cli-progress"; /** * Facebook Marketplace Scraper @@ -15,367 +15,370 @@ import cliProgress from "cli-progress"; type HTMLString = string; interface Cookie { - name: string; - value: string; - domain: string; - path: string; - secure?: boolean; - httpOnly?: boolean; - sameSite?: "strict" | "lax" | "none" | "unspecified"; - session?: boolean; - expirationDate?: number; - partitionKey?: Record; - storeId?: string; + name: string; + value: string; + domain: string; + path: string; + secure?: boolean; + httpOnly?: boolean; + sameSite?: "strict" | "lax" | "none" | "unspecified"; + session?: boolean; + expirationDate?: number; + partitionKey?: Record; + storeId?: string; } interface FacebookAdNode { - node: { - listing: { - id: string; - marketplace_listing_title?: string; - listing_price?: { - amount?: string | number; - currency?: string; - }; - location?: { - reverse_geocode?: { - city_page?: { - display_name?: string; - }; - }; - }; - creation_time?: number; - [k: string]: unknown; - }; - [k: string]: unknown; - }; + node: { + listing: { + id: string; + marketplace_listing_title?: string; + listing_price?: { + amount?: string | number; + currency?: string; + }; + location?: { + reverse_geocode?: { + city_page?: { + display_name?: string; + }; + }; + }; + creation_time?: number; + [k: string]: unknown; + }; + [k: string]: unknown; + }; } interface FacebookEdge { - node: FacebookAdNode["node"]; - [k: string]: unknown; + node: FacebookAdNode["node"]; + [k: string]: unknown; } interface FacebookMarketplaceSearch { - feed_units?: { - edges?: FacebookEdge[]; - }; - [k: string]: unknown; + feed_units?: { + edges?: FacebookEdge[]; + }; + [k: string]: unknown; } interface FacebookRequireData { - require?: [number, number, number, FacebookMarketplaceSearch, number][]; - [k: string]: unknown; + require?: [number, number, number, FacebookMarketplaceSearch, number][]; + [k: string]: unknown; } interface FacebookMarketplaceItem { - // Basic identification - id: string; - __typename: "GroupCommerceProductItem"; + // Basic identification + id: string; + __typename: "GroupCommerceProductItem"; - // Listing content - marketplace_listing_title: string; - redacted_description?: { - text: string; - }; - custom_title?: string; + // Listing content + marketplace_listing_title: string; + redacted_description?: { + text: string; + }; + custom_title?: string; - // Pricing - formatted_price?: { - text: string; - }; - listing_price?: { - amount: string; - currency: string; - amount_with_offset: string; - }; + // Pricing + formatted_price?: { + text: string; + }; + listing_price?: { + amount: string; + currency: string; + amount_with_offset: string; + }; - // Location - location_text?: { - text: string; - }; - location?: { - latitude: number; - longitude: number; - reverse_geocode_detailed?: { - country_alpha_two: string; - postal_code_trimmed: string; - }; - }; + // Location + location_text?: { + text: string; + }; + location?: { + latitude: number; + longitude: number; + reverse_geocode_detailed?: { + country_alpha_two: string; + postal_code_trimmed: string; + }; + }; - // Status flags - is_live?: boolean; - is_sold?: boolean; - is_pending?: boolean; - is_hidden?: boolean; - is_draft?: boolean; + // Status flags + is_live?: boolean; + is_sold?: boolean; + is_pending?: boolean; + is_hidden?: boolean; + is_draft?: boolean; - // Timing - creation_time?: number; + // Timing + creation_time?: number; - // Seller information - marketplace_listing_seller?: { - __typename: "User"; - id: string; - name: string; - profile_picture?: { - uri: string; - }; - join_time?: number; - }; + // Seller information + marketplace_listing_seller?: { + __typename: "User"; + id: string; + name: string; + profile_picture?: { + uri: string; + }; + join_time?: number; + }; - // Vehicle-specific fields (for automotive listings) - vehicle_make_display_name?: string; - vehicle_model_display_name?: string; - vehicle_odometer_data?: { - unit: "KILOMETERS" | "MILES"; - value: number; - }; - vehicle_transmission_type?: "AUTOMATIC" | "MANUAL"; - vehicle_exterior_color?: string; - vehicle_interior_color?: string; - vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR"; - vehicle_fuel_type?: string; - vehicle_trim_display_name?: string; + // Vehicle-specific fields (for automotive listings) + vehicle_make_display_name?: string; + vehicle_model_display_name?: string; + vehicle_odometer_data?: { + unit: "KILOMETERS" | "MILES"; + value: number; + }; + vehicle_transmission_type?: "AUTOMATIC" | "MANUAL"; + vehicle_exterior_color?: string; + vehicle_interior_color?: string; + vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR"; + vehicle_fuel_type?: string; + vehicle_trim_display_name?: string; - // Category and commerce - marketplace_listing_category_id?: string; - condition?: string; + // Category and commerce + marketplace_listing_category_id?: string; + condition?: string; - // Commerce features - delivery_types?: string[]; - is_shipping_offered?: boolean; - is_buy_now_enabled?: boolean; - can_buyer_make_checkout_offer?: boolean; + // Commerce features + delivery_types?: string[]; + is_shipping_offered?: boolean; + is_buy_now_enabled?: boolean; + can_buyer_make_checkout_offer?: boolean; - // Communication - messaging_enabled?: boolean; - first_message_suggested_value?: string; + // Communication + messaging_enabled?: boolean; + first_message_suggested_value?: string; - // Metadata - logging_id?: string; - reportable_ent_id?: string; + // Metadata + logging_id?: string; + reportable_ent_id?: string; - // Related listings (for part-out sellers) - marketplace_listing_sets?: { - edges: Array<{ - node: { - canonical_listing: { - id: string; - marketplace_listing_title: string; - is_live: boolean; - is_sold: boolean; - formatted_price: { text: string }; - }; - }; - }>; - }; + // Related listings (for part-out sellers) + marketplace_listing_sets?: { + edges: Array<{ + node: { + canonical_listing: { + id: string; + marketplace_listing_title: string; + is_live: boolean; + is_sold: boolean; + formatted_price: { text: string }; + }; + }; + }>; + }; - [k: string]: unknown; + [k: string]: unknown; } type ListingDetails = { - url: string; - title: string; - description?: string; - listingPrice?: { - amountFormatted: string; - cents?: number; - currency?: string; - }; - listingType?: string; - listingStatus?: string; - creationDate?: string; - endDate?: string; - numberOfViews?: number; - address?: string | null; - // Facebook-specific fields - imageUrl?: string; - videoUrl?: string; - seller?: { - name?: string; - id?: string; - }; - categoryId?: string; - deliveryTypes?: string[]; + url: string; + title: string; + description?: string; + listingPrice?: { + amountFormatted: string; + cents?: number; + currency?: string; + }; + listingType?: string; + listingStatus?: string; + creationDate?: string; + endDate?: string; + numberOfViews?: number; + address?: string | null; + // Facebook-specific fields + imageUrl?: string; + videoUrl?: string; + seller?: { + name?: string; + id?: string; + }; + categoryId?: string; + deliveryTypes?: string[]; }; // ----------------------------- Utilities ----------------------------- function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null; + return typeof value === "object" && value !== null; } async function delay(ms: number): Promise { - await new Promise((resolve) => setTimeout(resolve, ms)); + await new Promise((resolve) => setTimeout(resolve, ms)); } /** * Load Facebook cookies from file or string */ -async function loadFacebookCookies(cookiesSource?: string, cookiePath = './cookies/facebook.json'): Promise { - // First try to load from provided string parameter - if (cookiesSource) { - try { - const cookies = JSON.parse(cookiesSource); - if (Array.isArray(cookies)) { - return cookies.filter( - (cookie): cookie is Cookie => - cookie && - typeof cookie.name === "string" && - typeof cookie.value === "string", - ); - } - } catch (e) { - throw new Error(`Invalid cookies JSON provided: ${e}`); - } - } +async function loadFacebookCookies( + cookiesSource?: string, + cookiePath = "./cookies/facebook.json", +): Promise { + // First try to load from provided string parameter + if (cookiesSource) { + try { + const cookies = JSON.parse(cookiesSource); + if (Array.isArray(cookies)) { + return cookies.filter( + (cookie): cookie is Cookie => + cookie && + typeof cookie.name === "string" && + typeof cookie.value === "string", + ); + } + } catch (e) { + throw new Error(`Invalid cookies JSON provided: ${e}`); + } + } - // Try to load from specified path - try { - const cookiesPath = cookiePath; - const file = Bun.file(cookiesPath); - if (await file.exists()) { - const content = await file.text(); - const cookies = JSON.parse(content); - if (Array.isArray(cookies)) { - return cookies.filter( - (cookie): cookie is Cookie => - cookie && - typeof cookie.name === "string" && - typeof cookie.value === "string", - ); - } - } - } catch (e) { - console.warn(`Could not load cookies from ./cookies/facebook.json: ${e}`); - } + // Try to load from specified path + try { + const cookiesPath = cookiePath; + const file = Bun.file(cookiesPath); + if (await file.exists()) { + const content = await file.text(); + const cookies = JSON.parse(content); + if (Array.isArray(cookies)) { + return cookies.filter( + (cookie): cookie is Cookie => + cookie && + typeof cookie.name === "string" && + typeof cookie.value === "string", + ); + } + } + } catch (e) { + console.warn(`Could not load cookies from ./cookies/facebook.json: ${e}`); + } - return []; + return []; } /** * Parse Facebook cookie string into Cookie array format */ function parseFacebookCookieString(cookieString: string): Cookie[] { - if (!cookieString || !cookieString.trim()) { - return []; - } + if (!cookieString || !cookieString.trim()) { + return []; + } - return cookieString - .split(';') - .map(pair => pair.trim()) - .filter(pair => pair.includes('=')) - .map(pair => { - const [name, value] = pair.split('=', 2); - const trimmedName = name.trim(); - const trimmedValue = value.trim(); + return cookieString + .split(";") + .map((pair) => pair.trim()) + .filter((pair) => pair.includes("=")) + .map((pair) => { + const [name, value] = pair.split("=", 2); + const trimmedName = name.trim(); + const trimmedValue = value.trim(); - // Skip empty names or values - if (!trimmedName || !trimmedValue) { - return null; - } + // Skip empty names or values + if (!trimmedName || !trimmedValue) { + return null; + } - return { - name: trimmedName, - value: decodeURIComponent(trimmedValue), - domain: '.facebook.com', - path: '/', - secure: true, - httpOnly: false, - sameSite: 'lax' as const, - expirationDate: undefined, // Session cookies - }; - }) - .filter((cookie): cookie is Cookie => cookie !== null); + return { + name: trimmedName, + value: decodeURIComponent(trimmedValue), + domain: ".facebook.com", + path: "/", + secure: true, + httpOnly: false, + sameSite: "lax" as const, + expirationDate: undefined, // Session cookies + }; + }) + .filter((cookie): cookie is Cookie => cookie !== null); } /** * Ensure Facebook cookies are available, parsing from env var if needed */ -async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Promise { +async function ensureFacebookCookies( + cookiePath = "./cookies/facebook.json", +): Promise { + // First try to load existing cookies + try { + const existing = await loadFacebookCookies(undefined, cookiePath); + if (existing.length > 0) { + return existing; + } + } catch (error) { + // File doesn't exist or is invalid, continue to check env var + } - // First try to load existing cookies - try { - const existing = await loadFacebookCookies(undefined, cookiePath); - if (existing.length > 0) { - return existing; - } - } catch (error) { - // File doesn't exist or is invalid, continue to check env var - } + // Try to parse from environment variable + const cookieString = process.env.FACEBOOK_COOKIE; + if (!cookieString || !cookieString.trim()) { + throw new Error( + "No valid Facebook cookies found. Either:\n" + + " 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" + + " 2. Create ./cookies/facebook.json manually with cookie array", + ); + } - // Try to parse from environment variable - const cookieString = process.env.FACEBOOK_COOKIE; - if (!cookieString || !cookieString.trim()) { - throw new Error( - 'No valid Facebook cookies found. Either:\n' + - ' 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n' + - ' 2. Create ./cookies/facebook.json manually with cookie array' - ); - } + // Parse the cookie string + const cookies = parseFacebookCookieString(cookieString); + if (cookies.length === 0) { + throw new Error( + "FACEBOOK_COOKIE environment variable contains no valid cookies. " + + 'Expected format: "name1=value1; name2=value2;"', + ); + } - // Parse the cookie string - const cookies = parseFacebookCookieString(cookieString); - if (cookies.length === 0) { - throw new Error( - 'FACEBOOK_COOKIE environment variable contains no valid cookies. ' + - 'Expected format: "name1=value1; name2=value2;"' - ); - } + // Save to file for future use + try { + await Bun.write(cookiePath, JSON.stringify(cookies, null, 2)); + console.log(`✅ Saved ${cookies.length} Facebook cookies to ${cookiePath}`); + } catch (error) { + console.warn(`! Could not save cookies to ${cookiePath}: ${error}`); + // Continue anyway, we have the cookies in memory + } - // Save to file for future use - try { - await Bun.write(cookiePath, JSON.stringify(cookies, null, 2)); - console.log(`✅ Saved ${cookies.length} Facebook cookies to ${cookiePath}`); - } catch (error) { - console.warn(`⚠️ Could not save cookies to ${cookiePath}: ${error}`); - // Continue anyway, we have the cookies in memory - } - - return cookies; + return cookies; } /** * Format cookies array into Cookie header string */ function formatCookiesForHeader(cookies: Cookie[], domain: string): string { - const validCookies = cookies - .filter((cookie) => { - // Check if cookie applies to this domain - if (cookie.domain.startsWith(".")) { - // Domain cookie (applies to subdomains) - return ( - domain.endsWith(cookie.domain.slice(1)) || - domain === cookie.domain.slice(1) - ); - } else { - // Host-only cookie - return cookie.domain === domain; - } - }) - .filter((cookie) => { - // Check expiration - if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) { - return false; // Expired - } - return true; - }); + const validCookies = cookies + .filter((cookie) => { + // Check if cookie applies to this domain + if (cookie.domain.startsWith(".")) { + // Domain cookie (applies to subdomains) + return ( + domain.endsWith(cookie.domain.slice(1)) || + domain === cookie.domain.slice(1) + ); + } + // Host-only cookie + return cookie.domain === domain; + }) + .filter((cookie) => { + // Check expiration + if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) { + return false; // Expired + } + return true; + }); - return validCookies - .map((cookie) => `${cookie.name}=${cookie.value}`) - .join("; "); + return validCookies + .map((cookie) => `${cookie.name}=${cookie.value}`) + .join("; "); } class HttpError extends Error { - constructor( - message: string, - public readonly status: number, - public readonly url: string, - ) { - super(message); - this.name = "HttpError"; - } + constructor( + message: string, + public readonly status: number, + public readonly url: string, + ) { + super(message); + this.name = "HttpError"; + } } // ----------------------------- HTTP Client ----------------------------- @@ -387,91 +390,93 @@ class HttpError extends Error { - Supports custom cookies for Facebook authentication */ async function fetchHtml( - url: string, - DELAY_MS: number, - opts?: { - maxRetries?: number; - retryBaseMs?: number; - onRateInfo?: (remaining: string | null, reset: string | null) => void; - cookies?: string; - }, + url: string, + DELAY_MS: number, + opts?: { + maxRetries?: number; + retryBaseMs?: number; + onRateInfo?: (remaining: string | null, reset: string | null) => void; + cookies?: string; + }, ): Promise { - const maxRetries = opts?.maxRetries ?? 3; - const retryBaseMs = opts?.retryBaseMs ?? 500; + const maxRetries = opts?.maxRetries ?? 3; + const retryBaseMs = opts?.retryBaseMs ?? 500; - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - const headers: Record = { - accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", - "accept-encoding": "gzip, deflate, br", - "cache-control": "no-cache", - "upgrade-insecure-requests": "1", - "sec-fetch-dest": "document", - "sec-fetch-mode": "navigate", - "sec-fetch-site": "none", - "sec-fetch-user": "?1", - "user-agent": - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - }; + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const headers: Record = { + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", + "accept-encoding": "gzip, deflate, br", + "cache-control": "no-cache", + "upgrade-insecure-requests": "1", + "sec-fetch-dest": "document", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "sec-fetch-user": "?1", + "user-agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + }; - // Add cookies if provided - if (opts?.cookies) { - headers["cookie"] = opts.cookies; - } + // Add cookies if provided + if (opts?.cookies) { + headers.cookie = opts.cookies; + } - const res = await fetch(url, { - method: "GET", - headers, - }); + const res = await fetch(url, { + method: "GET", + headers, + }); - const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); - const rateLimitReset = res.headers.get("X-RateLimit-Reset"); - opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); + const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); + const rateLimitReset = res.headers.get("X-RateLimit-Reset"); + opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); - if (!res.ok) { - // Respect 429 reset if provided - if (res.status === 429) { - const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN; - const waitMs = Number.isFinite(resetSeconds) - ? Math.max(0, resetSeconds * 1000) - : (attempt + 1) * retryBaseMs; - await delay(waitMs); - continue; - } - // For Facebook, 400 often means authentication required - // Don't retry 4xx client errors except 429 - if (res.status >= 400 && res.status < 500 && res.status !== 429) { - throw new HttpError( - `Request failed with status ${res.status} (Facebook may require authentication cookies for access)`, - res.status, - url, - ); - } - // Retry on 5xx - if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { - await delay((attempt + 1) * retryBaseMs); - continue; - } - throw new HttpError( - `Request failed with status ${res.status}`, - res.status, - url, - ); - } + if (!res.ok) { + // Respect 429 reset if provided + if (res.status === 429) { + const resetSeconds = rateLimitReset + ? Number(rateLimitReset) + : Number.NaN; + const waitMs = Number.isFinite(resetSeconds) + ? Math.max(0, resetSeconds * 1000) + : (attempt + 1) * retryBaseMs; + await delay(waitMs); + continue; + } + // For Facebook, 400 often means authentication required + // Don't retry 4xx client errors except 429 + if (res.status >= 400 && res.status < 500 && res.status !== 429) { + throw new HttpError( + `Request failed with status ${res.status} (Facebook may require authentication cookies for access)`, + res.status, + url, + ); + } + // Retry on 5xx + if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { + await delay((attempt + 1) * retryBaseMs); + continue; + } + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + url, + ); + } - const html = await res.text(); - // Respect per-request delay to keep at or under REQUESTS_PER_SECOND - await delay(DELAY_MS); - return html; - } catch (err) { - if (attempt >= maxRetries) throw err; - await delay((attempt + 1) * retryBaseMs); - } - } + const html = await res.text(); + // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + await delay(DELAY_MS); + return html; + } catch (err) { + if (attempt >= maxRetries) throw err; + await delay((attempt + 1) * retryBaseMs); + } + } - throw new Error("Exhausted retries without response"); + throw new Error("Exhausted retries without response"); } // ----------------------------- Parsing ----------------------------- @@ -480,702 +485,780 @@ async function fetchHtml( Extract marketplace search data from Facebook page script tags */ function extractFacebookMarketplaceData( - htmlString: HTMLString, + htmlString: HTMLString, ): FacebookAdNode[] | null { - const { document } = parseHTML(htmlString); - const scripts = document.querySelectorAll("script"); + const { document } = parseHTML(htmlString); + const scripts = document.querySelectorAll("script"); - let marketplaceData: FacebookMarketplaceSearch | null = null; + let marketplaceData: FacebookMarketplaceSearch | null = null; - // Find the script containing the require data with marketplace_search - for (const script of Array.from(scripts) as HTMLScriptElement[]) { - const scriptText = script.textContent; - if (!scriptText) continue; + // Find the script containing the require data with marketplace_search + for (const script of Array.from(scripts) as HTMLScriptElement[]) { + const scriptText = script.textContent; + if (!scriptText) continue; - try { - const parsed = JSON.parse(scriptText); + try { + const parsed = JSON.parse(scriptText); - // First check if this is the direct data structure (like in examples) - if (parsed.require && Array.isArray(parsed.require)) { - // Try multiple navigation paths to find marketplace_search - const paths = [ - // Original path from example - () => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'], - // Alternative path structure - () => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search, - // Another variation - () => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'], - // Direct access for some responses - () => { - for (const item of parsed.require) { - if (item && item.length >= 4 && item[3]) { - const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search; - if (bbox) return bbox; - } - } - return null; - } - ]; + // First check if this is the direct data structure (like in examples) + if (parsed.require && Array.isArray(parsed.require)) { + // Try multiple navigation paths to find marketplace_search + const paths = [ + // Original path from example + () => + parsed.require[0][3][0].__bbox.require[0][3][1].__bbox.result.data + .marketplace_search, + // Alternative path structure + () => + parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search, + // Another variation + () => parsed.require[0][3][0].__bbox.result.data.marketplace_search, + // Direct access for some responses + () => { + for (const item of parsed.require) { + if (item && item.length >= 4 && item[3]) { + const bbox = item[3]?.__bbox?.result?.data?.marketplace_search; + if (bbox) return bbox; + } + } + return null; + }, + ]; - for (const getData of paths) { - try { - const result = getData(); - if (result && isRecord(result) && result.feed_units?.edges?.length > 0) { - marketplaceData = result as FacebookMarketplaceSearch; - break; - } - } catch { - continue; - } - } + for (const getData of paths) { + try { + const result = getData(); + if ( + result && + isRecord(result) && + result.feed_units?.edges?.length > 0 + ) { + marketplaceData = result as FacebookMarketplaceSearch; + break; + } + } catch {} + } - if (marketplaceData) break; - } + if (marketplaceData) break; + } - // Also check for direct marketplace_search in the parsed data - if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) { - const searchData = parsed.marketplace_search as FacebookMarketplaceSearch; - if (searchData.feed_units?.edges?.length > 0) { - marketplaceData = searchData; - break; - } - } - } catch { - // Ignore parsing errors for other scripts - } - } + // Also check for direct marketplace_search in the parsed data + if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) { + const searchData = + parsed.marketplace_search as FacebookMarketplaceSearch; + if (searchData.feed_units?.edges?.length > 0) { + marketplaceData = searchData; + break; + } + } + } catch { + // Ignore parsing errors for other scripts + } + } - if (!marketplaceData?.feed_units?.edges?.length) { - console.warn("No marketplace data found in HTML response"); - return null; - } + if (!marketplaceData?.feed_units?.edges?.length) { + console.warn("No marketplace data found in HTML response"); + return null; + } - console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`); - return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node })); + console.log( + `Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`, + ); + return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node })); } /** * Monitor API extraction success/failure for detecting changes */ -let extractionStats = { - totalExtractions: 0, - successfulExtractions: 0, - failedExtractions: 0, - lastApiChangeDetected: null as Date | null, +const extractionStats = { + totalExtractions: 0, + successfulExtractions: 0, + failedExtractions: 0, + lastApiChangeDetected: null as Date | null, }; /** * Log extraction metrics for monitoring API stability */ function logExtractionMetrics(success: boolean, itemId?: string) { - extractionStats.totalExtractions++; - if (success) { - extractionStats.successfulExtractions++; - } else { - extractionStats.failedExtractions++; - } + extractionStats.totalExtractions++; + if (success) { + extractionStats.successfulExtractions++; + } else { + extractionStats.failedExtractions++; + } - // Log warning if extraction success rate drops below 80% - const successRate = extractionStats.successfulExtractions / extractionStats.totalExtractions; - if (extractionStats.totalExtractions > 10 && successRate < 0.8 && !extractionStats.lastApiChangeDetected) { - console.warn("⚠️ Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes."); - extractionStats.lastApiChangeDetected = new Date(); - } + // Log warning if extraction success rate drops below 80% + const successRate = + extractionStats.successfulExtractions / extractionStats.totalExtractions; + if ( + extractionStats.totalExtractions > 10 && + successRate < 0.8 && + !extractionStats.lastApiChangeDetected + ) { + console.warn( + "! Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.", + ); + extractionStats.lastApiChangeDetected = new Date(); + } - if (success) { - console.log(`📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`); - } else { - console.warn(`❌ Facebook API extraction failed for item ${itemId || 'unknown'}`); - } + if (success) { + console.log( + `📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`, + ); + } else { + console.warn( + `❌ Facebook API extraction failed for item ${itemId || "unknown"}`, + ); + } } /** * Turns cents to localized currency string. */ function formatCentsToCurrency( - num: number | string | undefined, - locale = "en-US", + num: number | string | undefined, + locale = "en-US", ): string { - if (num == null) return ""; - const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; - if (Number.isNaN(cents)) return ""; - const dollars = cents / 100; - const formatter = new Intl.NumberFormat(locale, { - style: 'currency', - currency: 'USD', - minimumFractionDigits: 2, - maximumFractionDigits: 2, - useGrouping: true, - }); - return formatter.format(dollars); + if (num == null) return ""; + const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; + if (Number.isNaN(cents)) return ""; + const dollars = cents / 100; + const formatter = new Intl.NumberFormat(locale, { + style: "currency", + currency: "USD", + minimumFractionDigits: 2, + maximumFractionDigits: 2, + useGrouping: true, + }); + return formatter.format(dollars); } /** Extract marketplace item details from Facebook item page HTML Updated for 2026 Facebook Marketplace API structure with multiple extraction paths */ -function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceItem | null { - const { document } = parseHTML(htmlString); - const scripts = document.querySelectorAll("script"); +function extractFacebookItemData( + htmlString: HTMLString, +): FacebookMarketplaceItem | null { + const { document } = parseHTML(htmlString); + const scripts = document.querySelectorAll("script"); - for (const script of scripts) { - const scriptText = script.textContent; - if (!scriptText) continue; + for (const script of scripts) { + const scriptText = script.textContent; + if (!scriptText) continue; - try { - const parsed = JSON.parse(scriptText); + try { + const parsed = JSON.parse(scriptText); - // Check for the 2026 require structure with marketplace product details - if (parsed.require && Array.isArray(parsed.require)) { - // Try multiple extraction paths discovered from reverse engineering - const extractionPaths = [ - // Path 1: Primary path from current API structure - () => parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target, - // Path 2: Alternative path with nested require - () => parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target, - // Path 3: Variation without the [0] index - () => parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target, - // Path 4-5: Additional fallback paths for edge cases - () => parsed.require[0][3][1]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target, - () => parsed.require[0][3][2]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target, - ]; + // Check for the 2026 require structure with marketplace product details + if (parsed.require && Array.isArray(parsed.require)) { + // Try multiple extraction paths discovered from reverse engineering + const extractionPaths = [ + // Path 1: Primary path from current API structure + () => + parsed.require[0][3].__bbox.result.data.viewer + .marketplace_product_details_page.target, + // Path 2: Alternative path with nested require + () => + parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data + .viewer.marketplace_product_details_page.target, + // Path 3: Variation without the [0] index + () => + parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data + .viewer.marketplace_product_details_page.target, + // Path 4-5: Additional fallback paths for edge cases + () => + parsed.require[0][3][1]?.__bbox?.result?.data?.viewer + ?.marketplace_product_details_page?.target, + () => + parsed.require[0][3][2]?.__bbox?.result?.data?.viewer + ?.marketplace_product_details_page?.target, + ]; - let pathIndex = 0; - for (const getPath of extractionPaths) { - try { - const targetData = getPath(); - if (targetData && typeof targetData === 'object' && - targetData.id && targetData.marketplace_listing_title && - targetData.__typename === 'GroupCommerceProductItem') { - console.log(`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`); - return targetData as FacebookMarketplaceItem; - } - } catch { - // Path not found or invalid, try next path - } - pathIndex++; - } + let pathIndex = 0; + for (const getPath of extractionPaths) { + try { + const targetData = getPath(); + if ( + targetData && + typeof targetData === "object" && + targetData.id && + targetData.marketplace_listing_title && + targetData.__typename === "GroupCommerceProductItem" + ) { + console.log( + `Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`, + ); + return targetData as FacebookMarketplaceItem; + } + } catch { + // Path not found or invalid, try next path + } + pathIndex++; + } - // Fallback: Search recursively for marketplace data in the parsed structure - const findMarketplaceData = (obj: unknown, depth = 0, maxDepth = 10): FacebookMarketplaceItem | null => { - if (depth > maxDepth) return null; // Prevent infinite recursion - if (isRecord(obj)) { - // Check if this object matches the expected marketplace item structure - if (obj.marketplace_listing_title && obj.id && - obj.__typename === 'GroupCommerceProductItem' && - obj.redacted_description) { - return obj as FacebookMarketplaceItem; - } - // Recursively search nested objects and arrays - for (const key in obj) { - const value = obj[key]; - if (isRecord(value) || Array.isArray(value)) { - const result = findMarketplaceData(value, depth + 1, maxDepth); - if (result) return result; - } - } - } else if (Array.isArray(obj)) { - // Search through arrays - for (const item of obj) { - const result = findMarketplaceData(item, depth + 1, maxDepth); - if (result) return result; - } - } - return null; - }; + // Fallback: Search recursively for marketplace data in the parsed structure + const findMarketplaceData = ( + obj: unknown, + depth = 0, + maxDepth = 10, + ): FacebookMarketplaceItem | null => { + if (depth > maxDepth) return null; // Prevent infinite recursion + if (isRecord(obj)) { + // Check if this object matches the expected marketplace item structure + if ( + obj.marketplace_listing_title && + obj.id && + obj.__typename === "GroupCommerceProductItem" && + obj.redacted_description + ) { + return obj as FacebookMarketplaceItem; + } + // Recursively search nested objects and arrays + for (const key in obj) { + const value = obj[key]; + if (isRecord(value) || Array.isArray(value)) { + const result = findMarketplaceData(value, depth + 1, maxDepth); + if (result) return result; + } + } + } else if (Array.isArray(obj)) { + // Search through arrays + for (const item of obj) { + const result = findMarketplaceData(item, depth + 1, maxDepth); + if (result) return result; + } + } + return null; + }; - // Search through the entire require structure - const recursiveResult = findMarketplaceData(parsed.require); - if (recursiveResult) { - console.log('Successfully extracted Facebook item data using recursive search'); - return recursiveResult; - } + // Search through the entire require structure + const recursiveResult = findMarketplaceData(parsed.require); + if (recursiveResult) { + console.log( + "Successfully extracted Facebook item data using recursive search", + ); + return recursiveResult; + } - // Additional search in other potential locations - if (parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target) { - const bboxData = parsed.__bbox.result.data.viewer.marketplace_product_details_page.target; - if (bboxData && typeof bboxData === 'object' && - bboxData.id && bboxData.marketplace_listing_title && - bboxData.__typename === 'GroupCommerceProductItem') { - console.log('Successfully extracted Facebook item data from __bbox structure'); - return bboxData as FacebookMarketplaceItem; - } - } - } - } catch (error) { - // Log parsing errors for debugging but continue to next script - console.debug(`Failed to parse script for Facebook item data: ${error}`); - } - } + // Additional search in other potential locations + if ( + parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page + ?.target + ) { + const bboxData = + parsed.__bbox.result.data.viewer.marketplace_product_details_page + .target; + if ( + bboxData && + typeof bboxData === "object" && + bboxData.id && + bboxData.marketplace_listing_title && + bboxData.__typename === "GroupCommerceProductItem" + ) { + console.log( + "Successfully extracted Facebook item data from __bbox structure", + ); + return bboxData as FacebookMarketplaceItem; + } + } + } + } catch (error) { + // Log parsing errors for debugging but continue to next script + console.debug(`Failed to parse script for Facebook item data: ${error}`); + } + } - return null; + return null; } /** Parse Facebook marketplace search results into ListingDetails[] */ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] { - const results: ListingDetails[] = []; + const results: ListingDetails[] = []; - for (const adJson of ads) { - try { - const listing = adJson.node.listing; - const title = listing.marketplace_listing_title; - const priceObj = listing.listing_price; + for (const adJson of ads) { + try { + const listing = adJson.node.listing; + const title = listing.marketplace_listing_title; + const priceObj = listing.listing_price; - if (!title || !priceObj) continue; + if (!title || !priceObj) continue; - const id = listing.id; - const url = `https://www.facebook.com/marketplace/item/${id}`; + const id = listing.id; + const url = `https://www.facebook.com/marketplace/item/${id}`; - // Facebook stores price in different fields: - // - amount_with_offset_in_currency: Facebook's internal price encoding (not cents) - // - amount: dollars (like "1.00") - // - formatted_amount: human-readable price (like "CA$1") - let cents: number; - if (priceObj.amount != null) { - const dollars = typeof priceObj.amount === 'string' - ? Number.parseFloat(priceObj.amount) - : priceObj.amount; - cents = Math.round(dollars * 100); - } else if (priceObj.amount_with_offset_in_currency != null) { - // Fallback: try to extract cents from amount_with_offset_in_currency - // This appears to use some exchange rate/multiplier format - const encodedAmount = Number(priceObj.amount_with_offset_in_currency); - if (!Number.isNaN(encodedAmount) && encodedAmount > 0) { - // Estimate roughly - this field doesn't contain real cents - // Use formatted_amount to get the actual dollar amount - if (priceObj.formatted_amount) { - const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/); - if (match) { - const dollars = Number.parseFloat(match[0].replace(',', '')); - if (!Number.isNaN(dollars)) { - cents = Math.round(dollars * 100); - } else { - cents = encodedAmount; // fallback - } - } else { - cents = encodedAmount; // fallback - } - } else { - cents = encodedAmount; // fallback - } - } else { - continue; // Invalid price - } - } else { - continue; // No price available - } + // Facebook stores price in different fields: + // - amount_with_offset_in_currency: Facebook's internal price encoding (not cents) + // - amount: dollars (like "1.00") + // - formatted_amount: human-readable price (like "CA$1") + let cents: number; + if (priceObj.amount != null) { + const dollars = + typeof priceObj.amount === "string" + ? Number.parseFloat(priceObj.amount) + : priceObj.amount; + cents = Math.round(dollars * 100); + } else if (priceObj.amount_with_offset_in_currency != null) { + // Fallback: try to extract cents from amount_with_offset_in_currency + // This appears to use some exchange rate/multiplier format + const encodedAmount = Number(priceObj.amount_with_offset_in_currency); + if (!Number.isNaN(encodedAmount) && encodedAmount > 0) { + // Estimate roughly - this field doesn't contain real cents + // Use formatted_amount to get the actual dollar amount + if (priceObj.formatted_amount) { + const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/); + if (match) { + const dollars = Number.parseFloat(match[0].replace(",", "")); + if (!Number.isNaN(dollars)) { + cents = Math.round(dollars * 100); + } else { + cents = encodedAmount; // fallback + } + } else { + cents = encodedAmount; // fallback + } + } else { + cents = encodedAmount; // fallback + } + } else { + continue; // Invalid price + } + } else { + continue; // No price available + } - if (!Number.isFinite(cents) || cents <= 0) continue; + if (!Number.isFinite(cents) || cents <= 0) continue; - // Extract address from location data if available - const cityName = - listing.location?.reverse_geocode?.city_page?.display_name; - const address = cityName || null; + // Extract address from location data if available + const cityName = + listing.location?.reverse_geocode?.city_page?.display_name; + const address = cityName || null; - // Determine listing status from Facebook flags - let listingStatus: string | undefined = undefined; - if (listing.is_sold) { - listingStatus = "SOLD"; - } else if (listing.is_pending) { - listingStatus = "PENDING"; - } else if (listing.is_live) { - listingStatus = "ACTIVE"; - } else if (listing.is_hidden) { - listingStatus = "HIDDEN"; - } + // Determine listing status from Facebook flags + let listingStatus: string | undefined = undefined; + if (listing.is_sold) { + listingStatus = "SOLD"; + } else if (listing.is_pending) { + listingStatus = "PENDING"; + } else if (listing.is_live) { + listingStatus = "ACTIVE"; + } else if (listing.is_hidden) { + listingStatus = "HIDDEN"; + } - // Format creation date if available - const creationDate = listing.creation_time - ? new Date(listing.creation_time * 1000).toISOString() - : undefined; + // Format creation date if available + const creationDate = listing.creation_time + ? new Date(listing.creation_time * 1000).toISOString() + : undefined; - // Extract image and video URLs - const imageUrl = listing.primary_listing_photo?.image?.uri; - const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined; + // Extract image and video URLs + const imageUrl = listing.primary_listing_photo?.image?.uri; + const videoUrl = listing.listing_video + ? `https://www.facebook.com/${listing.listing_video.id}/` + : undefined; - // Extract seller information - const seller = listing.marketplace_listing_seller ? { - name: listing.marketplace_listing_seller.name, - id: listing.marketplace_listing_seller.id - } : undefined; + // Extract seller information + const seller = listing.marketplace_listing_seller + ? { + name: listing.marketplace_listing_seller.name, + id: listing.marketplace_listing_seller.id, + } + : undefined; - const listingDetails: ListingDetails = { - url, - title, - listingPrice: { - amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents), - cents, - currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD - }, - address, - creationDate, - listingType: "item", // Default type for marketplace listings - listingStatus, - categoryId: listing.marketplace_listing_category_id, - imageUrl, - videoUrl, - seller, - deliveryTypes: listing.delivery_types, - }; + const listingDetails: ListingDetails = { + url, + title, + listingPrice: { + amountFormatted: + priceObj.formatted_amount || formatCentsToCurrency(cents), + cents, + currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD + }, + address, + creationDate, + listingType: "item", // Default type for marketplace listings + listingStatus, + categoryId: listing.marketplace_listing_category_id, + imageUrl, + videoUrl, + seller, + deliveryTypes: listing.delivery_types, + }; - results.push(listingDetails); - } catch { - // Skip malformed ads - continue; - } - } + results.push(listingDetails); + } catch {} + } - return results; + return results; } /** Parse Facebook marketplace item details into ListingDetails format Updated for 2026 GroupCommerceProductItem structure */ -function parseFacebookItem(item: FacebookMarketplaceItem): ListingDetails | null { - try { - const title = item.marketplace_listing_title || item.custom_title; - if (!title) return null; +function parseFacebookItem( + item: FacebookMarketplaceItem, +): ListingDetails | null { + try { + const title = item.marketplace_listing_title || item.custom_title; + if (!title) return null; - const url = `https://www.facebook.com/marketplace/item/${item.id}`; + const url = `https://www.facebook.com/marketplace/item/${item.id}`; - // Extract price information - let cents = 0; - let currency = "CAD"; // Default - let amountFormatted = item.formatted_price?.text || "FREE"; + // Extract price information + let cents = 0; + let currency = "CAD"; // Default + let amountFormatted = item.formatted_price?.text || "FREE"; - if (item.listing_price) { - currency = item.listing_price.currency || "CAD"; - if (item.listing_price.amount && item.listing_price.amount !== "0.00") { - const amount = parseFloat(item.listing_price.amount); - if (!isNaN(amount)) { - cents = Math.round(amount * 100); - amountFormatted = item.formatted_price?.text || formatCentsToCurrency(cents); - } - } - } + if (item.listing_price) { + currency = item.listing_price.currency || "CAD"; + if (item.listing_price.amount && item.listing_price.amount !== "0.00") { + const amount = Number.parseFloat(item.listing_price.amount); + if (!Number.isNaN(amount)) { + cents = Math.round(amount * 100); + amountFormatted = + item.formatted_price?.text || formatCentsToCurrency(cents); + } + } + } - // Extract description - const description = item.redacted_description?.text; + // Extract description + const description = item.redacted_description?.text; - // Extract location - const address = item.location_text?.text || null; + // Extract location + const address = item.location_text?.text || null; - // Extract seller information - const seller = item.marketplace_listing_seller ? { - name: item.marketplace_listing_seller.name, - id: item.marketplace_listing_seller.id - } : undefined; + // Extract seller information + const seller = item.marketplace_listing_seller + ? { + name: item.marketplace_listing_seller.name, + id: item.marketplace_listing_seller.id, + } + : undefined; - // Determine listing status - let listingStatus: string | undefined; - if (item.is_sold) { - listingStatus = "SOLD"; - } else if (item.is_pending) { - listingStatus = "PENDING"; - } else if (item.is_live) { - listingStatus = "ACTIVE"; - } else if (item.is_hidden) { - listingStatus = "HIDDEN"; - } + // Determine listing status + let listingStatus: string | undefined; + if (item.is_sold) { + listingStatus = "SOLD"; + } else if (item.is_pending) { + listingStatus = "PENDING"; + } else if (item.is_live) { + listingStatus = "ACTIVE"; + } else if (item.is_hidden) { + listingStatus = "HIDDEN"; + } - // Format creation date - const creationDate = item.creation_time - ? new Date(item.creation_time * 1000).toISOString() - : undefined; + // Format creation date + const creationDate = item.creation_time + ? new Date(item.creation_time * 1000).toISOString() + : undefined; - // Determine listing type based on category or vehicle data - let listingType = "item"; - if (item.vehicle_make_display_name || item.vehicle_odometer_data) { - listingType = "vehicle"; - } else if (item.marketplace_listing_category_id) { - // Could map category IDs to types, but keeping simple for now - listingType = "item"; - } + // Determine listing type based on category or vehicle data + let listingType = "item"; + if (item.vehicle_make_display_name || item.vehicle_odometer_data) { + listingType = "vehicle"; + } else if (item.marketplace_listing_category_id) { + // Could map category IDs to types, but keeping simple for now + listingType = "item"; + } - const listingDetails: ListingDetails = { - url, - title, - description, - listingPrice: { - amountFormatted, - cents, - currency, - }, - address, - creationDate, - listingType, - listingStatus, - categoryId: item.marketplace_listing_category_id, - seller, - deliveryTypes: item.delivery_types, - }; + const listingDetails: ListingDetails = { + url, + title, + description, + listingPrice: { + amountFormatted, + cents, + currency, + }, + address, + creationDate, + listingType, + listingStatus, + categoryId: item.marketplace_listing_category_id, + seller, + deliveryTypes: item.delivery_types, + }; - return listingDetails; - } catch (error) { - console.warn(`Failed to parse Facebook item ${item.id}:`, error); - return null; - } + return listingDetails; + } catch (error) { + console.warn(`Failed to parse Facebook item ${item.id}:`, error); + return null; + } } // ----------------------------- Exports for Testing ----------------------------- // Export internal functions for comprehensive testing export { - extractFacebookItemData, - extractFacebookMarketplaceData, - parseFacebookItem, - parseFacebookAds, - formatCentsToCurrency, - loadFacebookCookies, - formatCookiesForHeader, - parseFacebookCookieString, - ensureFacebookCookies, + extractFacebookItemData, + extractFacebookMarketplaceData, + parseFacebookItem, + parseFacebookAds, + formatCentsToCurrency, + loadFacebookCookies, + formatCookiesForHeader, + parseFacebookCookieString, + ensureFacebookCookies, }; // ----------------------------- Main ----------------------------- export default async function fetchFacebookItems( - SEARCH_QUERY: string, - REQUESTS_PER_SECOND = 1, - LOCATION = "toronto", - MAX_ITEMS = 25, - cookiesSource?: string, - cookiePath?: string, + SEARCH_QUERY: string, + REQUESTS_PER_SECOND = 1, + LOCATION = "toronto", + MAX_ITEMS = 25, + cookiesSource?: string, + cookiePath?: string, ) { - // Load Facebook cookies - required for Facebook Marketplace access - let cookies: Cookie[]; - if (cookiesSource) { - // Use provided cookie source (backward compatibility) - cookies = await loadFacebookCookies(cookiesSource); - } else { - // Auto-load from file or parse from env var - cookies = await ensureFacebookCookies(cookiePath); - } + // Load Facebook cookies - required for Facebook Marketplace access + let cookies: Cookie[]; + if (cookiesSource) { + // Use provided cookie source (backward compatibility) + cookies = await loadFacebookCookies(cookiesSource); + } else { + // Auto-load from file or parse from env var + cookies = await ensureFacebookCookies(cookiePath); + } - if (cookies.length === 0) { - throw new Error( - "Facebook cookies are required for marketplace access. " + - "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", - ); - } + if (cookies.length === 0) { + throw new Error( + "Facebook cookies are required for marketplace access. " + + "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", + ); + } - // Format cookies for HTTP header - const domain = "www.facebook.com"; - const cookiesHeader = formatCookiesForHeader(cookies, domain); - if (!cookiesHeader) { - throw new Error( - "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", - ); - } + // Format cookies for HTTP header + const domain = "www.facebook.com"; + const cookiesHeader = formatCookiesForHeader(cookies, domain); + if (!cookiesHeader) { + throw new Error( + "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", + ); + } - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); - // Encode search query for URL - const encodedQuery = encodeURIComponent(SEARCH_QUERY); + // Encode search query for URL + const encodedQuery = encodeURIComponent(SEARCH_QUERY); - // Facebook marketplace URL structure - const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`; + // Facebook marketplace URL structure + const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`; - console.log(`Fetching Facebook marketplace: ${searchUrl}`); - console.log(`Using ${cookies.length} cookies for authentication`); + console.log(`Fetching Facebook marketplace: ${searchUrl}`); + console.log(`Using ${cookies.length} cookies for authentication`); - let searchHtml: string; - try { - searchHtml = await fetchHtml(searchUrl, DELAY_MS, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log( - "\n" + - `Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, - ); - } - }, - cookies: cookiesHeader, - }); - } catch (err) { - if (err instanceof HttpError) { - console.warn( - `\nFacebook marketplace access failed (${err.status}): ${err.message}`, - ); - if (err.status === 400 || err.status === 401 || err.status === 403) { - console.warn( - "This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.", - ); - } - return []; - } - throw err; - } + let searchHtml: string; + try { + searchHtml = await fetchHtml(searchUrl, DELAY_MS, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + cookies: cookiesHeader, + }); + } catch (err) { + if (err instanceof HttpError) { + console.warn( + `\nFacebook marketplace access failed (${err.status}): ${err.message}`, + ); + if (err.status === 400 || err.status === 401 || err.status === 403) { + console.warn( + "This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.", + ); + } + return []; + } + throw err; + } - const ads = extractFacebookMarketplaceData(searchHtml); - if (!ads || ads.length === 0) { - console.warn("No ads parsed from Facebook marketplace page."); - return []; - } + const ads = extractFacebookMarketplaceData(searchHtml); + if (!ads || ads.length === 0) { + console.warn("No ads parsed from Facebook marketplace page."); + return []; + } - console.log(`\nFound ${ads.length} raw ads. Processing...`); + console.log(`\nFound ${ads.length} raw ads. Processing...`); - const progressBar = new cliProgress.SingleBar( - {}, - cliProgress.Presets.shades_classic, - ); - const totalProgress = ads.length; - let currentProgress = 0; - progressBar.start(totalProgress, currentProgress); + const progressBar = new cliProgress.SingleBar( + {}, + cliProgress.Presets.shades_classic, + ); + const totalProgress = ads.length; + const currentProgress = 0; + progressBar.start(totalProgress, currentProgress); - const items = parseFacebookAds(ads); + const items = parseFacebookAds(ads); - // Filter to only priced items (already done in parseFacebookAds) - const pricedItems = items.filter( - (item) => item.listingPrice?.cents && item.listingPrice.cents > 0, - ); + // Filter to only priced items (already done in parseFacebookAds) + const pricedItems = items.filter( + (item) => item.listingPrice?.cents && item.listingPrice.cents > 0, + ); - progressBar.update(totalProgress); - progressBar.stop(); + progressBar.update(totalProgress); + progressBar.stop(); - console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`); - return pricedItems.slice(0, MAX_ITEMS); // Limit results + console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`); + return pricedItems.slice(0, MAX_ITEMS); // Limit results } /** * Fetch individual Facebook marketplace item details with enhanced error handling */ export async function fetchFacebookItem( - itemId: string, - cookiesSource?: string, - cookiePath?: string, + itemId: string, + cookiesSource?: string, + cookiePath?: string, ): Promise { - // Load Facebook cookies - required for Facebook Marketplace access - let cookies: Cookie[]; - if (cookiesSource) { - // Use provided cookie source (backward compatibility) - cookies = await loadFacebookCookies(cookiesSource); - } else { - // Auto-load from file or parse from env var - cookies = await ensureFacebookCookies(cookiePath); - } + // Load Facebook cookies - required for Facebook Marketplace access + let cookies: Cookie[]; + if (cookiesSource) { + // Use provided cookie source (backward compatibility) + cookies = await loadFacebookCookies(cookiesSource); + } else { + // Auto-load from file or parse from env var + cookies = await ensureFacebookCookies(cookiePath); + } - if (cookies.length === 0) { - throw new Error( - "Facebook cookies are required for marketplace access. " + - "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", - ); - } + if (cookies.length === 0) { + throw new Error( + "Facebook cookies are required for marketplace access. " + + "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", + ); + } - // Format cookies for HTTP header - const domain = "www.facebook.com"; - const cookiesHeader = formatCookiesForHeader(cookies, domain); - if (!cookiesHeader) { - throw new Error( - "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", - ); - } + // Format cookies for HTTP header + const domain = "www.facebook.com"; + const cookiesHeader = formatCookiesForHeader(cookies, domain); + if (!cookiesHeader) { + throw new Error( + "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", + ); + } - const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`; + const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`; - console.log(`Fetching Facebook marketplace item: ${itemUrl}`); + console.log(`Fetching Facebook marketplace item: ${itemUrl}`); - let itemHtml: string; - try { - itemHtml = await fetchHtml(itemUrl, 1000, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log( - "\n" + - `Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, - ); - } - }, - cookies: cookiesHeader, - }); - } catch (err) { - if (err instanceof HttpError) { - console.warn( - `\nFacebook marketplace item access failed (${err.status}): ${err.message}`, - ); + let itemHtml: string; + try { + itemHtml = await fetchHtml(itemUrl, 1000, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + cookies: cookiesHeader, + }); + } catch (err) { + if (err instanceof HttpError) { + console.warn( + `\nFacebook marketplace item access failed (${err.status}): ${err.message}`, + ); - // Enhanced error handling based on status codes - switch (err.status) { - case 400: - case 401: - case 403: - console.warn( - "Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.", - ); - console.warn("Try logging out and back into Facebook, then export fresh cookies."); - break; - case 404: - console.warn( - "Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.", - ); - break; - case 429: - console.warn( - "Rate limited: Too many requests. Facebook is blocking access temporarily.", - ); - break; - case 500: - case 502: - case 503: - console.warn( - "Facebook server error: Marketplace may be temporarily unavailable.", - ); - break; - default: - console.warn(`Unexpected error status: ${err.status}`); - } - return null; - } - throw err; - } + // Enhanced error handling based on status codes + switch (err.status) { + case 400: + case 401: + case 403: + console.warn( + "Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.", + ); + console.warn( + "Try logging out and back into Facebook, then export fresh cookies.", + ); + break; + case 404: + console.warn( + "Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.", + ); + break; + case 429: + console.warn( + "Rate limited: Too many requests. Facebook is blocking access temporarily.", + ); + break; + case 500: + case 502: + case 503: + console.warn( + "Facebook server error: Marketplace may be temporarily unavailable.", + ); + break; + default: + console.warn(`Unexpected error status: ${err.status}`); + } + return null; + } + throw err; + } - const itemData = extractFacebookItemData(itemHtml); - if (!itemData) { - logExtractionMetrics(false, itemId); - // Enhanced checking for specific failure scenarios - if (itemHtml.includes("This listing is no longer available") || - itemHtml.includes("listing has been removed") || - itemHtml.includes("This item has been sold")) { - console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`); - return null; - } + const itemData = extractFacebookItemData(itemHtml); + if (!itemData) { + logExtractionMetrics(false, itemId); + // Enhanced checking for specific failure scenarios + if ( + itemHtml.includes("This listing is no longer available") || + itemHtml.includes("listing has been removed") || + itemHtml.includes("This item has been sold") + ) { + console.warn( + `Item ${itemId} appears to be sold or removed from marketplace.`, + ); + return null; + } - if (itemHtml.includes("log in to Facebook") || - itemHtml.includes("You must log in") || - itemHtml.includes("authentication required")) { - console.warn(`Authentication failed for item ${itemId}. Cookies may be expired.`); - return null; - } + if ( + itemHtml.includes("log in to Facebook") || + itemHtml.includes("You must log in") || + itemHtml.includes("authentication required") + ) { + console.warn( + `Authentication failed for item ${itemId}. Cookies may be expired.`, + ); + return null; + } - console.warn(`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`); - console.warn(" - The listing was removed or sold"); - console.warn(" - Authentication issues"); - console.warn(" - Facebook changed their API structure"); - console.warn(" - Network or parsing issues"); - return null; - } + console.warn( + `No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`, + ); + console.warn(" - The listing was removed or sold"); + console.warn(" - Authentication issues"); + console.warn(" - Facebook changed their API structure"); + console.warn(" - Network or parsing issues"); + return null; + } - logExtractionMetrics(true, itemId); - console.log(`Successfully extracted data for item ${itemId}`); + logExtractionMetrics(true, itemId); + console.log(`Successfully extracted data for item ${itemId}`); - const parsedItem = parseFacebookItem(itemData); - if (!parsedItem) { - console.warn(`Failed to parse item ${itemId}: Invalid data structure`); - return null; - } + const parsedItem = parseFacebookItem(itemData); + if (!parsedItem) { + console.warn(`Failed to parse item ${itemId}: Invalid data structure`); + return null; + } - // Check for sold/removed status in the parsed data with proper precedence - if (itemData.is_sold) { - console.warn(`Item ${itemId} is marked as sold in the marketplace.`); - // Still return the data but mark it as sold - parsedItem.listingStatus = "SOLD"; - } else if (!itemData.is_live) { - console.warn(`Item ${itemId} is not live/active in the marketplace.`); - parsedItem.listingStatus = itemData.is_hidden ? "HIDDEN" : - itemData.is_pending ? "PENDING" : "INACTIVE"; - } + // Check for sold/removed status in the parsed data with proper precedence + if (itemData.is_sold) { + console.warn(`Item ${itemId} is marked as sold in the marketplace.`); + // Still return the data but mark it as sold + parsedItem.listingStatus = "SOLD"; + } else if (!itemData.is_live) { + console.warn(`Item ${itemId} is not live/active in the marketplace.`); + parsedItem.listingStatus = itemData.is_hidden + ? "HIDDEN" + : itemData.is_pending + ? "PENDING" + : "INACTIVE"; + } - return parsedItem; + return parsedItem; } diff --git a/src/index.ts b/src/index.ts index 10dd0dc..5d8680e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,187 +1,215 @@ -import fetchKijijiItems from "@/kijiji"; -import fetchFacebookItems from "@/facebook"; import fetchEbayItems from "@/ebay"; +import fetchFacebookItems from "@/facebook"; +import fetchKijijiItems from "@/kijiji"; const PORT = process.env.PORT || 4005; const server = Bun.serve({ - port: PORT, - idleTimeout: 0, - routes: { - // Static routes - "/api/status": new Response("OK"), + port: PORT, + idleTimeout: 0, + routes: { + // Static routes + "/api/status": new Response("OK"), - // Dynamic routes - "/api/kijiji": async (req: Request) => { - const reqUrl = new URL(req.url); + // Dynamic routes + "/api/kijiji": async (req: Request) => { + const reqUrl = new URL(req.url); - const SEARCH_QUERY = - req.headers.get("query") || reqUrl.searchParams.get("q") || null; - if (!SEARCH_QUERY) - return Response.json( - { - message: - "Request didn't have 'query' header or 'q' search parameter!", - }, - { status: 400 }, - ); + const SEARCH_QUERY = + req.headers.get("query") || reqUrl.searchParams.get("q") || null; + if (!SEARCH_QUERY) + return Response.json( + { + message: + "Request didn't have 'query' header or 'q' search parameter!", + }, + { status: 400 }, + ); - // Parse optional parameters with enhanced defaults - const location = reqUrl.searchParams.get("location"); - const category = reqUrl.searchParams.get("category"); - const maxPagesParam = reqUrl.searchParams.get("maxPages"); - const maxPages = maxPagesParam - ? Number.parseInt(maxPagesParam, 10) - : 5; // Default: 5 pages - const sortBy = reqUrl.searchParams.get("sortBy") as 'relevancy' | 'date' | 'price' | 'distance' | undefined; - const sortOrder = reqUrl.searchParams.get("sortOrder") as 'asc' | 'desc' | undefined; + // Parse optional parameters with enhanced defaults + const location = reqUrl.searchParams.get("location"); + const category = reqUrl.searchParams.get("category"); + const maxPagesParam = reqUrl.searchParams.get("maxPages"); + const maxPages = maxPagesParam ? Number.parseInt(maxPagesParam, 10) : 5; // Default: 5 pages + const sortBy = reqUrl.searchParams.get("sortBy") as + | "relevancy" + | "date" + | "price" + | "distance" + | undefined; + const sortOrder = reqUrl.searchParams.get("sortOrder") as + | "asc" + | "desc" + | undefined; - // Build search options - const locationValue = location ? (/^\d+$/.test(location) ? Number(location) : location) : 1700272; - const categoryValue = category ? (/^\d+$/.test(category) ? Number(category) : category) : 0; + // Build search options + const locationValue = location + ? /^\d+$/.test(location) + ? Number(location) + : location + : 1700272; + const categoryValue = category + ? /^\d+$/.test(category) + ? Number(category) + : category + : 0; - const searchOptions: import("@/kijiji").SearchOptions = { - location: locationValue, - category: categoryValue, - keywords: SEARCH_QUERY, - sortBy: sortBy || 'relevancy', - sortOrder: sortOrder || 'desc', - maxPages, - }; + const searchOptions: import("@/kijiji").SearchOptions = { + location: locationValue, + category: categoryValue, + keywords: SEARCH_QUERY, + sortBy: sortBy || "relevancy", + sortOrder: sortOrder || "desc", + maxPages, + }; - // Build listing fetch options with enhanced defaults - const listingOptions: import("@/kijiji").ListingFetchOptions = { - includeImages: true, // Always include full image arrays - sellerDataDepth: 'detailed', // Default: detailed seller info - includeClientSideData: false, // GraphQL reviews disabled by default - }; + // Build listing fetch options with enhanced defaults + const listingOptions: import("@/kijiji").ListingFetchOptions = { + includeImages: true, // Always include full image arrays + sellerDataDepth: "detailed", // Default: detailed seller info + includeClientSideData: false, // GraphQL reviews disabled by default + }; - try { - const items = await fetchKijijiItems(SEARCH_QUERY, 1, undefined, searchOptions, listingOptions); - if (!items || items.length === 0) - return Response.json( - { message: "Search didn't return any results!" }, - { status: 404 }, - ); - return Response.json(items, { status: 200 }); - } catch (error) { - console.error("Kijiji scraping error:", error); - const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; - return Response.json( - { - message: `Scraping failed: ${errorMessage}`, - query: SEARCH_QUERY, - options: { searchOptions, listingOptions } - }, - { status: 500 }, - ); - } - }, + try { + const items = await fetchKijijiItems( + SEARCH_QUERY, + 1, + undefined, + searchOptions, + listingOptions, + ); + if (!items || items.length === 0) + return Response.json( + { message: "Search didn't return any results!" }, + { status: 404 }, + ); + return Response.json(items, { status: 200 }); + } catch (error) { + console.error("Kijiji scraping error:", error); + const errorMessage = + error instanceof Error ? error.message : "Unknown error occurred"; + return Response.json( + { + message: `Scraping failed: ${errorMessage}`, + query: SEARCH_QUERY, + options: { searchOptions, listingOptions }, + }, + { status: 500 }, + ); + } + }, - "/api/facebook": async (req: Request) => { - const reqUrl = new URL(req.url); + "/api/facebook": async (req: Request) => { + const reqUrl = new URL(req.url); - const SEARCH_QUERY = - req.headers.get("query") || reqUrl.searchParams.get("q") || null; - if (!SEARCH_QUERY) - return Response.json( - { - message: - "Request didn't have 'query' header or 'q' search parameter!", - }, - { status: 400 }, - ); + const SEARCH_QUERY = + req.headers.get("query") || reqUrl.searchParams.get("q") || null; + if (!SEARCH_QUERY) + return Response.json( + { + message: + "Request didn't have 'query' header or 'q' search parameter!", + }, + { status: 400 }, + ); - const LOCATION = reqUrl.searchParams.get("location") || "toronto"; - const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined; + const LOCATION = reqUrl.searchParams.get("location") || "toronto"; + const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined; - try { - const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE, "./cookies/facebook.json"); - if (!items || items.length === 0) - return Response.json( - { message: "Search didn't return any results!" }, - { status: 404 }, - ); - return Response.json(items, { status: 200 }); - } catch (error) { - console.error("Facebook scraping error:", error); - const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; - return Response.json( - { message: errorMessage }, - { status: 400 }, - ); - } - }, + try { + const items = await fetchFacebookItems( + SEARCH_QUERY, + 5, + LOCATION, + 25, + COOKIES_SOURCE, + "./cookies/facebook.json", + ); + if (!items || items.length === 0) + return Response.json( + { message: "Search didn't return any results!" }, + { status: 404 }, + ); + return Response.json(items, { status: 200 }); + } catch (error) { + console.error("Facebook scraping error:", error); + const errorMessage = + error instanceof Error ? error.message : "Unknown error occurred"; + return Response.json({ message: errorMessage }, { status: 400 }); + } + }, - "/api/ebay": async (req: Request) => { - const reqUrl = new URL(req.url); + "/api/ebay": async (req: Request) => { + const reqUrl = new URL(req.url); - const SEARCH_QUERY = - req.headers.get("query") || reqUrl.searchParams.get("q") || null; - if (!SEARCH_QUERY) - return Response.json( - { - message: - "Request didn't have 'query' header or 'q' search parameter!", - }, - { status: 400 }, - ); + const SEARCH_QUERY = + req.headers.get("query") || reqUrl.searchParams.get("q") || null; + if (!SEARCH_QUERY) + return Response.json( + { + message: + "Request didn't have 'query' header or 'q' search parameter!", + }, + { status: 400 }, + ); - // Parse optional parameters with defaults - const minPriceParam = reqUrl.searchParams.get("minPrice"); - const minPrice = minPriceParam - ? Number.parseInt(minPriceParam, 10) - : undefined; - const maxPriceParam = reqUrl.searchParams.get("maxPrice"); - const maxPrice = maxPriceParam - ? Number.parseInt(maxPriceParam, 10) - : undefined; - const strictMode = reqUrl.searchParams.get("strictMode") === "true"; - const exclusionsParam = reqUrl.searchParams.get("exclusions"); - const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : []; - const keywordsParam = reqUrl.searchParams.get("keywords"); - const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY]; + // Parse optional parameters with defaults + const minPriceParam = reqUrl.searchParams.get("minPrice"); + const minPrice = minPriceParam + ? Number.parseInt(minPriceParam, 10) + : undefined; + const maxPriceParam = reqUrl.searchParams.get("maxPrice"); + const maxPrice = maxPriceParam + ? Number.parseInt(maxPriceParam, 10) + : undefined; + const strictMode = reqUrl.searchParams.get("strictMode") === "true"; + const exclusionsParam = reqUrl.searchParams.get("exclusions"); + const exclusions = exclusionsParam + ? exclusionsParam.split(",").map((s) => s.trim()) + : []; + const keywordsParam = reqUrl.searchParams.get("keywords"); + const keywords = keywordsParam + ? keywordsParam.split(",").map((s) => s.trim()) + : [SEARCH_QUERY]; - try { - const items = await fetchEbayItems(SEARCH_QUERY, 5, { - minPrice, - maxPrice, - strictMode, - exclusions, - keywords, - }); - if (!items || items.length === 0) - return Response.json( - { message: "Search didn't return any results!" }, - { status: 404 }, - ); - return Response.json(items, { status: 200 }); - } catch (error) { - console.error("eBay scraping error:", error); - const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; - return Response.json( - { message: errorMessage }, - { status: 400 }, - ); - } - }, + try { + const items = await fetchEbayItems(SEARCH_QUERY, 5, { + minPrice, + maxPrice, + strictMode, + exclusions, + keywords, + }); + if (!items || items.length === 0) + return Response.json( + { message: "Search didn't return any results!" }, + { status: 404 }, + ); + return Response.json(items, { status: 200 }); + } catch (error) { + console.error("eBay scraping error:", error); + const errorMessage = + error instanceof Error ? error.message : "Unknown error occurred"; + return Response.json({ message: errorMessage }, { status: 400 }); + } + }, - // Wildcard route for all routes that start with "/api/" and aren't otherwise matched - "/api/*": Response.json({ message: "Not found" }, { status: 404 }), + // Wildcard route for all routes that start with "/api/" and aren't otherwise matched + "/api/*": Response.json({ message: "Not found" }, { status: 404 }), - // // Serve a file by buffering it in memory - // "/favicon.ico": new Response(await Bun.file("./favicon.ico").bytes(), { - // headers: { - // "Content-Type": "image/x-icon", - // }, - // }), - }, + // // Serve a file by buffering it in memory + // "/favicon.ico": new Response(await Bun.file("./favicon.ico").bytes(), { + // headers: { + // "Content-Type": "image/x-icon", + // }, + // }), + }, - // (optional) fallback for unmatched routes: - // Required if Bun's version < 1.2.3 - fetch(req: Request) { - return new Response("Not Found", { status: 404 }); - }, + // (optional) fallback for unmatched routes: + // Required if Bun's version < 1.2.3 + fetch(req: Request) { + return new Response("Not Found", { status: 404 }); + }, }); console.log(`Serving on ${server.hostname}:${server.port}`); diff --git a/src/kijiji.ts b/src/kijiji.ts index 4379bcc..caf1340 100644 --- a/src/kijiji.ts +++ b/src/kijiji.ts @@ -1,7 +1,7 @@ +import cliProgress from "cli-progress"; /* eslint-disable @typescript-eslint/no-explicit-any */ import { parseHTML } from "linkedom"; import unidecode from "unidecode"; -import cliProgress from "cli-progress"; // const unidecode = require("unidecode"); @@ -10,171 +10,171 @@ import cliProgress from "cli-progress"; type HTMLString = string; type SearchListing = { - name: string; - listingLink: string; + name: string; + listingLink: string; }; type ApolloRecord = Record; interface ApolloSearchItem { - url?: string; - title?: string; - [k: string]: unknown; + url?: string; + title?: string; + [k: string]: unknown; } interface ApolloListingRoot { - url?: string; - title?: string; - description?: string; - price?: { amount?: number | string; currency?: string; type?: string }; - type?: string; - status?: string; - activationDate?: string; - endDate?: string; - metrics?: { views?: number | string }; - location?: { - address?: string | null; - id?: number; - name?: string; - coordinates?: { latitude: number; longitude: number }; - }; - imageUrls?: string[]; - imageCount?: number; - categoryId?: number; - adSource?: string; - flags?: { topAd?: boolean; priceDrop?: boolean }; - posterInfo?: { posterId?: string; rating?: number }; - attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>; - [k: string]: unknown; + url?: string; + title?: string; + description?: string; + price?: { amount?: number | string; currency?: string; type?: string }; + type?: string; + status?: string; + activationDate?: string; + endDate?: string; + metrics?: { views?: number | string }; + location?: { + address?: string | null; + id?: number; + name?: string; + coordinates?: { latitude: number; longitude: number }; + }; + imageUrls?: string[]; + imageCount?: number; + categoryId?: number; + adSource?: string; + flags?: { topAd?: boolean; priceDrop?: boolean }; + posterInfo?: { posterId?: string; rating?: number }; + attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>; + [k: string]: unknown; } // Keep existing interface for backward compatibility type ListingDetails = { - url: string; - title: string; - description?: string; - listingPrice?: { - amountFormatted: string; - cents?: number; - currency?: string; - }; - listingType?: string; - listingStatus?: string; - creationDate?: string; - endDate?: string; - numberOfViews?: number; - address?: string | null; + url: string; + title: string; + description?: string; + listingPrice?: { + amountFormatted: string; + cents?: number; + currency?: string; + }; + listingType?: string; + listingStatus?: string; + creationDate?: string; + endDate?: string; + numberOfViews?: number; + address?: string | null; }; // New comprehensive interface for detailed listings interface DetailedListing extends ListingDetails { - images: string[]; - categoryId: number; - adSource: string; - flags: { - topAd: boolean; - priceDrop: boolean; - }; - attributes: Record; - location: { - id: number; - name: string; - coordinates?: { - latitude: number; - longitude: number; - }; - }; - sellerInfo?: { - posterId: string; - rating?: number; - accountType?: string; - memberSince?: string; - reviewCount?: number; - reviewScore?: number; - }; + images: string[]; + categoryId: number; + adSource: string; + flags: { + topAd: boolean; + priceDrop: boolean; + }; + attributes: Record; + location: { + id: number; + name: string; + coordinates?: { + latitude: number; + longitude: number; + }; + }; + sellerInfo?: { + posterId: string; + rating?: number; + accountType?: string; + memberSince?: string; + reviewCount?: number; + reviewScore?: number; + }; } // Configuration interfaces interface SearchOptions { - location?: number | string; // Location ID or name - category?: number | string; // Category ID or name - keywords?: string; - sortBy?: 'relevancy' | 'date' | 'price' | 'distance'; - sortOrder?: 'desc' | 'asc'; - maxPages?: number; // Default: 5 - priceMin?: number; - priceMax?: number; + location?: number | string; // Location ID or name + category?: number | string; // Category ID or name + keywords?: string; + sortBy?: "relevancy" | "date" | "price" | "distance"; + sortOrder?: "desc" | "asc"; + maxPages?: number; // Default: 5 + priceMin?: number; + priceMax?: number; } interface ListingFetchOptions { - includeImages?: boolean; // Default: true - sellerDataDepth?: 'basic' | 'detailed' | 'full'; // Default: 'detailed' - includeClientSideData?: boolean; // Default: false + includeImages?: boolean; // Default: true + sellerDataDepth?: "basic" | "detailed" | "full"; // Default: 'detailed' + includeClientSideData?: boolean; // Default: false } // ----------------------------- Constants & Mappings ----------------------------- // Location mappings from KIJIJI.md const LOCATION_MAPPINGS: Record = { - 'canada': 0, - 'ontario': 9004, - 'toronto': 1700273, - 'gta': 1700272, - 'oshawa': 1700275, - 'quebec': 9001, - 'nova scotia': 9002, - 'alberta': 9003, - 'new brunswick': 9005, - 'manitoba': 9006, - 'british columbia': 9007, - 'newfoundland': 9008, - 'saskatchewan': 9009, - 'territories': 9010, - 'pei': 9011, - 'prince edward island': 9011, + canada: 0, + ontario: 9004, + toronto: 1700273, + gta: 1700272, + oshawa: 1700275, + quebec: 9001, + "nova scotia": 9002, + alberta: 9003, + "new brunswick": 9005, + manitoba: 9006, + "british columbia": 9007, + newfoundland: 9008, + saskatchewan: 9009, + territories: 9010, + pei: 9011, + "prince edward island": 9011, }; // Category mappings from KIJIJI.md (Buy & Sell main categories) const CATEGORY_MAPPINGS: Record = { - 'all': 0, - 'buy-sell': 10, - 'arts-collectibles': 12, - 'audio': 767, - 'baby-items': 253, - 'bags-luggage': 931, - 'bikes': 644, - 'books': 109, - 'cameras': 103, - 'cds': 104, - 'clothing': 274, - 'computers': 16, - 'computer-accessories': 128, - 'electronics': 29659001, - 'free-stuff': 17220001, - 'furniture': 235, - 'garage-sales': 638, - 'health-special-needs': 140, - 'hobbies-crafts': 139, - 'home-appliances': 107, - 'home-indoor': 717, - 'home-outdoor': 727, - 'jewellery': 133, - 'musical-instruments': 17, - 'phones': 132, - 'sporting-goods': 111, - 'tools': 110, - 'toys-games': 108, - 'tvs-video': 15093001, - 'video-games': 141, - 'other': 26, + all: 0, + "buy-sell": 10, + "arts-collectibles": 12, + audio: 767, + "baby-items": 253, + "bags-luggage": 931, + bikes: 644, + books: 109, + cameras: 103, + cds: 104, + clothing: 274, + computers: 16, + "computer-accessories": 128, + electronics: 29659001, + "free-stuff": 17220001, + furniture: 235, + "garage-sales": 638, + "health-special-needs": 140, + "hobbies-crafts": 139, + "home-appliances": 107, + "home-indoor": 717, + "home-outdoor": 727, + jewellery: 133, + "musical-instruments": 17, + phones: 132, + "sporting-goods": 111, + tools: 110, + "toys-games": 108, + "tvs-video": 15093001, + "video-games": 141, + other: 26, }; // Sort parameter mappings const SORT_MAPPINGS: Record = { - 'relevancy': 'MATCH', - 'date': 'DATE', - 'price': 'PRICE', - 'distance': 'DISTANCE', + relevancy: "MATCH", + date: "DATE", + price: "PRICE", + distance: "DISTANCE", }; // ----------------------------- Exports for Testing ----------------------------- @@ -193,157 +193,160 @@ const SEPS = new Set([" ", "–", "—", "/", ":", ";", ",", ".", "-"]); * Resolve location ID from name or return numeric ID */ function resolveLocationId(location?: number | string): number { - if (typeof location === 'number') return location; - if (typeof location === 'string') { - const normalized = location.toLowerCase().replace(/\s+/g, '-'); - return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0) - } - return 0; // Default to Canada + if (typeof location === "number") return location; + if (typeof location === "string") { + const normalized = location.toLowerCase().replace(/\s+/g, "-"); + return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0) + } + return 0; // Default to Canada } /** * Resolve category ID from name or return numeric ID */ function resolveCategoryId(category?: number | string): number { - if (typeof category === 'number') return category; - if (typeof category === 'string') { - const normalized = category.toLowerCase().replace(/\s+/g, '-'); - return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories - } - return 0; // Default to all categories + if (typeof category === "number") return category; + if (typeof category === "string") { + const normalized = category.toLowerCase().replace(/\s+/g, "-"); + return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories + } + return 0; // Default to all categories } /** * Build search URL with enhanced parameters */ function buildSearchUrl( - keywords: string, - options: SearchOptions & { page?: number }, - BASE_URL = "https://www.kijiji.ca" + keywords: string, + options: SearchOptions & { page?: number }, + BASE_URL = "https://www.kijiji.ca", ): string { - const locationId = resolveLocationId(options.location); - const categoryId = resolveCategoryId(options.category); + const locationId = resolveLocationId(options.location); + const categoryId = resolveCategoryId(options.category); - const categorySlug = categoryId === 0 ? 'buy-sell' : 'buy-sell'; // Could be enhanced - const locationSlug = locationId === 0 ? 'canada' : 'canada'; // Could be enhanced + const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell"; // Could be enhanced + const locationSlug = locationId === 0 ? "canada" : "canada"; // Could be enhanced - let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`; + let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`; - const sortParam = options.sortBy ? `&sort=${SORT_MAPPINGS[options.sortBy]}` : ''; - const sortOrder = options.sortOrder === 'asc' ? 'ASC' : 'DESC'; - const pageParam = options.page && options.page > 1 ? `&page=${options.page}` : ''; + const sortParam = options.sortBy + ? `&sort=${SORT_MAPPINGS[options.sortBy]}` + : ""; + const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC"; + const pageParam = + options.page && options.page > 1 ? `&page=${options.page}` : ""; - url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`; + url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`; - return url; + return url; } /** * Slugifies a string for search */ export function slugify(input: string): string { - const s = unidecode(input).toLowerCase(); - const out: string[] = []; - let lastHyphen = false; + const s = unidecode(input).toLowerCase(); + const out: string[] = []; + let lastHyphen = false; - for (let i = 0; i < s.length; i++) { - const ch = s[i]; - if (!ch) continue; - const code = ch.charCodeAt(0); + for (let i = 0; i < s.length; i++) { + const ch = s[i]; + if (!ch) continue; + const code = ch.charCodeAt(0); - // a-z or 0-9 - if ((code >= 97 && code <= 122) || (code >= 48 && code <= 57)) { - out.push(ch); - lastHyphen = false; - } else if (SEPS.has(ch)) { - if (!lastHyphen) { - out.push("-"); - lastHyphen = true; - } - } - // else drop character - } - return out.join(""); + // a-z or 0-9 + if ((code >= 97 && code <= 122) || (code >= 48 && code <= 57)) { + out.push(ch); + lastHyphen = false; + } else if (SEPS.has(ch)) { + if (!lastHyphen) { + out.push("-"); + lastHyphen = true; + } + } + // else drop character + } + return out.join(""); } /** * Turns cents to localized currency string. */ export function formatCentsToCurrency( - num: number | string | undefined, - locale = "en-US", + num: number | string | undefined, + locale = "en-US", ): string { - if (num == null) return ""; - const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; - if (Number.isNaN(cents)) return ""; - const dollars = cents / 100; - const formatter = new Intl.NumberFormat(locale, { - style: 'currency', - currency: 'USD', - minimumFractionDigits: 2, - maximumFractionDigits: 2, - }); - return formatter.format(dollars); + if (num == null) return ""; + const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; + if (Number.isNaN(cents)) return ""; + const dollars = cents / 100; + const formatter = new Intl.NumberFormat(locale, { + style: "currency", + currency: "USD", + minimumFractionDigits: 2, + maximumFractionDigits: 2, + }); + return formatter.format(dollars); } function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); + return typeof value === "object" && value !== null && !Array.isArray(value); } async function delay(ms: number): Promise { - await new Promise((resolve) => setTimeout(resolve, ms)); + await new Promise((resolve) => setTimeout(resolve, ms)); } // ----------------------------- Error Classes ----------------------------- class HttpError extends Error { - constructor( - message: string, - public readonly status: number, - public readonly url: string, - ) { - super(message); - this.name = "HttpError"; - } + constructor( + message: string, + public readonly status: number, + public readonly url: string, + ) { + super(message); + this.name = "HttpError"; + } } class NetworkError extends Error { - constructor( - message: string, - public readonly url: string, - public readonly cause?: Error, - ) { - super(message); - this.name = "NetworkError"; - } + constructor( + message: string, + public readonly url: string, + public readonly cause?: Error, + ) { + super(message); + this.name = "NetworkError"; + } } class ParseError extends Error { - constructor( - message: string, - public readonly data?: unknown, - ) { - super(message); - this.name = "ParseError"; - } + constructor( + message: string, + public readonly data?: unknown, + ) { + super(message); + this.name = "ParseError"; + } } class RateLimitError extends Error { - constructor( - message: string, - public readonly url: string, - public readonly resetTime?: number, - ) { - super(message); - this.name = "RateLimitError"; - } + constructor( + message: string, + public readonly url: string, + public readonly resetTime?: number, + ) { + super(message); + this.name = "RateLimitError"; + } } class ValidationError extends Error { - constructor(message: string) { - super(message); - this.name = "ValidationError"; - } + constructor(message: string) { + super(message); + this.name = "ValidationError"; + } } // ----------------------------- HTTP Client ----------------------------- @@ -355,117 +358,118 @@ class ValidationError extends Error { - Exponential backoff with jitter */ async function fetchHtml( - url: string, - DELAY_MS: number, - opts?: { - maxRetries?: number; - retryBaseMs?: number; - onRateInfo?: (remaining: string | null, reset: string | null) => void; - }, + url: string, + DELAY_MS: number, + opts?: { + maxRetries?: number; + retryBaseMs?: number; + onRateInfo?: (remaining: string | null, reset: string | null) => void; + }, ): Promise { - const maxRetries = opts?.maxRetries ?? 3; - const retryBaseMs = opts?.retryBaseMs ?? 1000; + const maxRetries = opts?.maxRetries ?? 3; + const retryBaseMs = opts?.retryBaseMs ?? 1000; - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 30000); // 30s timeout + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30000); // 30s timeout - const res = await fetch(url, { - method: "GET", - headers: { - accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", - "cache-control": "no-cache", - "upgrade-insecure-requests": "1", - "user-agent": - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", - }, - signal: controller.signal, - }); + const res = await fetch(url, { + method: "GET", + headers: { + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", + "cache-control": "no-cache", + "upgrade-insecure-requests": "1", + "user-agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", + }, + signal: controller.signal, + }); - clearTimeout(timeoutId); + clearTimeout(timeoutId); - const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); - const rateLimitReset = res.headers.get("X-RateLimit-Reset"); - opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); + const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); + const rateLimitReset = res.headers.get("X-RateLimit-Reset"); + opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); - if (!res.ok) { - // Handle rate limiting - if (res.status === 429) { - const resetSeconds = rateLimitReset ? Number(rateLimitReset) : Number.NaN; - const waitMs = Number.isFinite(resetSeconds) - ? Math.max(0, resetSeconds * 1000) - : calculateBackoffDelay(attempt, retryBaseMs); + if (!res.ok) { + // Handle rate limiting + if (res.status === 429) { + const resetSeconds = rateLimitReset + ? Number(rateLimitReset) + : Number.NaN; + const waitMs = Number.isFinite(resetSeconds) + ? Math.max(0, resetSeconds * 1000) + : calculateBackoffDelay(attempt, retryBaseMs); - if (attempt < maxRetries) { - await delay(waitMs); - continue; - } - throw new RateLimitError( - `Rate limit exceeded for ${url}`, - url, - resetSeconds, - ); - } + if (attempt < maxRetries) { + await delay(waitMs); + continue; + } + throw new RateLimitError( + `Rate limit exceeded for ${url}`, + url, + resetSeconds, + ); + } - // Retry on server errors - if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { - await delay(calculateBackoffDelay(attempt, retryBaseMs)); - continue; - } + // Retry on server errors + if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { + await delay(calculateBackoffDelay(attempt, retryBaseMs)); + continue; + } - throw new HttpError( - `Request failed with status ${res.status}`, - res.status, - url, - ); - } + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + url, + ); + } - const html = await res.text(); + const html = await res.text(); - // Respect per-request delay to maintain rate limiting - await delay(DELAY_MS); - return html; + // Respect per-request delay to maintain rate limiting + await delay(DELAY_MS); + return html; + } catch (err) { + // Handle different error types + if (err instanceof RateLimitError || err instanceof HttpError) { + throw err; // Re-throw known errors + } - } catch (err) { - // Handle different error types - if (err instanceof RateLimitError || err instanceof HttpError) { - throw err; // Re-throw known errors - } + if (err instanceof Error && err.name === "AbortError") { + if (attempt < maxRetries) { + await delay(calculateBackoffDelay(attempt, retryBaseMs)); + continue; + } + throw new NetworkError(`Request timeout for ${url}`, url, err); + } - if (err instanceof Error && err.name === 'AbortError') { - if (attempt < maxRetries) { - await delay(calculateBackoffDelay(attempt, retryBaseMs)); - continue; - } - throw new NetworkError(`Request timeout for ${url}`, url, err); - } + // Network or other errors + if (attempt < maxRetries) { + await delay(calculateBackoffDelay(attempt, retryBaseMs)); + continue; + } + throw new NetworkError( + `Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`, + url, + err instanceof Error ? err : undefined, + ); + } + } - // Network or other errors - if (attempt < maxRetries) { - await delay(calculateBackoffDelay(attempt, retryBaseMs)); - continue; - } - throw new NetworkError( - `Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`, - url, - err instanceof Error ? err : undefined - ); - } - } - - throw new NetworkError(`Exhausted retries without response for ${url}`, url); + throw new NetworkError(`Exhausted retries without response for ${url}`, url); } /** * Calculate exponential backoff delay with jitter */ function calculateBackoffDelay(attempt: number, baseMs: number): number { - const exponentialDelay = baseMs * (2 ** attempt); - const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter - return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds + const exponentialDelay = baseMs * 2 ** attempt; + const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter + return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds } // ----------------------------- GraphQL Client ----------------------------- @@ -474,72 +478,75 @@ function calculateBackoffDelay(attempt: number, baseMs: number): number { * Fetch additional data via GraphQL API */ async function fetchGraphQLData( - query: string, - variables: Record, - BASE_URL = "https://www.kijiji.ca" + query: string, + variables: Record, + BASE_URL = "https://www.kijiji.ca", ): Promise { - const endpoint = `${BASE_URL}/anvil/api`; + const endpoint = `${BASE_URL}/anvil/api`; - try { - const response = await fetch(endpoint, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'apollo-require-preflight': 'true', - }, - body: JSON.stringify({ - query, - variables, - }), - }); + try { + const response = await fetch(endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + "apollo-require-preflight": "true", + }, + body: JSON.stringify({ + query, + variables, + }), + }); - if (!response.ok) { - throw new HttpError( - `GraphQL request failed with status ${response.status}`, - response.status, - endpoint - ); - } + if (!response.ok) { + throw new HttpError( + `GraphQL request failed with status ${response.status}`, + response.status, + endpoint, + ); + } - const result = await response.json(); + const result = await response.json(); - if (result.errors) { - throw new ParseError(`GraphQL errors: ${JSON.stringify(result.errors)}`, result.errors); - } + if (result.errors) { + throw new ParseError( + `GraphQL errors: ${JSON.stringify(result.errors)}`, + result.errors, + ); + } - return result.data; - } catch (err) { - if (err instanceof HttpError || err instanceof ParseError) { - throw err; - } - throw new NetworkError( - `Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`, - endpoint, - err instanceof Error ? err : undefined - ); - } + return result.data; + } catch (err) { + if (err instanceof HttpError || err instanceof ParseError) { + throw err; + } + throw new NetworkError( + `Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`, + endpoint, + err instanceof Error ? err : undefined, + ); + } } // GraphQL response interfaces interface GraphQLReviewResponse { - user?: { - reviewSummary?: { - count?: number; - score?: number; - }; - }; + user?: { + reviewSummary?: { + count?: number; + score?: number; + }; + }; } interface GraphQLProfileResponse { - user?: { - memberSince?: string; - accountType?: string; - }; + user?: { + memberSince?: string; + accountType?: string; + }; } // GraphQL queries from KIJIJI.md const GRAPHQL_QUERIES = { - getReviewSummary: ` + getReviewSummary: ` query GetReviewSummary($userId: String!) { user(id: $userId) { reviewSummary { @@ -551,7 +558,7 @@ const GRAPHQL_QUERIES = { } } `, - getProfileMetrics: ` + getProfileMetrics: ` query GetProfileMetrics($profileId: String!) { user(id: $profileId) { memberSince @@ -566,29 +573,45 @@ const GRAPHQL_QUERIES = { * Fetch additional seller data via GraphQL */ async function fetchSellerDetails( - posterId: string, - BASE_URL = "https://www.kijiji.ca" -): Promise<{ reviewCount?: number; reviewScore?: number; memberSince?: string; accountType?: string }> { - try { - const [reviewData, profileData] = await Promise.all([ - fetchGraphQLData(GRAPHQL_QUERIES.getReviewSummary, { userId: posterId }, BASE_URL), - fetchGraphQLData(GRAPHQL_QUERIES.getProfileMetrics, { profileId: posterId }, BASE_URL), - ]); + posterId: string, + BASE_URL = "https://www.kijiji.ca", +): Promise<{ + reviewCount?: number; + reviewScore?: number; + memberSince?: string; + accountType?: string; +}> { + try { + const [reviewData, profileData] = await Promise.all([ + fetchGraphQLData( + GRAPHQL_QUERIES.getReviewSummary, + { userId: posterId }, + BASE_URL, + ), + fetchGraphQLData( + GRAPHQL_QUERIES.getProfileMetrics, + { profileId: posterId }, + BASE_URL, + ), + ]); - const reviewResponse = reviewData as GraphQLReviewResponse; - const profileResponse = profileData as GraphQLProfileResponse; + const reviewResponse = reviewData as GraphQLReviewResponse; + const profileResponse = profileData as GraphQLProfileResponse; - return { - reviewCount: reviewResponse?.user?.reviewSummary?.count, - reviewScore: reviewResponse?.user?.reviewSummary?.score, - memberSince: profileResponse?.user?.memberSince, - accountType: profileResponse?.user?.accountType, - }; - } catch (err) { - // Silently fail for GraphQL errors - not critical for basic functionality - console.warn(`Failed to fetch seller details for ${posterId}:`, err instanceof Error ? err.message : String(err)); - return {}; - } + return { + reviewCount: reviewResponse?.user?.reviewSummary?.count, + reviewScore: reviewResponse?.user?.reviewSummary?.score, + memberSince: profileResponse?.user?.memberSince, + accountType: profileResponse?.user?.accountType, + }; + } catch (err) { + // Silently fail for GraphQL errors - not critical for basic functionality + console.warn( + `Failed to fetch seller details for ${posterId}:`, + err instanceof Error ? err.message : String(err), + ); + return {}; + } } // ----------------------------- Parsing ----------------------------- @@ -597,17 +620,17 @@ async function fetchSellerDetails( Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML. */ function extractApolloState(htmlString: HTMLString): ApolloRecord | null { - const { document } = parseHTML(htmlString); - const nextData = document.getElementById("__NEXT_DATA__"); - if (!nextData || !nextData.textContent) return null; + const { document } = parseHTML(htmlString); + const nextData = document.getElementById("__NEXT_DATA__"); + if (!nextData || !nextData.textContent) return null; - try { - const jsonData = JSON.parse(nextData.textContent); - const apollo = jsonData?.props?.pageProps?.__APOLLO_STATE__; - return isRecord(apollo) ? apollo : null; - } catch { - return null; - } + try { + const jsonData = JSON.parse(nextData.textContent); + const apollo = jsonData?.props?.pageProps?.__APOLLO_STATE__; + return isRecord(apollo) ? apollo : null; + } catch { + return null; + } } /** @@ -615,339 +638,378 @@ function extractApolloState(htmlString: HTMLString): ApolloRecord | null { Filters keys likely to be listing entities and ensures url/title exist. */ function parseSearch( - htmlString: HTMLString, - BASE_URL: string, + htmlString: HTMLString, + BASE_URL: string, ): SearchListing[] { - const apolloState = extractApolloState(htmlString); - if (!apolloState) return []; + const apolloState = extractApolloState(htmlString); + if (!apolloState) return []; - const results: SearchListing[] = []; - for (const [key, value] of Object.entries(apolloState)) { - // Heuristic: Kijiji listing keys usually contain "Listing" - if (!key.includes("Listing")) continue; - if (!isRecord(value)) continue; + const results: SearchListing[] = []; + for (const [key, value] of Object.entries(apolloState)) { + // Heuristic: Kijiji listing keys usually contain "Listing" + if (!key.includes("Listing")) continue; + if (!isRecord(value)) continue; - const item = value as ApolloSearchItem; - if (typeof item.url === "string" && typeof item.title === "string") { - results.push({ - listingLink: item.url.startsWith("http") - ? item.url - : `${BASE_URL}${item.url}`, - name: item.title, - }); - } - } - return results; + const item = value as ApolloSearchItem; + if (typeof item.url === "string" && typeof item.title === "string") { + results.push({ + listingLink: item.url.startsWith("http") + ? item.url + : `${BASE_URL}${item.url}`, + name: item.title, + }); + } + } + return results; } /** Parse a listing page into a typed object. */ function parseListing( - htmlString: HTMLString, - BASE_URL: string, + htmlString: HTMLString, + BASE_URL: string, ): ListingDetails | null { - const apolloState = extractApolloState(htmlString); - if (!apolloState) return null; + const apolloState = extractApolloState(htmlString); + if (!apolloState) return null; - // Find the listing root key - const listingKey = Object.keys(apolloState).find((k) => - k.includes("Listing"), - ); - if (!listingKey) return null; + // Find the listing root key + const listingKey = Object.keys(apolloState).find((k) => + k.includes("Listing"), + ); + if (!listingKey) return null; - const root = apolloState[listingKey]; - if (!isRecord(root)) return null; + const root = apolloState[listingKey]; + if (!isRecord(root)) return null; - const { - url, - title, - description, - price, - type, - status, - activationDate, - endDate, - metrics, - location, - } = root as ApolloListingRoot; + const { + url, + title, + description, + price, + type, + status, + activationDate, + endDate, + metrics, + location, + } = root as ApolloListingRoot; - const cents = price?.amount != null ? Number(price.amount) : undefined; - const amountFormatted = formatCentsToCurrency(cents); + const cents = price?.amount != null ? Number(price.amount) : undefined; + const amountFormatted = formatCentsToCurrency(cents); - const numberOfViews = - metrics?.views != null ? Number(metrics.views) : undefined; + const numberOfViews = + metrics?.views != null ? Number(metrics.views) : undefined; - const listingUrl = - typeof url === "string" - ? url.startsWith("http") - ? url - : `${BASE_URL}${url}` - : ""; + const listingUrl = + typeof url === "string" + ? url.startsWith("http") + ? url + : `${BASE_URL}${url}` + : ""; - if (!listingUrl || !title) return null; + if (!listingUrl || !title) return null; - return { - url: listingUrl, - title, - description, - listingPrice: amountFormatted - ? { - amountFormatted, - cents: cents !== undefined && Number.isFinite(cents) ? cents : undefined, - currency: price?.currency, - } - : undefined, - listingType: type, - listingStatus: status, - creationDate: activationDate, - endDate, - numberOfViews: numberOfViews !== undefined && Number.isFinite(numberOfViews) ? numberOfViews : undefined, - address: location?.address ?? null, - }; + return { + url: listingUrl, + title, + description, + listingPrice: amountFormatted + ? { + amountFormatted, + cents: + cents !== undefined && Number.isFinite(cents) ? cents : undefined, + currency: price?.currency, + } + : undefined, + listingType: type, + listingStatus: status, + creationDate: activationDate, + endDate, + numberOfViews: + numberOfViews !== undefined && Number.isFinite(numberOfViews) + ? numberOfViews + : undefined, + address: location?.address ?? null, + }; } /** * Parse a listing page into a detailed object with all available fields */ async function parseDetailedListing( - htmlString: HTMLString, - BASE_URL: string, - options: ListingFetchOptions = {} + htmlString: HTMLString, + BASE_URL: string, + options: ListingFetchOptions = {}, ): Promise { - const apolloState = extractApolloState(htmlString); - if (!apolloState) return null; + const apolloState = extractApolloState(htmlString); + if (!apolloState) return null; - // Find the listing root key - const listingKey = Object.keys(apolloState).find((k) => - k.includes("Listing"), - ); - if (!listingKey) return null; + // Find the listing root key + const listingKey = Object.keys(apolloState).find((k) => + k.includes("Listing"), + ); + if (!listingKey) return null; - const root = apolloState[listingKey]; - if (!isRecord(root)) return null; + const root = apolloState[listingKey]; + if (!isRecord(root)) return null; - const { - url, - title, - description, - price, - type, - status, - activationDate, - endDate, - metrics, - location, - imageUrls, - imageCount, - categoryId, - adSource, - flags, - posterInfo, - attributes, - } = root as ApolloListingRoot; + const { + url, + title, + description, + price, + type, + status, + activationDate, + endDate, + metrics, + location, + imageUrls, + imageCount, + categoryId, + adSource, + flags, + posterInfo, + attributes, + } = root as ApolloListingRoot; - const cents = price?.amount != null ? Number(price.amount) : undefined; - const amountFormatted = formatCentsToCurrency(cents); + const cents = price?.amount != null ? Number(price.amount) : undefined; + const amountFormatted = formatCentsToCurrency(cents); - const numberOfViews = - metrics?.views != null ? Number(metrics.views) : undefined; + const numberOfViews = + metrics?.views != null ? Number(metrics.views) : undefined; - const listingUrl = - typeof url === "string" - ? url.startsWith("http") - ? url - : `${BASE_URL}${url}` - : ""; + const listingUrl = + typeof url === "string" + ? url.startsWith("http") + ? url + : `${BASE_URL}${url}` + : ""; - if (!listingUrl || !title) return null; + if (!listingUrl || !title) return null; - // Only include fixed-price listings - if (!amountFormatted || cents === undefined) return null; + // Only include fixed-price listings + if (!amountFormatted || cents === undefined) return null; - // Extract images if requested - const images = options.includeImages !== false && Array.isArray(imageUrls) - ? imageUrls.filter((url): url is string => typeof url === 'string') - : []; + // Extract images if requested + const images = + options.includeImages !== false && Array.isArray(imageUrls) + ? imageUrls.filter((url): url is string => typeof url === "string") + : []; - // Extract attributes as key-value pairs - const attributeMap: Record = {}; - if (Array.isArray(attributes)) { - for (const attr of attributes) { - if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) { - attributeMap[attr.canonicalName] = attr.canonicalValues; - } - } - } + // Extract attributes as key-value pairs + const attributeMap: Record = {}; + if (Array.isArray(attributes)) { + for (const attr of attributes) { + if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) { + attributeMap[attr.canonicalName] = attr.canonicalValues; + } + } + } - // Extract seller info based on depth setting - let sellerInfo: DetailedListing['sellerInfo']; - const depth = options.sellerDataDepth ?? 'detailed'; + // Extract seller info based on depth setting + let sellerInfo: DetailedListing["sellerInfo"]; + const depth = options.sellerDataDepth ?? "detailed"; - if (posterInfo?.posterId) { - sellerInfo = { - posterId: posterInfo.posterId, - rating: typeof posterInfo.rating === 'number' ? posterInfo.rating : undefined, - }; + if (posterInfo?.posterId) { + sellerInfo = { + posterId: posterInfo.posterId, + rating: + typeof posterInfo.rating === "number" ? posterInfo.rating : undefined, + }; - // Add more detailed info if requested and client-side data is enabled - if ((depth === 'detailed' || depth === 'full') && options.includeClientSideData) { - try { - const additionalData = await fetchSellerDetails(posterInfo.posterId, BASE_URL); - sellerInfo = { - ...sellerInfo, - ...additionalData, - }; - } catch (err) { - // Silently fail - GraphQL data is optional - console.warn(`Failed to fetch additional seller data for ${posterInfo.posterId}`); - } - } - } + // Add more detailed info if requested and client-side data is enabled + if ( + (depth === "detailed" || depth === "full") && + options.includeClientSideData + ) { + try { + const additionalData = await fetchSellerDetails( + posterInfo.posterId, + BASE_URL, + ); + sellerInfo = { + ...sellerInfo, + ...additionalData, + }; + } catch (err) { + // Silently fail - GraphQL data is optional + console.warn( + `Failed to fetch additional seller data for ${posterInfo.posterId}`, + ); + } + } + } - return { - url: listingUrl, - title, - description, - listingPrice: { - amountFormatted, - cents, - currency: price?.currency, - }, - listingType: type, - listingStatus: status, - creationDate: activationDate, - endDate, - numberOfViews: numberOfViews !== undefined && Number.isFinite(numberOfViews) ? numberOfViews : undefined, - address: location?.address ?? null, - images, - categoryId: typeof categoryId === 'number' ? categoryId : 0, - adSource: typeof adSource === 'string' ? adSource : 'UNKNOWN', - flags: { - topAd: flags?.topAd === true, - priceDrop: flags?.priceDrop === true, - }, - attributes: attributeMap, - location: { - id: typeof location?.id === 'number' ? location.id : 0, - name: typeof location?.name === 'string' ? location.name : 'Unknown', - coordinates: location?.coordinates ? { - latitude: location.coordinates.latitude, - longitude: location.coordinates.longitude, - } : undefined, - }, - sellerInfo, - }; + return { + url: listingUrl, + title, + description, + listingPrice: { + amountFormatted, + cents, + currency: price?.currency, + }, + listingType: type, + listingStatus: status, + creationDate: activationDate, + endDate, + numberOfViews: + numberOfViews !== undefined && Number.isFinite(numberOfViews) + ? numberOfViews + : undefined, + address: location?.address ?? null, + images, + categoryId: typeof categoryId === "number" ? categoryId : 0, + adSource: typeof adSource === "string" ? adSource : "UNKNOWN", + flags: { + topAd: flags?.topAd === true, + priceDrop: flags?.priceDrop === true, + }, + attributes: attributeMap, + location: { + id: typeof location?.id === "number" ? location.id : 0, + name: typeof location?.name === "string" ? location.name : "Unknown", + coordinates: location?.coordinates + ? { + latitude: location.coordinates.latitude, + longitude: location.coordinates.longitude, + } + : undefined, + }, + sellerInfo, + }; } // ----------------------------- Main ----------------------------- export default async function fetchKijijiItems( - SEARCH_QUERY: string, - REQUESTS_PER_SECOND = 1, - BASE_URL = "https://www.kijiji.ca", - searchOptions: SearchOptions = {}, - listingOptions: ListingFetchOptions = {}, + SEARCH_QUERY: string, + REQUESTS_PER_SECOND = 1, + BASE_URL = "https://www.kijiji.ca", + searchOptions: SearchOptions = {}, + listingOptions: ListingFetchOptions = {}, ) { - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); - // Set defaults for configuration - const finalSearchOptions: Required = { - location: searchOptions.location ?? 1700272, // Default to GTA - category: searchOptions.category ?? 0, // Default to all categories - keywords: searchOptions.keywords ?? SEARCH_QUERY, - sortBy: searchOptions.sortBy ?? 'relevancy', - sortOrder: searchOptions.sortOrder ?? 'desc', - maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages - priceMin: searchOptions.priceMin, - priceMax: searchOptions.priceMax, - }; + // Set defaults for configuration + const finalSearchOptions: Required = { + location: searchOptions.location ?? 1700272, // Default to GTA + category: searchOptions.category ?? 0, // Default to all categories + keywords: searchOptions.keywords ?? SEARCH_QUERY, + sortBy: searchOptions.sortBy ?? "relevancy", + sortOrder: searchOptions.sortOrder ?? "desc", + maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages + priceMin: searchOptions.priceMin, + priceMax: searchOptions.priceMax, + }; - const finalListingOptions: Required = { - includeImages: listingOptions.includeImages ?? true, - sellerDataDepth: listingOptions.sellerDataDepth ?? 'detailed', - includeClientSideData: listingOptions.includeClientSideData ?? false, - }; + const finalListingOptions: Required = { + includeImages: listingOptions.includeImages ?? true, + sellerDataDepth: listingOptions.sellerDataDepth ?? "detailed", + includeClientSideData: listingOptions.includeClientSideData ?? false, + }; - const allListings: DetailedListing[] = []; - const seenUrls = new Set(); + const allListings: DetailedListing[] = []; + const seenUrls = new Set(); - // Fetch multiple pages - for (let page = 1; page <= finalSearchOptions.maxPages; page++) { - const searchUrl = buildSearchUrl(finalSearchOptions.keywords, { - ...finalSearchOptions, - // Add page parameter for pagination - ...(page > 1 && { page }), - }, BASE_URL); + // Fetch multiple pages + for (let page = 1; page <= finalSearchOptions.maxPages; page++) { + const searchUrl = buildSearchUrl( + finalSearchOptions.keywords, + { + ...finalSearchOptions, + // Add page parameter for pagination + ...(page > 1 && { page }), + }, + BASE_URL, + ); - console.log(`Fetching search page ${page}: ${searchUrl}`); - const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log(`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`); - } - }, - }); + console.log(`Fetching search page ${page}: ${searchUrl}`); + const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + }); - const searchResults = parseSearch(searchHtml, BASE_URL); - if (searchResults.length === 0) { - console.log(`No more results found on page ${page}. Stopping pagination.`); - break; - } + const searchResults = parseSearch(searchHtml, BASE_URL); + if (searchResults.length === 0) { + console.log( + `No more results found on page ${page}. Stopping pagination.`, + ); + break; + } - // Deduplicate links across pages - const newListingLinks = searchResults - .map((r) => r.listingLink) - .filter((link) => !seenUrls.has(link)); + // Deduplicate links across pages + const newListingLinks = searchResults + .map((r) => r.listingLink) + .filter((link) => !seenUrls.has(link)); - for (const link of newListingLinks) { - seenUrls.add(link); - } + for (const link of newListingLinks) { + seenUrls.add(link); + } - console.log(`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`); + console.log( + `\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`, + ); - // Fetch details for this page's listings - const progressBar = new cliProgress.SingleBar( - {}, - cliProgress.Presets.shades_classic, - ); - const totalProgress = newListingLinks.length; - let currentProgress = 0; - progressBar.start(totalProgress, currentProgress); + // Fetch details for this page's listings + const progressBar = new cliProgress.SingleBar( + {}, + cliProgress.Presets.shades_classic, + ); + const totalProgress = newListingLinks.length; + let currentProgress = 0; + progressBar.start(totalProgress, currentProgress); - for (const link of newListingLinks) { - try { - const html = await fetchHtml(link, DELAY_MS, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log(`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`); - } - }, - }); - const parsed = await parseDetailedListing(html, BASE_URL, finalListingOptions); - if (parsed) { - allListings.push(parsed); - } - } catch (err) { - if (err instanceof HttpError) { - console.error(`\nFailed to fetch ${link}\n - ${err.status} ${err.message}`); - } else { - console.error(`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`); - } - } finally { - currentProgress++; - progressBar.update(currentProgress); - } - } + for (const link of newListingLinks) { + try { + const html = await fetchHtml(link, DELAY_MS, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + }); + const parsed = await parseDetailedListing( + html, + BASE_URL, + finalListingOptions, + ); + if (parsed) { + allListings.push(parsed); + } + } catch (err) { + if (err instanceof HttpError) { + console.error( + `\nFailed to fetch ${link}\n - ${err.status} ${err.message}`, + ); + } else { + console.error( + `\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`, + ); + } + } finally { + currentProgress++; + progressBar.update(currentProgress); + } + } - progressBar.stop(); + progressBar.stop(); - // If we got fewer results than expected (40 per page), we've reached the end - if (searchResults.length < 40) { - break; - } - } + // If we got fewer results than expected (40 per page), we've reached the end + if (searchResults.length < 40) { + break; + } + } - console.log(`\nParsed ${allListings.length} detailed listings.`); - return allListings; + console.log(`\nParsed ${allListings.length} detailed listings.`); + return allListings; } diff --git a/test/facebook-core.test.ts b/test/facebook-core.test.ts index ea3b5a1..4103fb2 100644 --- a/test/facebook-core.test.ts +++ b/test/facebook-core.test.ts @@ -1,748 +1,834 @@ -import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test"; +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { - fetchFacebookItem, - extractFacebookItemData, - extractFacebookMarketplaceData, - parseFacebookItem, - parseFacebookAds, - formatCentsToCurrency, - loadFacebookCookies, - formatCookiesForHeader, - parseFacebookCookieString, + extractFacebookItemData, + extractFacebookMarketplaceData, + fetchFacebookItem, + formatCentsToCurrency, + formatCookiesForHeader, + loadFacebookCookies, + parseFacebookAds, + parseFacebookCookieString, + parseFacebookItem, } from "../src/facebook"; // Mock fetch globally const originalFetch = global.fetch; describe("Facebook Marketplace Scraper Core Tests", () => { - beforeEach(() => { - global.fetch = mock(() => { - throw new Error("fetch should be mocked in individual tests"); - }); - }); - - afterEach(() => { - global.fetch = originalFetch; - }); - - describe("Cookie Parsing", () => { - describe("parseFacebookCookieString", () => { - test("should parse valid cookie string", () => { - const cookieString = 'c_user=123456789; xs=abcdef123456; fr=xyz789'; - const result = parseFacebookCookieString(cookieString); - - expect(result).toHaveLength(3); - expect(result[0]).toEqual({ - name: 'c_user', - value: '123456789', - domain: '.facebook.com', - path: '/', - secure: true, - httpOnly: false, - sameSite: 'lax', - expirationDate: undefined - }); - expect(result[1]).toEqual({ - name: 'xs', - value: 'abcdef123456', - domain: '.facebook.com', - path: '/', - secure: true, - httpOnly: false, - sameSite: 'lax', - expirationDate: undefined - }); - }); - - test("should handle URL-encoded values", () => { - const cookieString = 'c_user=123%2B456; xs=abc%3Ddef'; - const result = parseFacebookCookieString(cookieString); - - expect(result[0].value).toBe('123+456'); - expect(result[1].value).toBe('abc=def'); - }); - - test("should filter out malformed cookies", () => { - const cookieString = 'c_user=123; invalid; xs=abc; =empty'; - const result = parseFacebookCookieString(cookieString); - - expect(result).toHaveLength(2); - expect(result.map(c => c.name)).toEqual(['c_user', 'xs']); - }); - - test("should handle empty input", () => { - expect(parseFacebookCookieString('')).toEqual([]); - expect(parseFacebookCookieString(' ')).toEqual([]); - }); - - test("should handle extra whitespace", () => { - const cookieString = ' c_user = 123 ; xs=abc '; - const result = parseFacebookCookieString(cookieString); - - expect(result).toHaveLength(2); - expect(result[0].name).toBe('c_user'); - expect(result[0].value).toBe('123'); - expect(result[1].name).toBe('xs'); - expect(result[1].value).toBe('abc'); - }); - }); - }); - - describe("Facebook Item Fetching", () => { - describe("fetchFacebookItem", () => { - const mockCookies = JSON.stringify([ - { name: "c_user", value: "12345", domain: ".facebook.com" }, - { name: "xs", value: "abc123", domain: ".facebook.com" } - ]); - - test("should handle authentication errors", async () => { - global.fetch = mock(() => - Promise.resolve({ - ok: false, - status: 401, - text: () => Promise.resolve("Authentication required"), - headers: { - get: () => null - } - }) - ); - - const result = await fetchFacebookItem("123", mockCookies); - expect(result).toBeNull(); - }); - - test("should handle item not found", async () => { - global.fetch = mock(() => - Promise.resolve({ - ok: false, - status: 404, - text: () => Promise.resolve("Not found"), - headers: { - get: () => null - } - }) - ); - - const result = await fetchFacebookItem("nonexistent", mockCookies); - expect(result).toBeNull(); - }); - - test("should handle rate limiting", async () => { - let attempts = 0; - global.fetch = mock(() => { - attempts++; - if (attempts === 1) { - return Promise.resolve({ - ok: false, - status: 429, - headers: { - get: (header: string) => { - if (header === "X-RateLimit-Reset") return "1"; - return null; - } - }, - text: () => Promise.resolve("Rate limited") - }); - } - const mockData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - viewer: { - marketplace_product_details_page: { - target: { - id: "123", - __typename: "GroupCommerceProductItem", - marketplace_listing_title: "Test Item", - is_live: true - } - } - } - } - } - } - }] - ] - }; - return Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }); - }); - - const result = await fetchFacebookItem("123", mockCookies); - expect(attempts).toBe(2); - // Should eventually succeed after retry - }); - - test("should handle sold items", async () => { - const mockData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - viewer: { - marketplace_product_details_page: { - target: { - id: "456", - __typename: "GroupCommerceProductItem", - marketplace_listing_title: "Sold Item", - is_sold: true, - is_live: false - } - } - } - } - } - } - }] - ] - }; - - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); - - const result = await fetchFacebookItem("456", mockCookies); - expect(result?.listingStatus).toBe("SOLD"); - }); - - test("should handle missing authentication cookies", async () => { - // Use a test-specific cookie file that doesn't exist - const testCookiePath = './cookies/facebook-test.json'; - - // Test with no cookies available (test file doesn't exist) - await expect(fetchFacebookItem("123", undefined, testCookiePath)).rejects.toThrow( - "No valid Facebook cookies found" - ); - }); - - test("should handle successful item extraction", async () => { - const mockData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - viewer: { - marketplace_product_details_page: { - target: { - id: "789", - __typename: "GroupCommerceProductItem", - marketplace_listing_title: "Working Item", - formatted_price: { text: "$299.00" }, - listing_price: { amount: "299.00", currency: "CAD" }, - is_live: true, - creation_time: 1640995200 - } - } - } - } - } - } - }] - ] - }; - - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); - - const result = await fetchFacebookItem("789", mockCookies); - expect(result).not.toBeNull(); - expect(result?.title).toBe("Working Item"); - expect(result?.listingPrice?.amountFormatted).toBe("$299.00"); - expect(result?.listingStatus).toBe("ACTIVE"); - }); - - test("should handle server errors", async () => { - global.fetch = mock(() => - Promise.resolve({ - ok: false, - status: 500, - text: () => Promise.resolve("Internal Server Error"), - headers: { - get: () => null - } - }) - ); - - const result = await fetchFacebookItem("error", mockCookies); - expect(result).toBeNull(); - }); - }); - }); - - describe("Data Extraction", () => { - describe("extractFacebookItemData", () => { - test("should extract item data from standard require structure", () => { - const mockItemData = { - id: "123456", - __typename: "GroupCommerceProductItem", - marketplace_listing_title: "Test Item", - formatted_price: { text: "$100.00" }, - listing_price: { amount: "100.00", currency: "CAD" }, - is_live: true - }; - const mockData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - viewer: { - marketplace_product_details_page: { - target: mockItemData - } - } - } - } - } - }] - ] - }; - const html = ``; - - const result = extractFacebookItemData(html); - expect(result).not.toBeNull(); - expect(result?.id).toBe("123456"); - expect(result?.marketplace_listing_title).toBe("Test Item"); - }); - - test("should handle missing item data", () => { - const mockData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - viewer: { - marketplace_product_details_page: {} - } - } - } - } - }] - ] - }; - const html = ``; - - const result = extractFacebookItemData(html); - expect(result).toBeNull(); - }); - - test("should handle malformed HTML", () => { - const result = extractFacebookItemData("Invalid HTML"); - expect(result).toBeNull(); - }); - - test("should handle invalid JSON in script tags", () => { - const html = ''; - const result = extractFacebookItemData(html); - expect(result).toBeNull(); - }); - - test("should extract item with vehicle data", () => { - const mockVehicleItem = { - id: "789", - __typename: "GroupCommerceProductItem", - marketplace_listing_title: "2006 Honda Civic", - formatted_price: { text: "$5,000" }, - listing_price: { amount: "5000.00", currency: "CAD" }, - vehicle_make_display_name: "Honda", - vehicle_model_display_name: "Civic", - vehicle_odometer_data: { unit: "KILOMETERS", value: 150000 }, - vehicle_transmission_type: "AUTOMATIC", - is_live: true - }; - const mockData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - viewer: { - marketplace_product_details_page: { - target: mockVehicleItem - } - } - } - } - } - }] - ] - }; - const html = ``; - - const result = extractFacebookItemData(html); - expect(result).not.toBeNull(); - expect(result?.vehicle_make_display_name).toBe("Honda"); - expect(result?.vehicle_odometer_data?.value).toBe(150000); - }); - }); - - describe("extractFacebookMarketplaceData", () => { - test("should extract search results from marketplace data", () => { - const mockMarketplaceData = { - feed_units: { - edges: [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "Item 1", - listing_price: { amount: "10.00", currency: "CAD" } - } - } - }, - { - node: { - listing: { - id: "2", - marketplace_listing_title: "Item 2", - listing_price: { amount: "20.00", currency: "CAD" } - } - } - } - ] - } - }; - const mockData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: mockMarketplaceData - } - } - } - }] - ] - }; - const html = ``; - - const result = extractFacebookMarketplaceData(html); - expect(result).not.toBeNull(); - expect(result).toHaveLength(2); - expect(result?.[0].node.listing.marketplace_listing_title).toBe("Item 1"); - }); - - test("should handle empty search results", () => { - const mockData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { edges: [] } - } - } - } - } - }] - ] - }; - const html = ``; - - const result = extractFacebookMarketplaceData(html); - expect(result).toBeNull(); - }); - }); - }); - - describe("Data Parsing", () => { - describe("parseFacebookItem", () => { - test("should parse complete item with all fields", () => { - const item = { - id: "123456", - __typename: "GroupCommerceProductItem" as const, - marketplace_listing_title: "iPhone 13 Pro", - redacted_description: { text: "Excellent condition" }, - formatted_price: { text: "$800.00" }, - listing_price: { amount: "800.00", currency: "CAD" }, - location_text: { text: "Toronto, ON" }, - is_live: true, - creation_time: 1640995200, - marketplace_listing_seller: { - id: "seller1", - name: "John Doe" - }, - delivery_types: ["IN_PERSON"] - }; - - const result = parseFacebookItem(item); - expect(result).not.toBeNull(); - expect(result?.title).toBe("iPhone 13 Pro"); - expect(result?.description).toBe("Excellent condition"); - expect(result?.listingPrice?.amountFormatted).toBe("$800.00"); - expect(result?.listingPrice?.cents).toBe(80000); - expect(result?.listingPrice?.currency).toBe("CAD"); - expect(result?.address).toBe("Toronto, ON"); - expect(result?.listingStatus).toBe("ACTIVE"); - expect(result?.seller?.name).toBe("John Doe"); - expect(result?.deliveryTypes).toEqual(["IN_PERSON"]); - }); - - test("should parse FREE items", () => { - const item = { - id: "789", - __typename: "GroupCommerceProductItem" as const, - marketplace_listing_title: "Free Sofa", - formatted_price: { text: "FREE" }, - listing_price: { amount: "0.00", currency: "CAD" }, - is_live: true - }; - - const result = parseFacebookItem(item); - expect(result).not.toBeNull(); - expect(result?.title).toBe("Free Sofa"); - expect(result?.listingPrice?.amountFormatted).toBe("FREE"); - expect(result?.listingPrice?.cents).toBe(0); - }); - - test("should handle missing optional fields", () => { - const item = { - id: "456", - __typename: "GroupCommerceProductItem" as const, - marketplace_listing_title: "Minimal Item" - }; - - const result = parseFacebookItem(item); - expect(result).not.toBeNull(); - expect(result?.title).toBe("Minimal Item"); - expect(result?.description).toBeUndefined(); - expect(result?.seller).toBeUndefined(); - }); - - test("should identify vehicle listings", () => { - const vehicleItem = { - id: "999", - __typename: "GroupCommerceProductItem" as const, - marketplace_listing_title: "2012 Mazda 3", - formatted_price: { text: "$8,000" }, - listing_price: { amount: "8000.00", currency: "CAD" }, - vehicle_make_display_name: "Mazda", - vehicle_model_display_name: "3", - is_live: true - }; - - const result = parseFacebookItem(vehicleItem); - expect(result?.listingType).toBe("vehicle"); - }); - - test("should handle different listing statuses", () => { - const soldItem = { - id: "111", - __typename: "GroupCommerceProductItem" as const, - marketplace_listing_title: "Sold Item", - is_sold: true, - is_live: false - }; - - const pendingItem = { - id: "222", - __typename: "GroupCommerceProductItem" as const, - marketplace_listing_title: "Pending Item", - is_pending: true, - is_live: true - }; - - const hiddenItem = { - id: "333", - __typename: "GroupCommerceProductItem" as const, - marketplace_listing_title: "Hidden Item", - is_hidden: true, - is_live: false - }; - - expect(parseFacebookItem(soldItem)?.listingStatus).toBe("SOLD"); - expect(parseFacebookItem(pendingItem)?.listingStatus).toBe("PENDING"); - expect(parseFacebookItem(hiddenItem)?.listingStatus).toBe("HIDDEN"); - }); - - test("should return null for items without title", () => { - const invalidItem = { - id: "invalid", - __typename: "GroupCommerceProductItem" as const, - is_live: true - }; - - const result = parseFacebookItem(invalidItem); - expect(result).toBeNull(); - }); - }); - - describe("parseFacebookAds", () => { - test("should parse search result ads", () => { - const ads = [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "Ad 1", - listing_price: { amount: "50.00", formatted_amount: "$50.00", currency: "CAD" }, - location: { reverse_geocode: { city_page: { display_name: "Toronto" } } }, - creation_time: 1640995200, - is_live: true - } - } - }, - { - node: { - listing: { - id: "2", - marketplace_listing_title: "Ad 2", - listing_price: { amount: "75.00", formatted_amount: "$75.00", currency: "CAD" }, - location: { reverse_geocode: { city_page: { display_name: "Ottawa" } } }, - creation_time: 1640995300, - is_live: true - } - } - } - ]; - - const results = parseFacebookAds(ads); - expect(results).toHaveLength(2); - expect(results[0].title).toBe("Ad 1"); - expect(results[0].listingPrice?.cents).toBe(5000); - expect(results[0].address).toBe("Toronto"); - expect(results[1].title).toBe("Ad 2"); - expect(results[1].address).toBe("Ottawa"); - }); - - test("should filter out ads without price", () => { - const ads = [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "With Price", - listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" }, - is_live: true - } - } - }, - { - node: { - listing: { - id: "2", - marketplace_listing_title: "No Price", - is_live: true - } - } - } - ]; - - const results = parseFacebookAds(ads); - expect(results).toHaveLength(1); - expect(results[0].title).toBe("With Price"); - }); - - test("should handle malformed ads gracefully", () => { - const ads = [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "Valid Ad", - listing_price: { amount: "50.00", formatted_amount: "$50.00", currency: "CAD" }, - is_live: true - } - } - }, - { - node: { - // Missing listing - } - } as { node: { listing?: unknown } } - ]; - - const results = parseFacebookAds(ads); - expect(results).toHaveLength(1); - expect(results[0].title).toBe("Valid Ad"); - }); - }); - }); - - describe("Utility Functions", () => { - describe("formatCentsToCurrency", () => { - test("should format cents to currency string", () => { - expect(formatCentsToCurrency(100)).toBe("$1.00"); - expect(formatCentsToCurrency(1000)).toBe("$10.00"); - expect(formatCentsToCurrency(9999)).toBe("$99.99"); - expect(formatCentsToCurrency(123456)).toBe("$1,234.56"); - }); - - test("should handle string inputs", () => { - expect(formatCentsToCurrency("100")).toBe("$1.00"); - expect(formatCentsToCurrency("1000")).toBe("$10.00"); - }); - - test("should handle zero", () => { - expect(formatCentsToCurrency(0)).toBe("$0.00"); - }); - - test("should handle null and undefined", () => { - expect(formatCentsToCurrency(null)).toBe(""); - expect(formatCentsToCurrency(undefined)).toBe(""); - }); - - test("should handle invalid inputs", () => { - expect(formatCentsToCurrency("invalid")).toBe(""); - expect(formatCentsToCurrency(Number.NaN)).toBe(""); - }); - }); - - describe("formatCookiesForHeader", () => { - const mockCookies = [ - { name: "c_user", value: "123456", domain: ".facebook.com", path: "/" }, - { name: "xs", value: "abcdef", domain: ".facebook.com", path: "/" }, - { name: "session_id", value: "xyz", domain: "other.com", path: "/" } - ]; - - test("should format cookies for header string", () => { - const result = formatCookiesForHeader(mockCookies, "www.facebook.com"); - expect(result).toBe("c_user=123456; xs=abcdef"); - }); - - test("should filter expired cookies", () => { - const cookiesWithExpiration = [ - ...mockCookies, - { name: "expired", value: "old", domain: ".facebook.com", path: "/", expirationDate: Date.now() / 1000 - 1000 } - ]; - const result = formatCookiesForHeader(cookiesWithExpiration, "www.facebook.com"); - expect(result).not.toContain("expired"); - }); - - test("should handle no matching cookies", () => { - const result = formatCookiesForHeader(mockCookies, "www.google.com"); - expect(result).toBe(""); - }); - - test("should handle empty cookie array", () => { - const result = formatCookiesForHeader([], "www.facebook.com"); - expect(result).toBe(""); - }); - }); - }); -}); \ No newline at end of file + beforeEach(() => { + global.fetch = mock(() => { + throw new Error("fetch should be mocked in individual tests"); + }); + }); + + afterEach(() => { + global.fetch = originalFetch; + }); + + describe("Cookie Parsing", () => { + describe("parseFacebookCookieString", () => { + test("should parse valid cookie string", () => { + const cookieString = "c_user=123456789; xs=abcdef123456; fr=xyz789"; + const result = parseFacebookCookieString(cookieString); + + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ + name: "c_user", + value: "123456789", + domain: ".facebook.com", + path: "/", + secure: true, + httpOnly: false, + sameSite: "lax", + expirationDate: undefined, + }); + expect(result[1]).toEqual({ + name: "xs", + value: "abcdef123456", + domain: ".facebook.com", + path: "/", + secure: true, + httpOnly: false, + sameSite: "lax", + expirationDate: undefined, + }); + }); + + test("should handle URL-encoded values", () => { + const cookieString = "c_user=123%2B456; xs=abc%3Ddef"; + const result = parseFacebookCookieString(cookieString); + + expect(result[0].value).toBe("123+456"); + expect(result[1].value).toBe("abc=def"); + }); + + test("should filter out malformed cookies", () => { + const cookieString = "c_user=123; invalid; xs=abc; =empty"; + const result = parseFacebookCookieString(cookieString); + + expect(result).toHaveLength(2); + expect(result.map((c) => c.name)).toEqual(["c_user", "xs"]); + }); + + test("should handle empty input", () => { + expect(parseFacebookCookieString("")).toEqual([]); + expect(parseFacebookCookieString(" ")).toEqual([]); + }); + + test("should handle extra whitespace", () => { + const cookieString = " c_user = 123 ; xs=abc "; + const result = parseFacebookCookieString(cookieString); + + expect(result).toHaveLength(2); + expect(result[0].name).toBe("c_user"); + expect(result[0].value).toBe("123"); + expect(result[1].name).toBe("xs"); + expect(result[1].value).toBe("abc"); + }); + }); + }); + + describe("Facebook Item Fetching", () => { + describe("fetchFacebookItem", () => { + const mockCookies = JSON.stringify([ + { name: "c_user", value: "12345", domain: ".facebook.com" }, + { name: "xs", value: "abc123", domain: ".facebook.com" }, + ]); + + test("should handle authentication errors", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: false, + status: 401, + text: () => Promise.resolve("Authentication required"), + headers: { + get: () => null, + }, + }), + ); + + const result = await fetchFacebookItem("123", mockCookies); + expect(result).toBeNull(); + }); + + test("should handle item not found", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: false, + status: 404, + text: () => Promise.resolve("Not found"), + headers: { + get: () => null, + }, + }), + ); + + const result = await fetchFacebookItem("nonexistent", mockCookies); + expect(result).toBeNull(); + }); + + test("should handle rate limiting", async () => { + let attempts = 0; + global.fetch = mock(() => { + attempts++; + if (attempts === 1) { + return Promise.resolve({ + ok: false, + status: 429, + headers: { + get: (header: string) => { + if (header === "X-RateLimit-Reset") return "1"; + return null; + }, + }, + text: () => Promise.resolve("Rate limited"), + }); + } + const mockData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + viewer: { + marketplace_product_details_page: { + target: { + id: "123", + __typename: "GroupCommerceProductItem", + marketplace_listing_title: "Test Item", + is_live: true, + }, + }, + }, + }, + }, + }, + }, + ], + ], + }; + return Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }); + }); + + const result = await fetchFacebookItem("123", mockCookies); + expect(attempts).toBe(2); + // Should eventually succeed after retry + }); + + test("should handle sold items", async () => { + const mockData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + viewer: { + marketplace_product_details_page: { + target: { + id: "456", + __typename: "GroupCommerceProductItem", + marketplace_listing_title: "Sold Item", + is_sold: true, + is_live: false, + }, + }, + }, + }, + }, + }, + }, + ], + ], + }; + + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); + + const result = await fetchFacebookItem("456", mockCookies); + expect(result?.listingStatus).toBe("SOLD"); + }); + + test("should handle missing authentication cookies", async () => { + // Use a test-specific cookie file that doesn't exist + const testCookiePath = "./cookies/facebook-test.json"; + + // Test with no cookies available (test file doesn't exist) + await expect( + fetchFacebookItem("123", undefined, testCookiePath), + ).rejects.toThrow("No valid Facebook cookies found"); + }); + + test("should handle successful item extraction", async () => { + const mockData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + viewer: { + marketplace_product_details_page: { + target: { + id: "789", + __typename: "GroupCommerceProductItem", + marketplace_listing_title: "Working Item", + formatted_price: { text: "$299.00" }, + listing_price: { + amount: "299.00", + currency: "CAD", + }, + is_live: true, + creation_time: 1640995200, + }, + }, + }, + }, + }, + }, + }, + ], + ], + }; + + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); + + const result = await fetchFacebookItem("789", mockCookies); + expect(result).not.toBeNull(); + expect(result?.title).toBe("Working Item"); + expect(result?.listingPrice?.amountFormatted).toBe("$299.00"); + expect(result?.listingStatus).toBe("ACTIVE"); + }); + + test("should handle server errors", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: false, + status: 500, + text: () => Promise.resolve("Internal Server Error"), + headers: { + get: () => null, + }, + }), + ); + + const result = await fetchFacebookItem("error", mockCookies); + expect(result).toBeNull(); + }); + }); + }); + + describe("Data Extraction", () => { + describe("extractFacebookItemData", () => { + test("should extract item data from standard require structure", () => { + const mockItemData = { + id: "123456", + __typename: "GroupCommerceProductItem", + marketplace_listing_title: "Test Item", + formatted_price: { text: "$100.00" }, + listing_price: { amount: "100.00", currency: "CAD" }, + is_live: true, + }; + const mockData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + viewer: { + marketplace_product_details_page: { + target: mockItemData, + }, + }, + }, + }, + }, + }, + ], + ], + }; + const html = ``; + + const result = extractFacebookItemData(html); + expect(result).not.toBeNull(); + expect(result?.id).toBe("123456"); + expect(result?.marketplace_listing_title).toBe("Test Item"); + }); + + test("should handle missing item data", () => { + const mockData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + viewer: { + marketplace_product_details_page: {}, + }, + }, + }, + }, + }, + ], + ], + }; + const html = ``; + + const result = extractFacebookItemData(html); + expect(result).toBeNull(); + }); + + test("should handle malformed HTML", () => { + const result = extractFacebookItemData( + "Invalid HTML", + ); + expect(result).toBeNull(); + }); + + test("should handle invalid JSON in script tags", () => { + const html = + ""; + const result = extractFacebookItemData(html); + expect(result).toBeNull(); + }); + + test("should extract item with vehicle data", () => { + const mockVehicleItem = { + id: "789", + __typename: "GroupCommerceProductItem", + marketplace_listing_title: "2006 Honda Civic", + formatted_price: { text: "$5,000" }, + listing_price: { amount: "5000.00", currency: "CAD" }, + vehicle_make_display_name: "Honda", + vehicle_model_display_name: "Civic", + vehicle_odometer_data: { unit: "KILOMETERS", value: 150000 }, + vehicle_transmission_type: "AUTOMATIC", + is_live: true, + }; + const mockData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + viewer: { + marketplace_product_details_page: { + target: mockVehicleItem, + }, + }, + }, + }, + }, + }, + ], + ], + }; + const html = ``; + + const result = extractFacebookItemData(html); + expect(result).not.toBeNull(); + expect(result?.vehicle_make_display_name).toBe("Honda"); + expect(result?.vehicle_odometer_data?.value).toBe(150000); + }); + }); + + describe("extractFacebookMarketplaceData", () => { + test("should extract search results from marketplace data", () => { + const mockMarketplaceData = { + feed_units: { + edges: [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "Item 1", + listing_price: { amount: "10.00", currency: "CAD" }, + }, + }, + }, + { + node: { + listing: { + id: "2", + marketplace_listing_title: "Item 2", + listing_price: { amount: "20.00", currency: "CAD" }, + }, + }, + }, + ], + }, + }; + const mockData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: mockMarketplaceData, + }, + }, + }, + }, + ], + ], + }; + const html = ``; + + const result = extractFacebookMarketplaceData(html); + expect(result).not.toBeNull(); + expect(result).toHaveLength(2); + expect(result?.[0].node.listing.marketplace_listing_title).toBe( + "Item 1", + ); + }); + + test("should handle empty search results", () => { + const mockData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { edges: [] }, + }, + }, + }, + }, + }, + ], + ], + }; + const html = ``; + + const result = extractFacebookMarketplaceData(html); + expect(result).toBeNull(); + }); + }); + }); + + describe("Data Parsing", () => { + describe("parseFacebookItem", () => { + test("should parse complete item with all fields", () => { + const item = { + id: "123456", + __typename: "GroupCommerceProductItem" as const, + marketplace_listing_title: "iPhone 13 Pro", + redacted_description: { text: "Excellent condition" }, + formatted_price: { text: "$800.00" }, + listing_price: { amount: "800.00", currency: "CAD" }, + location_text: { text: "Toronto, ON" }, + is_live: true, + creation_time: 1640995200, + marketplace_listing_seller: { + id: "seller1", + name: "John Doe", + }, + delivery_types: ["IN_PERSON"], + }; + + const result = parseFacebookItem(item); + expect(result).not.toBeNull(); + expect(result?.title).toBe("iPhone 13 Pro"); + expect(result?.description).toBe("Excellent condition"); + expect(result?.listingPrice?.amountFormatted).toBe("$800.00"); + expect(result?.listingPrice?.cents).toBe(80000); + expect(result?.listingPrice?.currency).toBe("CAD"); + expect(result?.address).toBe("Toronto, ON"); + expect(result?.listingStatus).toBe("ACTIVE"); + expect(result?.seller?.name).toBe("John Doe"); + expect(result?.deliveryTypes).toEqual(["IN_PERSON"]); + }); + + test("should parse FREE items", () => { + const item = { + id: "789", + __typename: "GroupCommerceProductItem" as const, + marketplace_listing_title: "Free Sofa", + formatted_price: { text: "FREE" }, + listing_price: { amount: "0.00", currency: "CAD" }, + is_live: true, + }; + + const result = parseFacebookItem(item); + expect(result).not.toBeNull(); + expect(result?.title).toBe("Free Sofa"); + expect(result?.listingPrice?.amountFormatted).toBe("FREE"); + expect(result?.listingPrice?.cents).toBe(0); + }); + + test("should handle missing optional fields", () => { + const item = { + id: "456", + __typename: "GroupCommerceProductItem" as const, + marketplace_listing_title: "Minimal Item", + }; + + const result = parseFacebookItem(item); + expect(result).not.toBeNull(); + expect(result?.title).toBe("Minimal Item"); + expect(result?.description).toBeUndefined(); + expect(result?.seller).toBeUndefined(); + }); + + test("should identify vehicle listings", () => { + const vehicleItem = { + id: "999", + __typename: "GroupCommerceProductItem" as const, + marketplace_listing_title: "2012 Mazda 3", + formatted_price: { text: "$8,000" }, + listing_price: { amount: "8000.00", currency: "CAD" }, + vehicle_make_display_name: "Mazda", + vehicle_model_display_name: "3", + is_live: true, + }; + + const result = parseFacebookItem(vehicleItem); + expect(result?.listingType).toBe("vehicle"); + }); + + test("should handle different listing statuses", () => { + const soldItem = { + id: "111", + __typename: "GroupCommerceProductItem" as const, + marketplace_listing_title: "Sold Item", + is_sold: true, + is_live: false, + }; + + const pendingItem = { + id: "222", + __typename: "GroupCommerceProductItem" as const, + marketplace_listing_title: "Pending Item", + is_pending: true, + is_live: true, + }; + + const hiddenItem = { + id: "333", + __typename: "GroupCommerceProductItem" as const, + marketplace_listing_title: "Hidden Item", + is_hidden: true, + is_live: false, + }; + + expect(parseFacebookItem(soldItem)?.listingStatus).toBe("SOLD"); + expect(parseFacebookItem(pendingItem)?.listingStatus).toBe("PENDING"); + expect(parseFacebookItem(hiddenItem)?.listingStatus).toBe("HIDDEN"); + }); + + test("should return null for items without title", () => { + const invalidItem = { + id: "invalid", + __typename: "GroupCommerceProductItem" as const, + is_live: true, + }; + + const result = parseFacebookItem(invalidItem); + expect(result).toBeNull(); + }); + }); + + describe("parseFacebookAds", () => { + test("should parse search result ads", () => { + const ads = [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "Ad 1", + listing_price: { + amount: "50.00", + formatted_amount: "$50.00", + currency: "CAD", + }, + location: { + reverse_geocode: { city_page: { display_name: "Toronto" } }, + }, + creation_time: 1640995200, + is_live: true, + }, + }, + }, + { + node: { + listing: { + id: "2", + marketplace_listing_title: "Ad 2", + listing_price: { + amount: "75.00", + formatted_amount: "$75.00", + currency: "CAD", + }, + location: { + reverse_geocode: { city_page: { display_name: "Ottawa" } }, + }, + creation_time: 1640995300, + is_live: true, + }, + }, + }, + ]; + + const results = parseFacebookAds(ads); + expect(results).toHaveLength(2); + expect(results[0].title).toBe("Ad 1"); + expect(results[0].listingPrice?.cents).toBe(5000); + expect(results[0].address).toBe("Toronto"); + expect(results[1].title).toBe("Ad 2"); + expect(results[1].address).toBe("Ottawa"); + }); + + test("should filter out ads without price", () => { + const ads = [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "With Price", + listing_price: { + amount: "100.00", + formatted_amount: "$100.00", + currency: "CAD", + }, + is_live: true, + }, + }, + }, + { + node: { + listing: { + id: "2", + marketplace_listing_title: "No Price", + is_live: true, + }, + }, + }, + ]; + + const results = parseFacebookAds(ads); + expect(results).toHaveLength(1); + expect(results[0].title).toBe("With Price"); + }); + + test("should handle malformed ads gracefully", () => { + const ads = [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "Valid Ad", + listing_price: { + amount: "50.00", + formatted_amount: "$50.00", + currency: "CAD", + }, + is_live: true, + }, + }, + }, + { + node: { + // Missing listing + }, + } as { node: { listing?: unknown } }, + ]; + + const results = parseFacebookAds(ads); + expect(results).toHaveLength(1); + expect(results[0].title).toBe("Valid Ad"); + }); + }); + }); + + describe("Utility Functions", () => { + describe("formatCentsToCurrency", () => { + test("should format cents to currency string", () => { + expect(formatCentsToCurrency(100)).toBe("$1.00"); + expect(formatCentsToCurrency(1000)).toBe("$10.00"); + expect(formatCentsToCurrency(9999)).toBe("$99.99"); + expect(formatCentsToCurrency(123456)).toBe("$1,234.56"); + }); + + test("should handle string inputs", () => { + expect(formatCentsToCurrency("100")).toBe("$1.00"); + expect(formatCentsToCurrency("1000")).toBe("$10.00"); + }); + + test("should handle zero", () => { + expect(formatCentsToCurrency(0)).toBe("$0.00"); + }); + + test("should handle null and undefined", () => { + expect(formatCentsToCurrency(null)).toBe(""); + expect(formatCentsToCurrency(undefined)).toBe(""); + }); + + test("should handle invalid inputs", () => { + expect(formatCentsToCurrency("invalid")).toBe(""); + expect(formatCentsToCurrency(Number.NaN)).toBe(""); + }); + }); + + describe("formatCookiesForHeader", () => { + const mockCookies = [ + { name: "c_user", value: "123456", domain: ".facebook.com", path: "/" }, + { name: "xs", value: "abcdef", domain: ".facebook.com", path: "/" }, + { name: "session_id", value: "xyz", domain: "other.com", path: "/" }, + ]; + + test("should format cookies for header string", () => { + const result = formatCookiesForHeader(mockCookies, "www.facebook.com"); + expect(result).toBe("c_user=123456; xs=abcdef"); + }); + + test("should filter expired cookies", () => { + const cookiesWithExpiration = [ + ...mockCookies, + { + name: "expired", + value: "old", + domain: ".facebook.com", + path: "/", + expirationDate: Date.now() / 1000 - 1000, + }, + ]; + const result = formatCookiesForHeader( + cookiesWithExpiration, + "www.facebook.com", + ); + expect(result).not.toContain("expired"); + }); + + test("should handle no matching cookies", () => { + const result = formatCookiesForHeader(mockCookies, "www.google.com"); + expect(result).toBe(""); + }); + + test("should handle empty cookie array", () => { + const result = formatCookiesForHeader([], "www.facebook.com"); + expect(result).toBe(""); + }); + }); + }); +}); diff --git a/test/facebook-integration.test.ts b/test/facebook-integration.test.ts index b89ba65..d86d7a8 100644 --- a/test/facebook-integration.test.ts +++ b/test/facebook-integration.test.ts @@ -1,517 +1,712 @@ -import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test"; +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import fetchFacebookItems, { fetchFacebookItem } from "../src/facebook"; // Mock fetch globally const originalFetch = global.fetch; describe("Facebook Marketplace Scraper Integration Tests", () => { - beforeEach(() => { - global.fetch = mock(() => { - throw new Error("fetch should be mocked in individual tests"); - }); - }); + beforeEach(() => { + global.fetch = mock(() => { + throw new Error("fetch should be mocked in individual tests"); + }); + }); - afterEach(() => { - global.fetch = originalFetch; - }); + afterEach(() => { + global.fetch = originalFetch; + }); - describe("Main Search Function", () => { - const mockCookies = JSON.stringify([ - { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, - { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" } - ]); + describe("Main Search Function", () => { + const mockCookies = JSON.stringify([ + { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, + { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" }, + ]); - test("should successfully fetch search results", async () => { - const mockSearchData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { - edges: [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "iPhone 13 Pro", - listing_price: { amount: "800.00", formatted_amount: "$800.00", currency: "CAD" }, - location: { reverse_geocode: { city_page: { display_name: "Toronto" } } }, - creation_time: 1640995200, - is_live: true - } - } - }, - { - node: { - listing: { - id: "2", - marketplace_listing_title: "Samsung Galaxy", - listing_price: { amount: "600.00", formatted_amount: "$600.00", currency: "CAD" }, - location: { reverse_geocode: { city_page: { display_name: "Mississauga" } } }, - creation_time: 1640995300, - is_live: true - } - } - } - ] - } - } - } - } - } - }] - ] - }; + test("should successfully fetch search results", async () => { + const mockSearchData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { + edges: [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "iPhone 13 Pro", + listing_price: { + amount: "800.00", + formatted_amount: "$800.00", + currency: "CAD", + }, + location: { + reverse_geocode: { + city_page: { display_name: "Toronto" }, + }, + }, + creation_time: 1640995200, + is_live: true, + }, + }, + }, + { + node: { + listing: { + id: "2", + marketplace_listing_title: "Samsung Galaxy", + listing_price: { + amount: "600.00", + formatted_amount: "$600.00", + currency: "CAD", + }, + location: { + reverse_geocode: { + city_page: { display_name: "Mississauga" }, + }, + }, + creation_time: 1640995300, + is_live: true, + }, + }, + }, + ], + }, + }, + }, + }, + }, + }, + ], + ], + }; - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("iPhone", 1, "toronto", 25, mockCookies); - expect(results).toHaveLength(2); - expect(results[0].title).toBe("iPhone 13 Pro"); - expect(results[1].title).toBe("Samsung Galaxy"); - }); + const results = await fetchFacebookItems( + "iPhone", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toHaveLength(2); + expect(results[0].title).toBe("iPhone 13 Pro"); + expect(results[1].title).toBe("Samsung Galaxy"); + }); - test("should filter out items without price", async () => { - const mockSearchData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { - edges: [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "With Price", - listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" }, - is_live: true - } - } - }, - { - node: { - listing: { - id: "2", - marketplace_listing_title: "No Price", - is_live: true - } - } - } - ] - } - } - } - } - } - }] - ] - }; + test("should filter out items without price", async () => { + const mockSearchData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { + edges: [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "With Price", + listing_price: { + amount: "100.00", + formatted_amount: "$100.00", + currency: "CAD", + }, + is_live: true, + }, + }, + }, + { + node: { + listing: { + id: "2", + marketplace_listing_title: "No Price", + is_live: true, + }, + }, + }, + ], + }, + }, + }, + }, + }, + }, + ], + ], + }; - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); - expect(results).toHaveLength(1); - expect(results[0].title).toBe("With Price"); - }); + const results = await fetchFacebookItems( + "test", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toHaveLength(1); + expect(results[0].title).toBe("With Price"); + }); - test("should respect MAX_ITEMS parameter", async () => { - const mockSearchData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { - edges: Array.from({ length: 10 }, (_, i) => ({ - node: { - listing: { - id: String(i), - marketplace_listing_title: `Item ${i}`, - listing_price: { amount: `${(i + 1) * 10}.00`, formatted_amount: `$${(i + 1) * 10}.00`, currency: "CAD" }, - is_live: true - } - } - })) - } - } - } - } - } - }] - ] - }; + test("should respect MAX_ITEMS parameter", async () => { + const mockSearchData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { + edges: Array.from({ length: 10 }, (_, i) => ({ + node: { + listing: { + id: String(i), + marketplace_listing_title: `Item ${i}`, + listing_price: { + amount: `${(i + 1) * 10}.00`, + formatted_amount: `$${(i + 1) * 10}.00`, + currency: "CAD", + }, + is_live: true, + }, + }, + })), + }, + }, + }, + }, + }, + }, + ], + ], + }; - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("test", 1, "toronto", 5, mockCookies); - expect(results).toHaveLength(5); - }); + const results = await fetchFacebookItems( + "test", + 1, + "toronto", + 5, + mockCookies, + ); + expect(results).toHaveLength(5); + }); - test("should return empty array for no results", async () => { - const mockSearchData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { - edges: [] - } - } - } - } - } - }] - ] - }; + test("should return empty array for no results", async () => { + const mockSearchData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { + edges: [], + }, + }, + }, + }, + }, + }, + ], + ], + }; - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("nonexistent query", 1, "toronto", 25, mockCookies); - expect(results).toEqual([]); - }); + const results = await fetchFacebookItems( + "nonexistent query", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toEqual([]); + }); - test("should handle authentication errors gracefully", async () => { - global.fetch = mock(() => - Promise.resolve({ - ok: false, - status: 401, - text: () => Promise.resolve("Unauthorized"), - headers: { - get: () => null - } - }) - ); + test("should handle authentication errors gracefully", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: false, + status: 401, + text: () => Promise.resolve("Unauthorized"), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); - expect(results).toEqual([]); - }); + const results = await fetchFacebookItems( + "test", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toEqual([]); + }); - test("should handle network errors", async () => { - global.fetch = mock(() => Promise.reject(new Error("Network error"))); + test("should handle network errors", async () => { + global.fetch = mock(() => Promise.reject(new Error("Network error"))); - await expect(fetchFacebookItems("test", 1, "toronto", 25, mockCookies)).rejects.toThrow("Network error"); - }); + await expect( + fetchFacebookItems("test", 1, "toronto", 25, mockCookies), + ).rejects.toThrow("Network error"); + }); - test("should handle rate limiting with retry", async () => { - let attempts = 0; - global.fetch = mock(() => { - attempts++; - if (attempts === 1) { - return Promise.resolve({ - ok: false, - status: 429, - headers: { - get: (header: string) => { - if (header === "X-RateLimit-Reset") return "1"; - return null; - } - }, - text: () => Promise.resolve("Rate limited") - }); - } - const mockSearchData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { - edges: [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "Item 1", - listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" }, - is_live: true - } - } - } - ] - } - } - } - } - } - }] - ] - }; - return Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }); - }); + test("should handle rate limiting with retry", async () => { + let attempts = 0; + global.fetch = mock(() => { + attempts++; + if (attempts === 1) { + return Promise.resolve({ + ok: false, + status: 429, + headers: { + get: (header: string) => { + if (header === "X-RateLimit-Reset") return "1"; + return null; + }, + }, + text: () => Promise.resolve("Rate limited"), + }); + } + const mockSearchData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { + edges: [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "Item 1", + listing_price: { + amount: "100.00", + formatted_amount: "$100.00", + currency: "CAD", + }, + is_live: true, + }, + }, + }, + ], + }, + }, + }, + }, + }, + }, + ], + ], + }; + return Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }); + }); - const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); - expect(attempts).toBe(2); - expect(results).toHaveLength(1); - }); - }); + const results = await fetchFacebookItems( + "test", + 1, + "toronto", + 25, + mockCookies, + ); + expect(attempts).toBe(2); + expect(results).toHaveLength(1); + }); + }); - describe("Vehicle Listing Integration", () => { - const mockCookies = JSON.stringify([ - { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, - { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" } - ]); + describe("Vehicle Listing Integration", () => { + const mockCookies = JSON.stringify([ + { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, + { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" }, + ]); - test("should correctly identify and parse vehicle listings", async () => { - const mockSearchData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { - edges: [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "2006 Honda Civic", - listing_price: { amount: "8000.00", formatted_amount: "$8,000.00", currency: "CAD" }, - is_live: true - } - } - }, - { - node: { - listing: { - id: "2", - marketplace_listing_title: "iPhone 13", - listing_price: { amount: "800.00", formatted_amount: "$800.00", currency: "CAD" }, - is_live: true - } - } - } - ] - } - } - } - } - } - }] - ] - }; + test("should correctly identify and parse vehicle listings", async () => { + const mockSearchData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { + edges: [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "2006 Honda Civic", + listing_price: { + amount: "8000.00", + formatted_amount: "$8,000.00", + currency: "CAD", + }, + is_live: true, + }, + }, + }, + { + node: { + listing: { + id: "2", + marketplace_listing_title: "iPhone 13", + listing_price: { + amount: "800.00", + formatted_amount: "$800.00", + currency: "CAD", + }, + is_live: true, + }, + }, + }, + ], + }, + }, + }, + }, + }, + }, + ], + ], + }; - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("cars", 1, "toronto", 25, mockCookies); - expect(results).toHaveLength(2); - // Both should be classified as "item" type in search results (vehicle detection is for item details) - expect(results[0].title).toBe("2006 Honda Civic"); - expect(results[1].title).toBe("iPhone 13"); - }); - }); + const results = await fetchFacebookItems( + "cars", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toHaveLength(2); + // Both should be classified as "item" type in search results (vehicle detection is for item details) + expect(results[0].title).toBe("2006 Honda Civic"); + expect(results[1].title).toBe("iPhone 13"); + }); + }); - describe("Different Categories", () => { - const mockCookies = JSON.stringify([ - { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, - { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" } - ]); + describe("Different Categories", () => { + const mockCookies = JSON.stringify([ + { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, + { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" }, + ]); - test("should handle electronics listings", async () => { - const mockSearchData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { - edges: [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "Nintendo Switch", - listing_price: { amount: "250.00", formatted_amount: "$250.00", currency: "CAD" }, - location: { reverse_geocode: { city_page: { display_name: "Toronto" } } }, - marketplace_listing_category_id: "479353692612078", - condition: "USED", - is_live: true - } - } - } - ] - } - } - } - } - } - }] - ] - }; + test("should handle electronics listings", async () => { + const mockSearchData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { + edges: [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "Nintendo Switch", + listing_price: { + amount: "250.00", + formatted_amount: "$250.00", + currency: "CAD", + }, + location: { + reverse_geocode: { + city_page: { display_name: "Toronto" }, + }, + }, + marketplace_listing_category_id: + "479353692612078", + condition: "USED", + is_live: true, + }, + }, + }, + ], + }, + }, + }, + }, + }, + }, + ], + ], + }; - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("nintendo switch", 1, "toronto", 25, mockCookies); - expect(results).toHaveLength(1); - expect(results[0].title).toBe("Nintendo Switch"); - expect(results[0].categoryId).toBe("479353692612078"); - }); + const results = await fetchFacebookItems( + "nintendo switch", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toHaveLength(1); + expect(results[0].title).toBe("Nintendo Switch"); + expect(results[0].categoryId).toBe("479353692612078"); + }); - test("should handle home goods/furniture listings", async () => { - const mockSearchData = { - require: [ - [null, null, null, { - __bbox: { - result: { - data: { - marketplace_search: { - feed_units: { - edges: [ - { - node: { - listing: { - id: "1", - marketplace_listing_title: "Dining Table", - listing_price: { amount: "150.00", formatted_amount: "$150.00", currency: "CAD" }, - location: { reverse_geocode: { city_page: { display_name: "Mississauga" } } }, - marketplace_listing_category_id: "1569171756675761", - condition: "USED", - is_live: true - } - } - } - ] - } - } - } - } - } - }] - ] - }; + test("should handle home goods/furniture listings", async () => { + const mockSearchData = { + require: [ + [ + null, + null, + null, + { + __bbox: { + result: { + data: { + marketplace_search: { + feed_units: { + edges: [ + { + node: { + listing: { + id: "1", + marketplace_listing_title: "Dining Table", + listing_price: { + amount: "150.00", + formatted_amount: "$150.00", + currency: "CAD", + }, + location: { + reverse_geocode: { + city_page: { display_name: "Mississauga" }, + }, + }, + marketplace_listing_category_id: + "1569171756675761", + condition: "USED", + is_live: true, + }, + }, + }, + ], + }, + }, + }, + }, + }, + }, + ], + ], + }; - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve(``), - headers: { - get: () => null - } - }) - ); + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + ``, + ), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("table", 1, "toronto", 25, mockCookies); - expect(results).toHaveLength(1); - expect(results[0].title).toBe("Dining Table"); - expect(results[0].categoryId).toBe("1569171756675761"); - }); - }); + const results = await fetchFacebookItems( + "table", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toHaveLength(1); + expect(results[0].title).toBe("Dining Table"); + expect(results[0].categoryId).toBe("1569171756675761"); + }); + }); - describe("Error Scenarios", () => { - const mockCookies = JSON.stringify([ - { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, - { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" } - ]); + describe("Error Scenarios", () => { + const mockCookies = JSON.stringify([ + { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, + { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" }, + ]); - test("should handle malformed HTML responses", async () => { - global.fetch = mock(() => - Promise.resolve({ - ok: true, - text: () => Promise.resolve("Invalid HTML without JSON data"), - headers: { - get: () => null - } - }) - ); + test("should handle malformed HTML responses", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + "Invalid HTML without JSON data", + ), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); - expect(results).toEqual([]); - }); + const results = await fetchFacebookItems( + "test", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toEqual([]); + }); - test("should handle 404 errors gracefully", async () => { - global.fetch = mock(() => - Promise.resolve({ - ok: false, - status: 404, - text: () => Promise.resolve("Not found"), - headers: { - get: () => null - } - }) - ); + test("should handle 404 errors gracefully", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: false, + status: 404, + text: () => Promise.resolve("Not found"), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); - expect(results).toEqual([]); - }); + const results = await fetchFacebookItems( + "test", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toEqual([]); + }); - test("should handle 500 errors gracefully", async () => { - global.fetch = mock(() => - Promise.resolve({ - ok: false, - status: 500, - text: () => Promise.resolve("Internal Server Error"), - headers: { - get: () => null - } - }) - ); + test("should handle 500 errors gracefully", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: false, + status: 500, + text: () => Promise.resolve("Internal Server Error"), + headers: { + get: () => null, + }, + }), + ); - const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); - expect(results).toEqual([]); - }); - }); + const results = await fetchFacebookItems( + "test", + 1, + "toronto", + 25, + mockCookies, + ); + expect(results).toEqual([]); + }); + }); }); diff --git a/test/kijiji-core.test.ts b/test/kijiji-core.test.ts index 62a8f5b..e56580d 100644 --- a/test/kijiji-core.test.ts +++ b/test/kijiji-core.test.ts @@ -1,162 +1,166 @@ -import { describe, test, expect } from "bun:test"; +import { describe, expect, test } from "bun:test"; import { - resolveLocationId, - resolveCategoryId, - buildSearchUrl, - HttpError, - NetworkError, - ParseError, - RateLimitError, - ValidationError + HttpError, + NetworkError, + ParseError, + RateLimitError, + ValidationError, + buildSearchUrl, + resolveCategoryId, + resolveLocationId, } from "../src/kijiji"; describe("Location and Category Resolution", () => { - describe("resolveLocationId", () => { - test("should return numeric IDs as-is", () => { - expect(resolveLocationId(1700272)).toBe(1700272); - expect(resolveLocationId(0)).toBe(0); - }); + describe("resolveLocationId", () => { + test("should return numeric IDs as-is", () => { + expect(resolveLocationId(1700272)).toBe(1700272); + expect(resolveLocationId(0)).toBe(0); + }); - test("should resolve string location names", () => { - expect(resolveLocationId("canada")).toBe(0); - expect(resolveLocationId("ontario")).toBe(9004); - expect(resolveLocationId("toronto")).toBe(1700273); - expect(resolveLocationId("gta")).toBe(1700272); - }); + test("should resolve string location names", () => { + expect(resolveLocationId("canada")).toBe(0); + expect(resolveLocationId("ontario")).toBe(9004); + expect(resolveLocationId("toronto")).toBe(1700273); + expect(resolveLocationId("gta")).toBe(1700272); + }); - test("should handle case insensitive matching", () => { - expect(resolveLocationId("Canada")).toBe(0); - expect(resolveLocationId("ONTARIO")).toBe(9004); - }); + test("should handle case insensitive matching", () => { + expect(resolveLocationId("Canada")).toBe(0); + expect(resolveLocationId("ONTARIO")).toBe(9004); + }); - test("should default to Canada for unknown locations", () => { - expect(resolveLocationId("unknown")).toBe(0); - expect(resolveLocationId("")).toBe(0); - }); + test("should default to Canada for unknown locations", () => { + expect(resolveLocationId("unknown")).toBe(0); + expect(resolveLocationId("")).toBe(0); + }); - test("should handle undefined input", () => { - expect(resolveLocationId(undefined)).toBe(0); - }); - }); + test("should handle undefined input", () => { + expect(resolveLocationId(undefined)).toBe(0); + }); + }); - describe("resolveCategoryId", () => { - test("should return numeric IDs as-is", () => { - expect(resolveCategoryId(132)).toBe(132); - expect(resolveCategoryId(0)).toBe(0); - }); + describe("resolveCategoryId", () => { + test("should return numeric IDs as-is", () => { + expect(resolveCategoryId(132)).toBe(132); + expect(resolveCategoryId(0)).toBe(0); + }); - test("should resolve string category names", () => { - expect(resolveCategoryId("all")).toBe(0); - expect(resolveCategoryId("phones")).toBe(132); - expect(resolveCategoryId("electronics")).toBe(29659001); - expect(resolveCategoryId("buy-sell")).toBe(10); - }); + test("should resolve string category names", () => { + expect(resolveCategoryId("all")).toBe(0); + expect(resolveCategoryId("phones")).toBe(132); + expect(resolveCategoryId("electronics")).toBe(29659001); + expect(resolveCategoryId("buy-sell")).toBe(10); + }); - test("should handle case insensitive matching", () => { - expect(resolveCategoryId("All")).toBe(0); - expect(resolveCategoryId("PHONES")).toBe(132); - }); + test("should handle case insensitive matching", () => { + expect(resolveCategoryId("All")).toBe(0); + expect(resolveCategoryId("PHONES")).toBe(132); + }); - test("should default to all categories for unknown categories", () => { - expect(resolveCategoryId("unknown")).toBe(0); - expect(resolveCategoryId("")).toBe(0); - }); + test("should default to all categories for unknown categories", () => { + expect(resolveCategoryId("unknown")).toBe(0); + expect(resolveCategoryId("")).toBe(0); + }); - test("should handle undefined input", () => { - expect(resolveCategoryId(undefined)).toBe(0); - }); - }); + test("should handle undefined input", () => { + expect(resolveCategoryId(undefined)).toBe(0); + }); + }); }); describe("URL Construction", () => { - describe("buildSearchUrl", () => { - test("should build basic search URL", () => { - const url = buildSearchUrl("iphone", { - location: 1700272, - category: 132, - sortBy: 'relevancy', - sortOrder: 'desc', - }); + describe("buildSearchUrl", () => { + test("should build basic search URL", () => { + const url = buildSearchUrl("iphone", { + location: 1700272, + category: 132, + sortBy: "relevancy", + sortOrder: "desc", + }); - expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272"); - expect(url).toContain("sort=relevancyDesc"); - expect(url).toContain("order=DESC"); - }); + expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272"); + expect(url).toContain("sort=relevancyDesc"); + expect(url).toContain("order=DESC"); + }); - test("should handle pagination", () => { - const url = buildSearchUrl("iphone", { - location: 1700272, - category: 132, - page: 2, - }); + test("should handle pagination", () => { + const url = buildSearchUrl("iphone", { + location: 1700272, + category: 132, + page: 2, + }); - expect(url).toContain("&page=2"); - }); + expect(url).toContain("&page=2"); + }); - test("should handle different sort options", () => { - const dateUrl = buildSearchUrl("iphone", { - sortBy: 'date', - sortOrder: 'asc', - }); - expect(dateUrl).toContain("sort=DATE"); - expect(dateUrl).toContain("order=ASC"); + test("should handle different sort options", () => { + const dateUrl = buildSearchUrl("iphone", { + sortBy: "date", + sortOrder: "asc", + }); + expect(dateUrl).toContain("sort=DATE"); + expect(dateUrl).toContain("order=ASC"); - const priceUrl = buildSearchUrl("iphone", { - sortBy: 'price', - sortOrder: 'desc', - }); - expect(priceUrl).toContain("sort=PRICE"); - expect(priceUrl).toContain("order=DESC"); - }); + const priceUrl = buildSearchUrl("iphone", { + sortBy: "price", + sortOrder: "desc", + }); + expect(priceUrl).toContain("sort=PRICE"); + expect(priceUrl).toContain("order=DESC"); + }); - test("should handle string location/category inputs", () => { - const url = buildSearchUrl("iphone", { - location: "toronto", - category: "phones", - }); + test("should handle string location/category inputs", () => { + const url = buildSearchUrl("iphone", { + location: "toronto", + category: "phones", + }); - expect(url).toContain("k0c132l1700273"); // phones + toronto - }); - }); + expect(url).toContain("k0c132l1700273"); // phones + toronto + }); + }); }); describe("Error Classes", () => { - test("HttpError should store status and URL", () => { - const error = new HttpError("Not found", 404, "https://example.com"); - expect(error.message).toBe("Not found"); - expect(error.status).toBe(404); - expect(error.url).toBe("https://example.com"); - expect(error.name).toBe("HttpError"); - }); + test("HttpError should store status and URL", () => { + const error = new HttpError("Not found", 404, "https://example.com"); + expect(error.message).toBe("Not found"); + expect(error.status).toBe(404); + expect(error.url).toBe("https://example.com"); + expect(error.name).toBe("HttpError"); + }); - test("NetworkError should store URL and cause", () => { - const cause = new Error("Connection failed"); - const error = new NetworkError("Network error", "https://example.com", cause); - expect(error.message).toBe("Network error"); - expect(error.url).toBe("https://example.com"); - expect(error.cause).toBe(cause); - expect(error.name).toBe("NetworkError"); - }); + test("NetworkError should store URL and cause", () => { + const cause = new Error("Connection failed"); + const error = new NetworkError( + "Network error", + "https://example.com", + cause, + ); + expect(error.message).toBe("Network error"); + expect(error.url).toBe("https://example.com"); + expect(error.cause).toBe(cause); + expect(error.name).toBe("NetworkError"); + }); - test("ParseError should store data", () => { - const data = { invalid: "json" }; - const error = new ParseError("Invalid JSON", data); - expect(error.message).toBe("Invalid JSON"); - expect(error.data).toBe(data); - expect(error.name).toBe("ParseError"); - }); + test("ParseError should store data", () => { + const data = { invalid: "json" }; + const error = new ParseError("Invalid JSON", data); + expect(error.message).toBe("Invalid JSON"); + expect(error.data).toBe(data); + expect(error.name).toBe("ParseError"); + }); - test("RateLimitError should store URL and reset time", () => { - const error = new RateLimitError("Rate limited", "https://example.com", 60); - expect(error.message).toBe("Rate limited"); - expect(error.url).toBe("https://example.com"); - expect(error.resetTime).toBe(60); - expect(error.name).toBe("RateLimitError"); - }); + test("RateLimitError should store URL and reset time", () => { + const error = new RateLimitError("Rate limited", "https://example.com", 60); + expect(error.message).toBe("Rate limited"); + expect(error.url).toBe("https://example.com"); + expect(error.resetTime).toBe(60); + expect(error.name).toBe("RateLimitError"); + }); - test("ValidationError should work without field", () => { - const error = new ValidationError("Invalid value"); - expect(error.message).toBe("Invalid value"); - expect(error.name).toBe("ValidationError"); - }); -}); \ No newline at end of file + test("ValidationError should work without field", () => { + const error = new ValidationError("Invalid value"); + expect(error.message).toBe("Invalid value"); + expect(error.name).toBe("ValidationError"); + }); +}); diff --git a/test/kijiji-integration.test.ts b/test/kijiji-integration.test.ts index 32a2704..af1323b 100644 --- a/test/kijiji-integration.test.ts +++ b/test/kijiji-integration.test.ts @@ -1,337 +1,363 @@ -import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test"; -import { extractApolloState, parseSearch, parseDetailedListing } from "../src/kijiji"; +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; +import { + extractApolloState, + parseDetailedListing, + parseSearch, +} from "../src/kijiji"; // Mock fetch globally const originalFetch = global.fetch; describe("HTML Parsing Integration", () => { - beforeEach(() => { - // Mock fetch for all tests - global.fetch = mock(() => { - throw new Error("fetch should be mocked in individual tests"); - }); - }); + beforeEach(() => { + // Mock fetch for all tests + global.fetch = mock(() => { + throw new Error("fetch should be mocked in individual tests"); + }); + }); - afterEach(() => { - global.fetch = originalFetch; - }); + afterEach(() => { + global.fetch = originalFetch; + }); - describe("extractApolloState", () => { - test("should extract Apollo state from valid HTML", () => { - const mockHtml = ''; + describe("extractApolloState", () => { + test("should extract Apollo state from valid HTML", () => { + const mockHtml = + ''; - const result = extractApolloState(mockHtml); - expect(result).toEqual({ - ROOT_QUERY: { test: "value" } - }); - }); + const result = extractApolloState(mockHtml); + expect(result).toEqual({ + ROOT_QUERY: { test: "value" }, + }); + }); - test("should return null for HTML without Apollo state", () => { - const mockHtml = 'No data here'; - const result = extractApolloState(mockHtml); - expect(result).toBeNull(); - }); + test("should return null for HTML without Apollo state", () => { + const mockHtml = "No data here"; + const result = extractApolloState(mockHtml); + expect(result).toBeNull(); + }); - test("should return null for malformed JSON", () => { - const mockHtml = ''; + test("should return null for malformed JSON", () => { + const mockHtml = + ''; - const result = extractApolloState(mockHtml); - expect(result).toBeNull(); - }); + const result = extractApolloState(mockHtml); + expect(result).toBeNull(); + }); - test("should handle missing __NEXT_DATA__ element", () => { - const mockHtml = '
Content
'; - const result = extractApolloState(mockHtml); - expect(result).toBeNull(); - }); - }); + test("should handle missing __NEXT_DATA__ element", () => { + const mockHtml = "
Content
"; + const result = extractApolloState(mockHtml); + expect(result).toBeNull(); + }); + }); - describe("parseSearch", () => { - test("should parse search results from HTML", () => { - const mockHtml = ` + describe("parseSearch", () => { + test("should parse search results from HTML", () => { + const mockHtml = ` `; - const results = parseSearch(mockHtml, "https://www.kijiji.ca"); - expect(results).toHaveLength(2); - expect(results[0]).toEqual({ - name: "iPhone 13 Pro", - listingLink: "https://www.kijiji.ca/v-iphone/k0l0" - }); - expect(results[1]).toEqual({ - name: "Samsung Galaxy", - listingLink: "https://www.kijiji.ca/v-samsung/k0l0" - }); - }); + const results = parseSearch(mockHtml, "https://www.kijiji.ca"); + expect(results).toHaveLength(2); + expect(results[0]).toEqual({ + name: "iPhone 13 Pro", + listingLink: "https://www.kijiji.ca/v-iphone/k0l0", + }); + expect(results[1]).toEqual({ + name: "Samsung Galaxy", + listingLink: "https://www.kijiji.ca/v-samsung/k0l0", + }); + }); - test("should handle absolute URLs", () => { - const mockHtml = ` + test("should handle absolute URLs", () => { + const mockHtml = ` `; - const results = parseSearch(mockHtml, "https://www.kijiji.ca"); - expect(results[0].listingLink).toBe("https://www.kijiji.ca/v-iphone/k0l0"); - }); + const results = parseSearch(mockHtml, "https://www.kijiji.ca"); + expect(results[0].listingLink).toBe( + "https://www.kijiji.ca/v-iphone/k0l0", + ); + }); - test("should filter out invalid listings", () => { - const mockHtml = ` + test("should filter out invalid listings", () => { + const mockHtml = ` `; - const results = parseSearch(mockHtml, "https://www.kijiji.ca"); - expect(results).toHaveLength(1); - expect(results[0].name).toBe("iPhone 13 Pro"); - }); + const results = parseSearch(mockHtml, "https://www.kijiji.ca"); + expect(results).toHaveLength(1); + expect(results[0].name).toBe("iPhone 13 Pro"); + }); - test("should return empty array for invalid HTML", () => { - const results = parseSearch("Invalid", "https://www.kijiji.ca"); - expect(results).toEqual([]); - }); - }); + test("should return empty array for invalid HTML", () => { + const results = parseSearch( + "Invalid", + "https://www.kijiji.ca", + ); + expect(results).toEqual([]); + }); + }); - describe("parseDetailedListing", () => { - test("should parse detailed listing with all fields", async () => { - const mockHtml = ` + describe("parseDetailedListing", () => { + test("should parse detailed listing with all fields", async () => { + const mockHtml = ` `; - const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca"); - expect(result).toEqual({ - url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0", - title: "iPhone 13 Pro 256GB", - description: "Excellent condition iPhone 13 Pro", - listingPrice: { - amountFormatted: "$800.00", - cents: 80000, - currency: "CAD" - }, - listingType: "OFFER", - listingStatus: "ACTIVE", - creationDate: "2024-01-15T10:00:00.000Z", - endDate: "2025-01-15T10:00:00.000Z", - numberOfViews: 150, - address: "Toronto, ON", - images: [ - "https://media.kijiji.ca/api/v1/image1.jpg", - "https://media.kijiji.ca/api/v1/image2.jpg" - ], - categoryId: 132, - adSource: "ORGANIC", - flags: { - topAd: false, - priceDrop: true - }, - attributes: { - forsaleby: ["ownr"], - phonecarrier: ["unlocked"] - }, - location: { - id: 1700273, - name: "Toronto", - coordinates: { - latitude: 43.6532, - longitude: -79.3832 - } - }, - sellerInfo: { - posterId: "user123", - rating: 4.8 - } - }); - }); + const result = await parseDetailedListing( + mockHtml, + "https://www.kijiji.ca", + ); + expect(result).toEqual({ + url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0", + title: "iPhone 13 Pro 256GB", + description: "Excellent condition iPhone 13 Pro", + listingPrice: { + amountFormatted: "$800.00", + cents: 80000, + currency: "CAD", + }, + listingType: "OFFER", + listingStatus: "ACTIVE", + creationDate: "2024-01-15T10:00:00.000Z", + endDate: "2025-01-15T10:00:00.000Z", + numberOfViews: 150, + address: "Toronto, ON", + images: [ + "https://media.kijiji.ca/api/v1/image1.jpg", + "https://media.kijiji.ca/api/v1/image2.jpg", + ], + categoryId: 132, + adSource: "ORGANIC", + flags: { + topAd: false, + priceDrop: true, + }, + attributes: { + forsaleby: ["ownr"], + phonecarrier: ["unlocked"], + }, + location: { + id: 1700273, + name: "Toronto", + coordinates: { + latitude: 43.6532, + longitude: -79.3832, + }, + }, + sellerInfo: { + posterId: "user123", + rating: 4.8, + }, + }); + }); - test("should return null for contact-based pricing", async () => { - const mockHtml = ` + test("should return null for contact-based pricing", async () => { + const mockHtml = ` `; - const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca"); - expect(result).toBeNull(); - }); + const result = await parseDetailedListing( + mockHtml, + "https://www.kijiji.ca", + ); + expect(result).toBeNull(); + }); - test("should handle missing optional fields", async () => { - const mockHtml = ` + test("should handle missing optional fields", async () => { + const mockHtml = ` `; - const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca"); - expect(result).toEqual({ - url: "https://www.kijiji.ca/v-iphone/k0l0", - title: "iPhone 13", - description: undefined, - listingPrice: { - amountFormatted: "$500.00", - cents: 50000, - currency: undefined - }, - listingType: undefined, - listingStatus: undefined, - creationDate: undefined, - endDate: undefined, - numberOfViews: undefined, - address: null, - images: [], - categoryId: 0, - adSource: "UNKNOWN", - flags: { - topAd: false, - priceDrop: false - }, - attributes: {}, - location: { - id: 0, - name: "Unknown", - coordinates: undefined - }, - sellerInfo: undefined - }); - }); - }); -}); \ No newline at end of file + const result = await parseDetailedListing( + mockHtml, + "https://www.kijiji.ca", + ); + expect(result).toEqual({ + url: "https://www.kijiji.ca/v-iphone/k0l0", + title: "iPhone 13", + description: undefined, + listingPrice: { + amountFormatted: "$500.00", + cents: 50000, + currency: undefined, + }, + listingType: undefined, + listingStatus: undefined, + creationDate: undefined, + endDate: undefined, + numberOfViews: undefined, + address: null, + images: [], + categoryId: 0, + adSource: "UNKNOWN", + flags: { + topAd: false, + priceDrop: false, + }, + attributes: {}, + location: { + id: 0, + name: "Unknown", + coordinates: undefined, + }, + sellerInfo: undefined, + }); + }); + }); +}); diff --git a/test/kijiji-utils.test.ts b/test/kijiji-utils.test.ts index 0a77713..0c5bd9d 100644 --- a/test/kijiji-utils.test.ts +++ b/test/kijiji-utils.test.ts @@ -1,54 +1,54 @@ -import { describe, test, expect, beforeEach, afterEach } from "bun:test"; -import { slugify, formatCentsToCurrency } from "../src/kijiji"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { formatCentsToCurrency, slugify } from "../src/kijiji"; describe("Utility Functions", () => { - describe("slugify", () => { - test("should convert basic strings to slugs", () => { - expect(slugify("Hello World")).toBe("hello-world"); - expect(slugify("iPhone 13 Pro")).toBe("iphone-13-pro"); - }); + describe("slugify", () => { + test("should convert basic strings to slugs", () => { + expect(slugify("Hello World")).toBe("hello-world"); + expect(slugify("iPhone 13 Pro")).toBe("iphone-13-pro"); + }); - test("should handle special characters", () => { - expect(slugify("Café & Restaurant")).toBe("cafe-restaurant"); - expect(slugify("100% New")).toBe("100-new"); - }); + test("should handle special characters", () => { + expect(slugify("Café & Restaurant")).toBe("cafe-restaurant"); + expect(slugify("100% New")).toBe("100-new"); + }); - test("should handle empty and edge cases", () => { - expect(slugify("")).toBe(""); - expect(slugify(" ")).toBe("-"); - expect(slugify("---")).toBe("-"); - }); + test("should handle empty and edge cases", () => { + expect(slugify("")).toBe(""); + expect(slugify(" ")).toBe("-"); + expect(slugify("---")).toBe("-"); + }); - test("should preserve numbers and valid characters", () => { - expect(slugify("iPhone 13")).toBe("iphone-13"); - expect(slugify("item123")).toBe("item123"); - }); - }); + test("should preserve numbers and valid characters", () => { + expect(slugify("iPhone 13")).toBe("iphone-13"); + expect(slugify("item123")).toBe("item123"); + }); + }); - describe("formatCentsToCurrency", () => { - test("should format valid cent values", () => { - expect(formatCentsToCurrency(100)).toBe("$1.00"); - expect(formatCentsToCurrency(1999)).toBe("$19.99"); - expect(formatCentsToCurrency(0)).toBe("$0.00"); - }); + describe("formatCentsToCurrency", () => { + test("should format valid cent values", () => { + expect(formatCentsToCurrency(100)).toBe("$1.00"); + expect(formatCentsToCurrency(1999)).toBe("$19.99"); + expect(formatCentsToCurrency(0)).toBe("$0.00"); + }); - test("should handle string inputs", () => { - expect(formatCentsToCurrency("100")).toBe("$1.00"); - expect(formatCentsToCurrency("1999")).toBe("$19.99"); - }); + test("should handle string inputs", () => { + expect(formatCentsToCurrency("100")).toBe("$1.00"); + expect(formatCentsToCurrency("1999")).toBe("$19.99"); + }); - test("should handle null/undefined inputs", () => { - expect(formatCentsToCurrency(null)).toBe(""); - expect(formatCentsToCurrency(undefined)).toBe(""); - }); + test("should handle null/undefined inputs", () => { + expect(formatCentsToCurrency(null)).toBe(""); + expect(formatCentsToCurrency(undefined)).toBe(""); + }); - test("should handle invalid inputs", () => { - expect(formatCentsToCurrency("invalid")).toBe(""); - expect(formatCentsToCurrency(Number.NaN)).toBe(""); - }); + test("should handle invalid inputs", () => { + expect(formatCentsToCurrency("invalid")).toBe(""); + expect(formatCentsToCurrency(Number.NaN)).toBe(""); + }); - test("should use en-US locale formatting", () => { - expect(formatCentsToCurrency(123456)).toBe("$1,234.56"); - }); - }); -}); \ No newline at end of file + test("should use en-US locale formatting", () => { + expect(formatCentsToCurrency(123456)).toBe("$1,234.56"); + }); + }); +}); diff --git a/test/setup.ts b/test/setup.ts index c197378..810403e 100644 --- a/test/setup.ts +++ b/test/setup.ts @@ -5,8 +5,10 @@ import { expect } from "bun:test"; // This file is loaded before any tests run due to bunfig.toml preload // Mock fetch globally for tests -global.fetch = global.fetch || (() => { - throw new Error('fetch is not available in test environment'); -}); +global.fetch = + global.fetch || + (() => { + throw new Error("fetch is not available in test environment"); + }); -// Add any global test utilities here \ No newline at end of file +// Add any global test utilities here diff --git a/tsconfig.json b/tsconfig.json index 2aeb3d1..1a8932f 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -7,25 +7,21 @@ "moduleDetection": "force", "jsx": "react-jsx", "allowJs": true, - // Bundler mode "moduleResolution": "bundler", "allowImportingTsExtensions": true, "verbatimModuleSyntax": true, "noEmit": true, - // Best practices "strict": true, "skipLibCheck": true, "noFallthroughCasesInSwitch": true, "noUncheckedIndexedAccess": true, "noImplicitAny": true, - // Some stricter flags (disabled by default) "noUnusedLocals": false, "noUnusedParameters": false, "noPropertyAccessFromIndexSignature": false, - "paths": { "@/*": ["./src/*"] }