chore: biome lint

Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
This commit is contained in:
2026-01-22 22:34:05 -05:00
parent 3919ec0727
commit 6ab9c4c3a5
12 changed files with 4426 additions and 3885 deletions

View File

@@ -12,7 +12,7 @@
* bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json * bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json
*/ */
import { parseFacebookCookieString } from '../src/facebook'; import { parseFacebookCookieString } from "../src/facebook";
interface Cookie { interface Cookie {
name: string; name: string;
@@ -28,14 +28,14 @@ interface Cookie {
function parseFacebookCookieStringCLI(cookieString: string): Cookie[] { function parseFacebookCookieStringCLI(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) { if (!cookieString || !cookieString.trim()) {
console.error('❌ Error: Empty or invalid cookie string provided'); console.error("❌ Error: Empty or invalid cookie string provided");
process.exit(1); process.exit(1);
} }
const cookies = parseFacebookCookieString(cookieString); const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) { if (cookies.length === 0) {
console.error('❌ Error: No valid cookies found in input string'); console.error("❌ Error: No valid cookies found in input string");
console.error('Expected format: "name1=value1; name2=value2;"'); console.error('Expected format: "name1=value1; name2=value2;"');
process.exit(1); process.exit(1);
} }
@@ -48,40 +48,40 @@ async function main() {
if (args.length === 0 && process.stdin.isTTY === false) { if (args.length === 0 && process.stdin.isTTY === false) {
// Read from stdin // Read from stdin
let input = ''; let input = "";
for await (const chunk of process.stdin) { for await (const chunk of process.stdin) {
input += chunk; input += chunk;
} }
input = input.trim(); input = input.trim();
if (!input) { if (!input) {
console.error('❌ Error: No input provided via stdin'); console.error("❌ Error: No input provided via stdin");
process.exit(1); process.exit(1);
} }
const cookies = parseFacebookCookieStringCLI(input); const cookies = parseFacebookCookieStringCLI(input);
await writeOutput(cookies, './cookies/facebook.json'); await writeOutput(cookies, "./cookies/facebook.json");
return; return;
} }
let cookieString = ''; let cookieString = "";
let outputPath = './cookies/facebook.json'; let outputPath = "./cookies/facebook.json";
let inputPath = ''; let inputPath = "";
// Parse command line arguments // Parse command line arguments
for (let i = 0; i < args.length; i++) { for (let i = 0; i < args.length; i++) {
const arg = args[i]; const arg = args[i];
if (arg === '--input' || arg === '-i') { if (arg === "--input" || arg === "-i") {
inputPath = args[i + 1]; inputPath = args[i + 1];
i++; // Skip next arg i++; // Skip next arg
} else if (arg === '--output' || arg === '-o') { } else if (arg === "--output" || arg === "-o") {
outputPath = args[i + 1]; outputPath = args[i + 1];
i++; // Skip next arg i++; // Skip next arg
} else if (arg === '--help' || arg === '-h') { } else if (arg === "--help" || arg === "-h") {
showHelp(); showHelp();
return; return;
} else if (!arg.startsWith('-')) { } else if (!arg.startsWith("-")) {
// Assume this is the cookie string // Assume this is the cookie string
cookieString = arg; cookieString = arg;
} else { } else {
@@ -107,8 +107,10 @@ async function main() {
} }
if (!cookieString.trim()) { if (!cookieString.trim()) {
console.error('❌ Error: No cookie string provided'); console.error("❌ Error: No cookie string provided");
console.error('Provide cookie string as argument, --input file, or via stdin'); console.error(
"Provide cookie string as argument, --input file, or via stdin",
);
showHelp(); showHelp();
process.exit(1); process.exit(1);
} }
@@ -124,11 +126,12 @@ async function writeOutput(cookies: Cookie[], outputPath: string) {
console.log(`📁 Saved to: ${outputPath}`); console.log(`📁 Saved to: ${outputPath}`);
// Show summary of parsed cookies // Show summary of parsed cookies
console.log('\n📋 Parsed cookies:'); console.log("\n📋 Parsed cookies:");
for (const cookie of cookies) { for (const cookie of cookies) {
console.log(`${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? '...' : ''}`); console.log(
`${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? "..." : ""}`,
);
} }
} catch (error) { } catch (error) {
console.error(`❌ Error writing to output file: ${error}`); console.error(`❌ Error writing to output file: ${error}`);
process.exit(1); process.exit(1);
@@ -173,7 +176,7 @@ OUTPUT:
// Run the CLI // Run the CLI
if (import.meta.main) { if (import.meta.main) {
main().catch(error => { main().catch((error) => {
console.error(`❌ Unexpected error: ${error}`); console.error(`❌ Unexpected error: ${error}`);
process.exit(1); process.exit(1);
}); });

View File

@@ -1,6 +1,6 @@
import cliProgress from "cli-progress";
/* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import cliProgress from "cli-progress";
// ----------------------------- Types ----------------------------- // ----------------------------- Types -----------------------------
@@ -55,8 +55,10 @@ function formatCentsToCurrency(
/** /**
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents * Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
*/ */
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null { function parseEbayPrice(
if (!priceText || typeof priceText !== 'string') return null; priceText: string,
): { cents: number; currency: string } | null {
if (!priceText || typeof priceText !== "string") return null;
// Clean up the price text and extract currency and amount // Clean up the price text and extract currency and amount
const cleaned = priceText.trim(); const cleaned = priceText.trim();
@@ -65,19 +67,23 @@ function parseEbayPrice(priceText: string): { cents: number; currency: string }
const numberMatches = cleaned.match(/[\d,]+\.?\d*/); const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
if (!numberMatches) return null; if (!numberMatches) return null;
const amountStr = numberMatches[0].replace(/,/g, ''); const amountStr = numberMatches[0].replace(/,/g, "");
const dollars = parseFloat(amountStr); const dollars = Number.parseFloat(amountStr);
if (isNaN(dollars)) return null; if (Number.isNaN(dollars)) return null;
const cents = Math.round(dollars * 100); const cents = Math.round(dollars * 100);
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc. // Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
let currency = 'USD'; // Default let currency = "USD"; // Default
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) { if (
currency = 'CAD'; cleaned.toUpperCase().includes("CAD") ||
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) { cleaned.includes("CA$") ||
currency = 'USD'; cleaned.includes("C $")
) {
currency = "CAD";
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
currency = "USD";
} }
return { cents, currency }; return { cents, currency };
@@ -135,7 +141,9 @@ async function fetchHtml(
if (!res.ok) { if (!res.ok) {
// Respect 429 reset if provided // Respect 429 reset if provided
if (res.status === 429) { if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN; const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds) const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000) ? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs; : (attempt + 1) * retryBaseMs;
@@ -176,7 +184,7 @@ function parseEbayListings(
htmlString: HTMLString, htmlString: HTMLString,
keywords: string[], keywords: string[],
exclusions: string[], exclusions: string[],
strictMode: boolean strictMode: boolean,
): ListingDetails[] { ): ListingDetails[] {
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const results: ListingDetails[] = []; const results: ListingDetails[] = [];
@@ -184,16 +192,17 @@ function parseEbayListings(
// Find all listing links by looking for eBay item URLs (/itm/) // Find all listing links by looking for eBay item URLs (/itm/)
const linkElements = document.querySelectorAll('a[href*="itm/"]'); const linkElements = document.querySelectorAll('a[href*="itm/"]');
for (const linkElement of linkElements) { for (const linkElement of linkElements) {
try { try {
// Get href attribute // Get href attribute
let href = linkElement.getAttribute('href'); let href = linkElement.getAttribute("href");
if (!href) continue; if (!href) continue;
// Make href absolute // Make href absolute
if (!href.startsWith('http')) { if (!href.startsWith("http")) {
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`; href = href.startsWith("//")
? `https:${href}`
: `https://www.ebay.com${href}`;
} }
// Find the container - go up several levels to find the item container // Find the container - go up several levels to find the item container
@@ -207,15 +216,23 @@ function parseEbayListings(
// Extract title - look for heading or title-related elements near the link // Extract title - look for heading or title-related elements near the link
// Modern eBay often uses h3, span, or div with text content near the link // Modern eBay often uses h3, span, or div with text content near the link
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span'); let titleElement = container.querySelector(
'h3, [role="heading"], .s-item__title span',
);
// If no direct title element, try finding text content around the link // If no direct title element, try finding text content around the link
if (!titleElement) { if (!titleElement) {
// Look for spans or divs with text near this link // Look for spans or divs with text near this link
const nearbySpans = container.querySelectorAll('span, div'); const nearbySpans = container.querySelectorAll("span, div");
for (const span of nearbySpans) { for (const span of nearbySpans) {
const text = span.textContent?.trim(); const text = span.textContent?.trim();
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) { if (
text &&
text.length > 10 &&
text.length < 200 &&
!text.includes("$") &&
!text.includes("item")
) {
titleElement = span; titleElement = span;
break; break;
} }
@@ -228,12 +245,12 @@ function parseEbayListings(
if (title) { if (title) {
// Remove common eBay UI strings that appear at the end of titles // Remove common eBay UI strings that appear at the end of titles
const uiStrings = [ const uiStrings = [
'Opens in a new window', "Opens in a new window",
'Opens in a new tab', "Opens in a new tab",
'Opens in a new window or tab', "Opens in a new window or tab",
'opens in a new window', "opens in a new window",
'opens in a new tab', "opens in a new tab",
'opens in a new window or tab' "opens in a new window or tab",
]; ];
for (const uiString of uiStrings) { for (const uiString of uiStrings) {
@@ -256,17 +273,27 @@ function parseEbayListings(
if (title === "Shop on eBay" || title.length < 3) continue; if (title === "Shop on eBay" || title.length < 3) continue;
// Extract price - look for eBay's price classes, preferring sale/discount prices // Extract price - look for eBay's price classes, preferring sale/discount prices
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]'); let priceElement = container.querySelector(
'[class*="s-item__price"], .s-item__price, [class*="price"]',
);
// If no direct price class, look for spans containing $ (but not titles) // If no direct price class, look for spans containing $ (but not titles)
if (!priceElement) { if (!priceElement) {
const spansAndElements = container.querySelectorAll('span, div, b, em, strong'); const spansAndElements = container.querySelectorAll(
"span, div, b, em, strong",
);
for (const el of spansAndElements) { for (const el of spansAndElements) {
const text = el.textContent?.trim(); const text = el.textContent?.trim();
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words // Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if (text && text.includes('$') && text.length < 100 && if (
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') && text?.includes("$") &&
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') && text.length < 100 &&
!text.includes("laptop") &&
!text.includes("computer") &&
!text.includes("intel") &&
!text.includes("core") &&
!text.includes("ram") &&
!text.includes("ssd") &&
!/\d{4}/.test(text) && // Avoid years like "2024" !/\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements !text.includes('"') // Avoid measurements
) { ) {
@@ -280,17 +307,26 @@ function parseEbayListings(
// Prefer sale/current price over original/strikethrough price // Prefer sale/current price over original/strikethrough price
if (priceElement) { if (priceElement) {
// Check if this element or its parent contains multiple price elements // Check if this element or its parent contains multiple price elements
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement; const priceContainer =
priceElement.closest('[class*="s-item__price"]') ||
priceElement.parentElement;
if (priceContainer) { if (priceContainer) {
// Look for all price elements within this container, including strikethrough prices // Look for all price elements within this container, including strikethrough prices
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike'); const allPriceElements = priceContainer.querySelectorAll(
'[class*="s-item__price"], span, b, em, strong, s, del, strike',
);
// Filter to only elements that actually contain prices (not labels) // Filter to only elements that actually contain prices (not labels)
const actualPrices: HTMLElement[] = []; const actualPrices: HTMLElement[] = [];
for (const el of allPriceElements) { for (const el of allPriceElements) {
const text = el.textContent?.trim(); const text = el.textContent?.trim();
if (text && /^\s*[\$£¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) { if (
text &&
/^\s*[\$£¥]/u.test(text) &&
text.length < 50 &&
!/\d{4}/.test(text)
) {
actualPrices.push(el); actualPrices.push(el);
} }
} }
@@ -298,11 +334,18 @@ function parseEbayListings(
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices) // Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
if (actualPrices.length > 1) { if (actualPrices.length > 1) {
// First, look for prices that are NOT struck through // First, look for prices that are NOT struck through
const nonStrikethroughPrices = actualPrices.filter(el => { const nonStrikethroughPrices = actualPrices.filter((el) => {
const tagName = el.tagName.toLowerCase(); const tagName = el.tagName.toLowerCase();
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') || const styles =
el.closest('s, del, strike'); el.classList.contains("s-strikethrough") ||
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles; el.classList.contains("u-flStrike") ||
el.closest("s, del, strike");
return (
tagName !== "s" &&
tagName !== "del" &&
tagName !== "strike" &&
!styles
);
}); });
if (nonStrikethroughPrices.length > 0) { if (nonStrikethroughPrices.length > 0) {
@@ -317,7 +360,7 @@ function parseEbayListings(
} }
} }
let priceText = priceElement?.textContent?.trim(); const priceText = priceElement?.textContent?.trim();
if (!priceText) continue; if (!priceText) continue;
@@ -326,12 +369,21 @@ function parseEbayListings(
if (!priceInfo) continue; if (!priceInfo) continue;
// Apply exclusion filters // Apply exclusion filters
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) { if (
exclusions.some((exclusion) =>
title.toLowerCase().includes(exclusion.toLowerCase()),
)
) {
continue; continue;
} }
// Apply strict mode filter (title must contain at least one keyword) // Apply strict mode filter (title must contain at least one keyword)
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) { if (
strictMode &&
!keywords.some((keyword) =>
title?.toLowerCase().includes(keyword.toLowerCase()),
)
) {
continue; continue;
} }
@@ -351,7 +403,6 @@ function parseEbayListings(
results.push(listing); results.push(listing);
} catch (err) { } catch (err) {
console.warn(`Error parsing eBay listing: ${err}`); console.warn(`Error parsing eBay listing: ${err}`);
continue;
} }
} }
@@ -376,7 +427,7 @@ export default async function fetchEbayItems(
maxPrice = Number.MAX_SAFE_INTEGER, maxPrice = Number.MAX_SAFE_INTEGER,
strictMode = false, strictMode = false,
exclusions = [], exclusions = [],
keywords = [SEARCH_QUERY] // Default to search query if no keywords provided keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
} = opts; } = opts;
// Build eBay search URL - use Canadian site and tracking parameters like real browser // Build eBay search URL - use Canadian site and tracking parameters like real browser
@@ -389,18 +440,19 @@ export default async function fetchEbayItems(
try { try {
// Use custom headers modeled after real browser requests to bypass bot detection // Use custom headers modeled after real browser requests to bypass bot detection
const headers: Record<string, string> = { const headers: Record<string, string> = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0', "User-Agent":
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
'Accept-Language': 'en-US,en;q=0.5', Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
'Accept-Encoding': 'gzip, deflate, br', "Accept-Language": "en-US,en;q=0.5",
'Referer': 'https://www.ebay.ca/', "Accept-Encoding": "gzip, deflate, br",
'Connection': 'keep-alive', Referer: "https://www.ebay.ca/",
'Upgrade-Insecure-Requests': '1', Connection: "keep-alive",
'Sec-Fetch-Dest': 'document', "Upgrade-Insecure-Requests": "1",
'Sec-Fetch-Mode': 'navigate', "Sec-Fetch-Dest": "document",
'Sec-Fetch-Site': 'same-origin', "Sec-Fetch-Mode": "navigate",
'Sec-Fetch-User': '?1', "Sec-Fetch-Site": "same-origin",
'Priority': 'u=0, i' "Sec-Fetch-User": "?1",
Priority: "u=0, i",
}; };
const res = await fetch(searchUrl, { const res = await fetch(searchUrl, {
@@ -420,19 +472,23 @@ export default async function fetchEbayItems(
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND // Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS); await delay(DELAY_MS);
console.log(`\nParsing eBay listings...`); console.log("\nParsing eBay listings...");
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode); const listings = parseEbayListings(
searchHtml,
keywords,
exclusions,
strictMode,
);
// Filter by price range (additional safety check) // Filter by price range (additional safety check)
const filteredListings = listings.filter(listing => { const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents; const cents = listing.listingPrice?.cents;
return cents && cents >= minPrice && cents <= maxPrice; return cents && cents >= minPrice && cents <= maxPrice;
}); });
console.log(`Parsed ${filteredListings.length} eBay listings.`); console.log(`Parsed ${filteredListings.length} eBay listings.`);
return filteredListings; return filteredListings;
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.error( console.error(

View File

@@ -1,6 +1,6 @@
import cliProgress from "cli-progress";
/* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import cliProgress from "cli-progress";
/** /**
* Facebook Marketplace Scraper * Facebook Marketplace Scraper
@@ -213,7 +213,10 @@ async function delay(ms: number): Promise<void> {
/** /**
* Load Facebook cookies from file or string * Load Facebook cookies from file or string
*/ */
async function loadFacebookCookies(cookiesSource?: string, cookiePath = './cookies/facebook.json'): Promise<Cookie[]> { async function loadFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load from provided string parameter // First try to load from provided string parameter
if (cookiesSource) { if (cookiesSource) {
try { try {
@@ -263,11 +266,11 @@ function parseFacebookCookieString(cookieString: string): Cookie[] {
} }
return cookieString return cookieString
.split(';') .split(";")
.map(pair => pair.trim()) .map((pair) => pair.trim())
.filter(pair => pair.includes('=')) .filter((pair) => pair.includes("="))
.map(pair => { .map((pair) => {
const [name, value] = pair.split('=', 2); const [name, value] = pair.split("=", 2);
const trimmedName = name.trim(); const trimmedName = name.trim();
const trimmedValue = value.trim(); const trimmedValue = value.trim();
@@ -279,11 +282,11 @@ function parseFacebookCookieString(cookieString: string): Cookie[] {
return { return {
name: trimmedName, name: trimmedName,
value: decodeURIComponent(trimmedValue), value: decodeURIComponent(trimmedValue),
domain: '.facebook.com', domain: ".facebook.com",
path: '/', path: "/",
secure: true, secure: true,
httpOnly: false, httpOnly: false,
sameSite: 'lax' as const, sameSite: "lax" as const,
expirationDate: undefined, // Session cookies expirationDate: undefined, // Session cookies
}; };
}) })
@@ -293,8 +296,9 @@ function parseFacebookCookieString(cookieString: string): Cookie[] {
/** /**
* Ensure Facebook cookies are available, parsing from env var if needed * Ensure Facebook cookies are available, parsing from env var if needed
*/ */
async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Promise<Cookie[]> { async function ensureFacebookCookies(
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load existing cookies // First try to load existing cookies
try { try {
const existing = await loadFacebookCookies(undefined, cookiePath); const existing = await loadFacebookCookies(undefined, cookiePath);
@@ -309,9 +313,9 @@ async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Pr
const cookieString = process.env.FACEBOOK_COOKIE; const cookieString = process.env.FACEBOOK_COOKIE;
if (!cookieString || !cookieString.trim()) { if (!cookieString || !cookieString.trim()) {
throw new Error( throw new Error(
'No valid Facebook cookies found. Either:\n' + "No valid Facebook cookies found. Either:\n" +
' 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n' + " 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
' 2. Create ./cookies/facebook.json manually with cookie array' " 2. Create ./cookies/facebook.json manually with cookie array",
); );
} }
@@ -319,8 +323,8 @@ async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Pr
const cookies = parseFacebookCookieString(cookieString); const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) { if (cookies.length === 0) {
throw new Error( throw new Error(
'FACEBOOK_COOKIE environment variable contains no valid cookies. ' + "FACEBOOK_COOKIE environment variable contains no valid cookies. " +
'Expected format: "name1=value1; name2=value2;"' 'Expected format: "name1=value1; name2=value2;"',
); );
} }
@@ -329,7 +333,7 @@ async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Pr
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2)); await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
console.log(`✅ Saved ${cookies.length} Facebook cookies to ${cookiePath}`); console.log(`✅ Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
} catch (error) { } catch (error) {
console.warn(`⚠️ Could not save cookies to ${cookiePath}: ${error}`); console.warn(`! Could not save cookies to ${cookiePath}: ${error}`);
// Continue anyway, we have the cookies in memory // Continue anyway, we have the cookies in memory
} }
@@ -349,10 +353,9 @@ function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
domain.endsWith(cookie.domain.slice(1)) || domain.endsWith(cookie.domain.slice(1)) ||
domain === cookie.domain.slice(1) domain === cookie.domain.slice(1)
); );
} else { }
// Host-only cookie // Host-only cookie
return cookie.domain === domain; return cookie.domain === domain;
}
}) })
.filter((cookie) => { .filter((cookie) => {
// Check expiration // Check expiration
@@ -418,7 +421,7 @@ async function fetchHtml(
// Add cookies if provided // Add cookies if provided
if (opts?.cookies) { if (opts?.cookies) {
headers["cookie"] = opts.cookies; headers.cookie = opts.cookies;
} }
const res = await fetch(url, { const res = await fetch(url, {
@@ -433,7 +436,9 @@ async function fetchHtml(
if (!res.ok) { if (!res.ok) {
// Respect 429 reset if provided // Respect 429 reset if provided
if (res.status === 429) { if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN; const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds) const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000) ? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs; : (attempt + 1) * retryBaseMs;
@@ -500,33 +505,38 @@ function extractFacebookMarketplaceData(
// Try multiple navigation paths to find marketplace_search // Try multiple navigation paths to find marketplace_search
const paths = [ const paths = [
// Original path from example // Original path from example
() => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'], () =>
parsed.require[0][3][0].__bbox.require[0][3][1].__bbox.result.data
.marketplace_search,
// Alternative path structure // Alternative path structure
() => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search, () =>
parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
// Another variation // Another variation
() => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'], () => parsed.require[0][3][0].__bbox.result.data.marketplace_search,
// Direct access for some responses // Direct access for some responses
() => { () => {
for (const item of parsed.require) { for (const item of parsed.require) {
if (item && item.length >= 4 && item[3]) { if (item && item.length >= 4 && item[3]) {
const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search; const bbox = item[3]?.__bbox?.result?.data?.marketplace_search;
if (bbox) return bbox; if (bbox) return bbox;
} }
} }
return null; return null;
} },
]; ];
for (const getData of paths) { for (const getData of paths) {
try { try {
const result = getData(); const result = getData();
if (result && isRecord(result) && result.feed_units?.edges?.length > 0) { if (
result &&
isRecord(result) &&
result.feed_units?.edges?.length > 0
) {
marketplaceData = result as FacebookMarketplaceSearch; marketplaceData = result as FacebookMarketplaceSearch;
break; break;
} }
} catch { } catch {}
continue;
}
} }
if (marketplaceData) break; if (marketplaceData) break;
@@ -534,7 +544,8 @@ function extractFacebookMarketplaceData(
// Also check for direct marketplace_search in the parsed data // Also check for direct marketplace_search in the parsed data
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) { if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
const searchData = parsed.marketplace_search as FacebookMarketplaceSearch; const searchData =
parsed.marketplace_search as FacebookMarketplaceSearch;
if (searchData.feed_units?.edges?.length > 0) { if (searchData.feed_units?.edges?.length > 0) {
marketplaceData = searchData; marketplaceData = searchData;
break; break;
@@ -550,14 +561,16 @@ function extractFacebookMarketplaceData(
return null; return null;
} }
console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`); console.log(
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`,
);
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node })); return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
} }
/** /**
* Monitor API extraction success/failure for detecting changes * Monitor API extraction success/failure for detecting changes
*/ */
let extractionStats = { const extractionStats = {
totalExtractions: 0, totalExtractions: 0,
successfulExtractions: 0, successfulExtractions: 0,
failedExtractions: 0, failedExtractions: 0,
@@ -576,16 +589,27 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
} }
// Log warning if extraction success rate drops below 80% // Log warning if extraction success rate drops below 80%
const successRate = extractionStats.successfulExtractions / extractionStats.totalExtractions; const successRate =
if (extractionStats.totalExtractions > 10 && successRate < 0.8 && !extractionStats.lastApiChangeDetected) { extractionStats.successfulExtractions / extractionStats.totalExtractions;
console.warn("⚠️ Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes."); if (
extractionStats.totalExtractions > 10 &&
successRate < 0.8 &&
!extractionStats.lastApiChangeDetected
) {
console.warn(
"! Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
);
extractionStats.lastApiChangeDetected = new Date(); extractionStats.lastApiChangeDetected = new Date();
} }
if (success) { if (success) {
console.log(`📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`); console.log(
`📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`,
);
} else { } else {
console.warn(`❌ Facebook API extraction failed for item ${itemId || 'unknown'}`); console.warn(
`❌ Facebook API extraction failed for item ${itemId || "unknown"}`,
);
} }
} }
@@ -601,8 +625,8 @@ function formatCentsToCurrency(
if (Number.isNaN(cents)) return ""; if (Number.isNaN(cents)) return "";
const dollars = cents / 100; const dollars = cents / 100;
const formatter = new Intl.NumberFormat(locale, { const formatter = new Intl.NumberFormat(locale, {
style: 'currency', style: "currency",
currency: 'USD', currency: "USD",
minimumFractionDigits: 2, minimumFractionDigits: 2,
maximumFractionDigits: 2, maximumFractionDigits: 2,
useGrouping: true, useGrouping: true,
@@ -614,7 +638,9 @@ function formatCentsToCurrency(
Extract marketplace item details from Facebook item page HTML Extract marketplace item details from Facebook item page HTML
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
*/ */
function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceItem | null { function extractFacebookItemData(
htmlString: HTMLString,
): FacebookMarketplaceItem | null {
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const scripts = document.querySelectorAll("script"); const scripts = document.querySelectorAll("script");
@@ -630,24 +656,40 @@ function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceIte
// Try multiple extraction paths discovered from reverse engineering // Try multiple extraction paths discovered from reverse engineering
const extractionPaths = [ const extractionPaths = [
// Path 1: Primary path from current API structure // Path 1: Primary path from current API structure
() => parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target, () =>
parsed.require[0][3].__bbox.result.data.viewer
.marketplace_product_details_page.target,
// Path 2: Alternative path with nested require // Path 2: Alternative path with nested require
() => parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target, () =>
parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data
.viewer.marketplace_product_details_page.target,
// Path 3: Variation without the [0] index // Path 3: Variation without the [0] index
() => parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target, () =>
parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data
.viewer.marketplace_product_details_page.target,
// Path 4-5: Additional fallback paths for edge cases // Path 4-5: Additional fallback paths for edge cases
() => parsed.require[0][3][1]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target, () =>
() => parsed.require[0][3][2]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target, parsed.require[0][3][1]?.__bbox?.result?.data?.viewer
?.marketplace_product_details_page?.target,
() =>
parsed.require[0][3][2]?.__bbox?.result?.data?.viewer
?.marketplace_product_details_page?.target,
]; ];
let pathIndex = 0; let pathIndex = 0;
for (const getPath of extractionPaths) { for (const getPath of extractionPaths) {
try { try {
const targetData = getPath(); const targetData = getPath();
if (targetData && typeof targetData === 'object' && if (
targetData.id && targetData.marketplace_listing_title && targetData &&
targetData.__typename === 'GroupCommerceProductItem') { typeof targetData === "object" &&
console.log(`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`); targetData.id &&
targetData.marketplace_listing_title &&
targetData.__typename === "GroupCommerceProductItem"
) {
console.log(
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`,
);
return targetData as FacebookMarketplaceItem; return targetData as FacebookMarketplaceItem;
} }
} catch { } catch {
@@ -657,13 +699,20 @@ function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceIte
} }
// Fallback: Search recursively for marketplace data in the parsed structure // Fallback: Search recursively for marketplace data in the parsed structure
const findMarketplaceData = (obj: unknown, depth = 0, maxDepth = 10): FacebookMarketplaceItem | null => { const findMarketplaceData = (
obj: unknown,
depth = 0,
maxDepth = 10,
): FacebookMarketplaceItem | null => {
if (depth > maxDepth) return null; // Prevent infinite recursion if (depth > maxDepth) return null; // Prevent infinite recursion
if (isRecord(obj)) { if (isRecord(obj)) {
// Check if this object matches the expected marketplace item structure // Check if this object matches the expected marketplace item structure
if (obj.marketplace_listing_title && obj.id && if (
obj.__typename === 'GroupCommerceProductItem' && obj.marketplace_listing_title &&
obj.redacted_description) { obj.id &&
obj.__typename === "GroupCommerceProductItem" &&
obj.redacted_description
) {
return obj as FacebookMarketplaceItem; return obj as FacebookMarketplaceItem;
} }
// Recursively search nested objects and arrays // Recursively search nested objects and arrays
@@ -687,17 +736,30 @@ function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceIte
// Search through the entire require structure // Search through the entire require structure
const recursiveResult = findMarketplaceData(parsed.require); const recursiveResult = findMarketplaceData(parsed.require);
if (recursiveResult) { if (recursiveResult) {
console.log('Successfully extracted Facebook item data using recursive search'); console.log(
"Successfully extracted Facebook item data using recursive search",
);
return recursiveResult; return recursiveResult;
} }
// Additional search in other potential locations // Additional search in other potential locations
if (parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target) { if (
const bboxData = parsed.__bbox.result.data.viewer.marketplace_product_details_page.target; parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page
if (bboxData && typeof bboxData === 'object' && ?.target
bboxData.id && bboxData.marketplace_listing_title && ) {
bboxData.__typename === 'GroupCommerceProductItem') { const bboxData =
console.log('Successfully extracted Facebook item data from __bbox structure'); parsed.__bbox.result.data.viewer.marketplace_product_details_page
.target;
if (
bboxData &&
typeof bboxData === "object" &&
bboxData.id &&
bboxData.marketplace_listing_title &&
bboxData.__typename === "GroupCommerceProductItem"
) {
console.log(
"Successfully extracted Facebook item data from __bbox structure",
);
return bboxData as FacebookMarketplaceItem; return bboxData as FacebookMarketplaceItem;
} }
} }
@@ -734,7 +796,8 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
// - formatted_amount: human-readable price (like "CA$1") // - formatted_amount: human-readable price (like "CA$1")
let cents: number; let cents: number;
if (priceObj.amount != null) { if (priceObj.amount != null) {
const dollars = typeof priceObj.amount === 'string' const dollars =
typeof priceObj.amount === "string"
? Number.parseFloat(priceObj.amount) ? Number.parseFloat(priceObj.amount)
: priceObj.amount; : priceObj.amount;
cents = Math.round(dollars * 100); cents = Math.round(dollars * 100);
@@ -748,7 +811,7 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
if (priceObj.formatted_amount) { if (priceObj.formatted_amount) {
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/); const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (match) { if (match) {
const dollars = Number.parseFloat(match[0].replace(',', '')); const dollars = Number.parseFloat(match[0].replace(",", ""));
if (!Number.isNaN(dollars)) { if (!Number.isNaN(dollars)) {
cents = Math.round(dollars * 100); cents = Math.round(dollars * 100);
} else { } else {
@@ -793,19 +856,24 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
// Extract image and video URLs // Extract image and video URLs
const imageUrl = listing.primary_listing_photo?.image?.uri; const imageUrl = listing.primary_listing_photo?.image?.uri;
const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined; const videoUrl = listing.listing_video
? `https://www.facebook.com/${listing.listing_video.id}/`
: undefined;
// Extract seller information // Extract seller information
const seller = listing.marketplace_listing_seller ? { const seller = listing.marketplace_listing_seller
? {
name: listing.marketplace_listing_seller.name, name: listing.marketplace_listing_seller.name,
id: listing.marketplace_listing_seller.id id: listing.marketplace_listing_seller.id,
} : undefined; }
: undefined;
const listingDetails: ListingDetails = { const listingDetails: ListingDetails = {
url, url,
title, title,
listingPrice: { listingPrice: {
amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents), amountFormatted:
priceObj.formatted_amount || formatCentsToCurrency(cents),
cents, cents,
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
}, },
@@ -821,10 +889,7 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
}; };
results.push(listingDetails); results.push(listingDetails);
} catch { } catch {}
// Skip malformed ads
continue;
}
} }
return results; return results;
@@ -834,7 +899,9 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
Parse Facebook marketplace item details into ListingDetails format Parse Facebook marketplace item details into ListingDetails format
Updated for 2026 GroupCommerceProductItem structure Updated for 2026 GroupCommerceProductItem structure
*/ */
function parseFacebookItem(item: FacebookMarketplaceItem): ListingDetails | null { function parseFacebookItem(
item: FacebookMarketplaceItem,
): ListingDetails | null {
try { try {
const title = item.marketplace_listing_title || item.custom_title; const title = item.marketplace_listing_title || item.custom_title;
if (!title) return null; if (!title) return null;
@@ -849,10 +916,11 @@ function parseFacebookItem(item: FacebookMarketplaceItem): ListingDetails | null
if (item.listing_price) { if (item.listing_price) {
currency = item.listing_price.currency || "CAD"; currency = item.listing_price.currency || "CAD";
if (item.listing_price.amount && item.listing_price.amount !== "0.00") { if (item.listing_price.amount && item.listing_price.amount !== "0.00") {
const amount = parseFloat(item.listing_price.amount); const amount = Number.parseFloat(item.listing_price.amount);
if (!isNaN(amount)) { if (!Number.isNaN(amount)) {
cents = Math.round(amount * 100); cents = Math.round(amount * 100);
amountFormatted = item.formatted_price?.text || formatCentsToCurrency(cents); amountFormatted =
item.formatted_price?.text || formatCentsToCurrency(cents);
} }
} }
} }
@@ -864,10 +932,12 @@ function parseFacebookItem(item: FacebookMarketplaceItem): ListingDetails | null
const address = item.location_text?.text || null; const address = item.location_text?.text || null;
// Extract seller information // Extract seller information
const seller = item.marketplace_listing_seller ? { const seller = item.marketplace_listing_seller
? {
name: item.marketplace_listing_seller.name, name: item.marketplace_listing_seller.name,
id: item.marketplace_listing_seller.id id: item.marketplace_listing_seller.id,
} : undefined; }
: undefined;
// Determine listing status // Determine listing status
let listingStatus: string | undefined; let listingStatus: string | undefined;
@@ -987,8 +1057,7 @@ export default async function fetchFacebookItems(
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( console.log(
"\n" + `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
}, },
@@ -1022,7 +1091,7 @@ export default async function fetchFacebookItems(
cliProgress.Presets.shades_classic, cliProgress.Presets.shades_classic,
); );
const totalProgress = ads.length; const totalProgress = ads.length;
let currentProgress = 0; const currentProgress = 0;
progressBar.start(totalProgress, currentProgress); progressBar.start(totalProgress, currentProgress);
const items = parseFacebookAds(ads); const items = parseFacebookAds(ads);
@@ -1083,8 +1152,7 @@ export async function fetchFacebookItem(
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( console.log(
"\n" + `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
}, },
@@ -1104,7 +1172,9 @@ export async function fetchFacebookItem(
console.warn( console.warn(
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.", "Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
); );
console.warn("Try logging out and back into Facebook, then export fresh cookies."); console.warn(
"Try logging out and back into Facebook, then export fresh cookies.",
);
break; break;
case 404: case 404:
console.warn( console.warn(
@@ -1135,21 +1205,31 @@ export async function fetchFacebookItem(
if (!itemData) { if (!itemData) {
logExtractionMetrics(false, itemId); logExtractionMetrics(false, itemId);
// Enhanced checking for specific failure scenarios // Enhanced checking for specific failure scenarios
if (itemHtml.includes("This listing is no longer available") || if (
itemHtml.includes("This listing is no longer available") ||
itemHtml.includes("listing has been removed") || itemHtml.includes("listing has been removed") ||
itemHtml.includes("This item has been sold")) { itemHtml.includes("This item has been sold")
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`); ) {
console.warn(
`Item ${itemId} appears to be sold or removed from marketplace.`,
);
return null; return null;
} }
if (itemHtml.includes("log in to Facebook") || if (
itemHtml.includes("log in to Facebook") ||
itemHtml.includes("You must log in") || itemHtml.includes("You must log in") ||
itemHtml.includes("authentication required")) { itemHtml.includes("authentication required")
console.warn(`Authentication failed for item ${itemId}. Cookies may be expired.`); ) {
console.warn(
`Authentication failed for item ${itemId}. Cookies may be expired.`,
);
return null; return null;
} }
console.warn(`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`); console.warn(
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
);
console.warn(" - The listing was removed or sold"); console.warn(" - The listing was removed or sold");
console.warn(" - Authentication issues"); console.warn(" - Authentication issues");
console.warn(" - Facebook changed their API structure"); console.warn(" - Facebook changed their API structure");
@@ -1173,8 +1253,11 @@ export async function fetchFacebookItem(
parsedItem.listingStatus = "SOLD"; parsedItem.listingStatus = "SOLD";
} else if (!itemData.is_live) { } else if (!itemData.is_live) {
console.warn(`Item ${itemId} is not live/active in the marketplace.`); console.warn(`Item ${itemId} is not live/active in the marketplace.`);
parsedItem.listingStatus = itemData.is_hidden ? "HIDDEN" : parsedItem.listingStatus = itemData.is_hidden
itemData.is_pending ? "PENDING" : "INACTIVE"; ? "HIDDEN"
: itemData.is_pending
? "PENDING"
: "INACTIVE";
} }
return parsedItem; return parsedItem;

View File

@@ -1,6 +1,6 @@
import fetchKijijiItems from "@/kijiji";
import fetchFacebookItems from "@/facebook";
import fetchEbayItems from "@/ebay"; import fetchEbayItems from "@/ebay";
import fetchFacebookItems from "@/facebook";
import fetchKijijiItems from "@/kijiji";
const PORT = process.env.PORT || 4005; const PORT = process.env.PORT || 4005;
@@ -30,34 +30,54 @@ const server = Bun.serve({
const location = reqUrl.searchParams.get("location"); const location = reqUrl.searchParams.get("location");
const category = reqUrl.searchParams.get("category"); const category = reqUrl.searchParams.get("category");
const maxPagesParam = reqUrl.searchParams.get("maxPages"); const maxPagesParam = reqUrl.searchParams.get("maxPages");
const maxPages = maxPagesParam const maxPages = maxPagesParam ? Number.parseInt(maxPagesParam, 10) : 5; // Default: 5 pages
? Number.parseInt(maxPagesParam, 10) const sortBy = reqUrl.searchParams.get("sortBy") as
: 5; // Default: 5 pages | "relevancy"
const sortBy = reqUrl.searchParams.get("sortBy") as 'relevancy' | 'date' | 'price' | 'distance' | undefined; | "date"
const sortOrder = reqUrl.searchParams.get("sortOrder") as 'asc' | 'desc' | undefined; | "price"
| "distance"
| undefined;
const sortOrder = reqUrl.searchParams.get("sortOrder") as
| "asc"
| "desc"
| undefined;
// Build search options // Build search options
const locationValue = location ? (/^\d+$/.test(location) ? Number(location) : location) : 1700272; const locationValue = location
const categoryValue = category ? (/^\d+$/.test(category) ? Number(category) : category) : 0; ? /^\d+$/.test(location)
? Number(location)
: location
: 1700272;
const categoryValue = category
? /^\d+$/.test(category)
? Number(category)
: category
: 0;
const searchOptions: import("@/kijiji").SearchOptions = { const searchOptions: import("@/kijiji").SearchOptions = {
location: locationValue, location: locationValue,
category: categoryValue, category: categoryValue,
keywords: SEARCH_QUERY, keywords: SEARCH_QUERY,
sortBy: sortBy || 'relevancy', sortBy: sortBy || "relevancy",
sortOrder: sortOrder || 'desc', sortOrder: sortOrder || "desc",
maxPages, maxPages,
}; };
// Build listing fetch options with enhanced defaults // Build listing fetch options with enhanced defaults
const listingOptions: import("@/kijiji").ListingFetchOptions = { const listingOptions: import("@/kijiji").ListingFetchOptions = {
includeImages: true, // Always include full image arrays includeImages: true, // Always include full image arrays
sellerDataDepth: 'detailed', // Default: detailed seller info sellerDataDepth: "detailed", // Default: detailed seller info
includeClientSideData: false, // GraphQL reviews disabled by default includeClientSideData: false, // GraphQL reviews disabled by default
}; };
try { try {
const items = await fetchKijijiItems(SEARCH_QUERY, 1, undefined, searchOptions, listingOptions); const items = await fetchKijijiItems(
SEARCH_QUERY,
1,
undefined,
searchOptions,
listingOptions,
);
if (!items || items.length === 0) if (!items || items.length === 0)
return Response.json( return Response.json(
{ message: "Search didn't return any results!" }, { message: "Search didn't return any results!" },
@@ -66,12 +86,13 @@ const server = Bun.serve({
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} catch (error) { } catch (error) {
console.error("Kijiji scraping error:", error); console.error("Kijiji scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json( return Response.json(
{ {
message: `Scraping failed: ${errorMessage}`, message: `Scraping failed: ${errorMessage}`,
query: SEARCH_QUERY, query: SEARCH_QUERY,
options: { searchOptions, listingOptions } options: { searchOptions, listingOptions },
}, },
{ status: 500 }, { status: 500 },
); );
@@ -96,7 +117,14 @@ const server = Bun.serve({
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined; const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
try { try {
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE, "./cookies/facebook.json"); const items = await fetchFacebookItems(
SEARCH_QUERY,
5,
LOCATION,
25,
COOKIES_SOURCE,
"./cookies/facebook.json",
);
if (!items || items.length === 0) if (!items || items.length === 0)
return Response.json( return Response.json(
{ message: "Search didn't return any results!" }, { message: "Search didn't return any results!" },
@@ -105,11 +133,9 @@ const server = Bun.serve({
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} catch (error) { } catch (error) {
console.error("Facebook scraping error:", error); console.error("Facebook scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; const errorMessage =
return Response.json( error instanceof Error ? error.message : "Unknown error occurred";
{ message: errorMessage }, return Response.json({ message: errorMessage }, { status: 400 });
{ status: 400 },
);
} }
}, },
@@ -138,9 +164,13 @@ const server = Bun.serve({
: undefined; : undefined;
const strictMode = reqUrl.searchParams.get("strictMode") === "true"; const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const exclusionsParam = reqUrl.searchParams.get("exclusions"); const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : []; const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords"); const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY]; const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
try { try {
const items = await fetchEbayItems(SEARCH_QUERY, 5, { const items = await fetchEbayItems(SEARCH_QUERY, 5, {
@@ -158,11 +188,9 @@ const server = Bun.serve({
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} catch (error) { } catch (error) {
console.error("eBay scraping error:", error); console.error("eBay scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; const errorMessage =
return Response.json( error instanceof Error ? error.message : "Unknown error occurred";
{ message: errorMessage }, return Response.json({ message: errorMessage }, { status: 400 });
{ status: 400 },
);
} }
}, },

View File

@@ -1,7 +1,7 @@
import cliProgress from "cli-progress";
/* eslint-disable @typescript-eslint/no-explicit-any */ /* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import unidecode from "unidecode"; import unidecode from "unidecode";
import cliProgress from "cli-progress";
// const unidecode = require("unidecode"); // const unidecode = require("unidecode");
@@ -99,8 +99,8 @@ interface SearchOptions {
location?: number | string; // Location ID or name location?: number | string; // Location ID or name
category?: number | string; // Category ID or name category?: number | string; // Category ID or name
keywords?: string; keywords?: string;
sortBy?: 'relevancy' | 'date' | 'price' | 'distance'; sortBy?: "relevancy" | "date" | "price" | "distance";
sortOrder?: 'desc' | 'asc'; sortOrder?: "desc" | "asc";
maxPages?: number; // Default: 5 maxPages?: number; // Default: 5
priceMin?: number; priceMin?: number;
priceMax?: number; priceMax?: number;
@@ -108,7 +108,7 @@ interface SearchOptions {
interface ListingFetchOptions { interface ListingFetchOptions {
includeImages?: boolean; // Default: true includeImages?: boolean; // Default: true
sellerDataDepth?: 'basic' | 'detailed' | 'full'; // Default: 'detailed' sellerDataDepth?: "basic" | "detailed" | "full"; // Default: 'detailed'
includeClientSideData?: boolean; // Default: false includeClientSideData?: boolean; // Default: false
} }
@@ -116,65 +116,65 @@ interface ListingFetchOptions {
// Location mappings from KIJIJI.md // Location mappings from KIJIJI.md
const LOCATION_MAPPINGS: Record<string, number> = { const LOCATION_MAPPINGS: Record<string, number> = {
'canada': 0, canada: 0,
'ontario': 9004, ontario: 9004,
'toronto': 1700273, toronto: 1700273,
'gta': 1700272, gta: 1700272,
'oshawa': 1700275, oshawa: 1700275,
'quebec': 9001, quebec: 9001,
'nova scotia': 9002, "nova scotia": 9002,
'alberta': 9003, alberta: 9003,
'new brunswick': 9005, "new brunswick": 9005,
'manitoba': 9006, manitoba: 9006,
'british columbia': 9007, "british columbia": 9007,
'newfoundland': 9008, newfoundland: 9008,
'saskatchewan': 9009, saskatchewan: 9009,
'territories': 9010, territories: 9010,
'pei': 9011, pei: 9011,
'prince edward island': 9011, "prince edward island": 9011,
}; };
// Category mappings from KIJIJI.md (Buy & Sell main categories) // Category mappings from KIJIJI.md (Buy & Sell main categories)
const CATEGORY_MAPPINGS: Record<string, number> = { const CATEGORY_MAPPINGS: Record<string, number> = {
'all': 0, all: 0,
'buy-sell': 10, "buy-sell": 10,
'arts-collectibles': 12, "arts-collectibles": 12,
'audio': 767, audio: 767,
'baby-items': 253, "baby-items": 253,
'bags-luggage': 931, "bags-luggage": 931,
'bikes': 644, bikes: 644,
'books': 109, books: 109,
'cameras': 103, cameras: 103,
'cds': 104, cds: 104,
'clothing': 274, clothing: 274,
'computers': 16, computers: 16,
'computer-accessories': 128, "computer-accessories": 128,
'electronics': 29659001, electronics: 29659001,
'free-stuff': 17220001, "free-stuff": 17220001,
'furniture': 235, furniture: 235,
'garage-sales': 638, "garage-sales": 638,
'health-special-needs': 140, "health-special-needs": 140,
'hobbies-crafts': 139, "hobbies-crafts": 139,
'home-appliances': 107, "home-appliances": 107,
'home-indoor': 717, "home-indoor": 717,
'home-outdoor': 727, "home-outdoor": 727,
'jewellery': 133, jewellery: 133,
'musical-instruments': 17, "musical-instruments": 17,
'phones': 132, phones: 132,
'sporting-goods': 111, "sporting-goods": 111,
'tools': 110, tools: 110,
'toys-games': 108, "toys-games": 108,
'tvs-video': 15093001, "tvs-video": 15093001,
'video-games': 141, "video-games": 141,
'other': 26, other: 26,
}; };
// Sort parameter mappings // Sort parameter mappings
const SORT_MAPPINGS: Record<string, string> = { const SORT_MAPPINGS: Record<string, string> = {
'relevancy': 'MATCH', relevancy: "MATCH",
'date': 'DATE', date: "DATE",
'price': 'PRICE', price: "PRICE",
'distance': 'DISTANCE', distance: "DISTANCE",
}; };
// ----------------------------- Exports for Testing ----------------------------- // ----------------------------- Exports for Testing -----------------------------
@@ -193,9 +193,9 @@ const SEPS = new Set([" ", "", "—", "/", ":", ";", ",", ".", "-"]);
* Resolve location ID from name or return numeric ID * Resolve location ID from name or return numeric ID
*/ */
function resolveLocationId(location?: number | string): number { function resolveLocationId(location?: number | string): number {
if (typeof location === 'number') return location; if (typeof location === "number") return location;
if (typeof location === 'string') { if (typeof location === "string") {
const normalized = location.toLowerCase().replace(/\s+/g, '-'); const normalized = location.toLowerCase().replace(/\s+/g, "-");
return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0) return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0)
} }
return 0; // Default to Canada return 0; // Default to Canada
@@ -205,9 +205,9 @@ function resolveLocationId(location?: number | string): number {
* Resolve category ID from name or return numeric ID * Resolve category ID from name or return numeric ID
*/ */
function resolveCategoryId(category?: number | string): number { function resolveCategoryId(category?: number | string): number {
if (typeof category === 'number') return category; if (typeof category === "number") return category;
if (typeof category === 'string') { if (typeof category === "string") {
const normalized = category.toLowerCase().replace(/\s+/g, '-'); const normalized = category.toLowerCase().replace(/\s+/g, "-");
return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories
} }
return 0; // Default to all categories return 0; // Default to all categories
@@ -219,19 +219,22 @@ function resolveCategoryId(category?: number | string): number {
function buildSearchUrl( function buildSearchUrl(
keywords: string, keywords: string,
options: SearchOptions & { page?: number }, options: SearchOptions & { page?: number },
BASE_URL = "https://www.kijiji.ca" BASE_URL = "https://www.kijiji.ca",
): string { ): string {
const locationId = resolveLocationId(options.location); const locationId = resolveLocationId(options.location);
const categoryId = resolveCategoryId(options.category); const categoryId = resolveCategoryId(options.category);
const categorySlug = categoryId === 0 ? 'buy-sell' : 'buy-sell'; // Could be enhanced const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell"; // Could be enhanced
const locationSlug = locationId === 0 ? 'canada' : 'canada'; // Could be enhanced const locationSlug = locationId === 0 ? "canada" : "canada"; // Could be enhanced
let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`; let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`;
const sortParam = options.sortBy ? `&sort=${SORT_MAPPINGS[options.sortBy]}` : ''; const sortParam = options.sortBy
const sortOrder = options.sortOrder === 'asc' ? 'ASC' : 'DESC'; ? `&sort=${SORT_MAPPINGS[options.sortBy]}`
const pageParam = options.page && options.page > 1 ? `&page=${options.page}` : ''; : "";
const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC";
const pageParam =
options.page && options.page > 1 ? `&page=${options.page}` : "";
url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`; url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`;
@@ -278,8 +281,8 @@ export function formatCentsToCurrency(
if (Number.isNaN(cents)) return ""; if (Number.isNaN(cents)) return "";
const dollars = cents / 100; const dollars = cents / 100;
const formatter = new Intl.NumberFormat(locale, { const formatter = new Intl.NumberFormat(locale, {
style: 'currency', style: "currency",
currency: 'USD', currency: "USD",
minimumFractionDigits: 2, minimumFractionDigits: 2,
maximumFractionDigits: 2, maximumFractionDigits: 2,
}); });
@@ -394,7 +397,9 @@ async function fetchHtml(
if (!res.ok) { if (!res.ok) {
// Handle rate limiting // Handle rate limiting
if (res.status === 429) { if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : Number.NaN; const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds) const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000) ? Math.max(0, resetSeconds * 1000)
: calculateBackoffDelay(attempt, retryBaseMs); : calculateBackoffDelay(attempt, retryBaseMs);
@@ -428,14 +433,13 @@ async function fetchHtml(
// Respect per-request delay to maintain rate limiting // Respect per-request delay to maintain rate limiting
await delay(DELAY_MS); await delay(DELAY_MS);
return html; return html;
} catch (err) { } catch (err) {
// Handle different error types // Handle different error types
if (err instanceof RateLimitError || err instanceof HttpError) { if (err instanceof RateLimitError || err instanceof HttpError) {
throw err; // Re-throw known errors throw err; // Re-throw known errors
} }
if (err instanceof Error && err.name === 'AbortError') { if (err instanceof Error && err.name === "AbortError") {
if (attempt < maxRetries) { if (attempt < maxRetries) {
await delay(calculateBackoffDelay(attempt, retryBaseMs)); await delay(calculateBackoffDelay(attempt, retryBaseMs));
continue; continue;
@@ -451,7 +455,7 @@ async function fetchHtml(
throw new NetworkError( throw new NetworkError(
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`, `Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
url, url,
err instanceof Error ? err : undefined err instanceof Error ? err : undefined,
); );
} }
} }
@@ -463,7 +467,7 @@ async function fetchHtml(
* Calculate exponential backoff delay with jitter * Calculate exponential backoff delay with jitter
*/ */
function calculateBackoffDelay(attempt: number, baseMs: number): number { function calculateBackoffDelay(attempt: number, baseMs: number): number {
const exponentialDelay = baseMs * (2 ** attempt); const exponentialDelay = baseMs * 2 ** attempt;
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
} }
@@ -476,16 +480,16 @@ function calculateBackoffDelay(attempt: number, baseMs: number): number {
async function fetchGraphQLData( async function fetchGraphQLData(
query: string, query: string,
variables: Record<string, unknown>, variables: Record<string, unknown>,
BASE_URL = "https://www.kijiji.ca" BASE_URL = "https://www.kijiji.ca",
): Promise<unknown> { ): Promise<unknown> {
const endpoint = `${BASE_URL}/anvil/api`; const endpoint = `${BASE_URL}/anvil/api`;
try { try {
const response = await fetch(endpoint, { const response = await fetch(endpoint, {
method: 'POST', method: "POST",
headers: { headers: {
'Content-Type': 'application/json', "Content-Type": "application/json",
'apollo-require-preflight': 'true', "apollo-require-preflight": "true",
}, },
body: JSON.stringify({ body: JSON.stringify({
query, query,
@@ -497,14 +501,17 @@ async function fetchGraphQLData(
throw new HttpError( throw new HttpError(
`GraphQL request failed with status ${response.status}`, `GraphQL request failed with status ${response.status}`,
response.status, response.status,
endpoint endpoint,
); );
} }
const result = await response.json(); const result = await response.json();
if (result.errors) { if (result.errors) {
throw new ParseError(`GraphQL errors: ${JSON.stringify(result.errors)}`, result.errors); throw new ParseError(
`GraphQL errors: ${JSON.stringify(result.errors)}`,
result.errors,
);
} }
return result.data; return result.data;
@@ -515,7 +522,7 @@ async function fetchGraphQLData(
throw new NetworkError( throw new NetworkError(
`Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`, `Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`,
endpoint, endpoint,
err instanceof Error ? err : undefined err instanceof Error ? err : undefined,
); );
} }
} }
@@ -567,12 +574,25 @@ const GRAPHQL_QUERIES = {
*/ */
async function fetchSellerDetails( async function fetchSellerDetails(
posterId: string, posterId: string,
BASE_URL = "https://www.kijiji.ca" BASE_URL = "https://www.kijiji.ca",
): Promise<{ reviewCount?: number; reviewScore?: number; memberSince?: string; accountType?: string }> { ): Promise<{
reviewCount?: number;
reviewScore?: number;
memberSince?: string;
accountType?: string;
}> {
try { try {
const [reviewData, profileData] = await Promise.all([ const [reviewData, profileData] = await Promise.all([
fetchGraphQLData(GRAPHQL_QUERIES.getReviewSummary, { userId: posterId }, BASE_URL), fetchGraphQLData(
fetchGraphQLData(GRAPHQL_QUERIES.getProfileMetrics, { profileId: posterId }, BASE_URL), GRAPHQL_QUERIES.getReviewSummary,
{ userId: posterId },
BASE_URL,
),
fetchGraphQLData(
GRAPHQL_QUERIES.getProfileMetrics,
{ profileId: posterId },
BASE_URL,
),
]); ]);
const reviewResponse = reviewData as GraphQLReviewResponse; const reviewResponse = reviewData as GraphQLReviewResponse;
@@ -586,7 +606,10 @@ async function fetchSellerDetails(
}; };
} catch (err) { } catch (err) {
// Silently fail for GraphQL errors - not critical for basic functionality // Silently fail for GraphQL errors - not critical for basic functionality
console.warn(`Failed to fetch seller details for ${posterId}:`, err instanceof Error ? err.message : String(err)); console.warn(
`Failed to fetch seller details for ${posterId}:`,
err instanceof Error ? err.message : String(err),
);
return {}; return {};
} }
} }
@@ -694,7 +717,8 @@ function parseListing(
listingPrice: amountFormatted listingPrice: amountFormatted
? { ? {
amountFormatted, amountFormatted,
cents: cents !== undefined && Number.isFinite(cents) ? cents : undefined, cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency, currency: price?.currency,
} }
: undefined, : undefined,
@@ -702,7 +726,10 @@ function parseListing(
listingStatus: status, listingStatus: status,
creationDate: activationDate, creationDate: activationDate,
endDate, endDate,
numberOfViews: numberOfViews !== undefined && Number.isFinite(numberOfViews) ? numberOfViews : undefined, numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null, address: location?.address ?? null,
}; };
} }
@@ -713,7 +740,7 @@ function parseListing(
async function parseDetailedListing( async function parseDetailedListing(
htmlString: HTMLString, htmlString: HTMLString,
BASE_URL: string, BASE_URL: string,
options: ListingFetchOptions = {} options: ListingFetchOptions = {},
): Promise<DetailedListing | null> { ): Promise<DetailedListing | null> {
const apolloState = extractApolloState(htmlString); const apolloState = extractApolloState(htmlString);
if (!apolloState) return null; if (!apolloState) return null;
@@ -766,8 +793,9 @@ async function parseDetailedListing(
if (!amountFormatted || cents === undefined) return null; if (!amountFormatted || cents === undefined) return null;
// Extract images if requested // Extract images if requested
const images = options.includeImages !== false && Array.isArray(imageUrls) const images =
? imageUrls.filter((url): url is string => typeof url === 'string') options.includeImages !== false && Array.isArray(imageUrls)
? imageUrls.filter((url): url is string => typeof url === "string")
: []; : [];
// Extract attributes as key-value pairs // Extract attributes as key-value pairs
@@ -781,26 +809,35 @@ async function parseDetailedListing(
} }
// Extract seller info based on depth setting // Extract seller info based on depth setting
let sellerInfo: DetailedListing['sellerInfo']; let sellerInfo: DetailedListing["sellerInfo"];
const depth = options.sellerDataDepth ?? 'detailed'; const depth = options.sellerDataDepth ?? "detailed";
if (posterInfo?.posterId) { if (posterInfo?.posterId) {
sellerInfo = { sellerInfo = {
posterId: posterInfo.posterId, posterId: posterInfo.posterId,
rating: typeof posterInfo.rating === 'number' ? posterInfo.rating : undefined, rating:
typeof posterInfo.rating === "number" ? posterInfo.rating : undefined,
}; };
// Add more detailed info if requested and client-side data is enabled // Add more detailed info if requested and client-side data is enabled
if ((depth === 'detailed' || depth === 'full') && options.includeClientSideData) { if (
(depth === "detailed" || depth === "full") &&
options.includeClientSideData
) {
try { try {
const additionalData = await fetchSellerDetails(posterInfo.posterId, BASE_URL); const additionalData = await fetchSellerDetails(
posterInfo.posterId,
BASE_URL,
);
sellerInfo = { sellerInfo = {
...sellerInfo, ...sellerInfo,
...additionalData, ...additionalData,
}; };
} catch (err) { } catch (err) {
// Silently fail - GraphQL data is optional // Silently fail - GraphQL data is optional
console.warn(`Failed to fetch additional seller data for ${posterInfo.posterId}`); console.warn(
`Failed to fetch additional seller data for ${posterInfo.posterId}`,
);
} }
} }
} }
@@ -818,23 +855,28 @@ async function parseDetailedListing(
listingStatus: status, listingStatus: status,
creationDate: activationDate, creationDate: activationDate,
endDate, endDate,
numberOfViews: numberOfViews !== undefined && Number.isFinite(numberOfViews) ? numberOfViews : undefined, numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null, address: location?.address ?? null,
images, images,
categoryId: typeof categoryId === 'number' ? categoryId : 0, categoryId: typeof categoryId === "number" ? categoryId : 0,
adSource: typeof adSource === 'string' ? adSource : 'UNKNOWN', adSource: typeof adSource === "string" ? adSource : "UNKNOWN",
flags: { flags: {
topAd: flags?.topAd === true, topAd: flags?.topAd === true,
priceDrop: flags?.priceDrop === true, priceDrop: flags?.priceDrop === true,
}, },
attributes: attributeMap, attributes: attributeMap,
location: { location: {
id: typeof location?.id === 'number' ? location.id : 0, id: typeof location?.id === "number" ? location.id : 0,
name: typeof location?.name === 'string' ? location.name : 'Unknown', name: typeof location?.name === "string" ? location.name : "Unknown",
coordinates: location?.coordinates ? { coordinates: location?.coordinates
? {
latitude: location.coordinates.latitude, latitude: location.coordinates.latitude,
longitude: location.coordinates.longitude, longitude: location.coordinates.longitude,
} : undefined, }
: undefined,
}, },
sellerInfo, sellerInfo,
}; };
@@ -856,8 +898,8 @@ export default async function fetchKijijiItems(
location: searchOptions.location ?? 1700272, // Default to GTA location: searchOptions.location ?? 1700272, // Default to GTA
category: searchOptions.category ?? 0, // Default to all categories category: searchOptions.category ?? 0, // Default to all categories
keywords: searchOptions.keywords ?? SEARCH_QUERY, keywords: searchOptions.keywords ?? SEARCH_QUERY,
sortBy: searchOptions.sortBy ?? 'relevancy', sortBy: searchOptions.sortBy ?? "relevancy",
sortOrder: searchOptions.sortOrder ?? 'desc', sortOrder: searchOptions.sortOrder ?? "desc",
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
priceMin: searchOptions.priceMin, priceMin: searchOptions.priceMin,
priceMax: searchOptions.priceMax, priceMax: searchOptions.priceMax,
@@ -865,7 +907,7 @@ export default async function fetchKijijiItems(
const finalListingOptions: Required<ListingFetchOptions> = { const finalListingOptions: Required<ListingFetchOptions> = {
includeImages: listingOptions.includeImages ?? true, includeImages: listingOptions.includeImages ?? true,
sellerDataDepth: listingOptions.sellerDataDepth ?? 'detailed', sellerDataDepth: listingOptions.sellerDataDepth ?? "detailed",
includeClientSideData: listingOptions.includeClientSideData ?? false, includeClientSideData: listingOptions.includeClientSideData ?? false,
}; };
@@ -874,24 +916,32 @@ export default async function fetchKijijiItems(
// Fetch multiple pages // Fetch multiple pages
for (let page = 1; page <= finalSearchOptions.maxPages; page++) { for (let page = 1; page <= finalSearchOptions.maxPages; page++) {
const searchUrl = buildSearchUrl(finalSearchOptions.keywords, { const searchUrl = buildSearchUrl(
finalSearchOptions.keywords,
{
...finalSearchOptions, ...finalSearchOptions,
// Add page parameter for pagination // Add page parameter for pagination
...(page > 1 && { page }), ...(page > 1 && { page }),
}, BASE_URL); },
BASE_URL,
);
console.log(`Fetching search page ${page}: ${searchUrl}`); console.log(`Fetching search page ${page}: ${searchUrl}`);
const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { const searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log(`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`); console.log(
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
} }
}, },
}); });
const searchResults = parseSearch(searchHtml, BASE_URL); const searchResults = parseSearch(searchHtml, BASE_URL);
if (searchResults.length === 0) { if (searchResults.length === 0) {
console.log(`No more results found on page ${page}. Stopping pagination.`); console.log(
`No more results found on page ${page}. Stopping pagination.`,
);
break; break;
} }
@@ -904,7 +954,9 @@ export default async function fetchKijijiItems(
seenUrls.add(link); seenUrls.add(link);
} }
console.log(`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`); console.log(
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
);
// Fetch details for this page's listings // Fetch details for this page's listings
const progressBar = new cliProgress.SingleBar( const progressBar = new cliProgress.SingleBar(
@@ -920,19 +972,29 @@ export default async function fetchKijijiItems(
const html = await fetchHtml(link, DELAY_MS, { const html = await fetchHtml(link, DELAY_MS, {
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log(`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`); console.log(
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
} }
}, },
}); });
const parsed = await parseDetailedListing(html, BASE_URL, finalListingOptions); const parsed = await parseDetailedListing(
html,
BASE_URL,
finalListingOptions,
);
if (parsed) { if (parsed) {
allListings.push(parsed); allListings.push(parsed);
} }
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.error(`\nFailed to fetch ${link}\n - ${err.status} ${err.message}`); console.error(
`\nFailed to fetch ${link}\n - ${err.status} ${err.message}`,
);
} else { } else {
console.error(`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`); console.error(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
);
} }
} finally { } finally {
currentProgress++; currentProgress++;

View File

@@ -1,14 +1,14 @@
import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import { import {
fetchFacebookItem,
extractFacebookItemData, extractFacebookItemData,
extractFacebookMarketplaceData, extractFacebookMarketplaceData,
parseFacebookItem, fetchFacebookItem,
parseFacebookAds,
formatCentsToCurrency, formatCentsToCurrency,
loadFacebookCookies,
formatCookiesForHeader, formatCookiesForHeader,
loadFacebookCookies,
parseFacebookAds,
parseFacebookCookieString, parseFacebookCookieString,
parseFacebookItem,
} from "../src/facebook"; } from "../src/facebook";
// Mock fetch globally // Mock fetch globally
@@ -28,62 +28,62 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
describe("Cookie Parsing", () => { describe("Cookie Parsing", () => {
describe("parseFacebookCookieString", () => { describe("parseFacebookCookieString", () => {
test("should parse valid cookie string", () => { test("should parse valid cookie string", () => {
const cookieString = 'c_user=123456789; xs=abcdef123456; fr=xyz789'; const cookieString = "c_user=123456789; xs=abcdef123456; fr=xyz789";
const result = parseFacebookCookieString(cookieString); const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(3); expect(result).toHaveLength(3);
expect(result[0]).toEqual({ expect(result[0]).toEqual({
name: 'c_user', name: "c_user",
value: '123456789', value: "123456789",
domain: '.facebook.com', domain: ".facebook.com",
path: '/', path: "/",
secure: true, secure: true,
httpOnly: false, httpOnly: false,
sameSite: 'lax', sameSite: "lax",
expirationDate: undefined expirationDate: undefined,
}); });
expect(result[1]).toEqual({ expect(result[1]).toEqual({
name: 'xs', name: "xs",
value: 'abcdef123456', value: "abcdef123456",
domain: '.facebook.com', domain: ".facebook.com",
path: '/', path: "/",
secure: true, secure: true,
httpOnly: false, httpOnly: false,
sameSite: 'lax', sameSite: "lax",
expirationDate: undefined expirationDate: undefined,
}); });
}); });
test("should handle URL-encoded values", () => { test("should handle URL-encoded values", () => {
const cookieString = 'c_user=123%2B456; xs=abc%3Ddef'; const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
const result = parseFacebookCookieString(cookieString); const result = parseFacebookCookieString(cookieString);
expect(result[0].value).toBe('123+456'); expect(result[0].value).toBe("123+456");
expect(result[1].value).toBe('abc=def'); expect(result[1].value).toBe("abc=def");
}); });
test("should filter out malformed cookies", () => { test("should filter out malformed cookies", () => {
const cookieString = 'c_user=123; invalid; xs=abc; =empty'; const cookieString = "c_user=123; invalid; xs=abc; =empty";
const result = parseFacebookCookieString(cookieString); const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(2); expect(result).toHaveLength(2);
expect(result.map(c => c.name)).toEqual(['c_user', 'xs']); expect(result.map((c) => c.name)).toEqual(["c_user", "xs"]);
}); });
test("should handle empty input", () => { test("should handle empty input", () => {
expect(parseFacebookCookieString('')).toEqual([]); expect(parseFacebookCookieString("")).toEqual([]);
expect(parseFacebookCookieString(' ')).toEqual([]); expect(parseFacebookCookieString(" ")).toEqual([]);
}); });
test("should handle extra whitespace", () => { test("should handle extra whitespace", () => {
const cookieString = ' c_user = 123 ; xs=abc '; const cookieString = " c_user = 123 ; xs=abc ";
const result = parseFacebookCookieString(cookieString); const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(2); expect(result).toHaveLength(2);
expect(result[0].name).toBe('c_user'); expect(result[0].name).toBe("c_user");
expect(result[0].value).toBe('123'); expect(result[0].value).toBe("123");
expect(result[1].name).toBe('xs'); expect(result[1].name).toBe("xs");
expect(result[1].value).toBe('abc'); expect(result[1].value).toBe("abc");
}); });
}); });
}); });
@@ -92,7 +92,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
describe("fetchFacebookItem", () => { describe("fetchFacebookItem", () => {
const mockCookies = JSON.stringify([ const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com" }, { name: "c_user", value: "12345", domain: ".facebook.com" },
{ name: "xs", value: "abc123", domain: ".facebook.com" } { name: "xs", value: "abc123", domain: ".facebook.com" },
]); ]);
test("should handle authentication errors", async () => { test("should handle authentication errors", async () => {
@@ -102,9 +102,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
status: 401, status: 401,
text: () => Promise.resolve("Authentication required"), text: () => Promise.resolve("Authentication required"),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const result = await fetchFacebookItem("123", mockCookies); const result = await fetchFacebookItem("123", mockCookies);
@@ -118,9 +118,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
status: 404, status: 404,
text: () => Promise.resolve("Not found"), text: () => Promise.resolve("Not found"),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const result = await fetchFacebookItem("nonexistent", mockCookies); const result = await fetchFacebookItem("nonexistent", mockCookies);
@@ -139,14 +139,18 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: (header: string) => { get: (header: string) => {
if (header === "X-RateLimit-Reset") return "1"; if (header === "X-RateLimit-Reset") return "1";
return null; return null;
}
}, },
text: () => Promise.resolve("Rate limited") },
text: () => Promise.resolve("Rate limited"),
}); });
} }
const mockData = { const mockData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -156,22 +160,26 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
id: "123", id: "123",
__typename: "GroupCommerceProductItem", __typename: "GroupCommerceProductItem",
marketplace_listing_title: "Test Item", marketplace_listing_title: "Test Item",
is_live: true is_live: true,
} },
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
return Promise.resolve({ return Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}); });
}); });
@@ -183,7 +191,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
test("should handle sold items", async () => { test("should handle sold items", async () => {
const mockData = { const mockData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -194,25 +206,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem", __typename: "GroupCommerceProductItem",
marketplace_listing_title: "Sold Item", marketplace_listing_title: "Sold Item",
is_sold: true, is_sold: true,
is_live: false is_live: false,
} },
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const result = await fetchFacebookItem("456", mockCookies); const result = await fetchFacebookItem("456", mockCookies);
@@ -221,18 +237,22 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
test("should handle missing authentication cookies", async () => { test("should handle missing authentication cookies", async () => {
// Use a test-specific cookie file that doesn't exist // Use a test-specific cookie file that doesn't exist
const testCookiePath = './cookies/facebook-test.json'; const testCookiePath = "./cookies/facebook-test.json";
// Test with no cookies available (test file doesn't exist) // Test with no cookies available (test file doesn't exist)
await expect(fetchFacebookItem("123", undefined, testCookiePath)).rejects.toThrow( await expect(
"No valid Facebook cookies found" fetchFacebookItem("123", undefined, testCookiePath),
); ).rejects.toThrow("No valid Facebook cookies found");
}); });
test("should handle successful item extraction", async () => { test("should handle successful item extraction", async () => {
const mockData = { const mockData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -243,27 +263,34 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem", __typename: "GroupCommerceProductItem",
marketplace_listing_title: "Working Item", marketplace_listing_title: "Working Item",
formatted_price: { text: "$299.00" }, formatted_price: { text: "$299.00" },
listing_price: { amount: "299.00", currency: "CAD" }, listing_price: {
amount: "299.00",
currency: "CAD",
},
is_live: true, is_live: true,
creation_time: 1640995200 creation_time: 1640995200,
} },
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const result = await fetchFacebookItem("789", mockCookies); const result = await fetchFacebookItem("789", mockCookies);
@@ -280,9 +307,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
status: 500, status: 500,
text: () => Promise.resolve("Internal Server Error"), text: () => Promise.resolve("Internal Server Error"),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const result = await fetchFacebookItem("error", mockCookies); const result = await fetchFacebookItem("error", mockCookies);
@@ -300,24 +327,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
marketplace_listing_title: "Test Item", marketplace_listing_title: "Test Item",
formatted_price: { text: "$100.00" }, formatted_price: { text: "$100.00" },
listing_price: { amount: "100.00", currency: "CAD" }, listing_price: { amount: "100.00", currency: "CAD" },
is_live: true is_live: true,
}; };
const mockData = { const mockData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
viewer: { viewer: {
marketplace_product_details_page: { marketplace_product_details_page: {
target: mockItemData target: mockItemData,
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`; const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
@@ -330,18 +362,23 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
test("should handle missing item data", () => { test("should handle missing item data", () => {
const mockData = { const mockData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
viewer: { viewer: {
marketplace_product_details_page: {} marketplace_product_details_page: {},
} },
} },
} },
} },
}] },
] ],
],
}; };
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`; const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
@@ -350,12 +387,15 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}); });
test("should handle malformed HTML", () => { test("should handle malformed HTML", () => {
const result = extractFacebookItemData("<html><body>Invalid HTML</body></html>"); const result = extractFacebookItemData(
"<html><body>Invalid HTML</body></html>",
);
expect(result).toBeNull(); expect(result).toBeNull();
}); });
test("should handle invalid JSON in script tags", () => { test("should handle invalid JSON in script tags", () => {
const html = '<html><body><script>{invalid: json}</script></body></html>'; const html =
"<html><body><script>{invalid: json}</script></body></html>";
const result = extractFacebookItemData(html); const result = extractFacebookItemData(html);
expect(result).toBeNull(); expect(result).toBeNull();
}); });
@@ -371,24 +411,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
vehicle_model_display_name: "Civic", vehicle_model_display_name: "Civic",
vehicle_odometer_data: { unit: "KILOMETERS", value: 150000 }, vehicle_odometer_data: { unit: "KILOMETERS", value: 150000 },
vehicle_transmission_type: "AUTOMATIC", vehicle_transmission_type: "AUTOMATIC",
is_live: true is_live: true,
}; };
const mockData = { const mockData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
viewer: { viewer: {
marketplace_product_details_page: { marketplace_product_details_page: {
target: mockVehicleItem target: mockVehicleItem,
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`; const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
@@ -409,58 +454,70 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "Item 1", marketplace_listing_title: "Item 1",
listing_price: { amount: "10.00", currency: "CAD" } listing_price: { amount: "10.00", currency: "CAD" },
} },
} },
}, },
{ {
node: { node: {
listing: { listing: {
id: "2", id: "2",
marketplace_listing_title: "Item 2", marketplace_listing_title: "Item 2",
listing_price: { amount: "20.00", currency: "CAD" } listing_price: { amount: "20.00", currency: "CAD" },
} },
} },
} },
] ],
} },
}; };
const mockData = { const mockData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
marketplace_search: mockMarketplaceData marketplace_search: mockMarketplaceData,
} },
} },
} },
}] },
] ],
],
}; };
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`; const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
const result = extractFacebookMarketplaceData(html); const result = extractFacebookMarketplaceData(html);
expect(result).not.toBeNull(); expect(result).not.toBeNull();
expect(result).toHaveLength(2); expect(result).toHaveLength(2);
expect(result?.[0].node.listing.marketplace_listing_title).toBe("Item 1"); expect(result?.[0].node.listing.marketplace_listing_title).toBe(
"Item 1",
);
}); });
test("should handle empty search results", () => { test("should handle empty search results", () => {
const mockData = { const mockData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
marketplace_search: { marketplace_search: {
feed_units: { edges: [] } feed_units: { edges: [] },
} },
} },
} },
} },
}] },
] ],
],
}; };
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`; const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
@@ -485,9 +542,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
creation_time: 1640995200, creation_time: 1640995200,
marketplace_listing_seller: { marketplace_listing_seller: {
id: "seller1", id: "seller1",
name: "John Doe" name: "John Doe",
}, },
delivery_types: ["IN_PERSON"] delivery_types: ["IN_PERSON"],
}; };
const result = parseFacebookItem(item); const result = parseFacebookItem(item);
@@ -510,7 +567,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
marketplace_listing_title: "Free Sofa", marketplace_listing_title: "Free Sofa",
formatted_price: { text: "FREE" }, formatted_price: { text: "FREE" },
listing_price: { amount: "0.00", currency: "CAD" }, listing_price: { amount: "0.00", currency: "CAD" },
is_live: true is_live: true,
}; };
const result = parseFacebookItem(item); const result = parseFacebookItem(item);
@@ -524,7 +581,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const item = { const item = {
id: "456", id: "456",
__typename: "GroupCommerceProductItem" as const, __typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Minimal Item" marketplace_listing_title: "Minimal Item",
}; };
const result = parseFacebookItem(item); const result = parseFacebookItem(item);
@@ -543,7 +600,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing_price: { amount: "8000.00", currency: "CAD" }, listing_price: { amount: "8000.00", currency: "CAD" },
vehicle_make_display_name: "Mazda", vehicle_make_display_name: "Mazda",
vehicle_model_display_name: "3", vehicle_model_display_name: "3",
is_live: true is_live: true,
}; };
const result = parseFacebookItem(vehicleItem); const result = parseFacebookItem(vehicleItem);
@@ -556,7 +613,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem" as const, __typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Sold Item", marketplace_listing_title: "Sold Item",
is_sold: true, is_sold: true,
is_live: false is_live: false,
}; };
const pendingItem = { const pendingItem = {
@@ -564,7 +621,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem" as const, __typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Pending Item", marketplace_listing_title: "Pending Item",
is_pending: true, is_pending: true,
is_live: true is_live: true,
}; };
const hiddenItem = { const hiddenItem = {
@@ -572,7 +629,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem" as const, __typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Hidden Item", marketplace_listing_title: "Hidden Item",
is_hidden: true, is_hidden: true,
is_live: false is_live: false,
}; };
expect(parseFacebookItem(soldItem)?.listingStatus).toBe("SOLD"); expect(parseFacebookItem(soldItem)?.listingStatus).toBe("SOLD");
@@ -584,7 +641,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const invalidItem = { const invalidItem = {
id: "invalid", id: "invalid",
__typename: "GroupCommerceProductItem" as const, __typename: "GroupCommerceProductItem" as const,
is_live: true is_live: true,
}; };
const result = parseFacebookItem(invalidItem); const result = parseFacebookItem(invalidItem);
@@ -600,25 +657,37 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "Ad 1", marketplace_listing_title: "Ad 1",
listing_price: { amount: "50.00", formatted_amount: "$50.00", currency: "CAD" }, listing_price: {
location: { reverse_geocode: { city_page: { display_name: "Toronto" } } }, amount: "50.00",
formatted_amount: "$50.00",
currency: "CAD",
},
location: {
reverse_geocode: { city_page: { display_name: "Toronto" } },
},
creation_time: 1640995200, creation_time: 1640995200,
is_live: true is_live: true,
} },
} },
}, },
{ {
node: { node: {
listing: { listing: {
id: "2", id: "2",
marketplace_listing_title: "Ad 2", marketplace_listing_title: "Ad 2",
listing_price: { amount: "75.00", formatted_amount: "$75.00", currency: "CAD" }, listing_price: {
location: { reverse_geocode: { city_page: { display_name: "Ottawa" } } }, amount: "75.00",
formatted_amount: "$75.00",
currency: "CAD",
},
location: {
reverse_geocode: { city_page: { display_name: "Ottawa" } },
},
creation_time: 1640995300, creation_time: 1640995300,
is_live: true is_live: true,
} },
} },
} },
]; ];
const results = parseFacebookAds(ads); const results = parseFacebookAds(ads);
@@ -637,20 +706,24 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "With Price", marketplace_listing_title: "With Price",
listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" }, listing_price: {
is_live: true amount: "100.00",
} formatted_amount: "$100.00",
} currency: "CAD",
},
is_live: true,
},
},
}, },
{ {
node: { node: {
listing: { listing: {
id: "2", id: "2",
marketplace_listing_title: "No Price", marketplace_listing_title: "No Price",
is_live: true is_live: true,
} },
} },
} },
]; ];
const results = parseFacebookAds(ads); const results = parseFacebookAds(ads);
@@ -665,16 +738,20 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "Valid Ad", marketplace_listing_title: "Valid Ad",
listing_price: { amount: "50.00", formatted_amount: "$50.00", currency: "CAD" }, listing_price: {
is_live: true amount: "50.00",
} formatted_amount: "$50.00",
} currency: "CAD",
},
is_live: true,
},
},
}, },
{ {
node: { node: {
// Missing listing // Missing listing
} },
} as { node: { listing?: unknown } } } as { node: { listing?: unknown } },
]; ];
const results = parseFacebookAds(ads); const results = parseFacebookAds(ads);
@@ -717,7 +794,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const mockCookies = [ const mockCookies = [
{ name: "c_user", value: "123456", domain: ".facebook.com", path: "/" }, { name: "c_user", value: "123456", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abcdef", domain: ".facebook.com", path: "/" }, { name: "xs", value: "abcdef", domain: ".facebook.com", path: "/" },
{ name: "session_id", value: "xyz", domain: "other.com", path: "/" } { name: "session_id", value: "xyz", domain: "other.com", path: "/" },
]; ];
test("should format cookies for header string", () => { test("should format cookies for header string", () => {
@@ -728,9 +805,18 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
test("should filter expired cookies", () => { test("should filter expired cookies", () => {
const cookiesWithExpiration = [ const cookiesWithExpiration = [
...mockCookies, ...mockCookies,
{ name: "expired", value: "old", domain: ".facebook.com", path: "/", expirationDate: Date.now() / 1000 - 1000 } {
name: "expired",
value: "old",
domain: ".facebook.com",
path: "/",
expirationDate: Date.now() / 1000 - 1000,
},
]; ];
const result = formatCookiesForHeader(cookiesWithExpiration, "www.facebook.com"); const result = formatCookiesForHeader(
cookiesWithExpiration,
"www.facebook.com",
);
expect(result).not.toContain("expired"); expect(result).not.toContain("expired");
}); });

View File

@@ -1,4 +1,4 @@
import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import fetchFacebookItems, { fetchFacebookItem } from "../src/facebook"; import fetchFacebookItems, { fetchFacebookItem } from "../src/facebook";
// Mock fetch globally // Mock fetch globally
@@ -18,13 +18,17 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Main Search Function", () => { describe("Main Search Function", () => {
const mockCookies = JSON.stringify([ const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" } { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]); ]);
test("should successfully fetch search results", async () => { test("should successfully fetch search results", async () => {
const mockSearchData = { const mockSearchData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -36,46 +40,72 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "iPhone 13 Pro", marketplace_listing_title: "iPhone 13 Pro",
listing_price: { amount: "800.00", formatted_amount: "$800.00", currency: "CAD" }, listing_price: {
location: { reverse_geocode: { city_page: { display_name: "Toronto" } } }, amount: "800.00",
formatted_amount: "$800.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
},
},
creation_time: 1640995200, creation_time: 1640995200,
is_live: true is_live: true,
} },
} },
}, },
{ {
node: { node: {
listing: { listing: {
id: "2", id: "2",
marketplace_listing_title: "Samsung Galaxy", marketplace_listing_title: "Samsung Galaxy",
listing_price: { amount: "600.00", formatted_amount: "$600.00", currency: "CAD" }, listing_price: {
location: { reverse_geocode: { city_page: { display_name: "Mississauga" } } }, amount: "600.00",
formatted_amount: "$600.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Mississauga" },
},
},
creation_time: 1640995300, creation_time: 1640995300,
is_live: true is_live: true,
} },
} },
} },
] ],
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("iPhone", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"iPhone",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(2); expect(results).toHaveLength(2);
expect(results[0].title).toBe("iPhone 13 Pro"); expect(results[0].title).toBe("iPhone 13 Pro");
expect(results[1].title).toBe("Samsung Galaxy"); expect(results[1].title).toBe("Samsung Galaxy");
@@ -84,7 +114,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
test("should filter out items without price", async () => { test("should filter out items without price", async () => {
const mockSearchData = { const mockSearchData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -96,41 +130,55 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "With Price", marketplace_listing_title: "With Price",
listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" }, listing_price: {
is_live: true amount: "100.00",
} formatted_amount: "$100.00",
} currency: "CAD",
},
is_live: true,
},
},
}, },
{ {
node: { node: {
listing: { listing: {
id: "2", id: "2",
marketplace_listing_title: "No Price", marketplace_listing_title: "No Price",
is_live: true is_live: true,
} },
} },
} },
] ],
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("With Price"); expect(results[0].title).toBe("With Price");
}); });
@@ -138,7 +186,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
test("should respect MAX_ITEMS parameter", async () => { test("should respect MAX_ITEMS parameter", async () => {
const mockSearchData = { const mockSearchData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -149,64 +201,92 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: { listing: {
id: String(i), id: String(i),
marketplace_listing_title: `Item ${i}`, marketplace_listing_title: `Item ${i}`,
listing_price: { amount: `${(i + 1) * 10}.00`, formatted_amount: `$${(i + 1) * 10}.00`, currency: "CAD" }, listing_price: {
is_live: true amount: `${(i + 1) * 10}.00`,
} formatted_amount: `$${(i + 1) * 10}.00`,
} currency: "CAD",
})) },
} is_live: true,
} },
} },
} })),
} },
}] },
] },
},
},
},
],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("test", 1, "toronto", 5, mockCookies); const results = await fetchFacebookItems(
"test",
1,
"toronto",
5,
mockCookies,
);
expect(results).toHaveLength(5); expect(results).toHaveLength(5);
}); });
test("should return empty array for no results", async () => { test("should return empty array for no results", async () => {
const mockSearchData = { const mockSearchData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
marketplace_search: { marketplace_search: {
feed_units: { feed_units: {
edges: [] edges: [],
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("nonexistent query", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"nonexistent query",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]); expect(results).toEqual([]);
}); });
@@ -217,19 +297,27 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
status: 401, status: 401,
text: () => Promise.resolve("Unauthorized"), text: () => Promise.resolve("Unauthorized"),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]); expect(results).toEqual([]);
}); });
test("should handle network errors", async () => { test("should handle network errors", async () => {
global.fetch = mock(() => Promise.reject(new Error("Network error"))); global.fetch = mock(() => Promise.reject(new Error("Network error")));
await expect(fetchFacebookItems("test", 1, "toronto", 25, mockCookies)).rejects.toThrow("Network error"); await expect(
fetchFacebookItems("test", 1, "toronto", 25, mockCookies),
).rejects.toThrow("Network error");
}); });
test("should handle rate limiting with retry", async () => { test("should handle rate limiting with retry", async () => {
@@ -244,14 +332,18 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: (header: string) => { get: (header: string) => {
if (header === "X-RateLimit-Reset") return "1"; if (header === "X-RateLimit-Reset") return "1";
return null; return null;
}
}, },
text: () => Promise.resolve("Rate limited") },
text: () => Promise.resolve("Rate limited"),
}); });
} }
const mockSearchData = { const mockSearchData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -263,30 +355,44 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "Item 1", marketplace_listing_title: "Item 1",
listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" }, listing_price: {
is_live: true amount: "100.00",
} formatted_amount: "$100.00",
} currency: "CAD",
} },
] is_live: true,
} },
} },
} },
} ],
} },
}] },
] },
},
},
},
],
],
}; };
return Promise.resolve({ return Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}); });
}); });
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(attempts).toBe(2); expect(attempts).toBe(2);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
}); });
@@ -295,13 +401,17 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Vehicle Listing Integration", () => { describe("Vehicle Listing Integration", () => {
const mockCookies = JSON.stringify([ const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" } { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]); ]);
test("should correctly identify and parse vehicle listings", async () => { test("should correctly identify and parse vehicle listings", async () => {
const mockSearchData = { const mockSearchData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -313,42 +423,60 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "2006 Honda Civic", marketplace_listing_title: "2006 Honda Civic",
listing_price: { amount: "8000.00", formatted_amount: "$8,000.00", currency: "CAD" }, listing_price: {
is_live: true amount: "8000.00",
} formatted_amount: "$8,000.00",
} currency: "CAD",
},
is_live: true,
},
},
}, },
{ {
node: { node: {
listing: { listing: {
id: "2", id: "2",
marketplace_listing_title: "iPhone 13", marketplace_listing_title: "iPhone 13",
listing_price: { amount: "800.00", formatted_amount: "$800.00", currency: "CAD" }, listing_price: {
is_live: true amount: "800.00",
} formatted_amount: "$800.00",
} currency: "CAD",
} },
] is_live: true,
} },
} },
} },
} ],
} },
}] },
] },
},
},
},
],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("cars", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"cars",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(2); expect(results).toHaveLength(2);
// Both should be classified as "item" type in search results (vehicle detection is for item details) // Both should be classified as "item" type in search results (vehicle detection is for item details)
expect(results[0].title).toBe("2006 Honda Civic"); expect(results[0].title).toBe("2006 Honda Civic");
@@ -359,13 +487,17 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Different Categories", () => { describe("Different Categories", () => {
const mockCookies = JSON.stringify([ const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" } { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]); ]);
test("should handle electronics listings", async () => { test("should handle electronics listings", async () => {
const mockSearchData = { const mockSearchData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -377,35 +509,54 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "Nintendo Switch", marketplace_listing_title: "Nintendo Switch",
listing_price: { amount: "250.00", formatted_amount: "$250.00", currency: "CAD" }, listing_price: {
location: { reverse_geocode: { city_page: { display_name: "Toronto" } } }, amount: "250.00",
marketplace_listing_category_id: "479353692612078", formatted_amount: "$250.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
},
},
marketplace_listing_category_id:
"479353692612078",
condition: "USED", condition: "USED",
is_live: true is_live: true,
} },
} },
} },
] ],
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("nintendo switch", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"nintendo switch",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("Nintendo Switch"); expect(results[0].title).toBe("Nintendo Switch");
expect(results[0].categoryId).toBe("479353692612078"); expect(results[0].categoryId).toBe("479353692612078");
@@ -414,7 +565,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
test("should handle home goods/furniture listings", async () => { test("should handle home goods/furniture listings", async () => {
const mockSearchData = { const mockSearchData = {
require: [ require: [
[null, null, null, { [
null,
null,
null,
{
__bbox: { __bbox: {
result: { result: {
data: { data: {
@@ -426,35 +581,54 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: { listing: {
id: "1", id: "1",
marketplace_listing_title: "Dining Table", marketplace_listing_title: "Dining Table",
listing_price: { amount: "150.00", formatted_amount: "$150.00", currency: "CAD" }, listing_price: {
location: { reverse_geocode: { city_page: { display_name: "Mississauga" } } }, amount: "150.00",
marketplace_listing_category_id: "1569171756675761", formatted_amount: "$150.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Mississauga" },
},
},
marketplace_listing_category_id:
"1569171756675761",
condition: "USED", condition: "USED",
is_live: true is_live: true,
} },
} },
} },
] ],
} },
} },
} },
} },
} },
}] },
] ],
],
}; };
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`), text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("table", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"table",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("Dining Table"); expect(results[0].title).toBe("Dining Table");
expect(results[0].categoryId).toBe("1569171756675761"); expect(results[0].categoryId).toBe("1569171756675761");
@@ -464,21 +638,30 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Error Scenarios", () => { describe("Error Scenarios", () => {
const mockCookies = JSON.stringify([ const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" }, { name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" } { name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]); ]);
test("should handle malformed HTML responses", async () => { test("should handle malformed HTML responses", async () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve("<html><body>Invalid HTML without JSON data</body></html>"), text: () =>
Promise.resolve(
"<html><body>Invalid HTML without JSON data</body></html>",
),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]); expect(results).toEqual([]);
}); });
@@ -489,12 +672,18 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
status: 404, status: 404,
text: () => Promise.resolve("Not found"), text: () => Promise.resolve("Not found"),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]); expect(results).toEqual([]);
}); });
@@ -505,12 +694,18 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
status: 500, status: 500,
text: () => Promise.resolve("Internal Server Error"), text: () => Promise.resolve("Internal Server Error"),
headers: { headers: {
get: () => null get: () => null,
} },
}) }),
); );
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies); const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]); expect(results).toEqual([]);
}); });
}); });

View File

@@ -1,13 +1,13 @@
import { describe, test, expect } from "bun:test"; import { describe, expect, test } from "bun:test";
import { import {
resolveLocationId,
resolveCategoryId,
buildSearchUrl,
HttpError, HttpError,
NetworkError, NetworkError,
ParseError, ParseError,
RateLimitError, RateLimitError,
ValidationError ValidationError,
buildSearchUrl,
resolveCategoryId,
resolveLocationId,
} from "../src/kijiji"; } from "../src/kijiji";
describe("Location and Category Resolution", () => { describe("Location and Category Resolution", () => {
@@ -74,8 +74,8 @@ describe("URL Construction", () => {
const url = buildSearchUrl("iphone", { const url = buildSearchUrl("iphone", {
location: 1700272, location: 1700272,
category: 132, category: 132,
sortBy: 'relevancy', sortBy: "relevancy",
sortOrder: 'desc', sortOrder: "desc",
}); });
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272"); expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
@@ -95,15 +95,15 @@ describe("URL Construction", () => {
test("should handle different sort options", () => { test("should handle different sort options", () => {
const dateUrl = buildSearchUrl("iphone", { const dateUrl = buildSearchUrl("iphone", {
sortBy: 'date', sortBy: "date",
sortOrder: 'asc', sortOrder: "asc",
}); });
expect(dateUrl).toContain("sort=DATE"); expect(dateUrl).toContain("sort=DATE");
expect(dateUrl).toContain("order=ASC"); expect(dateUrl).toContain("order=ASC");
const priceUrl = buildSearchUrl("iphone", { const priceUrl = buildSearchUrl("iphone", {
sortBy: 'price', sortBy: "price",
sortOrder: 'desc', sortOrder: "desc",
}); });
expect(priceUrl).toContain("sort=PRICE"); expect(priceUrl).toContain("sort=PRICE");
expect(priceUrl).toContain("order=DESC"); expect(priceUrl).toContain("order=DESC");
@@ -131,7 +131,11 @@ describe("Error Classes", () => {
test("NetworkError should store URL and cause", () => { test("NetworkError should store URL and cause", () => {
const cause = new Error("Connection failed"); const cause = new Error("Connection failed");
const error = new NetworkError("Network error", "https://example.com", cause); const error = new NetworkError(
"Network error",
"https://example.com",
cause,
);
expect(error.message).toBe("Network error"); expect(error.message).toBe("Network error");
expect(error.url).toBe("https://example.com"); expect(error.url).toBe("https://example.com");
expect(error.cause).toBe(cause); expect(error.cause).toBe(cause);

View File

@@ -1,5 +1,9 @@
import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import { extractApolloState, parseSearch, parseDetailedListing } from "../src/kijiji"; import {
extractApolloState,
parseDetailedListing,
parseSearch,
} from "../src/kijiji";
// Mock fetch globally // Mock fetch globally
const originalFetch = global.fetch; const originalFetch = global.fetch;
@@ -18,29 +22,31 @@ describe("HTML Parsing Integration", () => {
describe("extractApolloState", () => { describe("extractApolloState", () => {
test("should extract Apollo state from valid HTML", () => { test("should extract Apollo state from valid HTML", () => {
const mockHtml = '<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"__APOLLO_STATE__":{"ROOT_QUERY":{"test":"value"}}}}}</script></head></html>'; const mockHtml =
'<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"__APOLLO_STATE__":{"ROOT_QUERY":{"test":"value"}}}}}</script></head></html>';
const result = extractApolloState(mockHtml); const result = extractApolloState(mockHtml);
expect(result).toEqual({ expect(result).toEqual({
ROOT_QUERY: { test: "value" } ROOT_QUERY: { test: "value" },
}); });
}); });
test("should return null for HTML without Apollo state", () => { test("should return null for HTML without Apollo state", () => {
const mockHtml = '<html><body>No data here</body></html>'; const mockHtml = "<html><body>No data here</body></html>";
const result = extractApolloState(mockHtml); const result = extractApolloState(mockHtml);
expect(result).toBeNull(); expect(result).toBeNull();
}); });
test("should return null for malformed JSON", () => { test("should return null for malformed JSON", () => {
const mockHtml = '<html><script id="__NEXT_DATA__" type="application/json">{"invalid": json}</script></html>'; const mockHtml =
'<html><script id="__NEXT_DATA__" type="application/json">{"invalid": json}</script></html>';
const result = extractApolloState(mockHtml); const result = extractApolloState(mockHtml);
expect(result).toBeNull(); expect(result).toBeNull();
}); });
test("should handle missing __NEXT_DATA__ element", () => { test("should handle missing __NEXT_DATA__ element", () => {
const mockHtml = '<html><body><div>Content</div></body></html>'; const mockHtml = "<html><body><div>Content</div></body></html>";
const result = extractApolloState(mockHtml); const result = extractApolloState(mockHtml);
expect(result).toBeNull(); expect(result).toBeNull();
}); });
@@ -63,10 +69,10 @@ describe("HTML Parsing Integration", () => {
url: "/v-samsung/k0l0", url: "/v-samsung/k0l0",
title: "Samsung Galaxy", title: "Samsung Galaxy",
}, },
"ROOT_QUERY": { test: "value" } ROOT_QUERY: { test: "value" },
} },
} },
} },
})} })}
</script> </script>
</html> </html>
@@ -76,11 +82,11 @@ describe("HTML Parsing Integration", () => {
expect(results).toHaveLength(2); expect(results).toHaveLength(2);
expect(results[0]).toEqual({ expect(results[0]).toEqual({
name: "iPhone 13 Pro", name: "iPhone 13 Pro",
listingLink: "https://www.kijiji.ca/v-iphone/k0l0" listingLink: "https://www.kijiji.ca/v-iphone/k0l0",
}); });
expect(results[1]).toEqual({ expect(results[1]).toEqual({
name: "Samsung Galaxy", name: "Samsung Galaxy",
listingLink: "https://www.kijiji.ca/v-samsung/k0l0" listingLink: "https://www.kijiji.ca/v-samsung/k0l0",
}); });
}); });
@@ -95,17 +101,19 @@ describe("HTML Parsing Integration", () => {
"Listing:123": { "Listing:123": {
url: "https://www.kijiji.ca/v-iphone/k0l0", url: "https://www.kijiji.ca/v-iphone/k0l0",
title: "iPhone 13 Pro", title: "iPhone 13 Pro",
} },
} },
} },
} },
})} })}
</script> </script>
</html> </html>
`; `;
const results = parseSearch(mockHtml, "https://www.kijiji.ca"); const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results[0].listingLink).toBe("https://www.kijiji.ca/v-iphone/k0l0"); expect(results[0].listingLink).toBe(
"https://www.kijiji.ca/v-iphone/k0l0",
);
}); });
test("should filter out invalid listings", () => { test("should filter out invalid listings", () => {
@@ -127,10 +135,10 @@ describe("HTML Parsing Integration", () => {
"Other:789": { "Other:789": {
url: "/v-other/k0l0", url: "/v-other/k0l0",
title: "Other Item", title: "Other Item",
} },
} },
} },
} },
})} })}
</script> </script>
</html> </html>
@@ -142,7 +150,10 @@ describe("HTML Parsing Integration", () => {
}); });
test("should return empty array for invalid HTML", () => { test("should return empty array for invalid HTML", () => {
const results = parseSearch("<html><body>Invalid</body></html>", "https://www.kijiji.ca"); const results = parseSearch(
"<html><body>Invalid</body></html>",
"https://www.kijiji.ca",
);
expect(results).toEqual([]); expect(results).toEqual([]);
}); });
}); });
@@ -163,7 +174,7 @@ describe("HTML Parsing Integration", () => {
price: { price: {
amount: 80000, amount: 80000,
currency: "CAD", currency: "CAD",
type: "FIXED" type: "FIXED",
}, },
type: "OFFER", type: "OFFER",
status: "ACTIVE", status: "ACTIVE",
@@ -176,38 +187,47 @@ describe("HTML Parsing Integration", () => {
name: "Toronto", name: "Toronto",
coordinates: { coordinates: {
latitude: 43.6532, latitude: 43.6532,
longitude: -79.3832 longitude: -79.3832,
} },
}, },
imageUrls: [ imageUrls: [
"https://media.kijiji.ca/api/v1/image1.jpg", "https://media.kijiji.ca/api/v1/image1.jpg",
"https://media.kijiji.ca/api/v1/image2.jpg" "https://media.kijiji.ca/api/v1/image2.jpg",
], ],
imageCount: 2, imageCount: 2,
categoryId: 132, categoryId: 132,
adSource: "ORGANIC", adSource: "ORGANIC",
flags: { flags: {
topAd: false, topAd: false,
priceDrop: true priceDrop: true,
}, },
posterInfo: { posterInfo: {
posterId: "user123", posterId: "user123",
rating: 4.8 rating: 4.8,
}, },
attributes: [ attributes: [
{ canonicalName: "forsaleby", canonicalValues: ["ownr"] }, {
{ canonicalName: "phonecarrier", canonicalValues: ["unlocked"] } canonicalName: "forsaleby",
] canonicalValues: ["ownr"],
} },
} {
} canonicalName: "phonecarrier",
} canonicalValues: ["unlocked"],
},
],
},
},
},
},
})} })}
</script> </script>
</html> </html>
`; `;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca"); const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toEqual({ expect(result).toEqual({
url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0", url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0",
title: "iPhone 13 Pro 256GB", title: "iPhone 13 Pro 256GB",
@@ -215,7 +235,7 @@ describe("HTML Parsing Integration", () => {
listingPrice: { listingPrice: {
amountFormatted: "$800.00", amountFormatted: "$800.00",
cents: 80000, cents: 80000,
currency: "CAD" currency: "CAD",
}, },
listingType: "OFFER", listingType: "OFFER",
listingStatus: "ACTIVE", listingStatus: "ACTIVE",
@@ -225,30 +245,30 @@ describe("HTML Parsing Integration", () => {
address: "Toronto, ON", address: "Toronto, ON",
images: [ images: [
"https://media.kijiji.ca/api/v1/image1.jpg", "https://media.kijiji.ca/api/v1/image1.jpg",
"https://media.kijiji.ca/api/v1/image2.jpg" "https://media.kijiji.ca/api/v1/image2.jpg",
], ],
categoryId: 132, categoryId: 132,
adSource: "ORGANIC", adSource: "ORGANIC",
flags: { flags: {
topAd: false, topAd: false,
priceDrop: true priceDrop: true,
}, },
attributes: { attributes: {
forsaleby: ["ownr"], forsaleby: ["ownr"],
phonecarrier: ["unlocked"] phonecarrier: ["unlocked"],
}, },
location: { location: {
id: 1700273, id: 1700273,
name: "Toronto", name: "Toronto",
coordinates: { coordinates: {
latitude: 43.6532, latitude: 43.6532,
longitude: -79.3832 longitude: -79.3832,
} },
}, },
sellerInfo: { sellerInfo: {
posterId: "user123", posterId: "user123",
rating: 4.8 rating: 4.8,
} },
}); });
}); });
@@ -265,18 +285,21 @@ describe("HTML Parsing Integration", () => {
title: "iPhone for Sale", title: "iPhone for Sale",
price: { price: {
type: "CONTACT", type: "CONTACT",
amount: null amount: null,
} },
} },
} },
} },
} },
})} })}
</script> </script>
</html> </html>
`; `;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca"); const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toBeNull(); expect(result).toBeNull();
}); });
@@ -291,17 +314,20 @@ describe("HTML Parsing Integration", () => {
"Listing:123": { "Listing:123": {
url: "/v-iphone/k0l0", url: "/v-iphone/k0l0",
title: "iPhone 13", title: "iPhone 13",
price: { amount: 50000 } price: { amount: 50000 },
} },
} },
} },
} },
})} })}
</script> </script>
</html> </html>
`; `;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca"); const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toEqual({ expect(result).toEqual({
url: "https://www.kijiji.ca/v-iphone/k0l0", url: "https://www.kijiji.ca/v-iphone/k0l0",
title: "iPhone 13", title: "iPhone 13",
@@ -309,7 +335,7 @@ describe("HTML Parsing Integration", () => {
listingPrice: { listingPrice: {
amountFormatted: "$500.00", amountFormatted: "$500.00",
cents: 50000, cents: 50000,
currency: undefined currency: undefined,
}, },
listingType: undefined, listingType: undefined,
listingStatus: undefined, listingStatus: undefined,
@@ -322,15 +348,15 @@ describe("HTML Parsing Integration", () => {
adSource: "UNKNOWN", adSource: "UNKNOWN",
flags: { flags: {
topAd: false, topAd: false,
priceDrop: false priceDrop: false,
}, },
attributes: {}, attributes: {},
location: { location: {
id: 0, id: 0,
name: "Unknown", name: "Unknown",
coordinates: undefined coordinates: undefined,
}, },
sellerInfo: undefined sellerInfo: undefined,
}); });
}); });
}); });

View File

@@ -1,5 +1,5 @@
import { describe, test, expect, beforeEach, afterEach } from "bun:test"; import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { slugify, formatCentsToCurrency } from "../src/kijiji"; import { formatCentsToCurrency, slugify } from "../src/kijiji";
describe("Utility Functions", () => { describe("Utility Functions", () => {
describe("slugify", () => { describe("slugify", () => {

View File

@@ -5,8 +5,10 @@ import { expect } from "bun:test";
// This file is loaded before any tests run due to bunfig.toml preload // This file is loaded before any tests run due to bunfig.toml preload
// Mock fetch globally for tests // Mock fetch globally for tests
global.fetch = global.fetch || (() => { global.fetch =
throw new Error('fetch is not available in test environment'); global.fetch ||
(() => {
throw new Error("fetch is not available in test environment");
}); });
// Add any global test utilities here // Add any global test utilities here

View File

@@ -7,25 +7,21 @@
"moduleDetection": "force", "moduleDetection": "force",
"jsx": "react-jsx", "jsx": "react-jsx",
"allowJs": true, "allowJs": true,
// Bundler mode // Bundler mode
"moduleResolution": "bundler", "moduleResolution": "bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,
"verbatimModuleSyntax": true, "verbatimModuleSyntax": true,
"noEmit": true, "noEmit": true,
// Best practices // Best practices
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"noFallthroughCasesInSwitch": true, "noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true, "noUncheckedIndexedAccess": true,
"noImplicitAny": true, "noImplicitAny": true,
// Some stricter flags (disabled by default) // Some stricter flags (disabled by default)
"noUnusedLocals": false, "noUnusedLocals": false,
"noUnusedParameters": false, "noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false, "noPropertyAccessFromIndexSignature": false,
"paths": { "paths": {
"@/*": ["./src/*"] "@/*": ["./src/*"]
} }