chore: biome lint

Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
This commit is contained in:
2026-01-22 22:34:05 -05:00
parent 3919ec0727
commit 6ab9c4c3a5
12 changed files with 4426 additions and 3885 deletions

View File

@@ -12,7 +12,7 @@
* bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json
*/
import { parseFacebookCookieString } from '../src/facebook';
import { parseFacebookCookieString } from "../src/facebook";
interface Cookie {
name: string;
@@ -28,14 +28,14 @@ interface Cookie {
function parseFacebookCookieStringCLI(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) {
console.error('❌ Error: Empty or invalid cookie string provided');
console.error("❌ Error: Empty or invalid cookie string provided");
process.exit(1);
}
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) {
console.error('❌ Error: No valid cookies found in input string');
console.error("❌ Error: No valid cookies found in input string");
console.error('Expected format: "name1=value1; name2=value2;"');
process.exit(1);
}
@@ -48,40 +48,40 @@ async function main() {
if (args.length === 0 && process.stdin.isTTY === false) {
// Read from stdin
let input = '';
let input = "";
for await (const chunk of process.stdin) {
input += chunk;
}
input = input.trim();
if (!input) {
console.error('❌ Error: No input provided via stdin');
console.error("❌ Error: No input provided via stdin");
process.exit(1);
}
const cookies = parseFacebookCookieStringCLI(input);
await writeOutput(cookies, './cookies/facebook.json');
await writeOutput(cookies, "./cookies/facebook.json");
return;
}
let cookieString = '';
let outputPath = './cookies/facebook.json';
let inputPath = '';
let cookieString = "";
let outputPath = "./cookies/facebook.json";
let inputPath = "";
// Parse command line arguments
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg === '--input' || arg === '-i') {
if (arg === "--input" || arg === "-i") {
inputPath = args[i + 1];
i++; // Skip next arg
} else if (arg === '--output' || arg === '-o') {
} else if (arg === "--output" || arg === "-o") {
outputPath = args[i + 1];
i++; // Skip next arg
} else if (arg === '--help' || arg === '-h') {
} else if (arg === "--help" || arg === "-h") {
showHelp();
return;
} else if (!arg.startsWith('-')) {
} else if (!arg.startsWith("-")) {
// Assume this is the cookie string
cookieString = arg;
} else {
@@ -107,8 +107,10 @@ async function main() {
}
if (!cookieString.trim()) {
console.error('❌ Error: No cookie string provided');
console.error('Provide cookie string as argument, --input file, or via stdin');
console.error("❌ Error: No cookie string provided");
console.error(
"Provide cookie string as argument, --input file, or via stdin",
);
showHelp();
process.exit(1);
}
@@ -124,11 +126,12 @@ async function writeOutput(cookies: Cookie[], outputPath: string) {
console.log(`📁 Saved to: ${outputPath}`);
// Show summary of parsed cookies
console.log('\n📋 Parsed cookies:');
console.log("\n📋 Parsed cookies:");
for (const cookie of cookies) {
console.log(`${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? '...' : ''}`);
console.log(
`${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? "..." : ""}`,
);
}
} catch (error) {
console.error(`❌ Error writing to output file: ${error}`);
process.exit(1);
@@ -173,7 +176,7 @@ OUTPUT:
// Run the CLI
if (import.meta.main) {
main().catch(error => {
main().catch((error) => {
console.error(`❌ Unexpected error: ${error}`);
process.exit(1);
});

View File

@@ -1,6 +1,6 @@
import cliProgress from "cli-progress";
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import cliProgress from "cli-progress";
// ----------------------------- Types -----------------------------
@@ -55,8 +55,10 @@ function formatCentsToCurrency(
/**
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
*/
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null {
if (!priceText || typeof priceText !== 'string') return null;
function parseEbayPrice(
priceText: string,
): { cents: number; currency: string } | null {
if (!priceText || typeof priceText !== "string") return null;
// Clean up the price text and extract currency and amount
const cleaned = priceText.trim();
@@ -65,19 +67,23 @@ function parseEbayPrice(priceText: string): { cents: number; currency: string }
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
if (!numberMatches) return null;
const amountStr = numberMatches[0].replace(/,/g, '');
const dollars = parseFloat(amountStr);
if (isNaN(dollars)) return null;
const amountStr = numberMatches[0].replace(/,/g, "");
const dollars = Number.parseFloat(amountStr);
if (Number.isNaN(dollars)) return null;
const cents = Math.round(dollars * 100);
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
let currency = 'USD'; // Default
let currency = "USD"; // Default
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) {
currency = 'CAD';
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) {
currency = 'USD';
if (
cleaned.toUpperCase().includes("CAD") ||
cleaned.includes("CA$") ||
cleaned.includes("C $")
) {
currency = "CAD";
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
currency = "USD";
}
return { cents, currency };
@@ -135,7 +141,9 @@ async function fetchHtml(
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
@@ -176,7 +184,7 @@ function parseEbayListings(
htmlString: HTMLString,
keywords: string[],
exclusions: string[],
strictMode: boolean
strictMode: boolean,
): ListingDetails[] {
const { document } = parseHTML(htmlString);
const results: ListingDetails[] = [];
@@ -184,16 +192,17 @@ function parseEbayListings(
// Find all listing links by looking for eBay item URLs (/itm/)
const linkElements = document.querySelectorAll('a[href*="itm/"]');
for (const linkElement of linkElements) {
try {
// Get href attribute
let href = linkElement.getAttribute('href');
let href = linkElement.getAttribute("href");
if (!href) continue;
// Make href absolute
if (!href.startsWith('http')) {
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`;
if (!href.startsWith("http")) {
href = href.startsWith("//")
? `https:${href}`
: `https://www.ebay.com${href}`;
}
// Find the container - go up several levels to find the item container
@@ -207,15 +216,23 @@ function parseEbayListings(
// Extract title - look for heading or title-related elements near the link
// Modern eBay often uses h3, span, or div with text content near the link
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span');
let titleElement = container.querySelector(
'h3, [role="heading"], .s-item__title span',
);
// If no direct title element, try finding text content around the link
if (!titleElement) {
// Look for spans or divs with text near this link
const nearbySpans = container.querySelectorAll('span, div');
const nearbySpans = container.querySelectorAll("span, div");
for (const span of nearbySpans) {
const text = span.textContent?.trim();
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) {
if (
text &&
text.length > 10 &&
text.length < 200 &&
!text.includes("$") &&
!text.includes("item")
) {
titleElement = span;
break;
}
@@ -228,12 +245,12 @@ function parseEbayListings(
if (title) {
// Remove common eBay UI strings that appear at the end of titles
const uiStrings = [
'Opens in a new window',
'Opens in a new tab',
'Opens in a new window or tab',
'opens in a new window',
'opens in a new tab',
'opens in a new window or tab'
"Opens in a new window",
"Opens in a new tab",
"Opens in a new window or tab",
"opens in a new window",
"opens in a new tab",
"opens in a new window or tab",
];
for (const uiString of uiStrings) {
@@ -256,18 +273,28 @@ function parseEbayListings(
if (title === "Shop on eBay" || title.length < 3) continue;
// Extract price - look for eBay's price classes, preferring sale/discount prices
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]');
let priceElement = container.querySelector(
'[class*="s-item__price"], .s-item__price, [class*="price"]',
);
// If no direct price class, look for spans containing $ (but not titles)
if (!priceElement) {
const spansAndElements = container.querySelectorAll('span, div, b, em, strong');
const spansAndElements = container.querySelectorAll(
"span, div, b, em, strong",
);
for (const el of spansAndElements) {
const text = el.textContent?.trim();
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if (text && text.includes('$') && text.length < 100 &&
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') &&
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') &&
! /\d{4}/.test(text) && // Avoid years like "2024"
if (
text?.includes("$") &&
text.length < 100 &&
!text.includes("laptop") &&
!text.includes("computer") &&
!text.includes("intel") &&
!text.includes("core") &&
!text.includes("ram") &&
!text.includes("ssd") &&
!/\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements
) {
priceElement = el;
@@ -280,17 +307,26 @@ function parseEbayListings(
// Prefer sale/current price over original/strikethrough price
if (priceElement) {
// Check if this element or its parent contains multiple price elements
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement;
const priceContainer =
priceElement.closest('[class*="s-item__price"]') ||
priceElement.parentElement;
if (priceContainer) {
// Look for all price elements within this container, including strikethrough prices
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike');
const allPriceElements = priceContainer.querySelectorAll(
'[class*="s-item__price"], span, b, em, strong, s, del, strike',
);
// Filter to only elements that actually contain prices (not labels)
const actualPrices: HTMLElement[] = [];
for (const el of allPriceElements) {
const text = el.textContent?.trim();
if (text && /^\s*[\$£¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) {
if (
text &&
/^\s*[\$£¥]/u.test(text) &&
text.length < 50 &&
!/\d{4}/.test(text)
) {
actualPrices.push(el);
}
}
@@ -298,11 +334,18 @@ function parseEbayListings(
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
if (actualPrices.length > 1) {
// First, look for prices that are NOT struck through
const nonStrikethroughPrices = actualPrices.filter(el => {
const nonStrikethroughPrices = actualPrices.filter((el) => {
const tagName = el.tagName.toLowerCase();
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') ||
el.closest('s, del, strike');
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles;
const styles =
el.classList.contains("s-strikethrough") ||
el.classList.contains("u-flStrike") ||
el.closest("s, del, strike");
return (
tagName !== "s" &&
tagName !== "del" &&
tagName !== "strike" &&
!styles
);
});
if (nonStrikethroughPrices.length > 0) {
@@ -317,7 +360,7 @@ function parseEbayListings(
}
}
let priceText = priceElement?.textContent?.trim();
const priceText = priceElement?.textContent?.trim();
if (!priceText) continue;
@@ -326,12 +369,21 @@ function parseEbayListings(
if (!priceInfo) continue;
// Apply exclusion filters
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) {
if (
exclusions.some((exclusion) =>
title.toLowerCase().includes(exclusion.toLowerCase()),
)
) {
continue;
}
// Apply strict mode filter (title must contain at least one keyword)
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) {
if (
strictMode &&
!keywords.some((keyword) =>
title?.toLowerCase().includes(keyword.toLowerCase()),
)
) {
continue;
}
@@ -351,7 +403,6 @@ function parseEbayListings(
results.push(listing);
} catch (err) {
console.warn(`Error parsing eBay listing: ${err}`);
continue;
}
}
@@ -376,7 +427,7 @@ export default async function fetchEbayItems(
maxPrice = Number.MAX_SAFE_INTEGER,
strictMode = false,
exclusions = [],
keywords = [SEARCH_QUERY] // Default to search query if no keywords provided
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
} = opts;
// Build eBay search URL - use Canadian site and tracking parameters like real browser
@@ -389,18 +440,19 @@ export default async function fetchEbayItems(
try {
// Use custom headers modeled after real browser requests to bypass bot detection
const headers: Record<string, string> = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://www.ebay.ca/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Priority': 'u=0, i'
"User-Agent":
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
Referer: "https://www.ebay.ca/",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
Priority: "u=0, i",
};
const res = await fetch(searchUrl, {
@@ -420,19 +472,23 @@ export default async function fetchEbayItems(
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
console.log(`\nParsing eBay listings...`);
console.log("\nParsing eBay listings...");
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode);
const listings = parseEbayListings(
searchHtml,
keywords,
exclusions,
strictMode,
);
// Filter by price range (additional safety check)
const filteredListings = listings.filter(listing => {
const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents;
return cents && cents >= minPrice && cents <= maxPrice;
});
console.log(`Parsed ${filteredListings.length} eBay listings.`);
return filteredListings;
} catch (err) {
if (err instanceof HttpError) {
console.error(

View File

@@ -1,6 +1,6 @@
import cliProgress from "cli-progress";
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import cliProgress from "cli-progress";
/**
* Facebook Marketplace Scraper
@@ -213,7 +213,10 @@ async function delay(ms: number): Promise<void> {
/**
* Load Facebook cookies from file or string
*/
async function loadFacebookCookies(cookiesSource?: string, cookiePath = './cookies/facebook.json'): Promise<Cookie[]> {
async function loadFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load from provided string parameter
if (cookiesSource) {
try {
@@ -263,11 +266,11 @@ function parseFacebookCookieString(cookieString: string): Cookie[] {
}
return cookieString
.split(';')
.map(pair => pair.trim())
.filter(pair => pair.includes('='))
.map(pair => {
const [name, value] = pair.split('=', 2);
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, value] = pair.split("=", 2);
const trimmedName = name.trim();
const trimmedValue = value.trim();
@@ -279,11 +282,11 @@ function parseFacebookCookieString(cookieString: string): Cookie[] {
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain: '.facebook.com',
path: '/',
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: 'lax' as const,
sameSite: "lax" as const,
expirationDate: undefined, // Session cookies
};
})
@@ -293,8 +296,9 @@ function parseFacebookCookieString(cookieString: string): Cookie[] {
/**
* Ensure Facebook cookies are available, parsing from env var if needed
*/
async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Promise<Cookie[]> {
async function ensureFacebookCookies(
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load existing cookies
try {
const existing = await loadFacebookCookies(undefined, cookiePath);
@@ -309,9 +313,9 @@ async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Pr
const cookieString = process.env.FACEBOOK_COOKIE;
if (!cookieString || !cookieString.trim()) {
throw new Error(
'No valid Facebook cookies found. Either:\n' +
' 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n' +
' 2. Create ./cookies/facebook.json manually with cookie array'
"No valid Facebook cookies found. Either:\n" +
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
" 2. Create ./cookies/facebook.json manually with cookie array",
);
}
@@ -319,8 +323,8 @@ async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Pr
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) {
throw new Error(
'FACEBOOK_COOKIE environment variable contains no valid cookies. ' +
'Expected format: "name1=value1; name2=value2;"'
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
'Expected format: "name1=value1; name2=value2;"',
);
}
@@ -329,7 +333,7 @@ async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Pr
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
console.log(`✅ Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
} catch (error) {
console.warn(`⚠️ Could not save cookies to ${cookiePath}: ${error}`);
console.warn(`! Could not save cookies to ${cookiePath}: ${error}`);
// Continue anyway, we have the cookies in memory
}
@@ -349,10 +353,9 @@ function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
domain.endsWith(cookie.domain.slice(1)) ||
domain === cookie.domain.slice(1)
);
} else {
}
// Host-only cookie
return cookie.domain === domain;
}
})
.filter((cookie) => {
// Check expiration
@@ -418,7 +421,7 @@ async function fetchHtml(
// Add cookies if provided
if (opts?.cookies) {
headers["cookie"] = opts.cookies;
headers.cookie = opts.cookies;
}
const res = await fetch(url, {
@@ -433,7 +436,9 @@ async function fetchHtml(
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
@@ -500,33 +505,38 @@ function extractFacebookMarketplaceData(
// Try multiple navigation paths to find marketplace_search
const paths = [
// Original path from example
() => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'],
() =>
parsed.require[0][3][0].__bbox.require[0][3][1].__bbox.result.data
.marketplace_search,
// Alternative path structure
() => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
() =>
parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
// Another variation
() => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'],
() => parsed.require[0][3][0].__bbox.result.data.marketplace_search,
// Direct access for some responses
() => {
for (const item of parsed.require) {
if (item && item.length >= 4 && item[3]) {
const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search;
const bbox = item[3]?.__bbox?.result?.data?.marketplace_search;
if (bbox) return bbox;
}
}
return null;
}
},
];
for (const getData of paths) {
try {
const result = getData();
if (result && isRecord(result) && result.feed_units?.edges?.length > 0) {
if (
result &&
isRecord(result) &&
result.feed_units?.edges?.length > 0
) {
marketplaceData = result as FacebookMarketplaceSearch;
break;
}
} catch {
continue;
}
} catch {}
}
if (marketplaceData) break;
@@ -534,7 +544,8 @@ function extractFacebookMarketplaceData(
// Also check for direct marketplace_search in the parsed data
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
const searchData = parsed.marketplace_search as FacebookMarketplaceSearch;
const searchData =
parsed.marketplace_search as FacebookMarketplaceSearch;
if (searchData.feed_units?.edges?.length > 0) {
marketplaceData = searchData;
break;
@@ -550,14 +561,16 @@ function extractFacebookMarketplaceData(
return null;
}
console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`);
console.log(
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`,
);
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
}
/**
* Monitor API extraction success/failure for detecting changes
*/
let extractionStats = {
const extractionStats = {
totalExtractions: 0,
successfulExtractions: 0,
failedExtractions: 0,
@@ -576,16 +589,27 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
}
// Log warning if extraction success rate drops below 80%
const successRate = extractionStats.successfulExtractions / extractionStats.totalExtractions;
if (extractionStats.totalExtractions > 10 && successRate < 0.8 && !extractionStats.lastApiChangeDetected) {
console.warn("⚠️ Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.");
const successRate =
extractionStats.successfulExtractions / extractionStats.totalExtractions;
if (
extractionStats.totalExtractions > 10 &&
successRate < 0.8 &&
!extractionStats.lastApiChangeDetected
) {
console.warn(
"! Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
);
extractionStats.lastApiChangeDetected = new Date();
}
if (success) {
console.log(`📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`);
console.log(
`📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`,
);
} else {
console.warn(`❌ Facebook API extraction failed for item ${itemId || 'unknown'}`);
console.warn(
`❌ Facebook API extraction failed for item ${itemId || "unknown"}`,
);
}
}
@@ -601,8 +625,8 @@ function formatCentsToCurrency(
if (Number.isNaN(cents)) return "";
const dollars = cents / 100;
const formatter = new Intl.NumberFormat(locale, {
style: 'currency',
currency: 'USD',
style: "currency",
currency: "USD",
minimumFractionDigits: 2,
maximumFractionDigits: 2,
useGrouping: true,
@@ -614,7 +638,9 @@ function formatCentsToCurrency(
Extract marketplace item details from Facebook item page HTML
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
*/
function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceItem | null {
function extractFacebookItemData(
htmlString: HTMLString,
): FacebookMarketplaceItem | null {
const { document } = parseHTML(htmlString);
const scripts = document.querySelectorAll("script");
@@ -630,24 +656,40 @@ function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceIte
// Try multiple extraction paths discovered from reverse engineering
const extractionPaths = [
// Path 1: Primary path from current API structure
() => parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target,
() =>
parsed.require[0][3].__bbox.result.data.viewer
.marketplace_product_details_page.target,
// Path 2: Alternative path with nested require
() => parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target,
() =>
parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data
.viewer.marketplace_product_details_page.target,
// Path 3: Variation without the [0] index
() => parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target,
() =>
parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data
.viewer.marketplace_product_details_page.target,
// Path 4-5: Additional fallback paths for edge cases
() => parsed.require[0][3][1]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target,
() => parsed.require[0][3][2]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target,
() =>
parsed.require[0][3][1]?.__bbox?.result?.data?.viewer
?.marketplace_product_details_page?.target,
() =>
parsed.require[0][3][2]?.__bbox?.result?.data?.viewer
?.marketplace_product_details_page?.target,
];
let pathIndex = 0;
for (const getPath of extractionPaths) {
try {
const targetData = getPath();
if (targetData && typeof targetData === 'object' &&
targetData.id && targetData.marketplace_listing_title &&
targetData.__typename === 'GroupCommerceProductItem') {
console.log(`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`);
if (
targetData &&
typeof targetData === "object" &&
targetData.id &&
targetData.marketplace_listing_title &&
targetData.__typename === "GroupCommerceProductItem"
) {
console.log(
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`,
);
return targetData as FacebookMarketplaceItem;
}
} catch {
@@ -657,13 +699,20 @@ function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceIte
}
// Fallback: Search recursively for marketplace data in the parsed structure
const findMarketplaceData = (obj: unknown, depth = 0, maxDepth = 10): FacebookMarketplaceItem | null => {
const findMarketplaceData = (
obj: unknown,
depth = 0,
maxDepth = 10,
): FacebookMarketplaceItem | null => {
if (depth > maxDepth) return null; // Prevent infinite recursion
if (isRecord(obj)) {
// Check if this object matches the expected marketplace item structure
if (obj.marketplace_listing_title && obj.id &&
obj.__typename === 'GroupCommerceProductItem' &&
obj.redacted_description) {
if (
obj.marketplace_listing_title &&
obj.id &&
obj.__typename === "GroupCommerceProductItem" &&
obj.redacted_description
) {
return obj as FacebookMarketplaceItem;
}
// Recursively search nested objects and arrays
@@ -687,17 +736,30 @@ function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceIte
// Search through the entire require structure
const recursiveResult = findMarketplaceData(parsed.require);
if (recursiveResult) {
console.log('Successfully extracted Facebook item data using recursive search');
console.log(
"Successfully extracted Facebook item data using recursive search",
);
return recursiveResult;
}
// Additional search in other potential locations
if (parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target) {
const bboxData = parsed.__bbox.result.data.viewer.marketplace_product_details_page.target;
if (bboxData && typeof bboxData === 'object' &&
bboxData.id && bboxData.marketplace_listing_title &&
bboxData.__typename === 'GroupCommerceProductItem') {
console.log('Successfully extracted Facebook item data from __bbox structure');
if (
parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page
?.target
) {
const bboxData =
parsed.__bbox.result.data.viewer.marketplace_product_details_page
.target;
if (
bboxData &&
typeof bboxData === "object" &&
bboxData.id &&
bboxData.marketplace_listing_title &&
bboxData.__typename === "GroupCommerceProductItem"
) {
console.log(
"Successfully extracted Facebook item data from __bbox structure",
);
return bboxData as FacebookMarketplaceItem;
}
}
@@ -734,7 +796,8 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
// - formatted_amount: human-readable price (like "CA$1")
let cents: number;
if (priceObj.amount != null) {
const dollars = typeof priceObj.amount === 'string'
const dollars =
typeof priceObj.amount === "string"
? Number.parseFloat(priceObj.amount)
: priceObj.amount;
cents = Math.round(dollars * 100);
@@ -748,7 +811,7 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
if (priceObj.formatted_amount) {
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (match) {
const dollars = Number.parseFloat(match[0].replace(',', ''));
const dollars = Number.parseFloat(match[0].replace(",", ""));
if (!Number.isNaN(dollars)) {
cents = Math.round(dollars * 100);
} else {
@@ -793,19 +856,24 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
// Extract image and video URLs
const imageUrl = listing.primary_listing_photo?.image?.uri;
const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined;
const videoUrl = listing.listing_video
? `https://www.facebook.com/${listing.listing_video.id}/`
: undefined;
// Extract seller information
const seller = listing.marketplace_listing_seller ? {
const seller = listing.marketplace_listing_seller
? {
name: listing.marketplace_listing_seller.name,
id: listing.marketplace_listing_seller.id
} : undefined;
id: listing.marketplace_listing_seller.id,
}
: undefined;
const listingDetails: ListingDetails = {
url,
title,
listingPrice: {
amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents),
amountFormatted:
priceObj.formatted_amount || formatCentsToCurrency(cents),
cents,
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
},
@@ -821,10 +889,7 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
};
results.push(listingDetails);
} catch {
// Skip malformed ads
continue;
}
} catch {}
}
return results;
@@ -834,7 +899,9 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
Parse Facebook marketplace item details into ListingDetails format
Updated for 2026 GroupCommerceProductItem structure
*/
function parseFacebookItem(item: FacebookMarketplaceItem): ListingDetails | null {
function parseFacebookItem(
item: FacebookMarketplaceItem,
): ListingDetails | null {
try {
const title = item.marketplace_listing_title || item.custom_title;
if (!title) return null;
@@ -849,10 +916,11 @@ function parseFacebookItem(item: FacebookMarketplaceItem): ListingDetails | null
if (item.listing_price) {
currency = item.listing_price.currency || "CAD";
if (item.listing_price.amount && item.listing_price.amount !== "0.00") {
const amount = parseFloat(item.listing_price.amount);
if (!isNaN(amount)) {
const amount = Number.parseFloat(item.listing_price.amount);
if (!Number.isNaN(amount)) {
cents = Math.round(amount * 100);
amountFormatted = item.formatted_price?.text || formatCentsToCurrency(cents);
amountFormatted =
item.formatted_price?.text || formatCentsToCurrency(cents);
}
}
}
@@ -864,10 +932,12 @@ function parseFacebookItem(item: FacebookMarketplaceItem): ListingDetails | null
const address = item.location_text?.text || null;
// Extract seller information
const seller = item.marketplace_listing_seller ? {
const seller = item.marketplace_listing_seller
? {
name: item.marketplace_listing_seller.name,
id: item.marketplace_listing_seller.id
} : undefined;
id: item.marketplace_listing_seller.id,
}
: undefined;
// Determine listing status
let listingStatus: string | undefined;
@@ -987,8 +1057,7 @@ export default async function fetchFacebookItems(
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
"\n" +
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
@@ -1022,7 +1091,7 @@ export default async function fetchFacebookItems(
cliProgress.Presets.shades_classic,
);
const totalProgress = ads.length;
let currentProgress = 0;
const currentProgress = 0;
progressBar.start(totalProgress, currentProgress);
const items = parseFacebookAds(ads);
@@ -1083,8 +1152,7 @@ export async function fetchFacebookItem(
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
"\n" +
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
@@ -1104,7 +1172,9 @@ export async function fetchFacebookItem(
console.warn(
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
);
console.warn("Try logging out and back into Facebook, then export fresh cookies.");
console.warn(
"Try logging out and back into Facebook, then export fresh cookies.",
);
break;
case 404:
console.warn(
@@ -1135,21 +1205,31 @@ export async function fetchFacebookItem(
if (!itemData) {
logExtractionMetrics(false, itemId);
// Enhanced checking for specific failure scenarios
if (itemHtml.includes("This listing is no longer available") ||
if (
itemHtml.includes("This listing is no longer available") ||
itemHtml.includes("listing has been removed") ||
itemHtml.includes("This item has been sold")) {
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`);
itemHtml.includes("This item has been sold")
) {
console.warn(
`Item ${itemId} appears to be sold or removed from marketplace.`,
);
return null;
}
if (itemHtml.includes("log in to Facebook") ||
if (
itemHtml.includes("log in to Facebook") ||
itemHtml.includes("You must log in") ||
itemHtml.includes("authentication required")) {
console.warn(`Authentication failed for item ${itemId}. Cookies may be expired.`);
itemHtml.includes("authentication required")
) {
console.warn(
`Authentication failed for item ${itemId}. Cookies may be expired.`,
);
return null;
}
console.warn(`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`);
console.warn(
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
);
console.warn(" - The listing was removed or sold");
console.warn(" - Authentication issues");
console.warn(" - Facebook changed their API structure");
@@ -1173,8 +1253,11 @@ export async function fetchFacebookItem(
parsedItem.listingStatus = "SOLD";
} else if (!itemData.is_live) {
console.warn(`Item ${itemId} is not live/active in the marketplace.`);
parsedItem.listingStatus = itemData.is_hidden ? "HIDDEN" :
itemData.is_pending ? "PENDING" : "INACTIVE";
parsedItem.listingStatus = itemData.is_hidden
? "HIDDEN"
: itemData.is_pending
? "PENDING"
: "INACTIVE";
}
return parsedItem;

View File

@@ -1,6 +1,6 @@
import fetchKijijiItems from "@/kijiji";
import fetchFacebookItems from "@/facebook";
import fetchEbayItems from "@/ebay";
import fetchFacebookItems from "@/facebook";
import fetchKijijiItems from "@/kijiji";
const PORT = process.env.PORT || 4005;
@@ -30,34 +30,54 @@ const server = Bun.serve({
const location = reqUrl.searchParams.get("location");
const category = reqUrl.searchParams.get("category");
const maxPagesParam = reqUrl.searchParams.get("maxPages");
const maxPages = maxPagesParam
? Number.parseInt(maxPagesParam, 10)
: 5; // Default: 5 pages
const sortBy = reqUrl.searchParams.get("sortBy") as 'relevancy' | 'date' | 'price' | 'distance' | undefined;
const sortOrder = reqUrl.searchParams.get("sortOrder") as 'asc' | 'desc' | undefined;
const maxPages = maxPagesParam ? Number.parseInt(maxPagesParam, 10) : 5; // Default: 5 pages
const sortBy = reqUrl.searchParams.get("sortBy") as
| "relevancy"
| "date"
| "price"
| "distance"
| undefined;
const sortOrder = reqUrl.searchParams.get("sortOrder") as
| "asc"
| "desc"
| undefined;
// Build search options
const locationValue = location ? (/^\d+$/.test(location) ? Number(location) : location) : 1700272;
const categoryValue = category ? (/^\d+$/.test(category) ? Number(category) : category) : 0;
const locationValue = location
? /^\d+$/.test(location)
? Number(location)
: location
: 1700272;
const categoryValue = category
? /^\d+$/.test(category)
? Number(category)
: category
: 0;
const searchOptions: import("@/kijiji").SearchOptions = {
location: locationValue,
category: categoryValue,
keywords: SEARCH_QUERY,
sortBy: sortBy || 'relevancy',
sortOrder: sortOrder || 'desc',
sortBy: sortBy || "relevancy",
sortOrder: sortOrder || "desc",
maxPages,
};
// Build listing fetch options with enhanced defaults
const listingOptions: import("@/kijiji").ListingFetchOptions = {
includeImages: true, // Always include full image arrays
sellerDataDepth: 'detailed', // Default: detailed seller info
sellerDataDepth: "detailed", // Default: detailed seller info
includeClientSideData: false, // GraphQL reviews disabled by default
};
try {
const items = await fetchKijijiItems(SEARCH_QUERY, 1, undefined, searchOptions, listingOptions);
const items = await fetchKijijiItems(
SEARCH_QUERY,
1,
undefined,
searchOptions,
listingOptions,
);
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
@@ -66,12 +86,13 @@ const server = Bun.serve({
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Kijiji scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json(
{
message: `Scraping failed: ${errorMessage}`,
query: SEARCH_QUERY,
options: { searchOptions, listingOptions }
options: { searchOptions, listingOptions },
},
{ status: 500 },
);
@@ -96,7 +117,14 @@ const server = Bun.serve({
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
try {
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE, "./cookies/facebook.json");
const items = await fetchFacebookItems(
SEARCH_QUERY,
5,
LOCATION,
25,
COOKIES_SOURCE,
"./cookies/facebook.json",
);
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
@@ -105,11 +133,9 @@ const server = Bun.serve({
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Facebook scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
return Response.json(
{ message: errorMessage },
{ status: 400 },
);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });
}
},
@@ -138,9 +164,13 @@ const server = Bun.serve({
: undefined;
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : [];
const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY];
const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
try {
const items = await fetchEbayItems(SEARCH_QUERY, 5, {
@@ -158,11 +188,9 @@ const server = Bun.serve({
return Response.json(items, { status: 200 });
} catch (error) {
console.error("eBay scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
return Response.json(
{ message: errorMessage },
{ status: 400 },
);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });
}
},

View File

@@ -1,7 +1,7 @@
import cliProgress from "cli-progress";
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import unidecode from "unidecode";
import cliProgress from "cli-progress";
// const unidecode = require("unidecode");
@@ -99,8 +99,8 @@ interface SearchOptions {
location?: number | string; // Location ID or name
category?: number | string; // Category ID or name
keywords?: string;
sortBy?: 'relevancy' | 'date' | 'price' | 'distance';
sortOrder?: 'desc' | 'asc';
sortBy?: "relevancy" | "date" | "price" | "distance";
sortOrder?: "desc" | "asc";
maxPages?: number; // Default: 5
priceMin?: number;
priceMax?: number;
@@ -108,7 +108,7 @@ interface SearchOptions {
interface ListingFetchOptions {
includeImages?: boolean; // Default: true
sellerDataDepth?: 'basic' | 'detailed' | 'full'; // Default: 'detailed'
sellerDataDepth?: "basic" | "detailed" | "full"; // Default: 'detailed'
includeClientSideData?: boolean; // Default: false
}
@@ -116,65 +116,65 @@ interface ListingFetchOptions {
// Location mappings from KIJIJI.md
const LOCATION_MAPPINGS: Record<string, number> = {
'canada': 0,
'ontario': 9004,
'toronto': 1700273,
'gta': 1700272,
'oshawa': 1700275,
'quebec': 9001,
'nova scotia': 9002,
'alberta': 9003,
'new brunswick': 9005,
'manitoba': 9006,
'british columbia': 9007,
'newfoundland': 9008,
'saskatchewan': 9009,
'territories': 9010,
'pei': 9011,
'prince edward island': 9011,
canada: 0,
ontario: 9004,
toronto: 1700273,
gta: 1700272,
oshawa: 1700275,
quebec: 9001,
"nova scotia": 9002,
alberta: 9003,
"new brunswick": 9005,
manitoba: 9006,
"british columbia": 9007,
newfoundland: 9008,
saskatchewan: 9009,
territories: 9010,
pei: 9011,
"prince edward island": 9011,
};
// Category mappings from KIJIJI.md (Buy & Sell main categories)
const CATEGORY_MAPPINGS: Record<string, number> = {
'all': 0,
'buy-sell': 10,
'arts-collectibles': 12,
'audio': 767,
'baby-items': 253,
'bags-luggage': 931,
'bikes': 644,
'books': 109,
'cameras': 103,
'cds': 104,
'clothing': 274,
'computers': 16,
'computer-accessories': 128,
'electronics': 29659001,
'free-stuff': 17220001,
'furniture': 235,
'garage-sales': 638,
'health-special-needs': 140,
'hobbies-crafts': 139,
'home-appliances': 107,
'home-indoor': 717,
'home-outdoor': 727,
'jewellery': 133,
'musical-instruments': 17,
'phones': 132,
'sporting-goods': 111,
'tools': 110,
'toys-games': 108,
'tvs-video': 15093001,
'video-games': 141,
'other': 26,
all: 0,
"buy-sell": 10,
"arts-collectibles": 12,
audio: 767,
"baby-items": 253,
"bags-luggage": 931,
bikes: 644,
books: 109,
cameras: 103,
cds: 104,
clothing: 274,
computers: 16,
"computer-accessories": 128,
electronics: 29659001,
"free-stuff": 17220001,
furniture: 235,
"garage-sales": 638,
"health-special-needs": 140,
"hobbies-crafts": 139,
"home-appliances": 107,
"home-indoor": 717,
"home-outdoor": 727,
jewellery: 133,
"musical-instruments": 17,
phones: 132,
"sporting-goods": 111,
tools: 110,
"toys-games": 108,
"tvs-video": 15093001,
"video-games": 141,
other: 26,
};
// Sort parameter mappings
const SORT_MAPPINGS: Record<string, string> = {
'relevancy': 'MATCH',
'date': 'DATE',
'price': 'PRICE',
'distance': 'DISTANCE',
relevancy: "MATCH",
date: "DATE",
price: "PRICE",
distance: "DISTANCE",
};
// ----------------------------- Exports for Testing -----------------------------
@@ -193,9 +193,9 @@ const SEPS = new Set([" ", "", "—", "/", ":", ";", ",", ".", "-"]);
* Resolve location ID from name or return numeric ID
*/
function resolveLocationId(location?: number | string): number {
if (typeof location === 'number') return location;
if (typeof location === 'string') {
const normalized = location.toLowerCase().replace(/\s+/g, '-');
if (typeof location === "number") return location;
if (typeof location === "string") {
const normalized = location.toLowerCase().replace(/\s+/g, "-");
return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0)
}
return 0; // Default to Canada
@@ -205,9 +205,9 @@ function resolveLocationId(location?: number | string): number {
* Resolve category ID from name or return numeric ID
*/
function resolveCategoryId(category?: number | string): number {
if (typeof category === 'number') return category;
if (typeof category === 'string') {
const normalized = category.toLowerCase().replace(/\s+/g, '-');
if (typeof category === "number") return category;
if (typeof category === "string") {
const normalized = category.toLowerCase().replace(/\s+/g, "-");
return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories
}
return 0; // Default to all categories
@@ -219,19 +219,22 @@ function resolveCategoryId(category?: number | string): number {
function buildSearchUrl(
keywords: string,
options: SearchOptions & { page?: number },
BASE_URL = "https://www.kijiji.ca"
BASE_URL = "https://www.kijiji.ca",
): string {
const locationId = resolveLocationId(options.location);
const categoryId = resolveCategoryId(options.category);
const categorySlug = categoryId === 0 ? 'buy-sell' : 'buy-sell'; // Could be enhanced
const locationSlug = locationId === 0 ? 'canada' : 'canada'; // Could be enhanced
const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell"; // Could be enhanced
const locationSlug = locationId === 0 ? "canada" : "canada"; // Could be enhanced
let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`;
const sortParam = options.sortBy ? `&sort=${SORT_MAPPINGS[options.sortBy]}` : '';
const sortOrder = options.sortOrder === 'asc' ? 'ASC' : 'DESC';
const pageParam = options.page && options.page > 1 ? `&page=${options.page}` : '';
const sortParam = options.sortBy
? `&sort=${SORT_MAPPINGS[options.sortBy]}`
: "";
const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC";
const pageParam =
options.page && options.page > 1 ? `&page=${options.page}` : "";
url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`;
@@ -278,8 +281,8 @@ export function formatCentsToCurrency(
if (Number.isNaN(cents)) return "";
const dollars = cents / 100;
const formatter = new Intl.NumberFormat(locale, {
style: 'currency',
currency: 'USD',
style: "currency",
currency: "USD",
minimumFractionDigits: 2,
maximumFractionDigits: 2,
});
@@ -394,7 +397,9 @@ async function fetchHtml(
if (!res.ok) {
// Handle rate limiting
if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : Number.NaN;
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: calculateBackoffDelay(attempt, retryBaseMs);
@@ -428,14 +433,13 @@ async function fetchHtml(
// Respect per-request delay to maintain rate limiting
await delay(DELAY_MS);
return html;
} catch (err) {
// Handle different error types
if (err instanceof RateLimitError || err instanceof HttpError) {
throw err; // Re-throw known errors
}
if (err instanceof Error && err.name === 'AbortError') {
if (err instanceof Error && err.name === "AbortError") {
if (attempt < maxRetries) {
await delay(calculateBackoffDelay(attempt, retryBaseMs));
continue;
@@ -451,7 +455,7 @@ async function fetchHtml(
throw new NetworkError(
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
url,
err instanceof Error ? err : undefined
err instanceof Error ? err : undefined,
);
}
}
@@ -463,7 +467,7 @@ async function fetchHtml(
* Calculate exponential backoff delay with jitter
*/
function calculateBackoffDelay(attempt: number, baseMs: number): number {
const exponentialDelay = baseMs * (2 ** attempt);
const exponentialDelay = baseMs * 2 ** attempt;
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
}
@@ -476,16 +480,16 @@ function calculateBackoffDelay(attempt: number, baseMs: number): number {
async function fetchGraphQLData(
query: string,
variables: Record<string, unknown>,
BASE_URL = "https://www.kijiji.ca"
BASE_URL = "https://www.kijiji.ca",
): Promise<unknown> {
const endpoint = `${BASE_URL}/anvil/api`;
try {
const response = await fetch(endpoint, {
method: 'POST',
method: "POST",
headers: {
'Content-Type': 'application/json',
'apollo-require-preflight': 'true',
"Content-Type": "application/json",
"apollo-require-preflight": "true",
},
body: JSON.stringify({
query,
@@ -497,14 +501,17 @@ async function fetchGraphQLData(
throw new HttpError(
`GraphQL request failed with status ${response.status}`,
response.status,
endpoint
endpoint,
);
}
const result = await response.json();
if (result.errors) {
throw new ParseError(`GraphQL errors: ${JSON.stringify(result.errors)}`, result.errors);
throw new ParseError(
`GraphQL errors: ${JSON.stringify(result.errors)}`,
result.errors,
);
}
return result.data;
@@ -515,7 +522,7 @@ async function fetchGraphQLData(
throw new NetworkError(
`Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`,
endpoint,
err instanceof Error ? err : undefined
err instanceof Error ? err : undefined,
);
}
}
@@ -567,12 +574,25 @@ const GRAPHQL_QUERIES = {
*/
async function fetchSellerDetails(
posterId: string,
BASE_URL = "https://www.kijiji.ca"
): Promise<{ reviewCount?: number; reviewScore?: number; memberSince?: string; accountType?: string }> {
BASE_URL = "https://www.kijiji.ca",
): Promise<{
reviewCount?: number;
reviewScore?: number;
memberSince?: string;
accountType?: string;
}> {
try {
const [reviewData, profileData] = await Promise.all([
fetchGraphQLData(GRAPHQL_QUERIES.getReviewSummary, { userId: posterId }, BASE_URL),
fetchGraphQLData(GRAPHQL_QUERIES.getProfileMetrics, { profileId: posterId }, BASE_URL),
fetchGraphQLData(
GRAPHQL_QUERIES.getReviewSummary,
{ userId: posterId },
BASE_URL,
),
fetchGraphQLData(
GRAPHQL_QUERIES.getProfileMetrics,
{ profileId: posterId },
BASE_URL,
),
]);
const reviewResponse = reviewData as GraphQLReviewResponse;
@@ -586,7 +606,10 @@ async function fetchSellerDetails(
};
} catch (err) {
// Silently fail for GraphQL errors - not critical for basic functionality
console.warn(`Failed to fetch seller details for ${posterId}:`, err instanceof Error ? err.message : String(err));
console.warn(
`Failed to fetch seller details for ${posterId}:`,
err instanceof Error ? err.message : String(err),
);
return {};
}
}
@@ -694,7 +717,8 @@ function parseListing(
listingPrice: amountFormatted
? {
amountFormatted,
cents: cents !== undefined && Number.isFinite(cents) ? cents : undefined,
cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency,
}
: undefined,
@@ -702,7 +726,10 @@ function parseListing(
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews: numberOfViews !== undefined && Number.isFinite(numberOfViews) ? numberOfViews : undefined,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
};
}
@@ -713,7 +740,7 @@ function parseListing(
async function parseDetailedListing(
htmlString: HTMLString,
BASE_URL: string,
options: ListingFetchOptions = {}
options: ListingFetchOptions = {},
): Promise<DetailedListing | null> {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
@@ -766,8 +793,9 @@ async function parseDetailedListing(
if (!amountFormatted || cents === undefined) return null;
// Extract images if requested
const images = options.includeImages !== false && Array.isArray(imageUrls)
? imageUrls.filter((url): url is string => typeof url === 'string')
const images =
options.includeImages !== false && Array.isArray(imageUrls)
? imageUrls.filter((url): url is string => typeof url === "string")
: [];
// Extract attributes as key-value pairs
@@ -781,26 +809,35 @@ async function parseDetailedListing(
}
// Extract seller info based on depth setting
let sellerInfo: DetailedListing['sellerInfo'];
const depth = options.sellerDataDepth ?? 'detailed';
let sellerInfo: DetailedListing["sellerInfo"];
const depth = options.sellerDataDepth ?? "detailed";
if (posterInfo?.posterId) {
sellerInfo = {
posterId: posterInfo.posterId,
rating: typeof posterInfo.rating === 'number' ? posterInfo.rating : undefined,
rating:
typeof posterInfo.rating === "number" ? posterInfo.rating : undefined,
};
// Add more detailed info if requested and client-side data is enabled
if ((depth === 'detailed' || depth === 'full') && options.includeClientSideData) {
if (
(depth === "detailed" || depth === "full") &&
options.includeClientSideData
) {
try {
const additionalData = await fetchSellerDetails(posterInfo.posterId, BASE_URL);
const additionalData = await fetchSellerDetails(
posterInfo.posterId,
BASE_URL,
);
sellerInfo = {
...sellerInfo,
...additionalData,
};
} catch (err) {
// Silently fail - GraphQL data is optional
console.warn(`Failed to fetch additional seller data for ${posterInfo.posterId}`);
console.warn(
`Failed to fetch additional seller data for ${posterInfo.posterId}`,
);
}
}
}
@@ -818,23 +855,28 @@ async function parseDetailedListing(
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews: numberOfViews !== undefined && Number.isFinite(numberOfViews) ? numberOfViews : undefined,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
images,
categoryId: typeof categoryId === 'number' ? categoryId : 0,
adSource: typeof adSource === 'string' ? adSource : 'UNKNOWN',
categoryId: typeof categoryId === "number" ? categoryId : 0,
adSource: typeof adSource === "string" ? adSource : "UNKNOWN",
flags: {
topAd: flags?.topAd === true,
priceDrop: flags?.priceDrop === true,
},
attributes: attributeMap,
location: {
id: typeof location?.id === 'number' ? location.id : 0,
name: typeof location?.name === 'string' ? location.name : 'Unknown',
coordinates: location?.coordinates ? {
id: typeof location?.id === "number" ? location.id : 0,
name: typeof location?.name === "string" ? location.name : "Unknown",
coordinates: location?.coordinates
? {
latitude: location.coordinates.latitude,
longitude: location.coordinates.longitude,
} : undefined,
}
: undefined,
},
sellerInfo,
};
@@ -856,8 +898,8 @@ export default async function fetchKijijiItems(
location: searchOptions.location ?? 1700272, // Default to GTA
category: searchOptions.category ?? 0, // Default to all categories
keywords: searchOptions.keywords ?? SEARCH_QUERY,
sortBy: searchOptions.sortBy ?? 'relevancy',
sortOrder: searchOptions.sortOrder ?? 'desc',
sortBy: searchOptions.sortBy ?? "relevancy",
sortOrder: searchOptions.sortOrder ?? "desc",
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
priceMin: searchOptions.priceMin,
priceMax: searchOptions.priceMax,
@@ -865,7 +907,7 @@ export default async function fetchKijijiItems(
const finalListingOptions: Required<ListingFetchOptions> = {
includeImages: listingOptions.includeImages ?? true,
sellerDataDepth: listingOptions.sellerDataDepth ?? 'detailed',
sellerDataDepth: listingOptions.sellerDataDepth ?? "detailed",
includeClientSideData: listingOptions.includeClientSideData ?? false,
};
@@ -874,24 +916,32 @@ export default async function fetchKijijiItems(
// Fetch multiple pages
for (let page = 1; page <= finalSearchOptions.maxPages; page++) {
const searchUrl = buildSearchUrl(finalSearchOptions.keywords, {
const searchUrl = buildSearchUrl(
finalSearchOptions.keywords,
{
...finalSearchOptions,
// Add page parameter for pagination
...(page > 1 && { page }),
}, BASE_URL);
},
BASE_URL,
);
console.log(`Fetching search page ${page}: ${searchUrl}`);
const searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`);
console.log(
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
});
const searchResults = parseSearch(searchHtml, BASE_URL);
if (searchResults.length === 0) {
console.log(`No more results found on page ${page}. Stopping pagination.`);
console.log(
`No more results found on page ${page}. Stopping pagination.`,
);
break;
}
@@ -904,7 +954,9 @@ export default async function fetchKijijiItems(
seenUrls.add(link);
}
console.log(`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`);
console.log(
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
);
// Fetch details for this page's listings
const progressBar = new cliProgress.SingleBar(
@@ -920,19 +972,29 @@ export default async function fetchKijijiItems(
const html = await fetchHtml(link, DELAY_MS, {
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`);
console.log(
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
});
const parsed = await parseDetailedListing(html, BASE_URL, finalListingOptions);
const parsed = await parseDetailedListing(
html,
BASE_URL,
finalListingOptions,
);
if (parsed) {
allListings.push(parsed);
}
} catch (err) {
if (err instanceof HttpError) {
console.error(`\nFailed to fetch ${link}\n - ${err.status} ${err.message}`);
console.error(
`\nFailed to fetch ${link}\n - ${err.status} ${err.message}`,
);
} else {
console.error(`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`);
console.error(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
);
}
} finally {
currentProgress++;

View File

@@ -1,14 +1,14 @@
import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test";
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
fetchFacebookItem,
extractFacebookItemData,
extractFacebookMarketplaceData,
parseFacebookItem,
parseFacebookAds,
fetchFacebookItem,
formatCentsToCurrency,
loadFacebookCookies,
formatCookiesForHeader,
loadFacebookCookies,
parseFacebookAds,
parseFacebookCookieString,
parseFacebookItem,
} from "../src/facebook";
// Mock fetch globally
@@ -28,62 +28,62 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
describe("Cookie Parsing", () => {
describe("parseFacebookCookieString", () => {
test("should parse valid cookie string", () => {
const cookieString = 'c_user=123456789; xs=abcdef123456; fr=xyz789';
const cookieString = "c_user=123456789; xs=abcdef123456; fr=xyz789";
const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(3);
expect(result[0]).toEqual({
name: 'c_user',
value: '123456789',
domain: '.facebook.com',
path: '/',
name: "c_user",
value: "123456789",
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: 'lax',
expirationDate: undefined
sameSite: "lax",
expirationDate: undefined,
});
expect(result[1]).toEqual({
name: 'xs',
value: 'abcdef123456',
domain: '.facebook.com',
path: '/',
name: "xs",
value: "abcdef123456",
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: 'lax',
expirationDate: undefined
sameSite: "lax",
expirationDate: undefined,
});
});
test("should handle URL-encoded values", () => {
const cookieString = 'c_user=123%2B456; xs=abc%3Ddef';
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
const result = parseFacebookCookieString(cookieString);
expect(result[0].value).toBe('123+456');
expect(result[1].value).toBe('abc=def');
expect(result[0].value).toBe("123+456");
expect(result[1].value).toBe("abc=def");
});
test("should filter out malformed cookies", () => {
const cookieString = 'c_user=123; invalid; xs=abc; =empty';
const cookieString = "c_user=123; invalid; xs=abc; =empty";
const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(2);
expect(result.map(c => c.name)).toEqual(['c_user', 'xs']);
expect(result.map((c) => c.name)).toEqual(["c_user", "xs"]);
});
test("should handle empty input", () => {
expect(parseFacebookCookieString('')).toEqual([]);
expect(parseFacebookCookieString(' ')).toEqual([]);
expect(parseFacebookCookieString("")).toEqual([]);
expect(parseFacebookCookieString(" ")).toEqual([]);
});
test("should handle extra whitespace", () => {
const cookieString = ' c_user = 123 ; xs=abc ';
const cookieString = " c_user = 123 ; xs=abc ";
const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(2);
expect(result[0].name).toBe('c_user');
expect(result[0].value).toBe('123');
expect(result[1].name).toBe('xs');
expect(result[1].value).toBe('abc');
expect(result[0].name).toBe("c_user");
expect(result[0].value).toBe("123");
expect(result[1].name).toBe("xs");
expect(result[1].value).toBe("abc");
});
});
});
@@ -92,7 +92,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
describe("fetchFacebookItem", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com" },
{ name: "xs", value: "abc123", domain: ".facebook.com" }
{ name: "xs", value: "abc123", domain: ".facebook.com" },
]);
test("should handle authentication errors", async () => {
@@ -102,9 +102,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
status: 401,
text: () => Promise.resolve("Authentication required"),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const result = await fetchFacebookItem("123", mockCookies);
@@ -118,9 +118,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
status: 404,
text: () => Promise.resolve("Not found"),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const result = await fetchFacebookItem("nonexistent", mockCookies);
@@ -139,14 +139,18 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: (header: string) => {
if (header === "X-RateLimit-Reset") return "1";
return null;
}
},
text: () => Promise.resolve("Rate limited")
},
text: () => Promise.resolve("Rate limited"),
});
}
const mockData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -156,22 +160,26 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
id: "123",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Test Item",
is_live: true
}
}
}
}
}
}
}]
]
is_live: true,
},
},
},
},
},
},
},
],
],
};
return Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: {
get: () => null
}
get: () => null,
},
});
});
@@ -183,7 +191,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
test("should handle sold items", async () => {
const mockData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -194,25 +206,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Sold Item",
is_sold: true,
is_live: false
}
}
}
}
}
}
}]
]
is_live: false,
},
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const result = await fetchFacebookItem("456", mockCookies);
@@ -221,18 +237,22 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
test("should handle missing authentication cookies", async () => {
// Use a test-specific cookie file that doesn't exist
const testCookiePath = './cookies/facebook-test.json';
const testCookiePath = "./cookies/facebook-test.json";
// Test with no cookies available (test file doesn't exist)
await expect(fetchFacebookItem("123", undefined, testCookiePath)).rejects.toThrow(
"No valid Facebook cookies found"
);
await expect(
fetchFacebookItem("123", undefined, testCookiePath),
).rejects.toThrow("No valid Facebook cookies found");
});
test("should handle successful item extraction", async () => {
const mockData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -243,27 +263,34 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Working Item",
formatted_price: { text: "$299.00" },
listing_price: { amount: "299.00", currency: "CAD" },
listing_price: {
amount: "299.00",
currency: "CAD",
},
is_live: true,
creation_time: 1640995200
}
}
}
}
}
}
}]
]
creation_time: 1640995200,
},
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const result = await fetchFacebookItem("789", mockCookies);
@@ -280,9 +307,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
status: 500,
text: () => Promise.resolve("Internal Server Error"),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const result = await fetchFacebookItem("error", mockCookies);
@@ -300,24 +327,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
marketplace_listing_title: "Test Item",
formatted_price: { text: "$100.00" },
listing_price: { amount: "100.00", currency: "CAD" },
is_live: true
is_live: true,
};
const mockData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {
target: mockItemData
}
}
}
}
}
}]
]
target: mockItemData,
},
},
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
@@ -330,18 +362,23 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
test("should handle missing item data", () => {
const mockData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {}
}
}
}
}
}]
]
marketplace_product_details_page: {},
},
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
@@ -350,12 +387,15 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
test("should handle malformed HTML", () => {
const result = extractFacebookItemData("<html><body>Invalid HTML</body></html>");
const result = extractFacebookItemData(
"<html><body>Invalid HTML</body></html>",
);
expect(result).toBeNull();
});
test("should handle invalid JSON in script tags", () => {
const html = '<html><body><script>{invalid: json}</script></body></html>';
const html =
"<html><body><script>{invalid: json}</script></body></html>";
const result = extractFacebookItemData(html);
expect(result).toBeNull();
});
@@ -371,24 +411,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
vehicle_model_display_name: "Civic",
vehicle_odometer_data: { unit: "KILOMETERS", value: 150000 },
vehicle_transmission_type: "AUTOMATIC",
is_live: true
is_live: true,
};
const mockData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {
target: mockVehicleItem
}
}
}
}
}
}]
]
target: mockVehicleItem,
},
},
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
@@ -409,58 +454,70 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "Item 1",
listing_price: { amount: "10.00", currency: "CAD" }
}
}
listing_price: { amount: "10.00", currency: "CAD" },
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Item 2",
listing_price: { amount: "20.00", currency: "CAD" }
}
}
}
]
}
listing_price: { amount: "20.00", currency: "CAD" },
},
},
},
],
},
};
const mockData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: mockMarketplaceData
}
}
}
}]
]
marketplace_search: mockMarketplaceData,
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
const result = extractFacebookMarketplaceData(html);
expect(result).not.toBeNull();
expect(result).toHaveLength(2);
expect(result?.[0].node.listing.marketplace_listing_title).toBe("Item 1");
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
"Item 1",
);
});
test("should handle empty search results", () => {
const mockData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: { edges: [] }
}
}
}
}
}]
]
feed_units: { edges: [] },
},
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
@@ -485,9 +542,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
creation_time: 1640995200,
marketplace_listing_seller: {
id: "seller1",
name: "John Doe"
name: "John Doe",
},
delivery_types: ["IN_PERSON"]
delivery_types: ["IN_PERSON"],
};
const result = parseFacebookItem(item);
@@ -510,7 +567,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
marketplace_listing_title: "Free Sofa",
formatted_price: { text: "FREE" },
listing_price: { amount: "0.00", currency: "CAD" },
is_live: true
is_live: true,
};
const result = parseFacebookItem(item);
@@ -524,7 +581,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const item = {
id: "456",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Minimal Item"
marketplace_listing_title: "Minimal Item",
};
const result = parseFacebookItem(item);
@@ -543,7 +600,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing_price: { amount: "8000.00", currency: "CAD" },
vehicle_make_display_name: "Mazda",
vehicle_model_display_name: "3",
is_live: true
is_live: true,
};
const result = parseFacebookItem(vehicleItem);
@@ -556,7 +613,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Sold Item",
is_sold: true,
is_live: false
is_live: false,
};
const pendingItem = {
@@ -564,7 +621,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Pending Item",
is_pending: true,
is_live: true
is_live: true,
};
const hiddenItem = {
@@ -572,7 +629,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Hidden Item",
is_hidden: true,
is_live: false
is_live: false,
};
expect(parseFacebookItem(soldItem)?.listingStatus).toBe("SOLD");
@@ -584,7 +641,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const invalidItem = {
id: "invalid",
__typename: "GroupCommerceProductItem" as const,
is_live: true
is_live: true,
};
const result = parseFacebookItem(invalidItem);
@@ -600,25 +657,37 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "Ad 1",
listing_price: { amount: "50.00", formatted_amount: "$50.00", currency: "CAD" },
location: { reverse_geocode: { city_page: { display_name: "Toronto" } } },
listing_price: {
amount: "50.00",
formatted_amount: "$50.00",
currency: "CAD",
},
location: {
reverse_geocode: { city_page: { display_name: "Toronto" } },
},
creation_time: 1640995200,
is_live: true
}
}
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Ad 2",
listing_price: { amount: "75.00", formatted_amount: "$75.00", currency: "CAD" },
location: { reverse_geocode: { city_page: { display_name: "Ottawa" } } },
listing_price: {
amount: "75.00",
formatted_amount: "$75.00",
currency: "CAD",
},
location: {
reverse_geocode: { city_page: { display_name: "Ottawa" } },
},
creation_time: 1640995300,
is_live: true
}
}
}
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
@@ -637,20 +706,24 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "With Price",
listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" },
is_live: true
}
}
listing_price: {
amount: "100.00",
formatted_amount: "$100.00",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "No Price",
is_live: true
}
}
}
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
@@ -665,16 +738,20 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "Valid Ad",
listing_price: { amount: "50.00", formatted_amount: "$50.00", currency: "CAD" },
is_live: true
}
}
listing_price: {
amount: "50.00",
formatted_amount: "$50.00",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
// Missing listing
}
} as { node: { listing?: unknown } }
},
} as { node: { listing?: unknown } },
];
const results = parseFacebookAds(ads);
@@ -717,7 +794,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const mockCookies = [
{ name: "c_user", value: "123456", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abcdef", domain: ".facebook.com", path: "/" },
{ name: "session_id", value: "xyz", domain: "other.com", path: "/" }
{ name: "session_id", value: "xyz", domain: "other.com", path: "/" },
];
test("should format cookies for header string", () => {
@@ -728,9 +805,18 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
test("should filter expired cookies", () => {
const cookiesWithExpiration = [
...mockCookies,
{ name: "expired", value: "old", domain: ".facebook.com", path: "/", expirationDate: Date.now() / 1000 - 1000 }
{
name: "expired",
value: "old",
domain: ".facebook.com",
path: "/",
expirationDate: Date.now() / 1000 - 1000,
},
];
const result = formatCookiesForHeader(cookiesWithExpiration, "www.facebook.com");
const result = formatCookiesForHeader(
cookiesWithExpiration,
"www.facebook.com",
);
expect(result).not.toContain("expired");
});

View File

@@ -1,4 +1,4 @@
import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test";
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import fetchFacebookItems, { fetchFacebookItem } from "../src/facebook";
// Mock fetch globally
@@ -18,13 +18,17 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Main Search Function", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" }
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]);
test("should successfully fetch search results", async () => {
const mockSearchData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -36,46 +40,72 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "iPhone 13 Pro",
listing_price: { amount: "800.00", formatted_amount: "$800.00", currency: "CAD" },
location: { reverse_geocode: { city_page: { display_name: "Toronto" } } },
listing_price: {
amount: "800.00",
formatted_amount: "$800.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
},
},
creation_time: 1640995200,
is_live: true
}
}
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Samsung Galaxy",
listing_price: { amount: "600.00", formatted_amount: "$600.00", currency: "CAD" },
location: { reverse_geocode: { city_page: { display_name: "Mississauga" } } },
listing_price: {
amount: "600.00",
formatted_amount: "$600.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Mississauga" },
},
},
creation_time: 1640995300,
is_live: true
}
}
}
]
}
}
}
}
}
}]
]
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("iPhone", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"iPhone",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(2);
expect(results[0].title).toBe("iPhone 13 Pro");
expect(results[1].title).toBe("Samsung Galaxy");
@@ -84,7 +114,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
test("should filter out items without price", async () => {
const mockSearchData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -96,41 +130,55 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "With Price",
listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" },
is_live: true
}
}
listing_price: {
amount: "100.00",
formatted_amount: "$100.00",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "No Price",
is_live: true
}
}
}
]
}
}
}
}
}
}]
]
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("With Price");
});
@@ -138,7 +186,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
test("should respect MAX_ITEMS parameter", async () => {
const mockSearchData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -149,64 +201,92 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: {
id: String(i),
marketplace_listing_title: `Item ${i}`,
listing_price: { amount: `${(i + 1) * 10}.00`, formatted_amount: `$${(i + 1) * 10}.00`, currency: "CAD" },
is_live: true
}
}
}))
}
}
}
}
}
}]
]
listing_price: {
amount: `${(i + 1) * 10}.00`,
formatted_amount: `$${(i + 1) * 10}.00`,
currency: "CAD",
},
is_live: true,
},
},
})),
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("test", 1, "toronto", 5, mockCookies);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
5,
mockCookies,
);
expect(results).toHaveLength(5);
});
test("should return empty array for no results", async () => {
const mockSearchData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: []
}
}
}
}
}
}]
]
edges: [],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("nonexistent query", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"nonexistent query",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
@@ -217,19 +297,27 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
status: 401,
text: () => Promise.resolve("Unauthorized"),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
test("should handle network errors", async () => {
global.fetch = mock(() => Promise.reject(new Error("Network error")));
await expect(fetchFacebookItems("test", 1, "toronto", 25, mockCookies)).rejects.toThrow("Network error");
await expect(
fetchFacebookItems("test", 1, "toronto", 25, mockCookies),
).rejects.toThrow("Network error");
});
test("should handle rate limiting with retry", async () => {
@@ -244,14 +332,18 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: (header: string) => {
if (header === "X-RateLimit-Reset") return "1";
return null;
}
},
text: () => Promise.resolve("Rate limited")
},
text: () => Promise.resolve("Rate limited"),
});
}
const mockSearchData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -263,30 +355,44 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "Item 1",
listing_price: { amount: "100.00", formatted_amount: "$100.00", currency: "CAD" },
is_live: true
}
}
}
]
}
}
}
}
}
}]
]
listing_price: {
amount: "100.00",
formatted_amount: "$100.00",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
return Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null
}
get: () => null,
},
});
});
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(attempts).toBe(2);
expect(results).toHaveLength(1);
});
@@ -295,13 +401,17 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Vehicle Listing Integration", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" }
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]);
test("should correctly identify and parse vehicle listings", async () => {
const mockSearchData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -313,42 +423,60 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "2006 Honda Civic",
listing_price: { amount: "8000.00", formatted_amount: "$8,000.00", currency: "CAD" },
is_live: true
}
}
listing_price: {
amount: "8000.00",
formatted_amount: "$8,000.00",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "iPhone 13",
listing_price: { amount: "800.00", formatted_amount: "$800.00", currency: "CAD" },
is_live: true
}
}
}
]
}
}
}
}
}
}]
]
listing_price: {
amount: "800.00",
formatted_amount: "$800.00",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("cars", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"cars",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(2);
// Both should be classified as "item" type in search results (vehicle detection is for item details)
expect(results[0].title).toBe("2006 Honda Civic");
@@ -359,13 +487,17 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Different Categories", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" }
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]);
test("should handle electronics listings", async () => {
const mockSearchData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -377,35 +509,54 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "Nintendo Switch",
listing_price: { amount: "250.00", formatted_amount: "$250.00", currency: "CAD" },
location: { reverse_geocode: { city_page: { display_name: "Toronto" } } },
marketplace_listing_category_id: "479353692612078",
listing_price: {
amount: "250.00",
formatted_amount: "$250.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
},
},
marketplace_listing_category_id:
"479353692612078",
condition: "USED",
is_live: true
}
}
}
]
}
}
}
}
}
}]
]
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("nintendo switch", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"nintendo switch",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Nintendo Switch");
expect(results[0].categoryId).toBe("479353692612078");
@@ -414,7 +565,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
test("should handle home goods/furniture listings", async () => {
const mockSearchData = {
require: [
[null, null, null, {
[
null,
null,
null,
{
__bbox: {
result: {
data: {
@@ -426,35 +581,54 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
listing: {
id: "1",
marketplace_listing_title: "Dining Table",
listing_price: { amount: "150.00", formatted_amount: "$150.00", currency: "CAD" },
location: { reverse_geocode: { city_page: { display_name: "Mississauga" } } },
marketplace_listing_category_id: "1569171756675761",
listing_price: {
amount: "150.00",
formatted_amount: "$150.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Mississauga" },
},
},
marketplace_listing_category_id:
"1569171756675761",
condition: "USED",
is_live: true
}
}
}
]
}
}
}
}
}
}]
]
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`),
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("table", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"table",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Dining Table");
expect(results[0].categoryId).toBe("1569171756675761");
@@ -464,21 +638,30 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Error Scenarios", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" }
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]);
test("should handle malformed HTML responses", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve("<html><body>Invalid HTML without JSON data</body></html>"),
text: () =>
Promise.resolve(
"<html><body>Invalid HTML without JSON data</body></html>",
),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
@@ -489,12 +672,18 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
status: 404,
text: () => Promise.resolve("Not found"),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
@@ -505,12 +694,18 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
status: 500,
text: () => Promise.resolve("Internal Server Error"),
headers: {
get: () => null
}
})
get: () => null,
},
}),
);
const results = await fetchFacebookItems("test", 1, "toronto", 25, mockCookies);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
});

View File

@@ -1,13 +1,13 @@
import { describe, test, expect } from "bun:test";
import { describe, expect, test } from "bun:test";
import {
resolveLocationId,
resolveCategoryId,
buildSearchUrl,
HttpError,
NetworkError,
ParseError,
RateLimitError,
ValidationError
ValidationError,
buildSearchUrl,
resolveCategoryId,
resolveLocationId,
} from "../src/kijiji";
describe("Location and Category Resolution", () => {
@@ -74,8 +74,8 @@ describe("URL Construction", () => {
const url = buildSearchUrl("iphone", {
location: 1700272,
category: 132,
sortBy: 'relevancy',
sortOrder: 'desc',
sortBy: "relevancy",
sortOrder: "desc",
});
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
@@ -95,15 +95,15 @@ describe("URL Construction", () => {
test("should handle different sort options", () => {
const dateUrl = buildSearchUrl("iphone", {
sortBy: 'date',
sortOrder: 'asc',
sortBy: "date",
sortOrder: "asc",
});
expect(dateUrl).toContain("sort=DATE");
expect(dateUrl).toContain("order=ASC");
const priceUrl = buildSearchUrl("iphone", {
sortBy: 'price',
sortOrder: 'desc',
sortBy: "price",
sortOrder: "desc",
});
expect(priceUrl).toContain("sort=PRICE");
expect(priceUrl).toContain("order=DESC");
@@ -131,7 +131,11 @@ describe("Error Classes", () => {
test("NetworkError should store URL and cause", () => {
const cause = new Error("Connection failed");
const error = new NetworkError("Network error", "https://example.com", cause);
const error = new NetworkError(
"Network error",
"https://example.com",
cause,
);
expect(error.message).toBe("Network error");
expect(error.url).toBe("https://example.com");
expect(error.cause).toBe(cause);

View File

@@ -1,5 +1,9 @@
import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test";
import { extractApolloState, parseSearch, parseDetailedListing } from "../src/kijiji";
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
extractApolloState,
parseDetailedListing,
parseSearch,
} from "../src/kijiji";
// Mock fetch globally
const originalFetch = global.fetch;
@@ -18,29 +22,31 @@ describe("HTML Parsing Integration", () => {
describe("extractApolloState", () => {
test("should extract Apollo state from valid HTML", () => {
const mockHtml = '<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"__APOLLO_STATE__":{"ROOT_QUERY":{"test":"value"}}}}}</script></head></html>';
const mockHtml =
'<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"__APOLLO_STATE__":{"ROOT_QUERY":{"test":"value"}}}}}</script></head></html>';
const result = extractApolloState(mockHtml);
expect(result).toEqual({
ROOT_QUERY: { test: "value" }
ROOT_QUERY: { test: "value" },
});
});
test("should return null for HTML without Apollo state", () => {
const mockHtml = '<html><body>No data here</body></html>';
const mockHtml = "<html><body>No data here</body></html>";
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
test("should return null for malformed JSON", () => {
const mockHtml = '<html><script id="__NEXT_DATA__" type="application/json">{"invalid": json}</script></html>';
const mockHtml =
'<html><script id="__NEXT_DATA__" type="application/json">{"invalid": json}</script></html>';
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
test("should handle missing __NEXT_DATA__ element", () => {
const mockHtml = '<html><body><div>Content</div></body></html>';
const mockHtml = "<html><body><div>Content</div></body></html>";
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
@@ -63,10 +69,10 @@ describe("HTML Parsing Integration", () => {
url: "/v-samsung/k0l0",
title: "Samsung Galaxy",
},
"ROOT_QUERY": { test: "value" }
}
}
}
ROOT_QUERY: { test: "value" },
},
},
},
})}
</script>
</html>
@@ -76,11 +82,11 @@ describe("HTML Parsing Integration", () => {
expect(results).toHaveLength(2);
expect(results[0]).toEqual({
name: "iPhone 13 Pro",
listingLink: "https://www.kijiji.ca/v-iphone/k0l0"
listingLink: "https://www.kijiji.ca/v-iphone/k0l0",
});
expect(results[1]).toEqual({
name: "Samsung Galaxy",
listingLink: "https://www.kijiji.ca/v-samsung/k0l0"
listingLink: "https://www.kijiji.ca/v-samsung/k0l0",
});
});
@@ -95,17 +101,19 @@ describe("HTML Parsing Integration", () => {
"Listing:123": {
url: "https://www.kijiji.ca/v-iphone/k0l0",
title: "iPhone 13 Pro",
}
}
}
}
},
},
},
},
})}
</script>
</html>
`;
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results[0].listingLink).toBe("https://www.kijiji.ca/v-iphone/k0l0");
expect(results[0].listingLink).toBe(
"https://www.kijiji.ca/v-iphone/k0l0",
);
});
test("should filter out invalid listings", () => {
@@ -127,10 +135,10 @@ describe("HTML Parsing Integration", () => {
"Other:789": {
url: "/v-other/k0l0",
title: "Other Item",
}
}
}
}
},
},
},
},
})}
</script>
</html>
@@ -142,7 +150,10 @@ describe("HTML Parsing Integration", () => {
});
test("should return empty array for invalid HTML", () => {
const results = parseSearch("<html><body>Invalid</body></html>", "https://www.kijiji.ca");
const results = parseSearch(
"<html><body>Invalid</body></html>",
"https://www.kijiji.ca",
);
expect(results).toEqual([]);
});
});
@@ -163,7 +174,7 @@ describe("HTML Parsing Integration", () => {
price: {
amount: 80000,
currency: "CAD",
type: "FIXED"
type: "FIXED",
},
type: "OFFER",
status: "ACTIVE",
@@ -176,38 +187,47 @@ describe("HTML Parsing Integration", () => {
name: "Toronto",
coordinates: {
latitude: 43.6532,
longitude: -79.3832
}
longitude: -79.3832,
},
},
imageUrls: [
"https://media.kijiji.ca/api/v1/image1.jpg",
"https://media.kijiji.ca/api/v1/image2.jpg"
"https://media.kijiji.ca/api/v1/image2.jpg",
],
imageCount: 2,
categoryId: 132,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: true
priceDrop: true,
},
posterInfo: {
posterId: "user123",
rating: 4.8
rating: 4.8,
},
attributes: [
{ canonicalName: "forsaleby", canonicalValues: ["ownr"] },
{ canonicalName: "phonecarrier", canonicalValues: ["unlocked"] }
]
}
}
}
}
{
canonicalName: "forsaleby",
canonicalValues: ["ownr"],
},
{
canonicalName: "phonecarrier",
canonicalValues: ["unlocked"],
},
],
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca");
const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toEqual({
url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0",
title: "iPhone 13 Pro 256GB",
@@ -215,7 +235,7 @@ describe("HTML Parsing Integration", () => {
listingPrice: {
amountFormatted: "$800.00",
cents: 80000,
currency: "CAD"
currency: "CAD",
},
listingType: "OFFER",
listingStatus: "ACTIVE",
@@ -225,30 +245,30 @@ describe("HTML Parsing Integration", () => {
address: "Toronto, ON",
images: [
"https://media.kijiji.ca/api/v1/image1.jpg",
"https://media.kijiji.ca/api/v1/image2.jpg"
"https://media.kijiji.ca/api/v1/image2.jpg",
],
categoryId: 132,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: true
priceDrop: true,
},
attributes: {
forsaleby: ["ownr"],
phonecarrier: ["unlocked"]
phonecarrier: ["unlocked"],
},
location: {
id: 1700273,
name: "Toronto",
coordinates: {
latitude: 43.6532,
longitude: -79.3832
}
longitude: -79.3832,
},
},
sellerInfo: {
posterId: "user123",
rating: 4.8
}
rating: 4.8,
},
});
});
@@ -265,18 +285,21 @@ describe("HTML Parsing Integration", () => {
title: "iPhone for Sale",
price: {
type: "CONTACT",
amount: null
}
}
}
}
}
amount: null,
},
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca");
const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toBeNull();
});
@@ -291,17 +314,20 @@ describe("HTML Parsing Integration", () => {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone 13",
price: { amount: 50000 }
}
}
}
}
price: { amount: 50000 },
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca");
const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toEqual({
url: "https://www.kijiji.ca/v-iphone/k0l0",
title: "iPhone 13",
@@ -309,7 +335,7 @@ describe("HTML Parsing Integration", () => {
listingPrice: {
amountFormatted: "$500.00",
cents: 50000,
currency: undefined
currency: undefined,
},
listingType: undefined,
listingStatus: undefined,
@@ -322,15 +348,15 @@ describe("HTML Parsing Integration", () => {
adSource: "UNKNOWN",
flags: {
topAd: false,
priceDrop: false
priceDrop: false,
},
attributes: {},
location: {
id: 0,
name: "Unknown",
coordinates: undefined
coordinates: undefined,
},
sellerInfo: undefined
sellerInfo: undefined,
});
});
});

View File

@@ -1,5 +1,5 @@
import { describe, test, expect, beforeEach, afterEach } from "bun:test";
import { slugify, formatCentsToCurrency } from "../src/kijiji";
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { formatCentsToCurrency, slugify } from "../src/kijiji";
describe("Utility Functions", () => {
describe("slugify", () => {

View File

@@ -5,8 +5,10 @@ import { expect } from "bun:test";
// This file is loaded before any tests run due to bunfig.toml preload
// Mock fetch globally for tests
global.fetch = global.fetch || (() => {
throw new Error('fetch is not available in test environment');
});
global.fetch =
global.fetch ||
(() => {
throw new Error("fetch is not available in test environment");
});
// Add any global test utilities here

View File

@@ -7,25 +7,21 @@
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
// Bundler mode
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"noEmit": true,
// Best practices
"strict": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitAny": true,
// Some stricter flags (disabled by default)
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false,
"paths": {
"@/*": ["./src/*"]
}