Compare commits

..

7 Commits

20 changed files with 2352 additions and 2342 deletions

View File

@@ -6,7 +6,9 @@
},
"private": true,
"type": "module",
"workspaces": ["packages/*"],
"workspaces": [
"packages/*"
],
"devDependencies": {
"@biomejs/biome": "2.3.11"
}

View File

@@ -1,45 +1,42 @@
// Export all scrapers
export type { EbayListingDetails } from "./scrapers/ebay";
export { default as fetchEbayItems } from "./scrapers/ebay";
export type { FacebookListingDetails } from "./scrapers/facebook";
export {
default as fetchFacebookItems,
ensureFacebookCookies,
extractFacebookItemData,
extractFacebookMarketplaceData,
fetchFacebookItem,
parseFacebookAds,
parseFacebookCookieString,
parseFacebookItem,
} from "./scrapers/facebook";
export type {
DetailedListing,
KijijiListingDetails,
ListingFetchOptions,
SearchOptions,
} from "./scrapers/kijiji";
export {
default as fetchKijijiItems,
slugify,
resolveLocationId,
resolveCategoryId,
buildSearchUrl,
default as fetchKijijiItems,
extractApolloState,
parseSearch,
parseDetailedListing,
HttpError,
NetworkError,
ParseError,
parseDetailedListing,
parseSearch,
RateLimitError,
resolveCategoryId,
resolveLocationId,
slugify,
ValidationError,
} from "./scrapers/kijiji";
export type {
KijijiListingDetails,
DetailedListing,
SearchOptions,
ListingFetchOptions,
} from "./scrapers/kijiji";
export {
default as fetchFacebookItems,
fetchFacebookItem,
parseFacebookCookieString,
ensureFacebookCookies,
extractFacebookMarketplaceData,
extractFacebookItemData,
parseFacebookAds,
parseFacebookItem,
} from "./scrapers/facebook";
export type { FacebookListingDetails } from "./scrapers/facebook";
export { default as fetchEbayItems } from "./scrapers/ebay";
export type { EbayListingDetails } from "./scrapers/ebay";
// Export shared utilities
export * from "./utils/http";
export * from "./utils/delay";
export * from "./utils/format";
// Export shared types
export * from "./types/common";
export * from "./utils/delay";
export * from "./utils/format";
// Export shared utilities
export * from "./utils/http";

View File

@@ -1,9 +1,4 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http";
// ----------------------------- Types -----------------------------
@@ -43,7 +38,7 @@ function parseEbayPrice(
const amountStr = numberMatches[0].replace(/,/g, "");
const dollars = parseFloat(amountStr);
if (isNaN(dollars)) return null;
if (Number.isNaN(dollars)) return null;
const cents = Math.round(dollars * 100);
@@ -185,8 +180,7 @@ function parseEbayListings(
const text = el.textContent?.trim();
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if (
text &&
text.includes("$") &&
text?.includes("$") &&
text.length < 100 &&
!text.includes("laptop") &&
!text.includes("computer") &&

View File

@@ -1,10 +1,11 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import cliProgress from "cli-progress";
import { isRecord } from "../utils/http";
import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import type { HTMLString } from "../types/common";
import { isRecord } from "../utils/http";
/**
* Facebook Marketplace Scraper
@@ -209,7 +210,7 @@ export interface FacebookListingDetails {
*/
async function loadFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json"
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load from provided string parameter
if (cookiesSource) {
@@ -220,7 +221,7 @@ async function loadFacebookCookies(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string"
typeof cookie.value === "string",
);
}
} catch (e) {
@@ -240,7 +241,7 @@ async function loadFacebookCookies(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string"
typeof cookie.value === "string",
);
}
}
@@ -291,7 +292,7 @@ export function parseFacebookCookieString(cookieString: string): Cookie[] {
* Ensure Facebook cookies are available, parsing from env var if needed
*/
export async function ensureFacebookCookies(
cookiePath = "./cookies/facebook.json"
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load existing cookies
try {
@@ -309,7 +310,7 @@ export async function ensureFacebookCookies(
throw new Error(
"No valid Facebook cookies found. Either:\n" +
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
" 2. Create ./cookies/facebook.json manually with cookie array"
" 2. Create ./cookies/facebook.json manually with cookie array",
);
}
@@ -318,7 +319,7 @@ export async function ensureFacebookCookies(
if (cookies.length === 0) {
throw new Error(
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
'Expected format: "name1=value1; name2=value2;"'
'Expected format: "name1=value1; name2=value2;"',
);
}
@@ -368,7 +369,7 @@ class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string
public readonly url: string,
) {
super(message);
this.name = "HttpError";
@@ -407,7 +408,7 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
!extractionStats.lastApiChangeDetected
) {
console.warn(
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes."
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
);
extractionStats.lastApiChangeDetected = new Date();
}
@@ -433,7 +434,7 @@ async function fetchHtml(
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
cookies?: string;
}
},
): Promise<HTMLString> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
@@ -487,7 +488,7 @@ async function fetchHtml(
throw new HttpError(
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
res.status,
url
url,
);
}
// Retry on 5xx
@@ -498,7 +499,7 @@ async function fetchHtml(
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url
url,
);
}
@@ -521,7 +522,7 @@ async function fetchHtml(
Extract marketplace search data from Facebook page script tags
*/
export function extractFacebookMarketplaceData(
htmlString: HTMLString
htmlString: HTMLString,
): FacebookAdNode[] | null {
const { document } = parseHTML(htmlString);
const scripts = document.querySelectorAll("script");
@@ -567,13 +568,12 @@ export function extractFacebookMarketplaceData(
if (
result &&
isRecord(result) &&
(result as any).feed_units?.edges?.length > 0
(result as Record<string, unknown>).feed_units?.edges?.length > 0
) {
marketplaceData = result as FacebookMarketplaceSearch;
break;
}
} catch {
}
} catch {}
}
if (marketplaceData) break;
@@ -583,13 +583,13 @@ export function extractFacebookMarketplaceData(
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
const searchData =
parsed.marketplace_search as FacebookMarketplaceSearch;
if (searchData.feed_units?.edges?.length ?? 0 > 0) {
const feedLength = searchData.feed_units?.edges?.length ?? 0;
if (feedLength > 0) {
marketplaceData = searchData;
break;
}
}
} catch {
}
} catch {}
}
if (!marketplaceData?.feed_units?.edges?.length) {
@@ -598,7 +598,7 @@ export function extractFacebookMarketplaceData(
}
console.log(
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`,
);
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
}
@@ -608,7 +608,7 @@ export function extractFacebookMarketplaceData(
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
*/
export function extractFacebookItemData(
htmlString: HTMLString
htmlString: HTMLString,
): FacebookMarketplaceItem | null {
const { document } = parseHTML(htmlString);
const scripts = document.querySelectorAll("script");
@@ -657,7 +657,7 @@ export function extractFacebookItemData(
targetData.__typename === "GroupCommerceProductItem"
) {
console.log(
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`,
);
return targetData as FacebookMarketplaceItem;
}
@@ -671,18 +671,19 @@ export function extractFacebookItemData(
const findMarketplaceData = (
obj: unknown,
depth = 0,
maxDepth = 10
maxDepth = 10,
): FacebookMarketplaceItem | null => {
if (depth > maxDepth) return null; // Prevent infinite recursion
if (isRecord(obj)) {
// Check if this object matches the expected marketplace item structure
const candidate = obj as Record<string, unknown>;
if (
(obj as any).marketplace_listing_title &&
(obj as any).id &&
(obj as any).__typename === "GroupCommerceProductItem" &&
(obj as any).redacted_description
candidate.marketplace_listing_title &&
candidate.id &&
candidate.__typename === "GroupCommerceProductItem" &&
candidate.redacted_description
) {
return obj as unknown as FacebookMarketplaceItem;
return candidate as unknown as FacebookMarketplaceItem;
}
// Recursively search nested objects and arrays
for (const key in obj) {
@@ -706,7 +707,7 @@ export function extractFacebookItemData(
const recursiveResult = findMarketplaceData(parsed.require);
if (recursiveResult) {
console.log(
"Successfully extracted Facebook item data using recursive search"
"Successfully extracted Facebook item data using recursive search",
);
return recursiveResult;
}
@@ -727,14 +728,13 @@ export function extractFacebookItemData(
bboxData.__typename === "GroupCommerceProductItem"
) {
console.log(
"Successfully extracted Facebook item data from __bbox structure"
"Successfully extracted Facebook item data from __bbox structure",
);
return bboxData as FacebookMarketplaceItem;
}
}
}
} catch {
}
} catch {}
}
return null;
@@ -743,7 +743,9 @@ export function extractFacebookItemData(
/**
Parse Facebook marketplace search results into ListingDetails[]
*/
export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
export function parseFacebookAds(
ads: FacebookAdNode[],
): FacebookListingDetails[] {
const results: FacebookListingDetails[] = [];
for (const adJson of ads) {
@@ -805,7 +807,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
const address = cityName || null;
// Determine listing status from Facebook flags
let listingStatus: string | undefined ;
let listingStatus: string | undefined;
if (listing.is_sold) {
listingStatus = "SOLD";
} else if (listing.is_pending) {
@@ -840,7 +842,8 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
title,
listingPrice: {
amountFormatted:
priceObj.formatted_amount || formatCentsToCurrency(cents / 100, "en-CA"),
priceObj.formatted_amount ||
formatCentsToCurrency(cents / 100, "en-CA"),
cents,
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
},
@@ -856,8 +859,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
};
results.push(listingDetails);
} catch {
}
} catch {}
}
return results;
@@ -868,7 +870,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
Updated for 2026 GroupCommerceProductItem structure
*/
export function parseFacebookItem(
item: FacebookMarketplaceItem
item: FacebookMarketplaceItem,
): FacebookListingDetails | null {
try {
const title = item.marketplace_listing_title || item.custom_title;
@@ -888,7 +890,8 @@ export function parseFacebookItem(
if (!Number.isNaN(amount)) {
cents = Math.round(amount * 100);
amountFormatted =
item.formatted_price?.text || formatCentsToCurrency(cents / 100, "en-CA");
item.formatted_price?.text ||
formatCentsToCurrency(cents / 100, "en-CA");
}
}
}
@@ -963,7 +966,7 @@ export default async function fetchFacebookItems(
LOCATION = "toronto",
MAX_ITEMS = 25,
cookiesSource?: string,
cookiePath?: string
cookiePath?: string,
) {
// Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[];
@@ -978,7 +981,7 @@ export default async function fetchFacebookItems(
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
@@ -987,7 +990,7 @@ export default async function fetchFacebookItems(
const cookiesHeader = formatCookiesForHeader(cookies, domain);
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain."
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
);
}
@@ -1009,7 +1012,7 @@ export default async function fetchFacebookItems(
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
@@ -1018,11 +1021,11 @@ export default async function fetchFacebookItems(
} catch (err) {
if (err instanceof HttpError) {
console.warn(
`\nFacebook marketplace access failed (${err.status}): ${err.message}`
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
);
if (err.status === 400 || err.status === 401 || err.status === 403) {
console.warn(
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies."
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
);
}
return [];
@@ -1040,7 +1043,7 @@ export default async function fetchFacebookItems(
const progressBar = new cliProgress.SingleBar(
{},
cliProgress.Presets.shades_classic
cliProgress.Presets.shades_classic,
);
const totalProgress = ads.length;
const currentProgress = 0;
@@ -1050,7 +1053,7 @@ export default async function fetchFacebookItems(
// Filter to only priced items (already done in parseFacebookAds)
const pricedItems = items.filter(
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
);
progressBar.update(totalProgress);
@@ -1066,7 +1069,7 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem(
itemId: string,
cookiesSource?: string,
cookiePath?: string
cookiePath?: string,
): Promise<FacebookListingDetails | null> {
// Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[];
@@ -1081,7 +1084,7 @@ export async function fetchFacebookItem(
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
@@ -1090,7 +1093,7 @@ export async function fetchFacebookItem(
const cookiesHeader = formatCookiesForHeader(cookies, domain);
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain."
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
);
}
@@ -1104,7 +1107,7 @@ export async function fetchFacebookItem(
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
@@ -1113,7 +1116,7 @@ export async function fetchFacebookItem(
} catch (err) {
if (err instanceof HttpError) {
console.warn(
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`,
);
// Enhanced error handling based on status codes
@@ -1122,27 +1125,27 @@ export async function fetchFacebookItem(
case 401:
case 403:
console.warn(
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies."
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
);
console.warn(
"Try logging out and back into Facebook, then export fresh cookies."
"Try logging out and back into Facebook, then export fresh cookies.",
);
break;
case 404:
console.warn(
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid."
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.",
);
break;
case 429:
console.warn(
"Rate limited: Too many requests. Facebook is blocking access temporarily."
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
);
break;
case 500:
case 502:
case 503:
console.warn(
"Facebook server error: Marketplace may be temporarily unavailable."
"Facebook server error: Marketplace may be temporarily unavailable.",
);
break;
default:
@@ -1163,7 +1166,7 @@ export async function fetchFacebookItem(
itemHtml.includes("This item has been sold")
) {
console.warn(
`Item ${itemId} appears to be sold or removed from marketplace.`
`Item ${itemId} appears to be sold or removed from marketplace.`,
);
return null;
}
@@ -1174,13 +1177,13 @@ export async function fetchFacebookItem(
itemHtml.includes("authentication required")
) {
console.warn(
`Authentication failed for item ${itemId}. Cookies may be expired.`
`Authentication failed for item ${itemId}. Cookies may be expired.`,
);
return null;
}
console.warn(
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
);
console.warn(" - The listing was removed or sold");
console.warn(" - Authentication issues");

View File

@@ -1,19 +1,19 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import cliProgress from "cli-progress";
import { parseHTML } from "linkedom";
import unidecode from "unidecode";
import cliProgress from "cli-progress";
import type { HTMLString } from "../types/common";
import { formatCentsToCurrency } from "../utils/format";
import {
fetchHtml,
isRecord,
HttpError,
isRecord,
NetworkError,
ParseError,
RateLimitError,
ValidationError,
} from "../utils/http";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import type { HTMLString } from "../types/common";
// ----------------------------- Types -----------------------------
@@ -219,7 +219,7 @@ export function resolveCategoryId(category?: number | string): number {
export function buildSearchUrl(
keywords: string,
options: SearchOptions & { page?: number },
BASE_URL = "https://www.kijiji.ca"
BASE_URL = "https://www.kijiji.ca",
): string {
const locationId = resolveLocationId(options.location);
const categoryId = resolveCategoryId(options.category);
@@ -319,7 +319,7 @@ const GRAPHQL_QUERIES = {
async function fetchGraphQLData(
query: string,
variables: Record<string, unknown>,
BASE_URL = "https://www.kijiji.ca"
BASE_URL = "https://www.kijiji.ca",
): Promise<unknown> {
const endpoint = `${BASE_URL}/anvil/api`;
@@ -340,7 +340,7 @@ async function fetchGraphQLData(
throw new HttpError(
`GraphQL request failed with status ${response.status}`,
response.status,
endpoint
endpoint,
);
}
@@ -349,7 +349,7 @@ async function fetchGraphQLData(
if (result.errors) {
throw new ParseError(
`GraphQL errors: ${JSON.stringify(result.errors)}`,
result.errors
result.errors,
);
}
@@ -361,7 +361,7 @@ async function fetchGraphQLData(
throw new NetworkError(
`Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`,
endpoint,
err instanceof Error ? err : undefined
err instanceof Error ? err : undefined,
);
}
}
@@ -371,7 +371,7 @@ async function fetchGraphQLData(
*/
async function fetchSellerDetails(
posterId: string,
BASE_URL = "https://www.kijiji.ca"
BASE_URL = "https://www.kijiji.ca",
): Promise<{
reviewCount?: number;
reviewScore?: number;
@@ -383,12 +383,12 @@ async function fetchSellerDetails(
fetchGraphQLData(
GRAPHQL_QUERIES.getReviewSummary,
{ userId: posterId },
BASE_URL
BASE_URL,
),
fetchGraphQLData(
GRAPHQL_QUERIES.getProfileMetrics,
{ profileId: posterId },
BASE_URL
BASE_URL,
),
]);
@@ -405,7 +405,7 @@ async function fetchSellerDetails(
// Silently fail for GraphQL errors - not critical for basic functionality
console.warn(
`Failed to fetch seller details for ${posterId}:`,
err instanceof Error ? err.message : String(err)
err instanceof Error ? err.message : String(err),
);
return {};
}
@@ -416,7 +416,9 @@ async function fetchSellerDetails(
/**
Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML.
*/
export function extractApolloState(htmlString: HTMLString): ApolloRecord | null {
export function extractApolloState(
htmlString: HTMLString,
): ApolloRecord | null {
const { document } = parseHTML(htmlString);
const nextData = document.getElementById("__NEXT_DATA__");
if (!nextData || !nextData.textContent) return null;
@@ -436,7 +438,7 @@ export function extractApolloState(htmlString: HTMLString): ApolloRecord | null
*/
export function parseSearch(
htmlString: HTMLString,
BASE_URL: string
BASE_URL: string,
): SearchListing[] {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return [];
@@ -463,16 +465,16 @@ export function parseSearch(
/**
Parse a listing page into a typed object (backward compatible).
*/
function parseListing(
function _parseListing(
htmlString: HTMLString,
BASE_URL: string
BASE_URL: string,
): KijijiListingDetails | null {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
// Find the listing root key
const listingKey = Object.keys(apolloState).find((k) =>
k.includes("Listing")
k.includes("Listing"),
);
if (!listingKey) return null;
@@ -515,7 +517,8 @@ function parseListing(
listingPrice: amountFormatted
? {
amountFormatted,
cents: Number.isFinite(cents!) ? cents : undefined,
cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency,
}
: undefined,
@@ -523,7 +526,10 @@ function parseListing(
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews: Number.isFinite(numberOfViews!) ? numberOfViews : undefined,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
};
}
@@ -534,14 +540,14 @@ function parseListing(
export async function parseDetailedListing(
htmlString: HTMLString,
BASE_URL: string,
options: ListingFetchOptions = {}
options: ListingFetchOptions = {},
): Promise<DetailedListing | null> {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
// Find the listing root key
const listingKey = Object.keys(apolloState).find((k) =>
k.includes("Listing")
k.includes("Listing"),
);
if (!listingKey) return null;
@@ -621,7 +627,7 @@ export async function parseDetailedListing(
try {
const additionalData = await fetchSellerDetails(
posterInfo.posterId,
BASE_URL
BASE_URL,
);
sellerInfo = {
...sellerInfo,
@@ -630,7 +636,7 @@ export async function parseDetailedListing(
} catch {
// Silently fail - GraphQL data is optional
console.warn(
`Failed to fetch additional seller data for ${posterInfo.posterId}`
`Failed to fetch additional seller data for ${posterInfo.posterId}`,
);
}
}
@@ -683,7 +689,7 @@ export default async function fetchKijijiItems(
REQUESTS_PER_SECOND = 1,
BASE_URL = "https://www.kijiji.ca",
searchOptions: SearchOptions = {},
listingOptions: ListingFetchOptions = {}
listingOptions: ListingFetchOptions = {},
) {
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
@@ -717,7 +723,7 @@ export default async function fetchKijijiItems(
// Add page parameter for pagination
...(page > 1 && { page }),
},
BASE_URL
BASE_URL,
);
console.log(`Fetching search page ${page}: ${searchUrl}`);
@@ -725,7 +731,7 @@ export default async function fetchKijijiItems(
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
@@ -734,7 +740,7 @@ export default async function fetchKijijiItems(
const searchResults = parseSearch(searchHtml, BASE_URL);
if (searchResults.length === 0) {
console.log(
`No more results found on page ${page}. Stopping pagination.`
`No more results found on page ${page}. Stopping pagination.`,
);
break;
}
@@ -749,13 +755,13 @@ export default async function fetchKijijiItems(
}
console.log(
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
);
// Fetch details for this page's listings
const progressBar = new cliProgress.SingleBar(
{},
cliProgress.Presets.shades_classic
cliProgress.Presets.shades_classic,
);
const totalProgress = newListingLinks.length;
let currentProgress = 0;
@@ -767,7 +773,7 @@ export default async function fetchKijijiItems(
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
@@ -775,7 +781,7 @@ export default async function fetchKijijiItems(
const parsed = await parseDetailedListing(
html,
BASE_URL,
finalListingOptions
finalListingOptions,
);
if (parsed) {
allListings.push(parsed);
@@ -783,11 +789,11 @@ export default async function fetchKijijiItems(
} catch (err) {
if (err instanceof HttpError) {
console.error(
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
);
} else {
console.error(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
);
}
} finally {
@@ -809,10 +815,4 @@ export default async function fetchKijijiItems(
}
// Re-export error classes for convenience
export {
HttpError,
NetworkError,
ParseError,
RateLimitError,
ValidationError,
};
export { HttpError, NetworkError, ParseError, RateLimitError, ValidationError };

View File

@@ -4,7 +4,10 @@
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
* @returns Formatted currency string
*/
export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string {
export function formatCentsToCurrency(
cents: number,
locale: string = "en-CA",
): string {
try {
const formatter = new Intl.NumberFormat(locale, {
style: "currency",
@@ -13,7 +16,7 @@ export function formatCentsToCurrency(cents: number, locale: string = "en-CA"):
maximumFractionDigits: 2,
});
return formatter.format(cents / 100);
} catch (error) {
} catch {
// Fallback if locale is not supported
const dollars = (cents / 100).toFixed(2);
return `$${dollars}`;

View File

@@ -3,7 +3,7 @@ export class HttpError extends Error {
constructor(
message: string,
public readonly statusCode: number,
public readonly url?: string
public readonly url?: string,
) {
super(message);
this.name = "HttpError";
@@ -15,7 +15,7 @@ export class NetworkError extends Error {
constructor(
message: string,
public readonly url: string,
public readonly cause?: Error
public readonly cause?: Error,
) {
super(message);
this.name = "NetworkError";
@@ -26,7 +26,7 @@ export class NetworkError extends Error {
export class ParseError extends Error {
constructor(
message: string,
public readonly data?: unknown
public readonly data?: unknown,
) {
super(message);
this.name = "ParseError";
@@ -38,7 +38,7 @@ export class RateLimitError extends Error {
constructor(
message: string,
public readonly url: string,
public readonly resetTime?: number
public readonly resetTime?: number,
) {
super(message);
this.name = "RateLimitError";
@@ -87,7 +87,7 @@ export interface FetchHtmlOptions {
export async function fetchHtml(
url: string,
delayMs: number,
opts?: FetchHtmlOptions
opts?: FetchHtmlOptions,
): Promise<string> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 1000;
@@ -137,14 +137,14 @@ export async function fetchHtml(
throw new RateLimitError(
`Rate limit exceeded for ${url}`,
url,
resetSeconds
resetSeconds,
);
}
// Retry on server errors
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
);
continue;
}
@@ -152,7 +152,7 @@ export async function fetchHtml(
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url
url,
);
}
@@ -174,7 +174,7 @@ export async function fetchHtml(
if (err instanceof Error && err.name === "AbortError") {
if (attempt < maxRetries) {
await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
);
continue;
}
@@ -184,14 +184,14 @@ export async function fetchHtml(
// Network or other errors
if (attempt < maxRetries) {
await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
);
continue;
}
throw new NetworkError(
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
url,
err instanceof Error ? err : undefined
err instanceof Error ? err : undefined,
);
}
}

View File

@@ -5,7 +5,6 @@ import {
fetchFacebookItem,
formatCentsToCurrency,
formatCookiesForHeader,
loadFacebookCookies,
parseFacebookAds,
parseFacebookCookieString,
parseFacebookItem,
@@ -183,7 +182,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
});
const result = await fetchFacebookItem("123", mockCookies);
const _result = await fetchFacebookItem("123", mockCookies);
expect(attempts).toBe(2);
// Should eventually succeed after retry
});

View File

@@ -1,5 +1,5 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
import { fetchFacebookItems } from "../src/scrapers/facebook";
// Mock fetch globally
const originalFetch = global.fetch;

View File

@@ -1,13 +1,12 @@
import { describe, expect, test } from "bun:test";
import {
HttpError,
buildSearchUrl,
NetworkError,
ParseError,
RateLimitError,
ValidationError,
buildSearchUrl,
resolveCategoryId,
resolveLocationId,
ValidationError,
} from "../src/scrapers/kijiji";
describe("Location and Category Resolution", () => {
@@ -121,20 +120,12 @@ describe("URL Construction", () => {
});
describe("Error Classes", () => {
test("HttpError should store status and URL", () => {
const error = new HttpError("Not found", 404, "https://example.com");
expect(error.message).toBe("Not found");
expect(error.statusCode).toBe(404);
expect(error.url).toBe("https://example.com");
expect(error.name).toBe("HttpError");
});
test("NetworkError should store URL and cause", () => {
const cause = new Error("Connection failed");
const error = new NetworkError(
"Network error",
"https://example.com",
cause
cause,
);
expect(error.message).toBe("Network error");
expect(error.url).toBe("https://example.com");

View File

@@ -1,4 +1,4 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { describe, expect, test } from "bun:test";
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
describe("Utility Functions", () => {

View File

@@ -8,9 +8,12 @@ const server = Bun.serve({
idleTimeout: 0,
routes: {
// MCP metadata discovery endpoint
"/.well-known/mcp/server-card.json": new Response(JSON.stringify(serverCard), {
"/.well-known/mcp/server-card.json": new Response(
JSON.stringify(serverCard),
{
headers: { "Content-Type": "application/json" },
}),
},
),
// MCP JSON-RPC 2.0 protocol endpoint
"/mcp": async (req: Request) => {
@@ -19,13 +22,13 @@ const server = Bun.serve({
}
return Response.json(
{ message: "MCP endpoint requires POST request" },
{ status: 405 }
{ status: 405 },
);
},
},
// Fallback for all other routes
fetch(req: Request) {
fetch(_req: Request) {
return new Response("Not Found", { status: 404 });
},
});

View File

@@ -1,4 +1,8 @@
import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core";
import {
fetchEbayItems,
fetchFacebookItems,
fetchKijijiItems,
} from "@marketplace-scrapers/core";
import { tools } from "./tools";
/**
@@ -16,7 +20,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
error: { code: -32600, message: "Invalid Request" },
id: body.id,
},
{ status: 400 }
{ status: 400 },
);
}
@@ -38,7 +42,8 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
name: "marketplace-scrapers",
version: "1.0.0",
},
instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
instructions:
"Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
},
});
}
@@ -78,15 +83,18 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
{
jsonrpc: "2.0",
id,
error: { code: -32602, message: "Invalid params: name and arguments required" },
error: {
code: -32602,
message: "Invalid params: name and arguments required",
},
{ status: 400 }
},
{ status: 400 },
);
}
// Route tool calls to appropriate handlers
try {
let result;
let result: unknown;
if (name === "search_kijiji") {
const query = args.query;
@@ -112,7 +120,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
1,
"https://www.kijiji.ca",
searchOptions,
{}
{},
);
result = items || [];
} else if (name === "search_facebook") {
@@ -130,7 +138,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
args.location || "toronto",
args.maxItems || 25,
args.cookiesSource,
undefined
undefined,
);
result = items || [];
} else if (name === "search_ebay") {
@@ -175,11 +183,15 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
},
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : "Unknown error";
const errorMessage =
error instanceof Error ? error.message : "Unknown error";
return Response.json({
jsonrpc: "2.0",
id,
error: { code: -32603, message: `Tool execution failed: ${errorMessage}` },
error: {
code: -32603,
message: `Tool execution failed: ${errorMessage}`,
},
});
}
}
@@ -191,16 +203,17 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
id,
error: { code: -32601, message: `Method not found: ${method}` },
},
{ status: 404 }
{ status: 404 },
);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : "Unknown error";
const errorMessage =
error instanceof Error ? error.message : "Unknown error";
return Response.json(
{
jsonrpc: "2.0",
error: { code: -32700, message: `Parse error: ${errorMessage}` },
},
{ status: 400 }
{ status: 400 },
);
}
}

View File

@@ -3,7 +3,8 @@
*/
export const serverCard = {
$schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
$schema:
"https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
version: "1.0",
protocolVersion: "2025-06-18",
serverInfo: {
@@ -20,6 +21,7 @@ export const serverCard = {
listChanged: true,
},
},
description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
description:
"Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
tools: "dynamic",
};

View File

@@ -15,11 +15,13 @@ export const tools = [
},
location: {
type: "string",
description: "Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
description:
"Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
},
category: {
type: "string",
description: "Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
description:
"Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
},
keywords: {
type: "string",
@@ -84,7 +86,8 @@ export const tools = [
},
{
name: "search_ebay",
description: "Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
description:
"Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
inputSchema: {
type: "object",
properties: {