migrate to monorepo?
This commit is contained in:
21
packages/api-server/package.json
Normal file
21
packages/api-server/package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "@marketplace-scrapers/api-server",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"module": "./src/index.ts",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"start": "bun ./src/index.ts",
|
||||
"dev": "bun --watch ./src/index.ts",
|
||||
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api"
|
||||
},
|
||||
"dependencies": {
|
||||
"@marketplace-scrapers/core": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
30
packages/api-server/src/index.ts
Normal file
30
packages/api-server/src/index.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
import { statusRoute } from "./routes/status";
|
||||
import { kijijiRoute } from "./routes/kijiji";
|
||||
import { facebookRoute } from "./routes/facebook";
|
||||
import { ebayRoute } from "./routes/ebay";
|
||||
|
||||
const PORT = process.env.PORT || 4005;
|
||||
|
||||
const server = Bun.serve({
|
||||
port: PORT as number | string,
|
||||
idleTimeout: 0,
|
||||
routes: {
|
||||
// Health check endpoint
|
||||
"/api/status": statusRoute,
|
||||
|
||||
// Marketplace search endpoints
|
||||
"/api/kijiji": kijijiRoute,
|
||||
"/api/facebook": facebookRoute,
|
||||
"/api/ebay": ebayRoute,
|
||||
|
||||
// Fallback for unmatched /api routes
|
||||
"/api/*": Response.json({ message: "Not found" }, { status: 404 }),
|
||||
},
|
||||
|
||||
// Fallback for all other routes
|
||||
fetch(req: Request) {
|
||||
return new Response("Not Found", { status: 404 });
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`API Server running on ${server.hostname}:${server.port}`);
|
||||
56
packages/api-server/src/routes/ebay.ts
Normal file
56
packages/api-server/src/routes/ebay.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
||||
|
||||
/**
|
||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}
|
||||
* Search eBay for listings
|
||||
*/
|
||||
export async function ebayRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
// Parse optional parameters with defaults
|
||||
const minPrice = reqUrl.searchParams.get("minPrice")
|
||||
? parseInt(reqUrl.searchParams.get("minPrice")!)
|
||||
: undefined;
|
||||
const maxPrice = reqUrl.searchParams.get("maxPrice")
|
||||
? parseInt(reqUrl.searchParams.get("maxPrice")!)
|
||||
: undefined;
|
||||
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
|
||||
const exclusionsParam = reqUrl.searchParams.get("exclusions");
|
||||
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : [];
|
||||
const keywordsParam = reqUrl.searchParams.get("keywords");
|
||||
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY];
|
||||
|
||||
try {
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 5, {
|
||||
minPrice,
|
||||
maxPrice,
|
||||
strictMode,
|
||||
exclusions,
|
||||
keywords,
|
||||
});
|
||||
if (!items || items.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("eBay scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
}
|
||||
40
packages/api-server/src/routes/facebook.ts
Normal file
40
packages/api-server/src/routes/facebook.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { fetchFacebookItems } from "@marketplace-scrapers/core";
|
||||
|
||||
/**
|
||||
* GET /api/facebook?q={query}&location={location}&cookies={cookies}
|
||||
* Search Facebook Marketplace for listings
|
||||
*/
|
||||
export async function facebookRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
|
||||
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
|
||||
|
||||
try {
|
||||
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE);
|
||||
if (!items || items.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Facebook scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
}
|
||||
37
packages/api-server/src/routes/kijiji.ts
Normal file
37
packages/api-server/src/routes/kijiji.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { fetchKijijiItems } from "@marketplace-scrapers/core";
|
||||
|
||||
/**
|
||||
* GET /api/kijiji?q={query}
|
||||
* Search Kijiji marketplace for listings
|
||||
*/
|
||||
export async function kijijiRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
try {
|
||||
const items = await fetchKijijiItems(SEARCH_QUERY, 5);
|
||||
if (!items)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Kijiji scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
}
|
||||
6
packages/api-server/src/routes/status.ts
Normal file
6
packages/api-server/src/routes/status.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
/**
|
||||
* Health check endpoint
|
||||
*/
|
||||
export function statusRoute(): Response {
|
||||
return new Response("OK", { status: 200 });
|
||||
}
|
||||
13
packages/api-server/tsconfig.json
Normal file
13
packages/api-server/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"lib": ["dom"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
},
|
||||
"strict": true,
|
||||
"noEmit": true
|
||||
}
|
||||
}
|
||||
21
packages/core/package.json
Normal file
21
packages/core/package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "@marketplace-scrapers/core",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"main": "./src/index.ts",
|
||||
"module": "./src/index.ts",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"cli-progress": "^3.12.0",
|
||||
"linkedom": "^0.18.12",
|
||||
"unidecode": "^1.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest",
|
||||
"@types/unidecode": "^1.1.0",
|
||||
"@types/cli-progress": "^3.11.6"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
17
packages/core/src/index.ts
Normal file
17
packages/core/src/index.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
// Export all scrapers
|
||||
export { default as fetchKijijiItems, slugify } from "./scrapers/kijiji";
|
||||
export type { KijijiListingDetails } from "./scrapers/kijiji";
|
||||
|
||||
export { default as fetchFacebookItems } from "./scrapers/facebook";
|
||||
export type { FacebookListingDetails } from "./scrapers/facebook";
|
||||
|
||||
export { default as fetchEbayItems } from "./scrapers/ebay";
|
||||
export type { EbayListingDetails } from "./scrapers/ebay";
|
||||
|
||||
// Export shared utilities
|
||||
export * from "./utils/http";
|
||||
export * from "./utils/delay";
|
||||
export * from "./utils/format";
|
||||
|
||||
// Export shared types
|
||||
export * from "./types/common";
|
||||
346
packages/core/src/scrapers/ebay.ts
Normal file
346
packages/core/src/scrapers/ebay.ts
Normal file
@@ -0,0 +1,346 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { parseHTML } from "linkedom";
|
||||
import { isRecord } from "../utils/http";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import type { HTMLString } from "../types/common";
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
export interface EbayListingDetails {
|
||||
url: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
listingPrice?: {
|
||||
amountFormatted: string;
|
||||
cents?: number;
|
||||
currency?: string;
|
||||
};
|
||||
listingType?: string;
|
||||
listingStatus?: string;
|
||||
creationDate?: string;
|
||||
endDate?: string;
|
||||
numberOfViews?: number;
|
||||
address?: string | null;
|
||||
}
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
/**
|
||||
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
|
||||
*/
|
||||
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null {
|
||||
if (!priceText || typeof priceText !== 'string') return null;
|
||||
|
||||
// Clean up the price text and extract currency and amount
|
||||
const cleaned = priceText.trim();
|
||||
|
||||
// Find all numbers in the string (including decimals)
|
||||
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
|
||||
if (!numberMatches) return null;
|
||||
|
||||
const amountStr = numberMatches[0].replace(/,/g, '');
|
||||
const dollars = parseFloat(amountStr);
|
||||
if (isNaN(dollars)) return null;
|
||||
|
||||
const cents = Math.round(dollars * 100);
|
||||
|
||||
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
||||
let currency = 'USD'; // Default
|
||||
|
||||
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) {
|
||||
currency = 'CAD';
|
||||
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) {
|
||||
currency = 'USD';
|
||||
}
|
||||
|
||||
return { cents, currency };
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
|
||||
/**
|
||||
Parse eBay search page HTML and extract listings using DOM selectors
|
||||
*/
|
||||
function parseEbayListings(
|
||||
htmlString: HTMLString,
|
||||
keywords: string[],
|
||||
exclusions: string[],
|
||||
strictMode: boolean
|
||||
): EbayListingDetails[] {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const results: EbayListingDetails[] = [];
|
||||
|
||||
// Find all listing links by looking for eBay item URLs (/itm/)
|
||||
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
||||
|
||||
|
||||
for (const linkElement of linkElements) {
|
||||
try {
|
||||
// Get href attribute
|
||||
let href = linkElement.getAttribute('href');
|
||||
if (!href) continue;
|
||||
|
||||
// Make href absolute
|
||||
if (!href.startsWith('http')) {
|
||||
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`;
|
||||
}
|
||||
|
||||
// Find the container - go up several levels to find the item container
|
||||
// Modern eBay uses complex nested structures
|
||||
let container = linkElement.parentElement?.parentElement?.parentElement;
|
||||
if (!container) {
|
||||
// Try a different level
|
||||
container = linkElement.parentElement?.parentElement;
|
||||
}
|
||||
if (!container) continue;
|
||||
|
||||
// Extract title - look for heading or title-related elements near the link
|
||||
// Modern eBay often uses h3, span, or div with text content near the link
|
||||
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span');
|
||||
|
||||
// If no direct title element, try finding text content around the link
|
||||
if (!titleElement) {
|
||||
// Look for spans or divs with text near this link
|
||||
const nearbySpans = container.querySelectorAll('span, div');
|
||||
for (const span of nearbySpans) {
|
||||
const text = span.textContent?.trim();
|
||||
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) {
|
||||
titleElement = span;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let title = titleElement?.textContent?.trim();
|
||||
|
||||
// Clean up eBay UI strings that get included in titles
|
||||
if (title) {
|
||||
// Remove common eBay UI strings that appear at the end of titles
|
||||
const uiStrings = [
|
||||
'Opens in a new window',
|
||||
'Opens in a new tab',
|
||||
'Opens in a new window or tab',
|
||||
'opens in a new window',
|
||||
'opens in a new tab',
|
||||
'opens in a new window or tab'
|
||||
];
|
||||
|
||||
for (const uiString of uiStrings) {
|
||||
const uiIndex = title.indexOf(uiString);
|
||||
if (uiIndex !== -1) {
|
||||
title = title.substring(0, uiIndex).trim();
|
||||
break; // Only remove one UI string per title
|
||||
}
|
||||
}
|
||||
|
||||
// If the title became empty or too short after cleaning, skip this item
|
||||
if (title.length < 10) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!title) continue;
|
||||
|
||||
// Skip irrelevant eBay ads
|
||||
if (title === "Shop on eBay" || title.length < 3) continue;
|
||||
|
||||
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
||||
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]');
|
||||
|
||||
// If no direct price class, look for spans containing $ (but not titles)
|
||||
if (!priceElement) {
|
||||
const spansAndElements = container.querySelectorAll('span, div, b, em, strong');
|
||||
for (const el of spansAndElements) {
|
||||
const text = el.textContent?.trim();
|
||||
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
||||
if (text && text.includes('$') && text.length < 100 &&
|
||||
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') &&
|
||||
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') &&
|
||||
! /\d{4}/.test(text) && // Avoid years like "2024"
|
||||
!text.includes('"') // Avoid measurements
|
||||
) {
|
||||
priceElement = el;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For discounted items, eBay shows both original and sale price
|
||||
// Prefer sale/current price over original/strikethrough price
|
||||
if (priceElement) {
|
||||
// Check if this element or its parent contains multiple price elements
|
||||
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement;
|
||||
|
||||
if (priceContainer) {
|
||||
// Look for all price elements within this container, including strikethrough prices
|
||||
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike');
|
||||
|
||||
// Filter to only elements that actually contain prices (not labels)
|
||||
const actualPrices: HTMLElement[] = [];
|
||||
for (const el of allPriceElements) {
|
||||
const text = el.textContent?.trim();
|
||||
if (text && /^\s*[\$£€¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) {
|
||||
actualPrices.push(el);
|
||||
}
|
||||
}
|
||||
|
||||
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
|
||||
if (actualPrices.length > 1) {
|
||||
// First, look for prices that are NOT struck through
|
||||
const nonStrikethroughPrices = actualPrices.filter(el => {
|
||||
const tagName = el.tagName.toLowerCase();
|
||||
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') ||
|
||||
el.closest('s, del, strike');
|
||||
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles;
|
||||
});
|
||||
|
||||
if (nonStrikethroughPrices.length > 0) {
|
||||
// Use the first non-strikethrough price (sale price)
|
||||
priceElement = nonStrikethroughPrices[0];
|
||||
} else {
|
||||
// Fallback: use the last price (likely the most current)
|
||||
const lastPrice = actualPrices[actualPrices.length - 1];
|
||||
priceElement = lastPrice;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let priceText = priceElement?.textContent?.trim();
|
||||
|
||||
if (!priceText) continue;
|
||||
|
||||
// Parse price into cents and currency
|
||||
const priceInfo = parseEbayPrice(priceText);
|
||||
if (!priceInfo) continue;
|
||||
|
||||
// Apply exclusion filters
|
||||
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Apply strict mode filter (title must contain at least one keyword)
|
||||
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const listing: EbayListingDetails = {
|
||||
url: href,
|
||||
title,
|
||||
listingPrice: {
|
||||
amountFormatted: priceText,
|
||||
cents: priceInfo.cents,
|
||||
currency: priceInfo.currency,
|
||||
},
|
||||
listingType: "OFFER", // eBay listings are typically offers
|
||||
listingStatus: "ACTIVE",
|
||||
address: null, // eBay doesn't typically show detailed addresses in search results
|
||||
};
|
||||
|
||||
results.push(listing);
|
||||
} catch (err) {
|
||||
console.warn(`Error parsing eBay listing: ${err}`);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
opts: {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
} = {},
|
||||
) {
|
||||
const {
|
||||
minPrice = 0,
|
||||
maxPrice = Number.MAX_SAFE_INTEGER,
|
||||
strictMode = false,
|
||||
exclusions = [],
|
||||
keywords = [SEARCH_QUERY] // Default to search query if no keywords provided
|
||||
} = opts;
|
||||
|
||||
// Build eBay search URL - use Canadian site and tracking parameters like real browser
|
||||
const searchUrl = `https://www.ebay.ca/sch/i.html?_nkw=${encodeURIComponent(SEARCH_QUERY)}^&_sacat=0^&_from=R40^&_trksid=p4432023.m570.l1313`;
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
|
||||
console.log(`Fetching eBay search: ${searchUrl}`);
|
||||
|
||||
try {
|
||||
// Use custom headers modeled after real browser requests to bypass bot detection
|
||||
const headers: Record<string, string> = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Referer': 'https://www.ebay.ca/',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Priority': 'u=0, i'
|
||||
};
|
||||
|
||||
const res = await fetch(searchUrl, {
|
||||
method: "GET",
|
||||
headers,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
searchUrl,
|
||||
);
|
||||
}
|
||||
|
||||
const searchHtml = await res.text();
|
||||
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||
await delay(DELAY_MS);
|
||||
|
||||
console.log(`\nParsing eBay listings...`);
|
||||
|
||||
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode);
|
||||
|
||||
// Filter by price range (additional safety check)
|
||||
const filteredListings = listings.filter(listing => {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
return cents && cents >= minPrice && cents <= maxPrice;
|
||||
});
|
||||
|
||||
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||
return filteredListings;
|
||||
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`Failed to fetch eBay search (${err.status}): ${err.message}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
570
packages/core/src/scrapers/facebook.ts
Normal file
570
packages/core/src/scrapers/facebook.ts
Normal file
@@ -0,0 +1,570 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { parseHTML } from "linkedom";
|
||||
import cliProgress from "cli-progress";
|
||||
import { isRecord } from "../utils/http";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import type { HTMLString } from "../types/common";
|
||||
|
||||
/**
|
||||
* Facebook Marketplace Scraper
|
||||
*
|
||||
* Note: Facebook Marketplace requires authentication cookies for full access.
|
||||
* This implementation will return limited or no results without proper authentication.
|
||||
* This is by design to respect Facebook's authentication requirements.
|
||||
*/
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
secure?: boolean;
|
||||
httpOnly?: boolean;
|
||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||
session?: boolean;
|
||||
expirationDate?: number;
|
||||
partitionKey?: any;
|
||||
storeId?: string;
|
||||
}
|
||||
|
||||
interface FacebookAdNode {
|
||||
node: {
|
||||
listing: {
|
||||
id: string;
|
||||
marketplace_listing_title?: string;
|
||||
listing_price?: {
|
||||
amount?: string | number;
|
||||
currency?: string;
|
||||
};
|
||||
location?: {
|
||||
reverse_geocode?: {
|
||||
city_page?: {
|
||||
display_name?: string;
|
||||
};
|
||||
};
|
||||
};
|
||||
creation_time?: number;
|
||||
[k: string]: unknown;
|
||||
};
|
||||
[k: string]: unknown;
|
||||
};
|
||||
}
|
||||
|
||||
interface FacebookEdge {
|
||||
node: FacebookAdNode["node"];
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
interface FacebookMarketplaceSearch {
|
||||
feed_units?: {
|
||||
edges?: FacebookEdge[];
|
||||
};
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
export interface FacebookListingDetails {
|
||||
url: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
listingPrice?: {
|
||||
amountFormatted: string;
|
||||
cents?: number;
|
||||
currency?: string;
|
||||
};
|
||||
listingType?: string;
|
||||
listingStatus?: string;
|
||||
creationDate?: string;
|
||||
endDate?: string;
|
||||
numberOfViews?: number;
|
||||
address?: string | null;
|
||||
// Facebook-specific fields
|
||||
imageUrl?: string;
|
||||
videoUrl?: string;
|
||||
seller?: {
|
||||
name?: string;
|
||||
id?: string;
|
||||
};
|
||||
categoryId?: string;
|
||||
deliveryTypes?: string[];
|
||||
}
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
/**
|
||||
* Load Facebook cookies from file or string
|
||||
*/
|
||||
async function loadFacebookCookies(cookiesSource?: string): Promise<Cookie[]> {
|
||||
// First try to load from provided string parameter
|
||||
if (cookiesSource) {
|
||||
try {
|
||||
const cookies = JSON.parse(cookiesSource);
|
||||
if (Array.isArray(cookies)) {
|
||||
return cookies.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to load from ./cookies/facebook.json
|
||||
try {
|
||||
const cookiesPath = "./cookies/facebook.json";
|
||||
const file = Bun.file(cookiesPath);
|
||||
if (await file.exists()) {
|
||||
const content = await file.text();
|
||||
const cookies = JSON.parse(content);
|
||||
if (Array.isArray(cookies)) {
|
||||
return cookies.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ./cookies/facebook.json: ${e}`);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Format cookies array into Cookie header string
|
||||
*/
|
||||
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
||||
const validCookies = cookies
|
||||
.filter((cookie) => {
|
||||
// Check if cookie applies to this domain
|
||||
if (cookie.domain.startsWith(".")) {
|
||||
// Domain cookie (applies to subdomains)
|
||||
return (
|
||||
domain.endsWith(cookie.domain.slice(1)) ||
|
||||
domain === cookie.domain.slice(1)
|
||||
);
|
||||
} else {
|
||||
// Host-only cookie
|
||||
return cookie.domain === domain;
|
||||
}
|
||||
})
|
||||
.filter((cookie) => {
|
||||
// Check expiration
|
||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||
return false; // Expired
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
return validCookies
|
||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||
.join("; ");
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- HTTP Client -----------------------------
|
||||
|
||||
/**
|
||||
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
|
||||
- Retries on 429 and 5xx
|
||||
- Respects X-RateLimit-Reset when present (seconds)
|
||||
- Supports custom cookies for Facebook authentication
|
||||
*/
|
||||
async function fetchHtml(
|
||||
url: string,
|
||||
DELAY_MS: number,
|
||||
opts?: {
|
||||
maxRetries?: number;
|
||||
retryBaseMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
cookies?: string;
|
||||
},
|
||||
): Promise<HTMLString> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const headers: Record<string, string> = {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "none",
|
||||
"sec-fetch-user": "?1",
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
};
|
||||
|
||||
// Add cookies if provided
|
||||
if (opts?.cookies) {
|
||||
headers["cookie"] = opts.cookies;
|
||||
}
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "GET",
|
||||
headers,
|
||||
});
|
||||
|
||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
||||
|
||||
if (!res.ok) {
|
||||
// Respect 429 reset if provided
|
||||
if (res.status === 429) {
|
||||
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: (attempt + 1) * retryBaseMs;
|
||||
await delay(waitMs);
|
||||
continue;
|
||||
}
|
||||
// For Facebook, 400 often means authentication required
|
||||
// Don't retry 4xx client errors except 429
|
||||
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
}
|
||||
// Retry on 5xx
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await delay((attempt + 1) * retryBaseMs);
|
||||
continue;
|
||||
}
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
}
|
||||
|
||||
const html = await res.text();
|
||||
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||
await delay(DELAY_MS);
|
||||
return html;
|
||||
} catch (err) {
|
||||
if (attempt >= maxRetries) throw err;
|
||||
await delay((attempt + 1) * retryBaseMs);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Exhausted retries without response");
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
|
||||
/**
|
||||
Extract marketplace search data from Facebook page script tags
|
||||
*/
|
||||
function extractFacebookMarketplaceData(
|
||||
htmlString: HTMLString,
|
||||
): FacebookAdNode[] | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const scripts = document.querySelectorAll("script");
|
||||
|
||||
let marketplaceData: FacebookMarketplaceSearch | null = null;
|
||||
|
||||
// Find the script containing the require data with marketplace_search
|
||||
for (const script of scripts as unknown as HTMLScriptElement[]) {
|
||||
const scriptText = script.textContent;
|
||||
if (!scriptText) continue;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(scriptText);
|
||||
|
||||
// First check if this is the direct data structure (like in examples)
|
||||
if (parsed.require && Array.isArray(parsed.require)) {
|
||||
// Try multiple navigation paths to find marketplace_search
|
||||
const paths = [
|
||||
// Original path from example
|
||||
() => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'],
|
||||
// Alternative path structure
|
||||
() => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
|
||||
// Another variation
|
||||
() => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'],
|
||||
// Direct access for some responses
|
||||
() => {
|
||||
for (const item of parsed.require) {
|
||||
if (item && item.length >= 4 && item[3]) {
|
||||
const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search;
|
||||
if (bbox) return bbox;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
];
|
||||
|
||||
for (const getData of paths) {
|
||||
try {
|
||||
const result = getData();
|
||||
if (result && isRecord(result) && result.feed_units?.edges) {
|
||||
marketplaceData = result as FacebookMarketplaceSearch;
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (marketplaceData) break;
|
||||
}
|
||||
|
||||
// Also check for direct marketplace_search in the parsed data
|
||||
if (parsed.marketplace_search && isRecord(parsed.marketplace_search) && parsed.marketplace_search.feed_units?.edges) {
|
||||
marketplaceData = parsed.marketplace_search as FacebookMarketplaceSearch;
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// Ignore parsing errors for other scripts
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!marketplaceData?.feed_units?.edges) {
|
||||
console.warn("No marketplace data found in HTML response");
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`);
|
||||
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
|
||||
}
|
||||
|
||||
/**
|
||||
Parse Facebook marketplace search results into ListingDetails[]
|
||||
*/
|
||||
function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
||||
const results: FacebookListingDetails[] = [];
|
||||
|
||||
for (const adJson of ads) {
|
||||
try {
|
||||
const listing = adJson.node.listing;
|
||||
const title = listing.marketplace_listing_title;
|
||||
const priceObj = listing.listing_price;
|
||||
|
||||
if (!title || !priceObj) continue;
|
||||
|
||||
const id = listing.id;
|
||||
const url = `https://www.facebook.com/marketplace/item/${id}`;
|
||||
|
||||
// Facebook stores price in different fields:
|
||||
// - amount_with_offset_in_currency: Facebook's internal price encoding (not cents)
|
||||
// - amount: dollars (like "1.00")
|
||||
// - formatted_amount: human-readable price (like "CA$1")
|
||||
let cents: number;
|
||||
if (priceObj.amount != null) {
|
||||
const dollars = typeof priceObj.amount === 'string'
|
||||
? Number.parseFloat(priceObj.amount)
|
||||
: priceObj.amount;
|
||||
cents = Math.round(dollars * 100);
|
||||
} else if (priceObj.amount_with_offset_in_currency != null) {
|
||||
// Fallback: try to extract cents from amount_with_offset_in_currency
|
||||
// This appears to use some exchange rate/multiplier format
|
||||
const encodedAmount = Number(priceObj.amount_with_offset_in_currency);
|
||||
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) {
|
||||
// Estimate roughly - this field doesn't contain real cents
|
||||
// Use formatted_amount to get the actual dollar amount
|
||||
if (priceObj.formatted_amount) {
|
||||
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
||||
if (match) {
|
||||
const dollars = Number.parseFloat(match[0].replace(',', ''));
|
||||
if (!Number.isNaN(dollars)) {
|
||||
cents = Math.round(dollars * 100);
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
continue; // Invalid price
|
||||
}
|
||||
} else {
|
||||
continue; // No price available
|
||||
}
|
||||
|
||||
if (!Number.isFinite(cents) || cents <= 0) continue;
|
||||
|
||||
// Extract address from location data if available
|
||||
const cityName =
|
||||
listing.location?.reverse_geocode?.city_page?.display_name;
|
||||
const address = cityName || null;
|
||||
|
||||
// Determine listing status from Facebook flags
|
||||
let listingStatus: string | undefined = undefined;
|
||||
if (listing.is_sold) {
|
||||
listingStatus = "SOLD";
|
||||
} else if (listing.is_pending) {
|
||||
listingStatus = "PENDING";
|
||||
} else if (listing.is_live) {
|
||||
listingStatus = "ACTIVE";
|
||||
} else if (listing.is_hidden) {
|
||||
listingStatus = "HIDDEN";
|
||||
}
|
||||
|
||||
// Format creation date if available
|
||||
const creationDate = listing.creation_time
|
||||
? new Date(listing.creation_time * 1000).toISOString()
|
||||
: undefined;
|
||||
|
||||
// Extract image and video URLs
|
||||
const imageUrl = listing.primary_listing_photo?.image?.uri;
|
||||
const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined;
|
||||
|
||||
// Extract seller information
|
||||
const seller = listing.marketplace_listing_seller ? {
|
||||
name: listing.marketplace_listing_seller.name,
|
||||
id: listing.marketplace_listing_seller.id
|
||||
} : undefined;
|
||||
|
||||
const listingDetails: FacebookListingDetails = {
|
||||
url,
|
||||
title,
|
||||
listingPrice: {
|
||||
amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents / 100, "en-CA"),
|
||||
cents,
|
||||
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
|
||||
},
|
||||
address,
|
||||
creationDate,
|
||||
listingType: "item", // Default type for marketplace listings
|
||||
listingStatus,
|
||||
categoryId: listing.marketplace_listing_category_id,
|
||||
imageUrl,
|
||||
videoUrl,
|
||||
seller,
|
||||
deliveryTypes: listing.delivery_types,
|
||||
};
|
||||
|
||||
results.push(listingDetails);
|
||||
} catch {
|
||||
// Skip malformed ads
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
LOCATION = "toronto",
|
||||
MAX_ITEMS = 25,
|
||||
cookiesSource?: string,
|
||||
) {
|
||||
// Load Facebook cookies - required for Facebook Marketplace access
|
||||
const cookies = await loadFacebookCookies(cookiesSource);
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"Facebook cookies are required for marketplace access. " +
|
||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
||||
);
|
||||
}
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const domain = "www.facebook.com";
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
);
|
||||
}
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
|
||||
// Encode search query for URL
|
||||
const encodedQuery = encodeURIComponent(SEARCH_QUERY);
|
||||
|
||||
// Facebook marketplace URL structure
|
||||
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
|
||||
|
||||
console.log(`Fetching Facebook marketplace: ${searchUrl}`);
|
||||
console.log(`Using ${cookies.length} cookies for authentication`);
|
||||
|
||||
let searchHtml: string;
|
||||
try {
|
||||
searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
maxRetries: 3,
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
"\n" +
|
||||
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
cookies: cookiesHeader,
|
||||
});
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
|
||||
);
|
||||
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
||||
console.warn(
|
||||
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
|
||||
);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
const ads = extractFacebookMarketplaceData(searchHtml);
|
||||
if (!ads || ads.length === 0) {
|
||||
console.warn("No ads parsed from Facebook marketplace page.");
|
||||
return [];
|
||||
}
|
||||
|
||||
console.log(`\nFound ${ads.length} raw ads. Processing...`);
|
||||
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic,
|
||||
);
|
||||
const totalProgress = ads.length;
|
||||
let currentProgress = 0;
|
||||
progressBar.start(totalProgress, currentProgress);
|
||||
|
||||
const items = parseFacebookAds(ads);
|
||||
|
||||
// Filter to only priced items (already done in parseFacebookAds)
|
||||
const pricedItems = items.filter(
|
||||
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
|
||||
);
|
||||
|
||||
progressBar.update(totalProgress);
|
||||
progressBar.stop();
|
||||
|
||||
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
|
||||
return pricedItems.slice(0, MAX_ITEMS); // Limit results
|
||||
}
|
||||
290
packages/core/src/scrapers/kijiji.ts
Normal file
290
packages/core/src/scrapers/kijiji.ts
Normal file
@@ -0,0 +1,290 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { parseHTML } from "linkedom";
|
||||
import unidecode from "unidecode";
|
||||
import cliProgress from "cli-progress";
|
||||
import { fetchHtml, isRecord, HttpError } from "../utils/http";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import type { HTMLString } from "../types/common";
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
type SearchListing = {
|
||||
name: string;
|
||||
listingLink: string;
|
||||
};
|
||||
|
||||
type ApolloRecord = Record<string, unknown>;
|
||||
|
||||
interface ApolloSearchItem {
|
||||
url?: string;
|
||||
title?: string;
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
interface ApolloListingRoot {
|
||||
url?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
price?: { amount?: number | string; currency?: string };
|
||||
type?: string;
|
||||
status?: string;
|
||||
activationDate?: string;
|
||||
endDate?: string;
|
||||
metrics?: { views?: number | string };
|
||||
location?: { address?: string | null };
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KijijiListingDetails {
|
||||
url: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
listingPrice?: {
|
||||
amountFormatted: string;
|
||||
cents?: number;
|
||||
currency?: string;
|
||||
};
|
||||
listingType?: string;
|
||||
listingStatus?: string;
|
||||
creationDate?: string;
|
||||
endDate?: string;
|
||||
numberOfViews?: number;
|
||||
address?: string | null;
|
||||
}
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
const SEPS = new Set([" ", "–", "—", "/", ":", ";", ",", ".", "-"]);
|
||||
|
||||
/**
|
||||
* Slugifies a string for Kijiji search URLs
|
||||
*/
|
||||
export function slugify(input: string): string {
|
||||
const s = unidecode(input).toLowerCase();
|
||||
const out: string[] = [];
|
||||
let lastHyphen = false;
|
||||
|
||||
for (let i = 0; i < s.length; i++) {
|
||||
const ch = s[i];
|
||||
const code = ch!.charCodeAt(0);
|
||||
|
||||
// a-z or 0-9
|
||||
if ((code >= 97 && code <= 122) || (code >= 48 && code <= 57)) {
|
||||
out.push(ch!);
|
||||
lastHyphen = false;
|
||||
} else if (SEPS.has(ch!)) {
|
||||
if (!lastHyphen) {
|
||||
out.push("-");
|
||||
lastHyphen = true;
|
||||
}
|
||||
}
|
||||
// else drop character
|
||||
}
|
||||
return out.join("");
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
|
||||
/**
|
||||
Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML.
|
||||
*/
|
||||
function extractApolloState(htmlString: HTMLString): ApolloRecord | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const nextData = document.getElementById("__NEXT_DATA__");
|
||||
if (!nextData || !nextData.textContent) return null;
|
||||
|
||||
try {
|
||||
const jsonData = JSON.parse(nextData.textContent);
|
||||
const apollo = jsonData?.props?.pageProps?.__APOLLO_STATE__;
|
||||
return isRecord(apollo) ? apollo : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
Parse search page apollo state into SearchListing[].
|
||||
Filters keys likely to be listing entities and ensures url/title exist.
|
||||
*/
|
||||
function parseSearch(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
): SearchListing[] {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return [];
|
||||
|
||||
const results: SearchListing[] = [];
|
||||
for (const [key, value] of Object.entries(apolloState)) {
|
||||
// Heuristic: Kijiji listing keys usually contain "Listing"
|
||||
if (!key.includes("Listing")) continue;
|
||||
if (!isRecord(value)) continue;
|
||||
|
||||
const item = value as ApolloSearchItem;
|
||||
if (typeof item.url === "string" && typeof item.title === "string") {
|
||||
results.push({
|
||||
listingLink: item.url.startsWith("http")
|
||||
? item.url
|
||||
: `${BASE_URL}${item.url}`,
|
||||
name: item.title,
|
||||
});
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse a listing page into a typed object.
|
||||
*/
|
||||
function parseListing(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
): KijijiListingDetails | null {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
// Find the listing root key
|
||||
const listingKey = Object.keys(apolloState).find((k) =>
|
||||
k.includes("Listing"),
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
const root = apolloState[listingKey];
|
||||
if (!isRecord(root)) return null;
|
||||
|
||||
const {
|
||||
url,
|
||||
title,
|
||||
description,
|
||||
price,
|
||||
type,
|
||||
status,
|
||||
activationDate,
|
||||
endDate,
|
||||
metrics,
|
||||
location,
|
||||
} = root as ApolloListingRoot;
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null
|
||||
? formatCentsToCurrency(cents / 100, "en-CA")
|
||||
: undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
|
||||
const listingUrl =
|
||||
typeof url === "string"
|
||||
? url.startsWith("http")
|
||||
? url
|
||||
: `${BASE_URL}${url}`
|
||||
: "";
|
||||
|
||||
if (!listingUrl || !title) return null;
|
||||
|
||||
return {
|
||||
url: listingUrl,
|
||||
title,
|
||||
description,
|
||||
listingPrice: amountFormatted
|
||||
? {
|
||||
amountFormatted,
|
||||
cents: Number.isFinite(cents!) ? cents : undefined,
|
||||
currency: price?.currency,
|
||||
}
|
||||
: undefined,
|
||||
listingType: type,
|
||||
listingStatus: status,
|
||||
creationDate: activationDate,
|
||||
endDate,
|
||||
numberOfViews: Number.isFinite(numberOfViews!) ? numberOfViews : undefined,
|
||||
address: location?.address ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
) {
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
|
||||
const searchUrl = `${BASE_URL}/b-gta-greater-toronto-area/${slugify(SEARCH_QUERY)}/k0l1700272?sort=relevancyDesc&view=list`;
|
||||
|
||||
console.log(`Fetching search: ${searchUrl}`);
|
||||
const searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
maxRetries: 3,
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
"\n" +
|
||||
`Search - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||
if (searchResults.length === 0) {
|
||||
console.warn("No search results parsed from page.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Deduplicate links
|
||||
const listingLinks = Array.from(
|
||||
new Set(searchResults.map((r) => r.listingLink)),
|
||||
);
|
||||
|
||||
console.log(
|
||||
"\n" + `Found ${listingLinks.length} listing links. Fetching details...`,
|
||||
);
|
||||
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic,
|
||||
);
|
||||
const totalProgress = listingLinks.length;
|
||||
let currentProgress = 0;
|
||||
progressBar.start(totalProgress, currentProgress);
|
||||
|
||||
const items: KijijiListingDetails[] = [];
|
||||
for (const link of listingLinks) {
|
||||
try {
|
||||
const html = await fetchHtml(link, DELAY_MS, {
|
||||
maxRetries: 3,
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
"\n" +
|
||||
`Item - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
const parsed = parseListing(html, BASE_URL);
|
||||
if (parsed) {
|
||||
if (parsed.listingPrice?.cents) items.push(parsed);
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
"\n" + `Failed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
"\n" +
|
||||
`Failed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
currentProgress++;
|
||||
progressBar.update(currentProgress);
|
||||
}
|
||||
}
|
||||
|
||||
console.log("\n" + `Parsed ${items.length} listings.`);
|
||||
return items;
|
||||
}
|
||||
20
packages/core/src/types/common.ts
Normal file
20
packages/core/src/types/common.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
/** HTML string alias for better type clarity */
|
||||
export type HTMLString = string;
|
||||
|
||||
/** Currency price object with formatting options */
|
||||
export interface Price {
|
||||
amountFormatted: string;
|
||||
cents: number;
|
||||
currency: string;
|
||||
}
|
||||
|
||||
/** Base listing details common across all marketplaces */
|
||||
export interface ListingDetails {
|
||||
url: string;
|
||||
title: string;
|
||||
listingPrice: Price;
|
||||
listingType: string;
|
||||
listingStatus: string;
|
||||
address?: string | null;
|
||||
creationDate?: string;
|
||||
}
|
||||
8
packages/core/src/utils/delay.ts
Normal file
8
packages/core/src/utils/delay.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* Delay execution for a specified number of milliseconds
|
||||
* @param ms - Milliseconds to delay
|
||||
* @returns A promise that resolves after the specified delay
|
||||
*/
|
||||
export function delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
21
packages/core/src/utils/format.ts
Normal file
21
packages/core/src/utils/format.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Format cents to a human-readable currency string
|
||||
* @param cents - Amount in cents (integer)
|
||||
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
|
||||
* @returns Formatted currency string
|
||||
*/
|
||||
export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string {
|
||||
try {
|
||||
const formatter = new Intl.NumberFormat(locale, {
|
||||
style: "currency",
|
||||
currency: "CAD",
|
||||
minimumFractionDigits: 2,
|
||||
maximumFractionDigits: 2,
|
||||
});
|
||||
return formatter.format(cents / 100);
|
||||
} catch (error) {
|
||||
// Fallback if locale is not supported
|
||||
const dollars = (cents / 100).toFixed(2);
|
||||
return `$${dollars}`;
|
||||
}
|
||||
}
|
||||
87
packages/core/src/utils/http.ts
Normal file
87
packages/core/src/utils/http.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
/** Custom error class for HTTP-related failures */
|
||||
export class HttpError extends Error {
|
||||
constructor(
|
||||
public statusCode: number,
|
||||
message: string
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Type guard to check if a value is a record (object) */
|
||||
export function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch HTML content from a URL with automatic retries
|
||||
* @param url - The URL to fetch
|
||||
* @param delayMs - Delay in milliseconds between retries
|
||||
* @param opts - Optional fetch options
|
||||
* @returns The HTML content as a string
|
||||
* @throws HttpError if all retries are exhausted
|
||||
*/
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: RequestInit
|
||||
): Promise<string> {
|
||||
const maxAttempts = 3;
|
||||
let lastError: Error | null = null;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
const response = await fetch(url, opts);
|
||||
|
||||
// Check for rate limiting
|
||||
if (response.status === 429) {
|
||||
const retryAfter = response.headers.get("Retry-After");
|
||||
const waitTime = retryAfter ? parseInt(retryAfter) * 1000 : delayMs * (attempt + 1);
|
||||
console.warn(
|
||||
`Rate limited. Retrying after ${waitTime}ms...`
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, waitTime));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for server errors
|
||||
if (response.status >= 500) {
|
||||
lastError = new HttpError(
|
||||
response.status,
|
||||
`Server error: ${response.status}`
|
||||
);
|
||||
if (attempt < maxAttempts - 1) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, delayMs * (attempt + 1))
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
// Check for successful response
|
||||
if (!response.ok) {
|
||||
throw new HttpError(
|
||||
response.status,
|
||||
`HTTP ${response.status}: ${response.statusText}`
|
||||
);
|
||||
}
|
||||
|
||||
return await response.text();
|
||||
} catch (error) {
|
||||
lastError =
|
||||
error instanceof Error
|
||||
? error
|
||||
: new Error("Unknown error during fetch");
|
||||
|
||||
if (attempt < maxAttempts - 1) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, delayMs * (attempt + 1))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError || new HttpError(0, "Failed to fetch after retries");
|
||||
}
|
||||
13
packages/core/tsconfig.json
Normal file
13
packages/core/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"lib": ["dom"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
},
|
||||
"strict": true,
|
||||
"noEmit": true
|
||||
}
|
||||
}
|
||||
21
packages/mcp-server/package.json
Normal file
21
packages/mcp-server/package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "@marketplace-scrapers/mcp-server",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"module": "./src/index.ts",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"start": "bun ./src/index.ts",
|
||||
"dev": "bun --watch ./src/index.ts",
|
||||
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp"
|
||||
},
|
||||
"dependencies": {
|
||||
"@marketplace-scrapers/core": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
33
packages/mcp-server/src/index.ts
Normal file
33
packages/mcp-server/src/index.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import { handleMcpRequest } from "./protocol/handler";
|
||||
import { serverCard } from "./protocol/metadata";
|
||||
|
||||
const PORT = process.env.MCP_PORT || 4006;
|
||||
|
||||
const server = Bun.serve({
|
||||
port: PORT as number | string,
|
||||
idleTimeout: 0,
|
||||
routes: {
|
||||
// MCP metadata discovery endpoint
|
||||
"/.well-known/mcp/server-card.json": new Response(JSON.stringify(serverCard), {
|
||||
headers: { "Content-Type": "application/json" },
|
||||
}),
|
||||
|
||||
// MCP JSON-RPC 2.0 protocol endpoint
|
||||
"/mcp": async (req: Request) => {
|
||||
if (req.method === "POST") {
|
||||
return await handleMcpRequest(req);
|
||||
}
|
||||
return Response.json(
|
||||
{ message: "MCP endpoint requires POST request" },
|
||||
{ status: 405 }
|
||||
);
|
||||
},
|
||||
},
|
||||
|
||||
// Fallback for all other routes
|
||||
fetch(req: Request) {
|
||||
return new Response("Not Found", { status: 404 });
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`MCP Server running on ${server.hostname}:${server.port}`);
|
||||
185
packages/mcp-server/src/protocol/handler.ts
Normal file
185
packages/mcp-server/src/protocol/handler.ts
Normal file
@@ -0,0 +1,185 @@
|
||||
import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core";
|
||||
import { tools } from "./tools";
|
||||
|
||||
/**
|
||||
* Handle MCP JSON-RPC 2.0 protocol requests
|
||||
*/
|
||||
export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
try {
|
||||
const body = await req.json();
|
||||
|
||||
// Validate JSON-RPC 2.0 format
|
||||
if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) {
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
error: { code: -32600, message: "Invalid Request" },
|
||||
id: body.id,
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
const { method, params, id } = body;
|
||||
|
||||
// Handle initialize method
|
||||
if (method === "initialize") {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
protocolVersion: "2025-06-18",
|
||||
capabilities: {
|
||||
tools: {
|
||||
listChanged: true,
|
||||
},
|
||||
},
|
||||
serverInfo: {
|
||||
name: "marketplace-scrapers",
|
||||
version: "1.0.0",
|
||||
},
|
||||
instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Handle tools/list method
|
||||
if (method === "tools/list") {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
tools,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Handle notifications (messages without id field should not get a response)
|
||||
if (!id) {
|
||||
// Notifications don't require a response
|
||||
if (method === "notifications/initialized") {
|
||||
// Client initialized successfully, no response needed
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
if (method === "notifications/progress") {
|
||||
// Progress notifications, no response needed
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
// Unknown notification - still no response for notifications
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
|
||||
// Handle tools/call method
|
||||
if (method === "tools/call") {
|
||||
const { name, arguments: args } = params || {};
|
||||
|
||||
if (!name || !args) {
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "Invalid params: name and arguments required" },
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
// Route tool calls to appropriate handlers
|
||||
try {
|
||||
let result;
|
||||
|
||||
if (name === "search_kijiji") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchKijijiItems(query, args.maxItems || 5);
|
||||
result = items || [];
|
||||
} else if (name === "search_facebook") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchFacebookItems(
|
||||
query,
|
||||
args.maxItems || 5,
|
||||
args.location || "toronto",
|
||||
25,
|
||||
args.cookiesSource
|
||||
);
|
||||
result = items || [];
|
||||
} else if (name === "search_ebay") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchEbayItems(query, args.maxItems || 5, {
|
||||
minPrice: args.minPrice,
|
||||
maxPrice: args.maxPrice,
|
||||
strictMode: args.strictMode || false,
|
||||
exclusions: args.exclusions || [],
|
||||
keywords: args.keywords || [query],
|
||||
});
|
||||
result = items || [];
|
||||
} else {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32601, message: `Unknown tool: ${name}` },
|
||||
});
|
||||
}
|
||||
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: JSON.stringify(result, null, 2),
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32603, message: `Tool execution failed: ${errorMessage}` },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Method not found
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32601, message: `Method not found: ${method}` },
|
||||
},
|
||||
{ status: 404 }
|
||||
);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
error: { code: -32700, message: `Parse error: ${errorMessage}` },
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
}
|
||||
25
packages/mcp-server/src/protocol/metadata.ts
Normal file
25
packages/mcp-server/src/protocol/metadata.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* MCP Server metadata for discovery
|
||||
*/
|
||||
|
||||
export const serverCard = {
|
||||
$schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
||||
version: "1.0",
|
||||
protocolVersion: "2025-06-18",
|
||||
serverInfo: {
|
||||
name: "marketplace-scrapers",
|
||||
title: "Marketplace Scrapers MCP Server",
|
||||
version: "1.0.0",
|
||||
},
|
||||
transport: {
|
||||
type: "streamable-http",
|
||||
endpoint: "/mcp",
|
||||
},
|
||||
capabilities: {
|
||||
tools: {
|
||||
listChanged: true,
|
||||
},
|
||||
},
|
||||
description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
||||
tools: "dynamic",
|
||||
};
|
||||
95
packages/mcp-server/src/protocol/tools.ts
Normal file
95
packages/mcp-server/src/protocol/tools.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
/**
|
||||
* MCP tool definitions for marketplace scrapers
|
||||
*/
|
||||
|
||||
export const tools = [
|
||||
{
|
||||
name: "search_kijiji",
|
||||
description: "Search Kijiji marketplace for listings matching a query",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for Kijiji listings",
|
||||
},
|
||||
maxItems: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "search_facebook",
|
||||
description: "Search Facebook Marketplace for listings matching a query",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for Facebook Marketplace listings",
|
||||
},
|
||||
location: {
|
||||
type: "string",
|
||||
description: "Location for search (e.g., 'toronto')",
|
||||
default: "toronto",
|
||||
},
|
||||
maxItems: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
cookiesSource: {
|
||||
type: "string",
|
||||
description: "Optional Facebook session cookies source",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "search_ebay",
|
||||
description: "Search eBay for listings matching a query",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for eBay listings",
|
||||
},
|
||||
minPrice: {
|
||||
type: "number",
|
||||
description: "Minimum price filter",
|
||||
},
|
||||
maxPrice: {
|
||||
type: "number",
|
||||
description: "Maximum price filter",
|
||||
},
|
||||
strictMode: {
|
||||
type: "boolean",
|
||||
description: "Enable strict search mode",
|
||||
default: false,
|
||||
},
|
||||
exclusions: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Terms to exclude from results",
|
||||
},
|
||||
keywords: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Keywords to include in search",
|
||||
},
|
||||
maxItems: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
];
|
||||
13
packages/mcp-server/tsconfig.json
Normal file
13
packages/mcp-server/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"lib": ["dom"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
},
|
||||
"strict": true,
|
||||
"noEmit": true
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user