Compare commits

...

31 Commits

Author SHA1 Message Date
e4ab145d70 feat: add cookie support to kijiji scraper
Add optional cookie parameter to bypass bot detection (403 errors).
Cookies can be provided via parameter, KIJIJI_COOKIE env var, or
cookies/kijiji.json file. Supports both JSON array and string formats.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:29:13 -05:00
1dce0392e3 refactor: use shared cookie utility in ebay scraper
Replace inline cookie loading with shared utility functions.
Now supports both JSON array and cookie string formats.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:29:02 -05:00
251fcbb7d9 refactor: use shared cookie utility in facebook scraper
Replace inline cookie parsing with shared utility functions.
Maintains backward compatibility with existing exports.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:28:52 -05:00
9bc57d6b54 refactor: add shared cookie utility to core package
Move cookie parsing logic to a dedicated utility module that can be
shared across all scrapers. Supports both JSON array and cookie string
formats for all input sources (parameter, env var, file).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:28:44 -05:00
4a467c9f02 fix: support both json and string cookies for facebook 2026-01-23 19:00:51 -05:00
f944d319c2 chore: update dockerignore 2026-01-23 15:43:13 -05:00
cf9784a565 feat: implement cookie priority hierarchy (URL param > env var > file) for Facebook and eBay scrapers 2026-01-23 15:32:17 -05:00
df0c528535 fix: correct formatCentsToCurrency usage in facebook scraper 2026-01-23 14:50:41 -05:00
2f97d3eafd fix: correct formatCentsToCurrency usage in kijiji scraper 2026-01-23 14:50:41 -05:00
65eb8d1724 refactor: increase kijiji scraping request rate to 4 rps
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 14:50:37 -05:00
f3839aba54 fix: increase kijiji rate limit to 4 rps
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 13:59:47 -05:00
90b98bfb09 chore: testing mcp server
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 13:59:28 -05:00
eb6705df0f feat: add 60-second timeouts to MCP request handlers for reliability 2026-01-23 13:59:28 -05:00
72525609ed fix: set idle timeout to 255 seconds in MCP server to prevent premature shutdown 2026-01-23 13:59:28 -05:00
8b0a65860c chore: add imports for linkedom and delay utils in ebay scraper 2026-01-23 13:10:44 -05:00
f9b1c7e096 fix: remove eslint-disable directives
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 13:08:38 -05:00
9edc74cbeb chore: local dev scripts
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 13:07:01 -05:00
ee0fca826d style: fix formatting in MCP server 2026-01-23 11:56:54 -05:00
f7372612fb test: fix formatting in test setup 2026-01-23 11:56:51 -05:00
bce126664e test: remove unused imports in Kijiji utils tests 2026-01-23 11:56:47 -05:00
8cbf11538e test: fix formatting and remove unused HttpError import in Kijiji tests 2026-01-23 11:56:44 -05:00
79f47fdaef test: remove unused import in Facebook integration tests 2026-01-23 11:56:41 -05:00
de5069bf2b test: fix unused variable in Facebook core tests 2026-01-23 11:56:38 -05:00
637f1a4e75 fix: resolve biome lint errors and warnings 2026-01-23 10:33:15 -05:00
441ff436c4 feat(mcp): extend Kijiji tool with filtering parameters 2026-01-23 09:55:37 -05:00
1f53ec912a feat(mcp): add search options to Kijiji and eBay tools 2026-01-23 09:55:21 -05:00
053efd815b feat(api/kijiji): add filtering and pagination parameters 2026-01-23 09:54:30 -05:00
d619fa5d77 feat(api/facebook): add maxItems parameter support 2026-01-23 09:53:51 -05:00
050fd0adba feat(api/ebay): add maxItems parameter and error handling 2026-01-23 09:53:00 -05:00
7b106c91ce style: format ebay scraper with consistent indentation 2026-01-23 09:52:25 -05:00
6e0487f8f3 style: format api-server index with consistent indentation 2026-01-23 09:52:22 -05:00
33 changed files with 3407 additions and 2871 deletions

View File

@@ -1,145 +1,84 @@
# Dependencies # =============================================================================
# Dependencies & Build Output
# =============================================================================
node_modules/ node_modules/
npm-debug.log* dist/
yarn-debug.log* out/
yarn-error.log*
bun.sum
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage/
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
jspm_packages/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz *.tgz
# Yarn Integrity file # =============================================================================
.yarn-integrity # Sensitive Files
# =============================================================================
# dotenv environment variables file
.env .env
.env.local .env.*
.env.development.local .envrc
.env.test.local cookies/
.env.production.local *.pem
*.key
*.cert
*secret*
*credential*
# parcel-bundler cache (https://parceljs.org/) # =============================================================================
.cache # Development Tools & Config
.parcel-cache # =============================================================================
# Nix/Devenv
.devenv/
.devenv.flake.nix
devenv.*
.direnv/
# Next.js build output # Linting/Formatting
.next biome.json
.eslintcache
.pre-commit-config.yaml
# Nuxt.js build / generate output # IDE/Editor
.nuxt
dist
# Gatsby files
.cache/
public
# Vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# IDE and editor files
.vscode/ .vscode/
.idea/ .idea/
*.swp *.swp
*.swo *.swo
*~ *~
# OS generated files # AI Assistant Config
.DS_Store .claude/
.DS_Store? CLAUDE.md
._* AGENTS.md
.Spotlight-V100 opencode.jsonc
.Trashes
ehthumbs.db
Thumbs.db
# Git # =============================================================================
.git # Documentation (not needed at runtime)
# =============================================================================
README.md
*.md
docs/
# =============================================================================
# Git & Docker (avoid recursive inclusion)
# =============================================================================
.git/
.gitignore .gitignore
# Docker
Dockerfile* Dockerfile*
.dockerignore .dockerignore
# Documentation # =============================================================================
README.md # Testing & Coverage
docs/ # =============================================================================
# Test files
test/ test/
tests/ tests/
*.test.js
*.test.ts *.test.ts
*.spec.js
*.spec.ts *.spec.ts
coverage/
*.lcov
.nyc_output/
# Development files # =============================================================================
CLAUDE.md # OS & Misc
devenv.* # =============================================================================
.DS_Store
Thumbs.db
*.log *.log
*.pid
# Runtime cookies/config .cache/
cookies/ examples/
scripts/

View File

@@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var).
- `GET /api/status` - Health check - `GET /api/status` - Health check
- `GET /api/kijiji?q={query}` - Search Kijiji - `GET /api/kijiji?q={query}` - Search Kijiji
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook - `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay - `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay
- `GET /api/*` - 404 fallback - `GET /api/*` - 404 fallback
### MCP Server (`@marketplace-scrapers/mcp-server`) ### MCP Server (`@marketplace-scrapers/mcp-server`)
@@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var).
**Tools:** **Tools:**
- `search_kijiji` - Search Kijiji (query, maxItems) - `search_kijiji` - Search Kijiji (query, maxItems)
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource) - `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems) - `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies)
## API Response Formats ## API Response Formats
@@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields:
### eBay-specific fields ### eBay-specific fields
Minimal - mainly the common fields Minimal - mainly the common fields
## Cookie Management
Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping.
### Cookie Priority Hierarchy (High → Low)
All scrapers follow this loading order:
1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority)
2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE`
3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback)
### Facebook Cookies
- **Required for**: Facebook Marketplace scraping
- **Format**: JSON array (see `cookies/README.md`)
- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb`
**Setup:**
```bash
# Option 1: File (fallback)
# Create cookies/facebook.json with cookie array
# Option 2: Environment variable
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
# Option 3: URL parameter (highest priority)
curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]"
```
### eBay Cookies
- **Required for**: Bypassing bot detection
- **Format**: Cookie string `"name=value; name2=value2"`
- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession`
**Setup:**
```bash
# Option 1: File (fallback)
# Create cookies/ebay.json with cookie string
# Option 2: Environment variable
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
# Option 3: URL parameter (highest priority)
curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE"
```
**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings.
## Technical Details ## Technical Details
- **TypeScript** with path mapping (`@/*``src/*`) per package - **TypeScript** with path mapping (`@/*``src/*`) per package
@@ -126,7 +172,7 @@ Minimal - mainly the common fields
## Development Notes ## Development Notes
- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json` - **Cookie files** are git-ignored for security (see `cookies/README.md`)
- eBay uses custom headers to bypass basic bot detection
- Kijiji parses Apollo state from Next.js hydration data - Kijiji parses Apollo state from Next.js hydration data
- All scrapers handle retries on 429/5xx errors - All scrapers handle retries on 429/5xx errors
- Cookie priority ensures flexibility across different deployment environments

View File

@@ -1,34 +1,34 @@
{ {
"$schema": "https://biomejs.dev/schemas/2.3.11/schema.json", "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json",
"vcs": { "vcs": {
"enabled": true, "enabled": true,
"clientKind": "git", "clientKind": "git",
"useIgnoreFile": true "useIgnoreFile": true
}, },
"files": { "files": {
"includes": ["**", "!!**/dist"] "includes": ["**", "!!**/dist"]
}, },
"formatter": { "formatter": {
"enabled": true, "enabled": true,
"indentStyle": "space" "indentStyle": "space"
}, },
"linter": { "linter": {
"enabled": true, "enabled": true,
"rules": { "rules": {
"recommended": true "recommended": true
} }
}, },
"javascript": { "javascript": {
"formatter": { "formatter": {
"quoteStyle": "double" "quoteStyle": "double"
} }
}, },
"assist": { "assist": {
"enabled": true, "enabled": true,
"actions": { "actions": {
"source": { "source": {
"organizeImports": "on" "organizeImports": "on"
} }
} }
} }
} }

View File

@@ -1,24 +1,33 @@
# Facebook Marketplace Cookies Setup # Marketplace Cookies Setup
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies. Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
## Option 1: Cookies File (`facebook.json`) ## Cookie Priority Hierarchy
1. Log into Facebook in your browser All scrapers follow this priority order (highest to lowest):
2. Open Developer Tools → Network tab 1. **URL Parameter** - Passed directly in API/MCP request (overrides all)
3. Visit facebook.com/marketplace (ensure you're logged in) 2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE`
4. Look for any marketplace-related requests in the Network tab 3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback)
5. Export cookies from the browser's Application/Storage → Cookies section
6. Save the cookies as a JSON array to `facebook.json`
The `facebook.json` file should contain Facebook session cookies, particularly: ---
## Facebook Marketplace (`facebook.json`)
### Required Cookies
- `c_user`: Your Facebook user ID - `c_user`: Your Facebook user ID
- `xs`: Facebook session token - `xs`: Facebook session token
- `fr`: Facebook request token - `fr`: Facebook request token
- `datr`: Data attribution token - `datr`: Data attribution token
- `sb`: Session browser token - `sb`: Session browser token
Example structure: ### Setup Methods
**Method 1: Cookie File (Lowest Priority)**
1. Log into Facebook in your browser
2. Open Developer Tools → Application/Storage → Cookies
3. Export cookies as JSON array to `facebook.json`
Example `facebook.json`:
```json ```json
[ [
{ {
@@ -27,26 +36,59 @@ Example structure:
"domain": ".facebook.com", "domain": ".facebook.com",
"path": "/", "path": "/",
"secure": true "secure": true
}, }
// ... other cookies
] ]
``` ```
## Option 2: URL Parameter **Method 2: Environment Variable**
```bash
You can pass cookies directly via the `cookies` URL parameter: export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
``` ```
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
**Method 3: URL Parameter (Highest Priority)**
``` ```
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}]
```
---
## eBay (`ebay.json`)
eBay has aggressive bot detection that blocks requests without valid session cookies.
### Setup Methods
**Method 1: Cookie File (Lowest Priority)**
1. Log into eBay in your browser
2. Open Developer Tools → Network tab
3. Visit ebay.ca and inspect any request headers
4. Copy the full `Cookie` header value
5. Save as plain text to `ebay.json` (see `ebay.json.example`)
Example `ebay.json`:
```
s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE
```
**Method 2: Environment Variable**
```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
```
**Method 3: URL Parameter (Highest Priority)**
```
GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE
```
---
## Important Notes ## Important Notes
- Cookies must be from an active Facebook session - Cookies must be from active browser sessions
- Cookies expire, so you may need to refresh them periodically - Cookies expire and need periodic refresh
- Never share real cookies or commit them to version control - **NEVER** commit real cookies to version control
- Facebook may block automated scraping even with valid cookies - Platforms may still block automated scraping despite valid cookies
## Security ## Security
The cookies file is intentionally left out of version control for security reasons.</content> All `*.json` files in this directory are git-ignored for security.</content>

View File

@@ -0,0 +1 @@
s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE

9
opencode.jsonc Normal file
View File

@@ -0,0 +1,9 @@
{
"$schema": "https://opencode.ai/config.json",
"mcp": {
"marketplace-scrape": {
"type": "remote",
"url": "http://localhost:4006/mcp"
}
}
}

View File

@@ -2,11 +2,19 @@
"name": "marketplace-scrapers-monorepo", "name": "marketplace-scrapers-monorepo",
"version": "1.0.0", "version": "1.0.0",
"scripts": { "scripts": {
"ci": "biome ci" "ci": "biome ci",
"clean": "rm -rf dist",
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
"build:all": "bun run build:api && bun run build:mcp",
"build": "bun run clean && bun run build:all",
"start": "./scripts/start.sh"
}, },
"private": true, "private": true,
"type": "module", "type": "module",
"workspaces": ["packages/*"], "workspaces": [
"packages/*"
],
"devDependencies": { "devDependencies": {
"@biomejs/biome": "2.3.11" "@biomejs/biome": "2.3.11"
} }

View File

@@ -1,30 +1,30 @@
import { statusRoute } from "./routes/status";
import { kijijiRoute } from "./routes/kijiji";
import { facebookRoute } from "./routes/facebook";
import { ebayRoute } from "./routes/ebay"; import { ebayRoute } from "./routes/ebay";
import { facebookRoute } from "./routes/facebook";
import { kijijiRoute } from "./routes/kijiji";
import { statusRoute } from "./routes/status";
const PORT = process.env.PORT || 4005; const PORT = process.env.PORT || 4005;
const server = Bun.serve({ const server = Bun.serve({
port: PORT as number | string, port: PORT as number | string,
idleTimeout: 0, idleTimeout: 0,
routes: { routes: {
// Health check endpoint // Health check endpoint
"/api/status": statusRoute, "/api/status": statusRoute,
// Marketplace search endpoints // Marketplace search endpoints
"/api/kijiji": kijijiRoute, "/api/kijiji": kijijiRoute,
"/api/facebook": facebookRoute, "/api/facebook": facebookRoute,
"/api/ebay": ebayRoute, "/api/ebay": ebayRoute,
// Fallback for unmatched /api routes // Fallback for unmatched /api routes
"/api/*": Response.json({ message: "Not found" }, { status: 404 }), "/api/*": Response.json({ message: "Not found" }, { status: 404 }),
}, },
// Fallback for all other routes // Fallback for all other routes
fetch(req: Request) { fetch(_req: Request) {
return new Response("Not Found", { status: 404 }); return new Response("Not Found", { status: 404 });
}, },
}); });
console.log(`API Server running on ${server.hostname}:${server.port}`); console.log(`API Server running on ${server.hostname}:${server.port}`);

View File

@@ -1,60 +1,68 @@
import { fetchEbayItems } from "@marketplace-scrapers/core"; import { fetchEbayItems } from "@marketplace-scrapers/core";
/** /**
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly} * GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
* Search eBay for listings (default: Buy It Now only, Canada only) * Search eBay for listings (default: Buy It Now only, Canada only)
* Optional: Pass cookies parameter to bypass bot detection
*/ */
export async function ebayRoute(req: Request): Promise<Response> { export async function ebayRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url); try {
const reqUrl = new URL(req.url);
const SEARCH_QUERY = const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null; req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY) if (!SEARCH_QUERY)
return Response.json( return Response.json(
{ {
message: message:
"Request didn't have 'query' header or 'q' search parameter!", "Request didn't have 'query' header or 'q' search parameter!",
}, },
{ status: 400 }, { status: 400 },
); );
// Parse optional parameters with defaults const minPriceParam = reqUrl.searchParams.get("minPrice");
const minPrice = reqUrl.searchParams.get("minPrice") const minPrice = minPriceParam ? parseInt(minPriceParam, 10) : undefined;
? parseInt(reqUrl.searchParams.get("minPrice")!) const maxPriceParam = reqUrl.searchParams.get("maxPrice");
: undefined; const maxPrice = maxPriceParam ? parseInt(maxPriceParam, 10) : undefined;
const maxPrice = reqUrl.searchParams.get("maxPrice") const strictMode = reqUrl.searchParams.get("strictMode") === "true";
? parseInt(reqUrl.searchParams.get("maxPrice")!) const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
: undefined; const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
const strictMode = reqUrl.searchParams.get("strictMode") === "true"; const exclusionsParam = reqUrl.searchParams.get("exclusions");
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false"; const exclusions = exclusionsParam
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false"; ? exclusionsParam.split(",").map((s) => s.trim())
const exclusionsParam = reqUrl.searchParams.get("exclusions"); : [];
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : []; const keywordsParam = reqUrl.searchParams.get("keywords");
const keywordsParam = reqUrl.searchParams.get("keywords"); const keywords = keywordsParam
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY]; ? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
try { const maxItemsParam = reqUrl.searchParams.get("maxItems");
const items = await fetchEbayItems(SEARCH_QUERY, 5, { const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
minPrice, const cookies = reqUrl.searchParams.get("cookies") || undefined;
maxPrice,
strictMode, const items = await fetchEbayItems(SEARCH_QUERY, 1, {
exclusions, minPrice,
keywords, maxPrice,
buyItNowOnly, strictMode,
canadaOnly, exclusions,
}); keywords,
if (!items || items.length === 0) buyItNowOnly,
return Response.json( canadaOnly,
{ message: "Search didn't return any results!" }, cookies,
{ status: 404 }, });
);
return Response.json(items, { status: 200 }); const results = maxItems ? items.slice(0, maxItems) : items;
} catch (error) {
console.error("eBay scraping error:", error); if (!results || results.length === 0)
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; return Response.json(
return Response.json( { message: "Search didn't return any results!" },
{ message: errorMessage }, { status: 404 },
{ status: 400 }, );
); return Response.json(results, { status: 200 });
} } catch (error) {
console.error("eBay scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });
}
} }

View File

@@ -5,36 +5,42 @@ import { fetchFacebookItems } from "@marketplace-scrapers/core";
* Search Facebook Marketplace for listings * Search Facebook Marketplace for listings
*/ */
export async function facebookRoute(req: Request): Promise<Response> { export async function facebookRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url); const reqUrl = new URL(req.url);
const SEARCH_QUERY = const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null; req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY) if (!SEARCH_QUERY)
return Response.json( return Response.json(
{ {
message: message: "Request didn't have 'query' header or 'q' search parameter!",
"Request didn't have 'query' header or 'q' search parameter!", },
}, { status: 400 },
{ status: 400 }, );
);
const LOCATION = reqUrl.searchParams.get("location") || "toronto"; const LOCATION = reqUrl.searchParams.get("location") || "toronto";
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined; const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
const maxItemsParam = reqUrl.searchParams.get("maxItems");
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : 25;
try { try {
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE); const items = await fetchFacebookItems(
if (!items || items.length === 0) SEARCH_QUERY,
return Response.json( 1,
{ message: "Search didn't return any results!" }, LOCATION,
{ status: 404 }, maxItems,
); COOKIES_SOURCE,
return Response.json(items, { status: 200 }); undefined,
} catch (error) { );
console.error("Facebook scraping error:", error); if (!items || items.length === 0)
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; return Response.json(
return Response.json( { message: "Search didn't return any results!" },
{ message: errorMessage }, { status: 404 },
{ status: 400 }, );
); return Response.json(items, { status: 200 });
} } catch (error) {
console.error("Facebook scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });
}
} }

View File

@@ -5,33 +5,63 @@ import { fetchKijijiItems } from "@marketplace-scrapers/core";
* Search Kijiji marketplace for listings * Search Kijiji marketplace for listings
*/ */
export async function kijijiRoute(req: Request): Promise<Response> { export async function kijijiRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url); const reqUrl = new URL(req.url);
const SEARCH_QUERY = const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null; req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY) if (!SEARCH_QUERY)
return Response.json( return Response.json(
{ {
message: message: "Request didn't have 'query' header or 'q' search parameter!",
"Request didn't have 'query' header or 'q' search parameter!", },
}, { status: 400 },
{ status: 400 }, );
);
try { const maxPagesParam = reqUrl.searchParams.get("maxPages");
const items = await fetchKijijiItems(SEARCH_QUERY, 5); const maxPages = maxPagesParam ? parseInt(maxPagesParam, 10) : 5;
if (!items) const priceMinParam = reqUrl.searchParams.get("priceMin");
return Response.json( const priceMin = priceMinParam ? parseInt(priceMinParam, 10) : undefined;
{ message: "Search didn't return any results!" }, const priceMaxParam = reqUrl.searchParams.get("priceMax");
{ status: 404 }, const priceMax = priceMaxParam ? parseInt(priceMaxParam, 10) : undefined;
);
return Response.json(items, { status: 200 }); const searchOptions = {
} catch (error) { location: reqUrl.searchParams.get("location") || undefined,
console.error("Kijiji scraping error:", error); category: reqUrl.searchParams.get("category") || undefined,
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; keywords: reqUrl.searchParams.get("keywords") || undefined,
return Response.json( sortBy: reqUrl.searchParams.get("sortBy") as
{ message: errorMessage }, | "relevancy"
{ status: 400 }, | "date"
); | "price"
} | "distance"
| undefined,
sortOrder: reqUrl.searchParams.get("sortOrder") as
| "desc"
| "asc"
| undefined,
maxPages,
priceMin,
priceMax,
cookies: reqUrl.searchParams.get("cookies") || undefined,
};
try {
const items = await fetchKijijiItems(
SEARCH_QUERY,
4, // 4 requests per second for faster scraping
"https://www.kijiji.ca",
searchOptions,
{},
);
if (!items)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Kijiji scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });
}
} }

View File

@@ -2,5 +2,5 @@
* Health check endpoint * Health check endpoint
*/ */
export function statusRoute(): Response { export function statusRoute(): Response {
return new Response("OK", { status: 200 }); return new Response("OK", { status: 200 });
} }

View File

@@ -1,45 +1,43 @@
// Export all scrapers // Export all scrapers
export {
default as fetchKijijiItems,
slugify,
resolveLocationId,
resolveCategoryId,
buildSearchUrl,
extractApolloState,
parseSearch,
parseDetailedListing,
HttpError,
NetworkError,
ParseError,
RateLimitError,
ValidationError,
} from "./scrapers/kijiji";
export type {
KijijiListingDetails,
DetailedListing,
SearchOptions,
ListingFetchOptions,
} from "./scrapers/kijiji";
export {
default as fetchFacebookItems,
fetchFacebookItem,
parseFacebookCookieString,
ensureFacebookCookies,
extractFacebookMarketplaceData,
extractFacebookItemData,
parseFacebookAds,
parseFacebookItem,
} from "./scrapers/facebook";
export type { FacebookListingDetails } from "./scrapers/facebook";
export { default as fetchEbayItems } from "./scrapers/ebay";
export type { EbayListingDetails } from "./scrapers/ebay"; export type { EbayListingDetails } from "./scrapers/ebay";
export { default as fetchEbayItems } from "./scrapers/ebay";
// Export shared utilities export type { FacebookListingDetails } from "./scrapers/facebook";
export * from "./utils/http"; export {
export * from "./utils/delay"; default as fetchFacebookItems,
export * from "./utils/format"; ensureFacebookCookies,
extractFacebookItemData,
extractFacebookMarketplaceData,
fetchFacebookItem,
parseFacebookAds,
parseFacebookCookieString,
parseFacebookItem,
} from "./scrapers/facebook";
export type {
DetailedListing,
KijijiListingDetails,
ListingFetchOptions,
SearchOptions,
} from "./scrapers/kijiji";
export {
buildSearchUrl,
default as fetchKijijiItems,
extractApolloState,
HttpError,
NetworkError,
ParseError,
parseDetailedListing,
parseSearch,
RateLimitError,
resolveCategoryId,
resolveLocationId,
slugify,
ValidationError,
} from "./scrapers/kijiji";
// Export shared types // Export shared types
export * from "./types/common"; export * from "./types/common";
// Export shared utilities
export * from "./utils/cookies";
export * from "./utils/delay";
export * from "./utils/format";
export * from "./utils/http";

View File

@@ -1,27 +1,36 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import { isRecord } from "../utils/http"; import {
type CookieConfig,
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { delay } from "../utils/delay"; import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import type { HTMLString } from "../types/common"; // eBay cookie configuration
const EBAY_COOKIE_CONFIG: CookieConfig = {
name: "eBay",
domain: ".ebay.ca",
envVar: "EBAY_COOKIE",
filePath: "./cookies/ebay.json",
};
// ----------------------------- Types ----------------------------- // ----------------------------- Types -----------------------------
export interface EbayListingDetails { export interface EbayListingDetails {
url: string; url: string;
title: string; title: string;
description?: string; description?: string;
listingPrice?: { listingPrice?: {
amountFormatted: string; amountFormatted: string;
cents?: number; cents?: number;
currency?: string; currency?: string;
}; };
listingType?: string; listingType?: string;
listingStatus?: string; listingStatus?: string;
creationDate?: string; creationDate?: string;
endDate?: string; endDate?: string;
numberOfViews?: number; numberOfViews?: number;
address?: string | null; address?: string | null;
} }
// ----------------------------- Utilities ----------------------------- // ----------------------------- Utilities -----------------------------
@@ -29,43 +38,49 @@ export interface EbayListingDetails {
/** /**
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents * Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
*/ */
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null { function parseEbayPrice(
if (!priceText || typeof priceText !== 'string') return null; priceText: string,
): { cents: number; currency: string } | null {
if (!priceText || typeof priceText !== "string") return null;
// Clean up the price text and extract currency and amount // Clean up the price text and extract currency and amount
const cleaned = priceText.trim(); const cleaned = priceText.trim();
// Find all numbers in the string (including decimals) // Find all numbers in the string (including decimals)
const numberMatches = cleaned.match(/[\d,]+\.?\d*/); const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
if (!numberMatches) return null; if (!numberMatches) return null;
const amountStr = numberMatches[0].replace(/,/g, ''); const amountStr = numberMatches[0].replace(/,/g, "");
const dollars = parseFloat(amountStr); const dollars = parseFloat(amountStr);
if (isNaN(dollars)) return null; if (Number.isNaN(dollars)) return null;
const cents = Math.round(dollars * 100); const cents = Math.round(dollars * 100);
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc. // Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
let currency = 'USD'; // Default let currency = "USD"; // Default
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) { if (
currency = 'CAD'; cleaned.toUpperCase().includes("CAD") ||
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) { cleaned.includes("CA$") ||
currency = 'USD'; cleaned.includes("C $")
} ) {
currency = "CAD";
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
currency = "USD";
}
return { cents, currency }; return { cents, currency };
} }
class HttpError extends Error { class HttpError extends Error {
constructor( constructor(
message: string, message: string,
public readonly status: number, public readonly status: number,
public readonly url: string, public readonly url: string,
) { ) {
super(message); super(message);
this.name = "HttpError"; this.name = "HttpError";
} }
} }
// ----------------------------- Parsing ----------------------------- // ----------------------------- Parsing -----------------------------
@@ -74,290 +89,390 @@ class HttpError extends Error {
Parse eBay search page HTML and extract listings using DOM selectors Parse eBay search page HTML and extract listings using DOM selectors
*/ */
function parseEbayListings( function parseEbayListings(
htmlString: HTMLString, htmlString: HTMLString,
keywords: string[], keywords: string[],
exclusions: string[], exclusions: string[],
strictMode: boolean strictMode: boolean,
): EbayListingDetails[] { ): EbayListingDetails[] {
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const results: EbayListingDetails[] = []; const results: EbayListingDetails[] = [];
// Find all listing links by looking for eBay item URLs (/itm/) // Find all listing links by looking for eBay item URLs (/itm/)
const linkElements = document.querySelectorAll('a[href*="itm/"]'); const linkElements = document.querySelectorAll('a[href*="itm/"]');
for (const linkElement of linkElements) {
try {
// Get href attribute
let href = linkElement.getAttribute("href");
if (!href) continue;
for (const linkElement of linkElements) { // Make href absolute
try { if (!href.startsWith("http")) {
// Get href attribute href = href.startsWith("//")
let href = linkElement.getAttribute('href'); ? `https:${href}`
if (!href) continue; : `https://www.ebay.com${href}`;
}
// Make href absolute // Find the container - go up several levels to find the item container
if (!href.startsWith('http')) { // Modern eBay uses complex nested structures (often 5-10 levels deep)
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`; let container: Element | null = linkElement;
} let depth = 0;
const maxDepth = 15;
// Find the container - go up several levels to find the item container // Walk up until we find a list item or results container
// Modern eBay uses complex nested structures while (container && depth < maxDepth) {
let container = linkElement.parentElement?.parentElement?.parentElement; const classes = container.className || "";
if (!container) { if (
// Try a different level classes.includes("s-item") ||
container = linkElement.parentElement?.parentElement; classes.includes("srp-results") ||
} container.tagName === "LI"
if (!container) continue; ) {
break;
}
container = container.parentElement;
depth++;
}
// Extract title - look for heading or title-related elements near the link if (!container || depth >= maxDepth) continue;
// Modern eBay often uses h3, span, or div with text content near the link
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span');
// If no direct title element, try finding text content around the link // Extract title - look for heading or title-related elements near the link
if (!titleElement) { // Modern eBay often uses h3, span, or div with text content near the link
// Look for spans or divs with text near this link let titleElement = container.querySelector(
const nearbySpans = container.querySelectorAll('span, div'); 'h3, [role="heading"], .s-item__title span',
for (const span of nearbySpans) { );
const text = span.textContent?.trim();
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) {
titleElement = span;
break;
}
}
}
let title = titleElement?.textContent?.trim(); // If no direct title element, try finding text content around the link
if (!titleElement) {
// Look for spans or divs with text near this link
const nearbySpans = container.querySelectorAll("span, div");
for (const span of nearbySpans) {
const text = span.textContent?.trim();
if (
text &&
text.length > 10 &&
text.length < 200 &&
!text.includes("$") &&
!text.includes("item")
) {
titleElement = span;
break;
}
}
}
// Clean up eBay UI strings that get included in titles let title = titleElement?.textContent?.trim();
if (title) {
// Remove common eBay UI strings that appear at the end of titles
const uiStrings = [
'Opens in a new window',
'Opens in a new tab',
'Opens in a new window or tab',
'opens in a new window',
'opens in a new tab',
'opens in a new window or tab'
];
for (const uiString of uiStrings) { // Clean up eBay UI strings that get included in titles
const uiIndex = title.indexOf(uiString); if (title) {
if (uiIndex !== -1) { // Remove common eBay UI strings that appear at the end of titles
title = title.substring(0, uiIndex).trim(); const uiStrings = [
break; // Only remove one UI string per title "Opens in a new window",
} "Opens in a new tab",
} "Opens in a new window or tab",
"opens in a new window",
"opens in a new tab",
"opens in a new window or tab",
];
// If the title became empty or too short after cleaning, skip this item for (const uiString of uiStrings) {
if (title.length < 10) { const uiIndex = title.indexOf(uiString);
continue; if (uiIndex !== -1) {
} title = title.substring(0, uiIndex).trim();
} break; // Only remove one UI string per title
}
}
if (!title) continue; // If the title became empty or too short after cleaning, skip this item
if (title.length < 10) {
continue;
}
}
// Skip irrelevant eBay ads if (!title) continue;
if (title === "Shop on eBay" || title.length < 3) continue;
// Extract price - look for eBay's price classes, preferring sale/discount prices // Skip irrelevant eBay ads
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]'); if (title === "Shop on eBay" || title.length < 3) continue;
// If no direct price class, look for spans containing $ (but not titles) // Extract price - look for eBay's price classes, preferring sale/discount prices
if (!priceElement) { // Updated for 2026 eBay HTML structure
const spansAndElements = container.querySelectorAll('span, div, b, em, strong'); let priceElement = container.querySelector(
for (const el of spansAndElements) { '[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
const text = el.textContent?.trim(); );
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if (text && text.includes('$') && text.length < 100 &&
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') &&
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') &&
! /\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements
) {
priceElement = el;
break;
}
}
}
// For discounted items, eBay shows both original and sale price // If no direct price class, look for spans containing $ (but not titles)
// Prefer sale/current price over original/strikethrough price if (!priceElement) {
if (priceElement) { const spansAndElements = container.querySelectorAll(
// Check if this element or its parent contains multiple price elements "span, div, b, em, strong",
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement; );
for (const el of spansAndElements) {
const text = el.textContent?.trim();
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if (
text?.includes("$") &&
text.length < 100 &&
!text.includes("laptop") &&
!text.includes("computer") &&
!text.includes("intel") &&
!text.includes("core") &&
!text.includes("ram") &&
!text.includes("ssd") &&
!/\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements
) {
priceElement = el;
break;
}
}
}
if (priceContainer) { // For discounted items, eBay shows both original and sale price
// Look for all price elements within this container, including strikethrough prices // Prefer sale/current price over original/strikethrough price
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike'); if (priceElement) {
// Check if this element or its parent contains multiple price elements
const priceContainer =
priceElement.closest('[class*="s-item__price"]') ||
priceElement.parentElement;
// Filter to only elements that actually contain prices (not labels) if (priceContainer) {
const actualPrices: HTMLElement[] = []; // Look for all price elements within this container, including strikethrough prices
for (const el of allPriceElements) { const allPriceElements = priceContainer.querySelectorAll(
const text = el.textContent?.trim(); '[class*="s-item__price"], span, b, em, strong, s, del, strike',
if (text && /^\s*[$£¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) { );
actualPrices.push(el);
}
}
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices) // Filter to only elements that actually contain prices (not labels)
if (actualPrices.length > 1) { const actualPrices: HTMLElement[] = [];
// First, look for prices that are NOT struck through for (const el of allPriceElements) {
const nonStrikethroughPrices = actualPrices.filter(el => { const text = el.textContent?.trim();
const tagName = el.tagName.toLowerCase(); if (
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') || text &&
el.closest('s, del, strike'); /^\s*[$£¥]/u.test(text) &&
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles; text.length < 50 &&
}); !/\d{4}/.test(text)
) {
actualPrices.push(el);
}
}
if (nonStrikethroughPrices.length > 0) { // Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
// Use the first non-strikethrough price (sale price) if (actualPrices.length > 1) {
priceElement = nonStrikethroughPrices[0]; // First, look for prices that are NOT struck through
} else { const nonStrikethroughPrices = actualPrices.filter((el) => {
// Fallback: use the last price (likely the most current) const tagName = el.tagName.toLowerCase();
const lastPrice = actualPrices[actualPrices.length - 1]; const styles =
priceElement = lastPrice; el.classList.contains("s-strikethrough") ||
} el.classList.contains("u-flStrike") ||
} el.closest("s, del, strike");
} return (
} tagName !== "s" &&
tagName !== "del" &&
tagName !== "strike" &&
!styles
);
});
const priceText = priceElement?.textContent?.trim(); if (nonStrikethroughPrices.length > 0) {
// Use the first non-strikethrough price (sale price)
priceElement = nonStrikethroughPrices[0];
} else {
// Fallback: use the last price (likely the most current)
const lastPrice = actualPrices[actualPrices.length - 1];
priceElement = lastPrice;
}
}
}
}
if (!priceText) continue; const priceText = priceElement?.textContent?.trim();
// Parse price into cents and currency if (!priceText) continue;
const priceInfo = parseEbayPrice(priceText);
if (!priceInfo) continue;
// Apply exclusion filters // Parse price into cents and currency
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) { const priceInfo = parseEbayPrice(priceText);
continue; if (!priceInfo) continue;
}
// Apply strict mode filter (title must contain at least one keyword) // Apply exclusion filters
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) { if (
continue; exclusions.some((exclusion) =>
} title.toLowerCase().includes(exclusion.toLowerCase()),
)
) {
continue;
}
const listing: EbayListingDetails = { // Apply strict mode filter (title must contain at least one keyword)
url: href, if (
title, strictMode &&
listingPrice: { title &&
amountFormatted: priceText, !keywords.some((keyword) =>
cents: priceInfo.cents, title.toLowerCase().includes(keyword.toLowerCase()),
currency: priceInfo.currency, )
}, ) {
listingType: "OFFER", // eBay listings are typically offers continue;
listingStatus: "ACTIVE", }
address: null, // eBay doesn't typically show detailed addresses in search results
};
results.push(listing); const listing: EbayListingDetails = {
} catch (err) { url: href,
console.warn(`Error parsing eBay listing: ${err}`); title,
} listingPrice: {
} amountFormatted: priceText,
cents: priceInfo.cents,
currency: priceInfo.currency,
},
listingType: "OFFER", // eBay listings are typically offers
listingStatus: "ACTIVE",
address: null, // eBay doesn't typically show detailed addresses in search results
};
return results; results.push(listing);
} catch (err) {
console.warn(`Error parsing eBay listing: ${err}`);
}
}
return results;
}
// ----------------------------- Cookie Loading -----------------------------
/**
* Load eBay cookies with priority: URL param > ENV var > file
* Uses shared cookie utility for consistent handling across all scrapers
*/
async function loadEbayCookies(
cookiesSource?: string,
): Promise<string | undefined> {
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
if (cookies.length === 0) {
console.warn(
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
"Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. EBAY_COOKIE environment variable, or\n" +
" 3. ./cookies/ebay.json file (lowest priority)\n" +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
return undefined;
}
return formatCookiesForHeader(cookies, "www.ebay.ca");
} }
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchEbayItems( export default async function fetchEbayItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
opts: { opts: {
minPrice?: number; minPrice?: number;
maxPrice?: number; maxPrice?: number;
strictMode?: boolean; strictMode?: boolean;
exclusions?: string[]; exclusions?: string[];
keywords?: string[]; keywords?: string[];
buyItNowOnly?: boolean; buyItNowOnly?: boolean;
canadaOnly?: boolean; canadaOnly?: boolean;
} = {}, cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
} = {},
) { ) {
const { const {
minPrice = 0, minPrice = 0,
maxPrice = Number.MAX_SAFE_INTEGER, maxPrice = Number.MAX_SAFE_INTEGER,
strictMode = false, strictMode = false,
exclusions = [], exclusions = [],
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
buyItNowOnly = true, buyItNowOnly = true,
canadaOnly = true, canadaOnly = true,
} = opts; cookies: cookiesSource,
} = opts;
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference // Load eBay cookies with priority: URL param > ENV var > file
const urlParams = new URLSearchParams({ const cookies = await loadEbayCookies(cookiesSource);
_nkw: SEARCH_QUERY,
_sacat: "0",
_from: "R40",
});
if (buyItNowOnly) { // Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
urlParams.set("LH_BIN", "1"); const urlParams = new URLSearchParams({
} _nkw: SEARCH_QUERY,
_sacat: "0",
_from: "R40",
});
if (canadaOnly) { if (buyItNowOnly) {
urlParams.set("LH_PrefLoc", "1"); urlParams.set("LH_BIN", "1");
} }
const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`; if (canadaOnly) {
urlParams.set("LH_PrefLoc", "1");
}
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`;
console.log(`Fetching eBay search: ${searchUrl}`); const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
try { console.log(`Fetching eBay search: ${searchUrl}`);
// Use custom headers modeled after real browser requests to bypass bot detection
const headers: Record<string, string> = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://www.ebay.ca/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Priority': 'u=0, i'
};
const res = await fetch(searchUrl, { try {
method: "GET", // Use custom headers modeled after real browser requests to bypass bot detection
headers, const headers: Record<string, string> = {
}); "User-Agent":
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br, zstd",
Referer: "https://www.ebay.ca/",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
Priority: "u=0, i",
};
if (!res.ok) { // Add cookies if available (helps bypass bot detection)
throw new HttpError( if (cookies) {
`Request failed with status ${res.status}`, headers.Cookie = cookies;
res.status, }
searchUrl,
);
}
const searchHtml = await res.text(); const res = await fetch(searchUrl, {
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND method: "GET",
await delay(DELAY_MS); headers,
});
console.log(`\nParsing eBay listings...`); if (!res.ok) {
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
searchUrl,
);
}
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode); const searchHtml = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
// Filter by price range (additional safety check) console.log(`\nParsing eBay listings...`);
const filteredListings = listings.filter(listing => {
const cents = listing.listingPrice?.cents;
return cents && cents >= minPrice && cents <= maxPrice;
});
console.log(`Parsed ${filteredListings.length} eBay listings.`); const listings = parseEbayListings(
return filteredListings; searchHtml,
keywords,
exclusions,
strictMode,
);
} catch (err) { // Filter by price range (additional safety check)
if (err instanceof HttpError) { const filteredListings = listings.filter((listing) => {
console.error( const cents = listing.listingPrice?.cents;
`Failed to fetch eBay search (${err.status}): ${err.message}`, return cents && cents >= minPrice && cents <= maxPrice;
); });
return [];
} console.log(`Parsed ${filteredListings.length} eBay listings.`);
throw err; return filteredListings;
} } catch (err) {
if (err instanceof HttpError) {
console.error(
`Failed to fetch eBay search (${err.status}): ${err.message}`,
);
return [];
}
throw err;
}
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -3,18 +3,18 @@ export type HTMLString = string;
/** Currency price object with formatting options */ /** Currency price object with formatting options */
export interface Price { export interface Price {
amountFormatted: string; amountFormatted: string;
cents: number; cents: number;
currency: string; currency: string;
} }
/** Base listing details common across all marketplaces */ /** Base listing details common across all marketplaces */
export interface ListingDetails { export interface ListingDetails {
url: string; url: string;
title: string; title: string;
listingPrice: Price; listingPrice: Price;
listingType: string; listingType: string;
listingStatus: string; listingStatus: string;
address?: string | null; address?: string | null;
creationDate?: string; creationDate?: string;
} }

View File

@@ -0,0 +1,227 @@
/**
* Shared cookie handling utilities for marketplace scrapers
*/
export interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
export interface CookieConfig {
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
name: string;
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
domain: string;
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
envVar: string;
/** Path to cookie file (e.g., "./cookies/facebook.json") */
filePath: string;
}
/**
* Parse cookie string format into Cookie array
* Supports format: "name1=value1; name2=value2"
*/
export function parseCookieString(
cookieString: string,
domain: string,
): Cookie[] {
if (!cookieString?.trim()) {
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, ...valueParts] = pair.split("=");
const trimmedName = name.trim();
const trimmedValue = valueParts.join("=").trim();
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain,
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined,
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
}
/**
* Parse JSON array format into Cookie array
* Supports format: [{"name": "foo", "value": "bar", ...}]
*/
export function parseJsonCookies(jsonString: string): Cookie[] {
const parsed = JSON.parse(jsonString);
if (!Array.isArray(parsed)) {
return [];
}
return parsed.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
/**
* Try to parse cookies from a string (tries JSON first, then cookie string format)
*/
export function parseCookiesAuto(
input: string,
defaultDomain: string,
): Cookie[] {
// Try JSON array format first
try {
const cookies = parseJsonCookies(input);
if (cookies.length > 0) {
return cookies;
}
} catch {
// JSON parse failed, try cookie string format
}
// Try cookie string format
return parseCookieString(input, defaultDomain);
}
/**
* Load cookies from file (supports both JSON array and cookie string formats)
*/
export async function loadCookiesFromFile(
filePath: string,
defaultDomain: string,
): Promise<Cookie[]> {
const file = Bun.file(filePath);
if (!(await file.exists())) {
return [];
}
const content = await file.text();
return parseCookiesAuto(content.trim(), defaultDomain);
}
/**
* Format cookies array into Cookie header string for HTTP requests
*/
export function formatCookiesForHeader(
cookies: Cookie[],
targetDomain: string,
): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
targetDomain.endsWith(cookie.domain.slice(1)) ||
targetDomain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === targetDomain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false;
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
}
/**
* Load cookies with priority: URL param > ENV var > file
* Supports both JSON array and cookie string formats for all sources
*/
export async function ensureCookies(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
// Priority 1: URL/API parameter (if provided)
if (cookiesSource) {
const cookies = parseCookiesAuto(cookiesSource, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
);
return cookies;
}
console.warn(
`${config.name} cookies parameter provided but no valid cookies extracted`,
);
}
// Priority 2: Environment variable
const envValue = process.env[config.envVar];
if (envValue?.trim()) {
const cookies = parseCookiesAuto(envValue, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
);
return cookies;
}
console.warn(`${config.envVar} env var contains no valid cookies`);
}
// Priority 3: Cookie file (fallback)
try {
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
);
return cookies;
}
} catch (e) {
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
}
// No cookies found from any source
throw new Error(
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
` 1. 'cookies' parameter (highest priority), or\n` +
` 2. ${config.envVar} environment variable, or\n` +
` 3. ${config.filePath} file (lowest priority)\n` +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
* Try to load cookies, return empty array if none found (non-throwing version)
*/
export async function loadCookiesOptional(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
try {
return await ensureCookies(config, cookiesSource);
} catch {
return [];
}
}

View File

@@ -4,5 +4,5 @@
* @returns A promise that resolves after the specified delay * @returns A promise that resolves after the specified delay
*/ */
export function delay(ms: number): Promise<void> { export function delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms)); return new Promise((resolve) => setTimeout(resolve, ms));
} }

View File

@@ -4,18 +4,21 @@
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US') * @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
* @returns Formatted currency string * @returns Formatted currency string
*/ */
export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string { export function formatCentsToCurrency(
try { cents: number,
const formatter = new Intl.NumberFormat(locale, { locale: string = "en-CA",
style: "currency", ): string {
currency: "CAD", try {
minimumFractionDigits: 2, const formatter = new Intl.NumberFormat(locale, {
maximumFractionDigits: 2, style: "currency",
}); currency: "CAD",
return formatter.format(cents / 100); minimumFractionDigits: 2,
} catch (error) { maximumFractionDigits: 2,
// Fallback if locale is not supported });
const dollars = (cents / 100).toFixed(2); return formatter.format(cents / 100);
return `$${dollars}`; } catch {
} // Fallback if locale is not supported
const dollars = (cents / 100).toFixed(2);
return `$${dollars}`;
}
} }

View File

@@ -1,79 +1,79 @@
/** Custom error class for HTTP-related failures */ /** Custom error class for HTTP-related failures */
export class HttpError extends Error { export class HttpError extends Error {
constructor( constructor(
message: string, message: string,
public readonly statusCode: number, public readonly statusCode: number,
public readonly url?: string public readonly url?: string,
) { ) {
super(message); super(message);
this.name = "HttpError"; this.name = "HttpError";
} }
} }
/** Error class for network failures (timeouts, connection issues) */ /** Error class for network failures (timeouts, connection issues) */
export class NetworkError extends Error { export class NetworkError extends Error {
constructor( constructor(
message: string, message: string,
public readonly url: string, public readonly url: string,
public readonly cause?: Error public readonly cause?: Error,
) { ) {
super(message); super(message);
this.name = "NetworkError"; this.name = "NetworkError";
} }
} }
/** Error class for parsing failures */ /** Error class for parsing failures */
export class ParseError extends Error { export class ParseError extends Error {
constructor( constructor(
message: string, message: string,
public readonly data?: unknown public readonly data?: unknown,
) { ) {
super(message); super(message);
this.name = "ParseError"; this.name = "ParseError";
} }
} }
/** Error class for rate limiting */ /** Error class for rate limiting */
export class RateLimitError extends Error { export class RateLimitError extends Error {
constructor( constructor(
message: string, message: string,
public readonly url: string, public readonly url: string,
public readonly resetTime?: number public readonly resetTime?: number,
) { ) {
super(message); super(message);
this.name = "RateLimitError"; this.name = "RateLimitError";
} }
} }
/** Error class for validation failures */ /** Error class for validation failures */
export class ValidationError extends Error { export class ValidationError extends Error {
constructor(message: string) { constructor(message: string) {
super(message); super(message);
this.name = "ValidationError"; this.name = "ValidationError";
} }
} }
/** Type guard to check if a value is a record (object) */ /** Type guard to check if a value is a record (object) */
export function isRecord(value: unknown): value is Record<string, unknown> { export function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value); return typeof value === "object" && value !== null && !Array.isArray(value);
} }
/** /**
* Calculate exponential backoff delay with jitter * Calculate exponential backoff delay with jitter
*/ */
function calculateBackoffDelay(attempt: number, baseMs: number): number { function calculateBackoffDelay(attempt: number, baseMs: number): number {
const exponentialDelay = baseMs * 2 ** attempt; const exponentialDelay = baseMs * 2 ** attempt;
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
} }
/** Options for fetchHtml */ /** Options for fetchHtml */
export interface FetchHtmlOptions { export interface FetchHtmlOptions {
maxRetries?: number; maxRetries?: number;
retryBaseMs?: number; retryBaseMs?: number;
timeoutMs?: number; timeoutMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void; onRateInfo?: (remaining: string | null, reset: string | null) => void;
headers?: Record<string, string>; headers?: Record<string, string>;
} }
/** /**
@@ -85,116 +85,116 @@ export interface FetchHtmlOptions {
* @throws HttpError, NetworkError, or RateLimitError on failure * @throws HttpError, NetworkError, or RateLimitError on failure
*/ */
export async function fetchHtml( export async function fetchHtml(
url: string, url: string,
delayMs: number, delayMs: number,
opts?: FetchHtmlOptions opts?: FetchHtmlOptions,
): Promise<string> { ): Promise<string> {
const maxRetries = opts?.maxRetries ?? 3; const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 1000; const retryBaseMs = opts?.retryBaseMs ?? 1000;
const timeoutMs = opts?.timeoutMs ?? 30000; const timeoutMs = opts?.timeoutMs ?? 30000;
const defaultHeaders: Record<string, string> = { const defaultHeaders: Record<string, string> = {
accept: accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8", "accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"cache-control": "no-cache", "cache-control": "no-cache",
"upgrade-insecure-requests": "1", "upgrade-insecure-requests": "1",
"user-agent": "user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
}; };
for (let attempt = 0; attempt <= maxRetries; attempt++) { for (let attempt = 0; attempt <= maxRetries; attempt++) {
try { try {
const controller = new AbortController(); const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs); const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
const res = await fetch(url, { const res = await fetch(url, {
method: "GET", method: "GET",
headers: { ...defaultHeaders, ...opts?.headers }, headers: { ...defaultHeaders, ...opts?.headers },
signal: controller.signal, signal: controller.signal,
}); });
clearTimeout(timeoutId); clearTimeout(timeoutId);
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset"); const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
if (!res.ok) { if (!res.ok) {
// Handle rate limiting // Handle rate limiting
if (res.status === 429) { if (res.status === 429) {
const resetSeconds = rateLimitReset const resetSeconds = rateLimitReset
? Number(rateLimitReset) ? Number(rateLimitReset)
: Number.NaN; : Number.NaN;
const waitMs = Number.isFinite(resetSeconds) const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000) ? Math.max(0, resetSeconds * 1000)
: calculateBackoffDelay(attempt, retryBaseMs); : calculateBackoffDelay(attempt, retryBaseMs);
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => setTimeout(resolve, waitMs)); await new Promise((resolve) => setTimeout(resolve, waitMs));
continue; continue;
} }
throw new RateLimitError( throw new RateLimitError(
`Rate limit exceeded for ${url}`, `Rate limit exceeded for ${url}`,
url, url,
resetSeconds resetSeconds,
); );
} }
// Retry on server errors // Retry on server errors
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await new Promise((resolve) => await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
); );
continue; continue;
} }
throw new HttpError( throw new HttpError(
`Request failed with status ${res.status}`, `Request failed with status ${res.status}`,
res.status, res.status,
url url,
); );
} }
const html = await res.text(); const html = await res.text();
// Respect per-request delay to maintain rate limiting // Respect per-request delay to maintain rate limiting
await new Promise((resolve) => setTimeout(resolve, delayMs)); await new Promise((resolve) => setTimeout(resolve, delayMs));
return html; return html;
} catch (err) { } catch (err) {
// Re-throw known errors // Re-throw known errors
if ( if (
err instanceof RateLimitError || err instanceof RateLimitError ||
err instanceof HttpError || err instanceof HttpError ||
err instanceof NetworkError err instanceof NetworkError
) { ) {
throw err; throw err;
} }
if (err instanceof Error && err.name === "AbortError") { if (err instanceof Error && err.name === "AbortError") {
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
); );
continue; continue;
} }
throw new NetworkError(`Request timeout for ${url}`, url, err); throw new NetworkError(`Request timeout for ${url}`, url, err);
} }
// Network or other errors // Network or other errors
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
); );
continue; continue;
} }
throw new NetworkError( throw new NetworkError(
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`, `Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
url, url,
err instanceof Error ? err : undefined err instanceof Error ? err : undefined,
); );
} }
} }
throw new NetworkError(`Exhausted retries without response for ${url}`, url); throw new NetworkError(`Exhausted retries without response for ${url}`, url);
} }

View File

@@ -5,7 +5,6 @@ import {
fetchFacebookItem, fetchFacebookItem,
formatCentsToCurrency, formatCentsToCurrency,
formatCookiesForHeader, formatCookiesForHeader,
loadFacebookCookies,
parseFacebookAds, parseFacebookAds,
parseFacebookCookieString, parseFacebookCookieString,
parseFacebookItem, parseFacebookItem,
@@ -183,7 +182,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}); });
}); });
const result = await fetchFacebookItem("123", mockCookies); const _result = await fetchFacebookItem("123", mockCookies);
expect(attempts).toBe(2); expect(attempts).toBe(2);
// Should eventually succeed after retry // Should eventually succeed after retry
}); });

View File

@@ -1,5 +1,5 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook"; import { fetchFacebookItems } from "../src/scrapers/facebook";
// Mock fetch globally // Mock fetch globally
const originalFetch = global.fetch; const originalFetch = global.fetch;

View File

@@ -1,166 +1,157 @@
import { describe, expect, test } from "bun:test"; import { describe, expect, test } from "bun:test";
import { import {
HttpError, buildSearchUrl,
NetworkError, NetworkError,
ParseError, ParseError,
RateLimitError, RateLimitError,
ValidationError, resolveCategoryId,
buildSearchUrl, resolveLocationId,
resolveCategoryId, ValidationError,
resolveLocationId,
} from "../src/scrapers/kijiji"; } from "../src/scrapers/kijiji";
describe("Location and Category Resolution", () => { describe("Location and Category Resolution", () => {
describe("resolveLocationId", () => { describe("resolveLocationId", () => {
test("should return numeric IDs as-is", () => { test("should return numeric IDs as-is", () => {
expect(resolveLocationId(1700272)).toBe(1700272); expect(resolveLocationId(1700272)).toBe(1700272);
expect(resolveLocationId(0)).toBe(0); expect(resolveLocationId(0)).toBe(0);
}); });
test("should resolve string location names", () => { test("should resolve string location names", () => {
expect(resolveLocationId("canada")).toBe(0); expect(resolveLocationId("canada")).toBe(0);
expect(resolveLocationId("ontario")).toBe(9004); expect(resolveLocationId("ontario")).toBe(9004);
expect(resolveLocationId("toronto")).toBe(1700273); expect(resolveLocationId("toronto")).toBe(1700273);
expect(resolveLocationId("gta")).toBe(1700272); expect(resolveLocationId("gta")).toBe(1700272);
}); });
test("should handle case insensitive matching", () => { test("should handle case insensitive matching", () => {
expect(resolveLocationId("Canada")).toBe(0); expect(resolveLocationId("Canada")).toBe(0);
expect(resolveLocationId("ONTARIO")).toBe(9004); expect(resolveLocationId("ONTARIO")).toBe(9004);
}); });
test("should default to Canada for unknown locations", () => { test("should default to Canada for unknown locations", () => {
expect(resolveLocationId("unknown")).toBe(0); expect(resolveLocationId("unknown")).toBe(0);
expect(resolveLocationId("")).toBe(0); expect(resolveLocationId("")).toBe(0);
}); });
test("should handle undefined input", () => { test("should handle undefined input", () => {
expect(resolveLocationId(undefined)).toBe(0); expect(resolveLocationId(undefined)).toBe(0);
}); });
}); });
describe("resolveCategoryId", () => { describe("resolveCategoryId", () => {
test("should return numeric IDs as-is", () => { test("should return numeric IDs as-is", () => {
expect(resolveCategoryId(132)).toBe(132); expect(resolveCategoryId(132)).toBe(132);
expect(resolveCategoryId(0)).toBe(0); expect(resolveCategoryId(0)).toBe(0);
}); });
test("should resolve string category names", () => { test("should resolve string category names", () => {
expect(resolveCategoryId("all")).toBe(0); expect(resolveCategoryId("all")).toBe(0);
expect(resolveCategoryId("phones")).toBe(132); expect(resolveCategoryId("phones")).toBe(132);
expect(resolveCategoryId("electronics")).toBe(29659001); expect(resolveCategoryId("electronics")).toBe(29659001);
expect(resolveCategoryId("buy-sell")).toBe(10); expect(resolveCategoryId("buy-sell")).toBe(10);
}); });
test("should handle case insensitive matching", () => { test("should handle case insensitive matching", () => {
expect(resolveCategoryId("All")).toBe(0); expect(resolveCategoryId("All")).toBe(0);
expect(resolveCategoryId("PHONES")).toBe(132); expect(resolveCategoryId("PHONES")).toBe(132);
}); });
test("should default to all categories for unknown categories", () => { test("should default to all categories for unknown categories", () => {
expect(resolveCategoryId("unknown")).toBe(0); expect(resolveCategoryId("unknown")).toBe(0);
expect(resolveCategoryId("")).toBe(0); expect(resolveCategoryId("")).toBe(0);
}); });
test("should handle undefined input", () => { test("should handle undefined input", () => {
expect(resolveCategoryId(undefined)).toBe(0); expect(resolveCategoryId(undefined)).toBe(0);
}); });
}); });
}); });
describe("URL Construction", () => { describe("URL Construction", () => {
describe("buildSearchUrl", () => { describe("buildSearchUrl", () => {
test("should build basic search URL", () => { test("should build basic search URL", () => {
const url = buildSearchUrl("iphone", { const url = buildSearchUrl("iphone", {
location: 1700272, location: 1700272,
category: 132, category: 132,
sortBy: "relevancy", sortBy: "relevancy",
sortOrder: "desc", sortOrder: "desc",
}); });
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272"); expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
expect(url).toContain("sort=relevancyDesc"); expect(url).toContain("sort=relevancyDesc");
expect(url).toContain("order=DESC"); expect(url).toContain("order=DESC");
}); });
test("should handle pagination", () => { test("should handle pagination", () => {
const url = buildSearchUrl("iphone", { const url = buildSearchUrl("iphone", {
location: 1700272, location: 1700272,
category: 132, category: 132,
page: 2, page: 2,
}); });
expect(url).toContain("&page=2"); expect(url).toContain("&page=2");
}); });
test("should handle different sort options", () => { test("should handle different sort options", () => {
const dateUrl = buildSearchUrl("iphone", { const dateUrl = buildSearchUrl("iphone", {
sortBy: "date", sortBy: "date",
sortOrder: "asc", sortOrder: "asc",
}); });
expect(dateUrl).toContain("sort=DATE"); expect(dateUrl).toContain("sort=DATE");
expect(dateUrl).toContain("order=ASC"); expect(dateUrl).toContain("order=ASC");
const priceUrl = buildSearchUrl("iphone", { const priceUrl = buildSearchUrl("iphone", {
sortBy: "price", sortBy: "price",
sortOrder: "desc", sortOrder: "desc",
}); });
expect(priceUrl).toContain("sort=PRICE"); expect(priceUrl).toContain("sort=PRICE");
expect(priceUrl).toContain("order=DESC"); expect(priceUrl).toContain("order=DESC");
}); });
test("should handle string location/category inputs", () => { test("should handle string location/category inputs", () => {
const url = buildSearchUrl("iphone", { const url = buildSearchUrl("iphone", {
location: "toronto", location: "toronto",
category: "phones", category: "phones",
}); });
expect(url).toContain("k0c132l1700273"); // phones + toronto expect(url).toContain("k0c132l1700273"); // phones + toronto
}); });
}); });
}); });
describe("Error Classes", () => { describe("Error Classes", () => {
test("HttpError should store status and URL", () => { test("NetworkError should store URL and cause", () => {
const error = new HttpError("Not found", 404, "https://example.com"); const cause = new Error("Connection failed");
expect(error.message).toBe("Not found"); const error = new NetworkError(
expect(error.statusCode).toBe(404); "Network error",
expect(error.url).toBe("https://example.com"); "https://example.com",
expect(error.name).toBe("HttpError"); cause,
}); );
expect(error.message).toBe("Network error");
expect(error.url).toBe("https://example.com");
expect(error.cause).toBe(cause);
expect(error.name).toBe("NetworkError");
});
test("NetworkError should store URL and cause", () => { test("ParseError should store data", () => {
const cause = new Error("Connection failed"); const data = { invalid: "json" };
const error = new NetworkError( const error = new ParseError("Invalid JSON", data);
"Network error", expect(error.message).toBe("Invalid JSON");
"https://example.com", expect(error.data).toBe(data);
cause expect(error.name).toBe("ParseError");
); });
expect(error.message).toBe("Network error");
expect(error.url).toBe("https://example.com");
expect(error.cause).toBe(cause);
expect(error.name).toBe("NetworkError");
});
test("ParseError should store data", () => { test("RateLimitError should store URL and reset time", () => {
const data = { invalid: "json" }; const error = new RateLimitError("Rate limited", "https://example.com", 60);
const error = new ParseError("Invalid JSON", data); expect(error.message).toBe("Rate limited");
expect(error.message).toBe("Invalid JSON"); expect(error.url).toBe("https://example.com");
expect(error.data).toBe(data); expect(error.resetTime).toBe(60);
expect(error.name).toBe("ParseError"); expect(error.name).toBe("RateLimitError");
}); });
test("RateLimitError should store URL and reset time", () => { test("ValidationError should work without field", () => {
const error = new RateLimitError("Rate limited", "https://example.com", 60); const error = new ValidationError("Invalid value");
expect(error.message).toBe("Rate limited"); expect(error.message).toBe("Invalid value");
expect(error.url).toBe("https://example.com"); expect(error.name).toBe("ValidationError");
expect(error.resetTime).toBe(60); });
expect(error.name).toBe("RateLimitError");
});
test("ValidationError should work without field", () => {
const error = new ValidationError("Invalid value");
expect(error.message).toBe("Invalid value");
expect(error.name).toBe("ValidationError");
});
}); });

View File

@@ -1,4 +1,4 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test"; import { describe, expect, test } from "bun:test";
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji"; import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
describe("Utility Functions", () => { describe("Utility Functions", () => {

View File

@@ -3,9 +3,9 @@
// Mock fetch globally for tests // Mock fetch globally for tests
global.fetch = global.fetch =
global.fetch || global.fetch ||
(() => { (() => {
throw new Error("fetch is not available in test environment"); throw new Error("fetch is not available in test environment");
}); });
// Add any global test utilities here // Add any global test utilities here

View File

@@ -4,30 +4,33 @@ import { serverCard } from "./protocol/metadata";
const PORT = process.env.MCP_PORT || 4006; const PORT = process.env.MCP_PORT || 4006;
const server = Bun.serve({ const server = Bun.serve({
port: PORT as number | string, port: PORT as number | string,
idleTimeout: 0, idleTimeout: 255, // 255 seconds (max allowed)
routes: { routes: {
// MCP metadata discovery endpoint // MCP metadata discovery endpoint
"/.well-known/mcp/server-card.json": new Response(JSON.stringify(serverCard), { "/.well-known/mcp/server-card.json": new Response(
headers: { "Content-Type": "application/json" }, JSON.stringify(serverCard),
}), {
headers: { "Content-Type": "application/json" },
},
),
// MCP JSON-RPC 2.0 protocol endpoint // MCP JSON-RPC 2.0 protocol endpoint
"/mcp": async (req: Request) => { "/mcp": async (req: Request) => {
if (req.method === "POST") { if (req.method === "POST") {
return await handleMcpRequest(req); return await handleMcpRequest(req);
} }
return Response.json( return Response.json(
{ message: "MCP endpoint requires POST request" }, { message: "MCP endpoint requires POST request" },
{ status: 405 } { status: 405 },
); );
}, },
}, },
// Fallback for all other routes // Fallback for all other routes
fetch(req: Request) { fetch(_req: Request) {
return new Response("Not Found", { status: 404 }); return new Response("Not Found", { status: 404 });
}, },
}); });
console.log(`MCP Server running on ${server.hostname}:${server.port}`); console.log(`MCP Server running on ${server.hostname}:${server.port}`);

View File

@@ -1,187 +1,292 @@
import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core";
import { tools } from "./tools"; import { tools } from "./tools";
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
/** /**
* Handle MCP JSON-RPC 2.0 protocol requests * Handle MCP JSON-RPC 2.0 protocol requests
*/ */
export async function handleMcpRequest(req: Request): Promise<Response> { export async function handleMcpRequest(req: Request): Promise<Response> {
try { try {
const body = await req.json(); const body = await req.json();
// Validate JSON-RPC 2.0 format // Validate JSON-RPC 2.0 format
if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) { if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) {
return Response.json( return Response.json(
{ {
jsonrpc: "2.0", jsonrpc: "2.0",
error: { code: -32600, message: "Invalid Request" }, error: { code: -32600, message: "Invalid Request" },
id: body.id, id: body.id,
}, },
{ status: 400 } { status: 400 },
); );
} }
const { method, params, id } = body; const { method, params, id } = body;
// Handle initialize method // Handle initialize method
if (method === "initialize") { if (method === "initialize") {
return Response.json({ return Response.json({
jsonrpc: "2.0", jsonrpc: "2.0",
id, id,
result: { result: {
protocolVersion: "2025-06-18", protocolVersion: "2025-06-18",
capabilities: { capabilities: {
tools: { tools: {
listChanged: true, listChanged: true,
}, },
}, },
serverInfo: { serverInfo: {
name: "marketplace-scrapers", name: "marketplace-scrapers",
version: "1.0.0", version: "1.0.0",
}, },
instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces", instructions:
}, "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
}); },
} });
}
// Handle tools/list method // Handle tools/list method
if (method === "tools/list") { if (method === "tools/list") {
return Response.json({ return Response.json({
jsonrpc: "2.0", jsonrpc: "2.0",
id, id,
result: { result: {
tools, tools,
}, },
}); });
} }
// Handle notifications (messages without id field should not get a response) // Handle notifications (messages without id field should not get a response)
if (!id) { if (!id) {
// Notifications don't require a response // Notifications don't require a response
if (method === "notifications/initialized") { if (method === "notifications/initialized") {
// Client initialized successfully, no response needed // Client initialized successfully, no response needed
return new Response(null, { status: 204 }); return new Response(null, { status: 204 });
} }
if (method === "notifications/progress") { if (method === "notifications/progress") {
// Progress notifications, no response needed // Progress notifications, no response needed
return new Response(null, { status: 204 }); return new Response(null, { status: 204 });
} }
// Unknown notification - still no response for notifications // Unknown notification - still no response for notifications
return new Response(null, { status: 204 }); return new Response(null, { status: 204 });
} }
// Handle tools/call method // Handle tools/call method
if (method === "tools/call") { if (method === "tools/call") {
const { name, arguments: args } = params || {}; const { name, arguments: args } = params || {};
if (!name || !args) { if (!name || !args) {
return Response.json( return Response.json(
{ {
jsonrpc: "2.0", jsonrpc: "2.0",
id, id,
error: { code: -32602, message: "Invalid params: name and arguments required" }, error: {
}, code: -32602,
{ status: 400 } message: "Invalid params: name and arguments required",
); },
} },
{ status: 400 },
);
}
// Route tool calls to appropriate handlers // Route tool calls to appropriate handlers
try { try {
let result; let result: unknown;
if (name === "search_kijiji") { if (name === "search_kijiji") {
const query = args.query; const query = args.query;
if (!query) { if (!query) {
return Response.json({ return Response.json({
jsonrpc: "2.0", jsonrpc: "2.0",
id, id,
error: { code: -32602, message: "query parameter is required" }, error: { code: -32602, message: "query parameter is required" },
}); });
} }
const items = await fetchKijijiItems(query, args.maxItems || 5); const params = new URLSearchParams({ q: query });
result = items || []; if (args.location) params.append("location", args.location);
} else if (name === "search_facebook") { if (args.category) params.append("category", args.category);
const query = args.query; if (args.keywords) params.append("keywords", args.keywords);
if (!query) { if (args.sortBy) params.append("sortBy", args.sortBy);
return Response.json({ if (args.sortOrder) params.append("sortOrder", args.sortOrder);
jsonrpc: "2.0", if (args.maxPages)
id, params.append("maxPages", args.maxPages.toString());
error: { code: -32602, message: "query parameter is required" }, if (args.priceMin)
}); params.append("priceMin", args.priceMin.toString());
} if (args.priceMax)
const items = await fetchFacebookItems( params.append("priceMax", args.priceMax.toString());
query, if (args.cookies) params.append("cookies", args.cookies);
args.maxItems || 5,
args.location || "toronto",
25,
args.cookiesSource
);
result = items || [];
} else if (name === "search_ebay") {
const query = args.query;
if (!query) {
return Response.json({
jsonrpc: "2.0",
id,
error: { code: -32602, message: "query parameter is required" },
});
}
const items = await fetchEbayItems(query, args.maxItems || 5, {
minPrice: args.minPrice,
maxPrice: args.maxPrice,
strictMode: args.strictMode || false,
exclusions: args.exclusions || [],
keywords: args.keywords || [query],
buyItNowOnly: args.buyItNowOnly !== false,
canadaOnly: args.canadaOnly !== false,
});
result = items || [];
} else {
return Response.json({
jsonrpc: "2.0",
id,
error: { code: -32601, message: `Unknown tool: ${name}` },
});
}
return Response.json({ console.log(
jsonrpc: "2.0", `[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
id, );
result: { const response = await Promise.race([
content: [ fetch(`${API_BASE_URL}/kijiji?${params.toString()}`),
{ new Promise<Response>((_, reject) =>
type: "text", setTimeout(
text: JSON.stringify(result, null, 2), () =>
}, reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
], API_TIMEOUT,
}, ),
}); ),
} catch (error) { ]);
const errorMessage = error instanceof Error ? error.message : "Unknown error";
return Response.json({
jsonrpc: "2.0",
id,
error: { code: -32603, message: `Tool execution failed: ${errorMessage}` },
});
}
}
// Method not found if (!response.ok) {
return Response.json( const errorText = await response.text();
{ console.error(
jsonrpc: "2.0", `[MCP] Kijiji API error ${response.status}: ${errorText}`,
id, );
error: { code: -32601, message: `Method not found: ${method}` }, throw new Error(`API returned ${response.status}: ${errorText}`);
}, }
{ status: 404 } result = await response.json();
); console.log(
} catch (error) { `[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
const errorMessage = error instanceof Error ? error.message : "Unknown error"; );
return Response.json( } else if (name === "search_facebook") {
{ const query = args.query;
jsonrpc: "2.0", if (!query) {
error: { code: -32700, message: `Parse error: ${errorMessage}` }, return Response.json({
}, jsonrpc: "2.0",
{ status: 400 } id,
); error: { code: -32602, message: "query parameter is required" },
} });
}
const params = new URLSearchParams({ q: query });
if (args.location) params.append("location", args.location);
if (args.maxItems)
params.append("maxItems", args.maxItems.toString());
if (args.cookiesSource) params.append("cookies", args.cookiesSource);
console.log(
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] Facebook API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
);
} else if (name === "search_ebay") {
const query = args.query;
if (!query) {
return Response.json({
jsonrpc: "2.0",
id,
error: { code: -32602, message: "query parameter is required" },
});
}
const params = new URLSearchParams({ q: query });
if (args.minPrice)
params.append("minPrice", args.minPrice.toString());
if (args.maxPrice)
params.append("maxPrice", args.maxPrice.toString());
if (args.strictMode !== undefined)
params.append("strictMode", args.strictMode.toString());
if (args.exclusions?.length)
params.append("exclusions", args.exclusions.join(","));
if (args.keywords?.length)
params.append("keywords", args.keywords.join(","));
if (args.buyItNowOnly !== undefined)
params.append("buyItNowOnly", args.buyItNowOnly.toString());
if (args.canadaOnly !== undefined)
params.append("canadaOnly", args.canadaOnly.toString());
if (args.maxItems)
params.append("maxItems", args.maxItems.toString());
if (args.cookies) params.append("cookies", args.cookies);
console.log(
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] eBay API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
);
} else {
return Response.json({
jsonrpc: "2.0",
id,
error: { code: -32601, message: `Unknown tool: ${name}` },
});
}
return Response.json({
jsonrpc: "2.0",
id,
result: {
content: [
{
type: "text",
text: JSON.stringify(result, null, 2),
},
],
},
});
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : "Unknown error";
return Response.json({
jsonrpc: "2.0",
id,
error: {
code: -32603,
message: `Tool execution failed: ${errorMessage}`,
},
});
}
}
// Method not found
return Response.json(
{
jsonrpc: "2.0",
id,
error: { code: -32601, message: `Method not found: ${method}` },
},
{ status: 404 },
);
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : "Unknown error";
return Response.json(
{
jsonrpc: "2.0",
error: { code: -32700, message: `Parse error: ${errorMessage}` },
},
{ status: 400 },
);
}
} }

View File

@@ -3,23 +3,25 @@
*/ */
export const serverCard = { export const serverCard = {
$schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json", $schema:
version: "1.0", "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
protocolVersion: "2025-06-18", version: "1.0",
serverInfo: { protocolVersion: "2025-06-18",
name: "marketplace-scrapers", serverInfo: {
title: "Marketplace Scrapers MCP Server", name: "marketplace-scrapers",
version: "1.0.0", title: "Marketplace Scrapers MCP Server",
}, version: "1.0.0",
transport: { },
type: "streamable-http", transport: {
endpoint: "/mcp", type: "streamable-http",
}, endpoint: "/mcp",
capabilities: { },
tools: { capabilities: {
listChanged: true, tools: {
}, listChanged: true,
}, },
description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay", },
tools: "dynamic", description:
"Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
tools: "dynamic",
}; };

View File

@@ -3,103 +3,148 @@
*/ */
export const tools = [ export const tools = [
{ {
name: "search_kijiji", name: "search_kijiji",
description: "Search Kijiji marketplace for listings matching a query", description: "Search Kijiji marketplace for listings matching a query",
inputSchema: { inputSchema: {
type: "object", type: "object",
properties: { properties: {
query: { query: {
type: "string", type: "string",
description: "Search query for Kijiji listings", description: "Search query for Kijiji listings",
}, },
maxItems: { location: {
type: "number", type: "string",
description: "Maximum number of items to return", description:
default: 5, "Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
}, },
}, category: {
required: ["query"], type: "string",
}, description:
}, "Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
{ },
name: "search_facebook", keywords: {
description: "Search Facebook Marketplace for listings matching a query", type: "string",
inputSchema: { description: "Additional keywords to filter results",
type: "object", },
properties: { sortBy: {
query: { type: "string",
type: "string", description: "Sort results by field",
description: "Search query for Facebook Marketplace listings", enum: ["relevancy", "date", "price", "distance"],
}, default: "relevancy",
location: { },
type: "string", sortOrder: {
description: "Location for search (e.g., 'toronto')", type: "string",
default: "toronto", description: "Sort order",
}, enum: ["asc", "desc"],
maxItems: { default: "desc",
type: "number", },
description: "Maximum number of items to return", maxPages: {
default: 5, type: "number",
}, description: "Maximum pages to fetch (~40 items per page)",
cookiesSource: { default: 5,
type: "string", },
description: "Optional Facebook session cookies source", priceMin: {
}, type: "number",
}, description: "Minimum price in cents",
required: ["query"], },
}, priceMax: {
}, type: "number",
{ description: "Maximum price in cents",
name: "search_ebay", },
description: "Search eBay for listings matching a query (default: Buy It Now only, Canada only)", cookies: {
inputSchema: { type: "string",
type: "object", description:
properties: { "Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
query: { },
type: "string", },
description: "Search query for eBay listings", required: ["query"],
}, },
minPrice: { },
type: "number", {
description: "Minimum price filter", name: "search_facebook",
}, description: "Search Facebook Marketplace for listings matching a query",
maxPrice: { inputSchema: {
type: "number", type: "object",
description: "Maximum price filter", properties: {
}, query: {
strictMode: { type: "string",
type: "boolean", description: "Search query for Facebook Marketplace listings",
description: "Enable strict search mode", },
default: false, location: {
}, type: "string",
exclusions: { description: "Location for search (e.g., 'toronto')",
type: "array", default: "toronto",
items: { type: "string" }, },
description: "Terms to exclude from results", maxItems: {
}, type: "number",
keywords: { description: "Maximum number of items to return",
type: "array", default: 5,
items: { type: "string" }, },
description: "Keywords to include in search", cookiesSource: {
}, type: "string",
buyItNowOnly: { description: "Optional Facebook session cookies source",
type: "boolean", },
description: "Include only Buy It Now listings (exclude auctions)", },
default: true, required: ["query"],
}, },
canadaOnly: { },
type: "boolean", {
description: "Include only Canadian sellers/listings", name: "search_ebay",
default: true, description:
}, "Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
maxItems: { inputSchema: {
type: "number", type: "object",
description: "Maximum number of items to return", properties: {
default: 5, query: {
}, type: "string",
}, description: "Search query for eBay listings",
required: ["query"], },
}, minPrice: {
}, type: "number",
description: "Minimum price filter",
},
maxPrice: {
type: "number",
description: "Maximum price filter",
},
strictMode: {
type: "boolean",
description: "Enable strict search mode",
default: false,
},
exclusions: {
type: "array",
items: { type: "string" },
description: "Terms to exclude from results",
},
keywords: {
type: "array",
items: { type: "string" },
description: "Keywords to include in search",
},
buyItNowOnly: {
type: "boolean",
description: "Include only Buy It Now listings (exclude auctions)",
default: true,
},
canadaOnly: {
type: "boolean",
description: "Include only Canadian sellers/listings",
default: true,
},
maxItems: {
type: "number",
description: "Maximum number of items to return",
default: 5,
},
cookies: {
type: "string",
description:
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
},
},
required: ["query"],
},
},
]; ];

26
scripts/biome-symlink.sh Executable file
View File

@@ -0,0 +1,26 @@
#!/usr/bin/env bash
# Get the path to the system biome executable
BIOME_PATH=$(which biome)
if [ -z "$BIOME_PATH" ]; then
echo "Error: biome executable not found in PATH"
exit 1
fi
# Find all biome executables in node_modules
files=$(fd biome node_modules --type executable --no-ignore --follow)
if [ -z "$files" ]; then
echo "No biome executables found in node_modules"
exit 0
fi
# Replace each with a symlink to the system biome
for file in $files; do
echo "Replacing $file with symlink to $BIOME_PATH"
rm "$file"
ln -s "$BIOME_PATH" "$file"
done
echo "Done."

30
scripts/remove-eslint.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
PATTERN="eslint"
FILES="$(fd .)" # Or use 'find .' to search recursively
for file in $FILES; do
if [[ -f "$file" ]]; then
# 1. Use rg with line numbers (-n) and only the matched line (-o)
# 2. Use awk to print ONLY the line number (field 1)
# 3. Use xargs to pass multiple line numbers to a single sed command
LINE_NUMBERS=$(rg --line-number --no-filename "$PATTERN" "$file" | awk -F':' '{print $1}' | tr '\n' ',')
# Remove trailing comma if any
LINE_NUMBERS=${LINE_NUMBERS%,}
if [[ -n "$LINE_NUMBERS" ]]; then
echo "Deleting lines $LINE_NUMBERS from $file..."
# Use sed to delete the specified comma-separated line numbers in-place (-i)
# NOTE: The syntax for -i might vary slightly between GNU sed (Linux) and BSD sed (macOS).
sed -i.bak "${LINE_NUMBERS}d" "$file"
# Optional: Remove the backup file created by sed -i.bak
# rm "${file}.bak"
else
echo "$file: No lines matching pattern found."
fi
fi
done

25
scripts/start.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -e
# Trap SIGTERM and SIGINT for graceful shutdown
trap 'echo "Received shutdown signal, stopping services..."; kill -TERM $API_PID $MCP_PID 2>/dev/null; wait' TERM INT
# Start API Server in background
echo "Starting API Server on port ${API_PORT:-4005}..."
bun dist/api/index.js &
API_PID=$!
# Give API server a moment to initialize
sleep 1
# Start MCP Server in background
echo "Starting MCP Server on port ${API_PORT:-4006}..."
bun dist/mcp/index.js &
MCP_PID=$!
echo "Both services started successfully"
echo "API Server PID: $API_PID"
echo "MCP Server PID: $MCP_PID"
# Wait for both processes
wait $API_PID $MCP_PID