Compare commits

..

24 Commits

Author SHA1 Message Date
e4ab145d70 feat: add cookie support to kijiji scraper
Add optional cookie parameter to bypass bot detection (403 errors).
Cookies can be provided via parameter, KIJIJI_COOKIE env var, or
cookies/kijiji.json file. Supports both JSON array and string formats.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:29:13 -05:00
1dce0392e3 refactor: use shared cookie utility in ebay scraper
Replace inline cookie loading with shared utility functions.
Now supports both JSON array and cookie string formats.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:29:02 -05:00
251fcbb7d9 refactor: use shared cookie utility in facebook scraper
Replace inline cookie parsing with shared utility functions.
Maintains backward compatibility with existing exports.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:28:52 -05:00
9bc57d6b54 refactor: add shared cookie utility to core package
Move cookie parsing logic to a dedicated utility module that can be
shared across all scrapers. Supports both JSON array and cookie string
formats for all input sources (parameter, env var, file).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:28:44 -05:00
4a467c9f02 fix: support both json and string cookies for facebook 2026-01-23 19:00:51 -05:00
f944d319c2 chore: update dockerignore 2026-01-23 15:43:13 -05:00
cf9784a565 feat: implement cookie priority hierarchy (URL param > env var > file) for Facebook and eBay scrapers 2026-01-23 15:32:17 -05:00
df0c528535 fix: correct formatCentsToCurrency usage in facebook scraper 2026-01-23 14:50:41 -05:00
2f97d3eafd fix: correct formatCentsToCurrency usage in kijiji scraper 2026-01-23 14:50:41 -05:00
65eb8d1724 refactor: increase kijiji scraping request rate to 4 rps
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 14:50:37 -05:00
f3839aba54 fix: increase kijiji rate limit to 4 rps
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 13:59:47 -05:00
90b98bfb09 chore: testing mcp server
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 13:59:28 -05:00
eb6705df0f feat: add 60-second timeouts to MCP request handlers for reliability 2026-01-23 13:59:28 -05:00
72525609ed fix: set idle timeout to 255 seconds in MCP server to prevent premature shutdown 2026-01-23 13:59:28 -05:00
8b0a65860c chore: add imports for linkedom and delay utils in ebay scraper 2026-01-23 13:10:44 -05:00
f9b1c7e096 fix: remove eslint-disable directives
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 13:08:38 -05:00
9edc74cbeb chore: local dev scripts
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-23 13:07:01 -05:00
ee0fca826d style: fix formatting in MCP server 2026-01-23 11:56:54 -05:00
f7372612fb test: fix formatting in test setup 2026-01-23 11:56:51 -05:00
bce126664e test: remove unused imports in Kijiji utils tests 2026-01-23 11:56:47 -05:00
8cbf11538e test: fix formatting and remove unused HttpError import in Kijiji tests 2026-01-23 11:56:44 -05:00
79f47fdaef test: remove unused import in Facebook integration tests 2026-01-23 11:56:41 -05:00
de5069bf2b test: fix unused variable in Facebook core tests 2026-01-23 11:56:38 -05:00
637f1a4e75 fix: resolve biome lint errors and warnings 2026-01-23 10:33:15 -05:00
31 changed files with 2920 additions and 2532 deletions

View File

@@ -1,145 +1,84 @@
# Dependencies # =============================================================================
# Dependencies & Build Output
# =============================================================================
node_modules/ node_modules/
npm-debug.log* dist/
yarn-debug.log* out/
yarn-error.log*
bun.sum
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage/
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
jspm_packages/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz *.tgz
# Yarn Integrity file # =============================================================================
.yarn-integrity # Sensitive Files
# =============================================================================
# dotenv environment variables file
.env .env
.env.local .env.*
.env.development.local .envrc
.env.test.local cookies/
.env.production.local *.pem
*.key
*.cert
*secret*
*credential*
# parcel-bundler cache (https://parceljs.org/) # =============================================================================
.cache # Development Tools & Config
.parcel-cache # =============================================================================
# Nix/Devenv
.devenv/
.devenv.flake.nix
devenv.*
.direnv/
# Next.js build output # Linting/Formatting
.next biome.json
.eslintcache
.pre-commit-config.yaml
# Nuxt.js build / generate output # IDE/Editor
.nuxt
dist
# Gatsby files
.cache/
public
# Vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# IDE and editor files
.vscode/ .vscode/
.idea/ .idea/
*.swp *.swp
*.swo *.swo
*~ *~
# OS generated files # AI Assistant Config
.DS_Store .claude/
.DS_Store? CLAUDE.md
._* AGENTS.md
.Spotlight-V100 opencode.jsonc
.Trashes
ehthumbs.db
Thumbs.db
# Git # =============================================================================
.git # Documentation (not needed at runtime)
# =============================================================================
README.md
*.md
docs/
# =============================================================================
# Git & Docker (avoid recursive inclusion)
# =============================================================================
.git/
.gitignore .gitignore
# Docker
Dockerfile* Dockerfile*
.dockerignore .dockerignore
# Documentation # =============================================================================
README.md # Testing & Coverage
docs/ # =============================================================================
# Test files
test/ test/
tests/ tests/
*.test.js
*.test.ts *.test.ts
*.spec.js
*.spec.ts *.spec.ts
coverage/
*.lcov
.nyc_output/
# Development files # =============================================================================
CLAUDE.md # OS & Misc
devenv.* # =============================================================================
.DS_Store
Thumbs.db
*.log *.log
*.pid
# Runtime cookies/config .cache/
cookies/ examples/
scripts/

View File

@@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var).
- `GET /api/status` - Health check - `GET /api/status` - Health check
- `GET /api/kijiji?q={query}` - Search Kijiji - `GET /api/kijiji?q={query}` - Search Kijiji
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook - `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay - `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay
- `GET /api/*` - 404 fallback - `GET /api/*` - 404 fallback
### MCP Server (`@marketplace-scrapers/mcp-server`) ### MCP Server (`@marketplace-scrapers/mcp-server`)
@@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var).
**Tools:** **Tools:**
- `search_kijiji` - Search Kijiji (query, maxItems) - `search_kijiji` - Search Kijiji (query, maxItems)
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource) - `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems) - `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies)
## API Response Formats ## API Response Formats
@@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields:
### eBay-specific fields ### eBay-specific fields
Minimal - mainly the common fields Minimal - mainly the common fields
## Cookie Management
Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping.
### Cookie Priority Hierarchy (High → Low)
All scrapers follow this loading order:
1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority)
2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE`
3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback)
### Facebook Cookies
- **Required for**: Facebook Marketplace scraping
- **Format**: JSON array (see `cookies/README.md`)
- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb`
**Setup:**
```bash
# Option 1: File (fallback)
# Create cookies/facebook.json with cookie array
# Option 2: Environment variable
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
# Option 3: URL parameter (highest priority)
curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]"
```
### eBay Cookies
- **Required for**: Bypassing bot detection
- **Format**: Cookie string `"name=value; name2=value2"`
- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession`
**Setup:**
```bash
# Option 1: File (fallback)
# Create cookies/ebay.json with cookie string
# Option 2: Environment variable
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
# Option 3: URL parameter (highest priority)
curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE"
```
**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings.
## Technical Details ## Technical Details
- **TypeScript** with path mapping (`@/*``src/*`) per package - **TypeScript** with path mapping (`@/*``src/*`) per package
@@ -126,7 +172,7 @@ Minimal - mainly the common fields
## Development Notes ## Development Notes
- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json` - **Cookie files** are git-ignored for security (see `cookies/README.md`)
- eBay uses custom headers to bypass basic bot detection
- Kijiji parses Apollo state from Next.js hydration data - Kijiji parses Apollo state from Next.js hydration data
- All scrapers handle retries on 429/5xx errors - All scrapers handle retries on 429/5xx errors
- Cookie priority ensures flexibility across different deployment environments

View File

@@ -1,24 +1,33 @@
# Facebook Marketplace Cookies Setup # Marketplace Cookies Setup
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies. Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
## Option 1: Cookies File (`facebook.json`) ## Cookie Priority Hierarchy
1. Log into Facebook in your browser All scrapers follow this priority order (highest to lowest):
2. Open Developer Tools → Network tab 1. **URL Parameter** - Passed directly in API/MCP request (overrides all)
3. Visit facebook.com/marketplace (ensure you're logged in) 2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE`
4. Look for any marketplace-related requests in the Network tab 3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback)
5. Export cookies from the browser's Application/Storage → Cookies section
6. Save the cookies as a JSON array to `facebook.json`
The `facebook.json` file should contain Facebook session cookies, particularly: ---
## Facebook Marketplace (`facebook.json`)
### Required Cookies
- `c_user`: Your Facebook user ID - `c_user`: Your Facebook user ID
- `xs`: Facebook session token - `xs`: Facebook session token
- `fr`: Facebook request token - `fr`: Facebook request token
- `datr`: Data attribution token - `datr`: Data attribution token
- `sb`: Session browser token - `sb`: Session browser token
Example structure: ### Setup Methods
**Method 1: Cookie File (Lowest Priority)**
1. Log into Facebook in your browser
2. Open Developer Tools → Application/Storage → Cookies
3. Export cookies as JSON array to `facebook.json`
Example `facebook.json`:
```json ```json
[ [
{ {
@@ -27,26 +36,59 @@ Example structure:
"domain": ".facebook.com", "domain": ".facebook.com",
"path": "/", "path": "/",
"secure": true "secure": true
}, }
// ... other cookies
] ]
``` ```
## Option 2: URL Parameter **Method 2: Environment Variable**
```bash
You can pass cookies directly via the `cookies` URL parameter: export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
``` ```
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
**Method 3: URL Parameter (Highest Priority)**
``` ```
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}]
```
---
## eBay (`ebay.json`)
eBay has aggressive bot detection that blocks requests without valid session cookies.
### Setup Methods
**Method 1: Cookie File (Lowest Priority)**
1. Log into eBay in your browser
2. Open Developer Tools → Network tab
3. Visit ebay.ca and inspect any request headers
4. Copy the full `Cookie` header value
5. Save as plain text to `ebay.json` (see `ebay.json.example`)
Example `ebay.json`:
```
s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE
```
**Method 2: Environment Variable**
```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
```
**Method 3: URL Parameter (Highest Priority)**
```
GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE
```
---
## Important Notes ## Important Notes
- Cookies must be from an active Facebook session - Cookies must be from active browser sessions
- Cookies expire, so you may need to refresh them periodically - Cookies expire and need periodic refresh
- Never share real cookies or commit them to version control - **NEVER** commit real cookies to version control
- Facebook may block automated scraping even with valid cookies - Platforms may still block automated scraping despite valid cookies
## Security ## Security
The cookies file is intentionally left out of version control for security reasons.</content> All `*.json` files in this directory are git-ignored for security.</content>

View File

@@ -0,0 +1 @@
s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE

9
opencode.jsonc Normal file
View File

@@ -0,0 +1,9 @@
{
"$schema": "https://opencode.ai/config.json",
"mcp": {
"marketplace-scrape": {
"type": "remote",
"url": "http://localhost:4006/mcp"
}
}
}

View File

@@ -2,11 +2,19 @@
"name": "marketplace-scrapers-monorepo", "name": "marketplace-scrapers-monorepo",
"version": "1.0.0", "version": "1.0.0",
"scripts": { "scripts": {
"ci": "biome ci" "ci": "biome ci",
"clean": "rm -rf dist",
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
"build:all": "bun run build:api && bun run build:mcp",
"build": "bun run clean && bun run build:all",
"start": "./scripts/start.sh"
}, },
"private": true, "private": true,
"type": "module", "type": "module",
"workspaces": ["packages/*"], "workspaces": [
"packages/*"
],
"devDependencies": { "devDependencies": {
"@biomejs/biome": "2.3.11" "@biomejs/biome": "2.3.11"
} }

View File

@@ -1,8 +1,9 @@
import { fetchEbayItems } from "@marketplace-scrapers/core"; import { fetchEbayItems } from "@marketplace-scrapers/core";
/** /**
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly} * GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
* Search eBay for listings (default: Buy It Now only, Canada only) * Search eBay for listings (default: Buy It Now only, Canada only)
* Optional: Pass cookies parameter to bypass bot detection
*/ */
export async function ebayRoute(req: Request): Promise<Response> { export async function ebayRoute(req: Request): Promise<Response> {
try { try {
@@ -37,6 +38,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
const maxItemsParam = reqUrl.searchParams.get("maxItems"); const maxItemsParam = reqUrl.searchParams.get("maxItems");
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined; const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
const cookies = reqUrl.searchParams.get("cookies") || undefined;
const items = await fetchEbayItems(SEARCH_QUERY, 1, { const items = await fetchEbayItems(SEARCH_QUERY, 1, {
minPrice, minPrice,
@@ -46,6 +48,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
keywords, keywords,
buyItNowOnly, buyItNowOnly,
canadaOnly, canadaOnly,
cookies,
}); });
const results = maxItems ? items.slice(0, maxItems) : items; const results = maxItems ? items.slice(0, maxItems) : items;

View File

@@ -41,12 +41,13 @@ export async function kijijiRoute(req: Request): Promise<Response> {
maxPages, maxPages,
priceMin, priceMin,
priceMax, priceMax,
cookies: reqUrl.searchParams.get("cookies") || undefined,
}; };
try { try {
const items = await fetchKijijiItems( const items = await fetchKijijiItems(
SEARCH_QUERY, SEARCH_QUERY,
1, 4, // 4 requests per second for faster scraping
"https://www.kijiji.ca", "https://www.kijiji.ca",
searchOptions, searchOptions,
{}, {},

View File

@@ -1,45 +1,43 @@
// Export all scrapers // Export all scrapers
export type { EbayListingDetails } from "./scrapers/ebay";
export { default as fetchEbayItems } from "./scrapers/ebay";
export type { FacebookListingDetails } from "./scrapers/facebook";
export {
default as fetchFacebookItems,
ensureFacebookCookies,
extractFacebookItemData,
extractFacebookMarketplaceData,
fetchFacebookItem,
parseFacebookAds,
parseFacebookCookieString,
parseFacebookItem,
} from "./scrapers/facebook";
export type {
DetailedListing,
KijijiListingDetails,
ListingFetchOptions,
SearchOptions,
} from "./scrapers/kijiji";
export { export {
default as fetchKijijiItems,
slugify,
resolveLocationId,
resolveCategoryId,
buildSearchUrl, buildSearchUrl,
default as fetchKijijiItems,
extractApolloState, extractApolloState,
parseSearch,
parseDetailedListing,
HttpError, HttpError,
NetworkError, NetworkError,
ParseError, ParseError,
parseDetailedListing,
parseSearch,
RateLimitError, RateLimitError,
resolveCategoryId,
resolveLocationId,
slugify,
ValidationError, ValidationError,
} from "./scrapers/kijiji"; } from "./scrapers/kijiji";
export type {
KijijiListingDetails,
DetailedListing,
SearchOptions,
ListingFetchOptions,
} from "./scrapers/kijiji";
export {
default as fetchFacebookItems,
fetchFacebookItem,
parseFacebookCookieString,
ensureFacebookCookies,
extractFacebookMarketplaceData,
extractFacebookItemData,
parseFacebookAds,
parseFacebookItem,
} from "./scrapers/facebook";
export type { FacebookListingDetails } from "./scrapers/facebook";
export { default as fetchEbayItems } from "./scrapers/ebay";
export type { EbayListingDetails } from "./scrapers/ebay";
// Export shared utilities
export * from "./utils/http";
export * from "./utils/delay";
export * from "./utils/format";
// Export shared types // Export shared types
export * from "./types/common"; export * from "./types/common";
// Export shared utilities
export * from "./utils/cookies";
export * from "./utils/delay";
export * from "./utils/format";
export * from "./utils/http";

View File

@@ -1,9 +1,18 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common"; import {
type CookieConfig,
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { delay } from "../utils/delay"; import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http"; // eBay cookie configuration
const EBAY_COOKIE_CONFIG: CookieConfig = {
name: "eBay",
domain: ".ebay.ca",
envVar: "EBAY_COOKIE",
filePath: "./cookies/ebay.json",
};
// ----------------------------- Types ----------------------------- // ----------------------------- Types -----------------------------
@@ -43,7 +52,7 @@ function parseEbayPrice(
const amountStr = numberMatches[0].replace(/,/g, ""); const amountStr = numberMatches[0].replace(/,/g, "");
const dollars = parseFloat(amountStr); const dollars = parseFloat(amountStr);
if (isNaN(dollars)) return null; if (Number.isNaN(dollars)) return null;
const cents = Math.round(dollars * 100); const cents = Math.round(dollars * 100);
@@ -105,13 +114,26 @@ function parseEbayListings(
} }
// Find the container - go up several levels to find the item container // Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures // Modern eBay uses complex nested structures (often 5-10 levels deep)
let container = linkElement.parentElement?.parentElement?.parentElement; let container: Element | null = linkElement;
if (!container) { let depth = 0;
// Try a different level const maxDepth = 15;
container = linkElement.parentElement?.parentElement;
// Walk up until we find a list item or results container
while (container && depth < maxDepth) {
const classes = container.className || "";
if (
classes.includes("s-item") ||
classes.includes("srp-results") ||
container.tagName === "LI"
) {
break;
} }
if (!container) continue; container = container.parentElement;
depth++;
}
if (!container || depth >= maxDepth) continue;
// Extract title - look for heading or title-related elements near the link // Extract title - look for heading or title-related elements near the link
// Modern eBay often uses h3, span, or div with text content near the link // Modern eBay often uses h3, span, or div with text content near the link
@@ -172,8 +194,9 @@ function parseEbayListings(
if (title === "Shop on eBay" || title.length < 3) continue; if (title === "Shop on eBay" || title.length < 3) continue;
// Extract price - look for eBay's price classes, preferring sale/discount prices // Extract price - look for eBay's price classes, preferring sale/discount prices
// Updated for 2026 eBay HTML structure
let priceElement = container.querySelector( let priceElement = container.querySelector(
'[class*="s-item__price"], .s-item__price, [class*="price"]', '[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
); );
// If no direct price class, look for spans containing $ (but not titles) // If no direct price class, look for spans containing $ (but not titles)
@@ -185,8 +208,7 @@ function parseEbayListings(
const text = el.textContent?.trim(); const text = el.textContent?.trim();
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words // Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if ( if (
text && text?.includes("$") &&
text.includes("$") &&
text.length < 100 && text.length < 100 &&
!text.includes("laptop") && !text.includes("laptop") &&
!text.includes("computer") && !text.includes("computer") &&
@@ -310,6 +332,32 @@ function parseEbayListings(
return results; return results;
} }
// ----------------------------- Cookie Loading -----------------------------
/**
* Load eBay cookies with priority: URL param > ENV var > file
* Uses shared cookie utility for consistent handling across all scrapers
*/
async function loadEbayCookies(
cookiesSource?: string,
): Promise<string | undefined> {
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
if (cookies.length === 0) {
console.warn(
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
"Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. EBAY_COOKIE environment variable, or\n" +
" 3. ./cookies/ebay.json file (lowest priority)\n" +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
return undefined;
}
return formatCookiesForHeader(cookies, "www.ebay.ca");
}
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchEbayItems( export default async function fetchEbayItems(
@@ -323,6 +371,7 @@ export default async function fetchEbayItems(
keywords?: string[]; keywords?: string[];
buyItNowOnly?: boolean; buyItNowOnly?: boolean;
canadaOnly?: boolean; canadaOnly?: boolean;
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
} = {}, } = {},
) { ) {
const { const {
@@ -333,8 +382,12 @@ export default async function fetchEbayItems(
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
buyItNowOnly = true, buyItNowOnly = true,
canadaOnly = true, canadaOnly = true,
cookies: cookiesSource,
} = opts; } = opts;
// Load eBay cookies with priority: URL param > ENV var > file
const cookies = await loadEbayCookies(cookiesSource);
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference // Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
const urlParams = new URLSearchParams({ const urlParams = new URLSearchParams({
_nkw: SEARCH_QUERY, _nkw: SEARCH_QUERY,
@@ -363,7 +416,7 @@ export default async function fetchEbayItems(
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0", "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5", "Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br", "Accept-Encoding": "gzip, deflate, br, zstd",
Referer: "https://www.ebay.ca/", Referer: "https://www.ebay.ca/",
Connection: "keep-alive", Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1", "Upgrade-Insecure-Requests": "1",
@@ -374,6 +427,11 @@ export default async function fetchEbayItems(
Priority: "u=0, i", Priority: "u=0, i",
}; };
// Add cookies if available (helps bypass bot detection)
if (cookies) {
headers.Cookie = cookies;
}
const res = await fetch(searchUrl, { const res = await fetch(searchUrl, {
method: "GET", method: "GET",
headers, headers,

View File

@@ -1,10 +1,16 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import cliProgress from "cli-progress"; import cliProgress from "cli-progress";
import { isRecord } from "../utils/http"; import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common";
import {
type Cookie,
type CookieConfig,
ensureCookies,
formatCookiesForHeader,
parseCookieString,
} from "../utils/cookies";
import { delay } from "../utils/delay"; import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format"; import { formatCentsToCurrency } from "../utils/format";
import type { HTMLString } from "../types/common"; import { isRecord } from "../utils/http";
/** /**
* Facebook Marketplace Scraper * Facebook Marketplace Scraper
@@ -14,21 +20,13 @@ import type { HTMLString } from "../types/common";
* This is by design to respect Facebook's authentication requirements. * This is by design to respect Facebook's authentication requirements.
*/ */
// ----------------------------- Types ----------------------------- // Facebook cookie configuration
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
interface Cookie { name: "Facebook",
name: string; domain: ".facebook.com",
value: string; envVar: "FACEBOOK_COOKIE",
domain: string; filePath: "./cookies/facebook.json",
path: string; };
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
interface FacebookAdNode { interface FacebookAdNode {
node: { node: {
@@ -204,171 +202,31 @@ export interface FacebookListingDetails {
// ----------------------------- Utilities ----------------------------- // ----------------------------- Utilities -----------------------------
/**
* Load Facebook cookies from file or string
*/
async function loadFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json"
): Promise<Cookie[]> {
// First try to load from provided string parameter
if (cookiesSource) {
try {
const cookies = JSON.parse(cookiesSource);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string"
);
}
} catch (e) {
throw new Error(`Invalid cookies JSON provided: ${e}`);
}
}
// Try to load from specified path
try {
const cookiesPath = cookiePath;
const file = Bun.file(cookiesPath);
if (await file.exists()) {
const content = await file.text();
const cookies = JSON.parse(content);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string"
);
}
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
return [];
}
/** /**
* Parse Facebook cookie string into Cookie array format * Parse Facebook cookie string into Cookie array format
* @deprecated Use parseCookieString from utils/cookies instead
*/ */
export function parseFacebookCookieString(cookieString: string): Cookie[] { export function parseFacebookCookieString(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) { return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, value] = pair.split("=", 2);
const trimmedName = name.trim();
const trimmedValue = value.trim();
// Skip empty names or values
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined, // Session cookies
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
} }
/** /**
* Ensure Facebook cookies are available, parsing from env var if needed * Load Facebook cookies with priority: URL param > ENV var > file
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
* @param _cookiePath - Deprecated, uses default path from config
*/ */
export async function ensureFacebookCookies( export async function ensureFacebookCookies(
cookiePath = "./cookies/facebook.json" cookiesSource?: string,
_cookiePath?: string,
): Promise<Cookie[]> { ): Promise<Cookie[]> {
// First try to load existing cookies return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
try {
const existing = await loadFacebookCookies(undefined, cookiePath);
if (existing.length > 0) {
return existing;
}
} catch {
// File doesn't exist or is invalid, continue to check env var
}
// Try to parse from environment variable
const cookieString = process.env.FACEBOOK_COOKIE;
if (!cookieString || !cookieString.trim()) {
throw new Error(
"No valid Facebook cookies found. Either:\n" +
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
" 2. Create ./cookies/facebook.json manually with cookie array"
);
}
// Parse the cookie string
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) {
throw new Error(
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
'Expected format: "name1=value1; name2=value2;"'
);
}
// Save to file for future use
try {
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
} catch (error) {
console.warn(`Could not save cookies to ${cookiePath}: ${error}`);
// Continue anyway, we have the cookies in memory
}
return cookies;
}
/**
* Format cookies array into Cookie header string
*/
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
domain.endsWith(cookie.domain.slice(1)) ||
domain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === domain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false; // Expired
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
} }
class HttpError extends Error { class HttpError extends Error {
constructor( constructor(
message: string, message: string,
public readonly status: number, public readonly status: number,
public readonly url: string public readonly url: string,
) { ) {
super(message); super(message);
this.name = "HttpError"; this.name = "HttpError";
@@ -407,7 +265,7 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
!extractionStats.lastApiChangeDetected !extractionStats.lastApiChangeDetected
) { ) {
console.warn( console.warn(
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes." "Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
); );
extractionStats.lastApiChangeDetected = new Date(); extractionStats.lastApiChangeDetected = new Date();
} }
@@ -433,7 +291,7 @@ async function fetchHtml(
retryBaseMs?: number; retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void; onRateInfo?: (remaining: string | null, reset: string | null) => void;
cookies?: string; cookies?: string;
} },
): Promise<HTMLString> { ): Promise<HTMLString> {
const maxRetries = opts?.maxRetries ?? 3; const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500; const retryBaseMs = opts?.retryBaseMs ?? 500;
@@ -487,7 +345,7 @@ async function fetchHtml(
throw new HttpError( throw new HttpError(
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`, `Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
res.status, res.status,
url url,
); );
} }
// Retry on 5xx // Retry on 5xx
@@ -498,7 +356,7 @@ async function fetchHtml(
throw new HttpError( throw new HttpError(
`Request failed with status ${res.status}`, `Request failed with status ${res.status}`,
res.status, res.status,
url url,
); );
} }
@@ -521,7 +379,7 @@ async function fetchHtml(
Extract marketplace search data from Facebook page script tags Extract marketplace search data from Facebook page script tags
*/ */
export function extractFacebookMarketplaceData( export function extractFacebookMarketplaceData(
htmlString: HTMLString htmlString: HTMLString,
): FacebookAdNode[] | null { ): FacebookAdNode[] | null {
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const scripts = document.querySelectorAll("script"); const scripts = document.querySelectorAll("script");
@@ -567,13 +425,12 @@ export function extractFacebookMarketplaceData(
if ( if (
result && result &&
isRecord(result) && isRecord(result) &&
(result as any).feed_units?.edges?.length > 0 (result as Record<string, unknown>).feed_units?.edges?.length > 0
) { ) {
marketplaceData = result as FacebookMarketplaceSearch; marketplaceData = result as FacebookMarketplaceSearch;
break; break;
} }
} catch { } catch {}
}
} }
if (marketplaceData) break; if (marketplaceData) break;
@@ -583,13 +440,13 @@ export function extractFacebookMarketplaceData(
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) { if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
const searchData = const searchData =
parsed.marketplace_search as FacebookMarketplaceSearch; parsed.marketplace_search as FacebookMarketplaceSearch;
if (searchData.feed_units?.edges?.length ?? 0 > 0) { const feedLength = searchData.feed_units?.edges?.length ?? 0;
if (feedLength > 0) {
marketplaceData = searchData; marketplaceData = searchData;
break; break;
} }
} }
} catch { } catch {}
}
} }
if (!marketplaceData?.feed_units?.edges?.length) { if (!marketplaceData?.feed_units?.edges?.length) {
@@ -598,7 +455,7 @@ export function extractFacebookMarketplaceData(
} }
console.log( console.log(
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings` `Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`,
); );
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node })); return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
} }
@@ -608,7 +465,7 @@ export function extractFacebookMarketplaceData(
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
*/ */
export function extractFacebookItemData( export function extractFacebookItemData(
htmlString: HTMLString htmlString: HTMLString,
): FacebookMarketplaceItem | null { ): FacebookMarketplaceItem | null {
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const scripts = document.querySelectorAll("script"); const scripts = document.querySelectorAll("script");
@@ -657,7 +514,7 @@ export function extractFacebookItemData(
targetData.__typename === "GroupCommerceProductItem" targetData.__typename === "GroupCommerceProductItem"
) { ) {
console.log( console.log(
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}` `Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`,
); );
return targetData as FacebookMarketplaceItem; return targetData as FacebookMarketplaceItem;
} }
@@ -671,18 +528,19 @@ export function extractFacebookItemData(
const findMarketplaceData = ( const findMarketplaceData = (
obj: unknown, obj: unknown,
depth = 0, depth = 0,
maxDepth = 10 maxDepth = 10,
): FacebookMarketplaceItem | null => { ): FacebookMarketplaceItem | null => {
if (depth > maxDepth) return null; // Prevent infinite recursion if (depth > maxDepth) return null; // Prevent infinite recursion
if (isRecord(obj)) { if (isRecord(obj)) {
// Check if this object matches the expected marketplace item structure // Check if this object matches the expected marketplace item structure
const candidate = obj as Record<string, unknown>;
if ( if (
(obj as any).marketplace_listing_title && candidate.marketplace_listing_title &&
(obj as any).id && candidate.id &&
(obj as any).__typename === "GroupCommerceProductItem" && candidate.__typename === "GroupCommerceProductItem" &&
(obj as any).redacted_description candidate.redacted_description
) { ) {
return obj as unknown as FacebookMarketplaceItem; return candidate as unknown as FacebookMarketplaceItem;
} }
// Recursively search nested objects and arrays // Recursively search nested objects and arrays
for (const key in obj) { for (const key in obj) {
@@ -706,7 +564,7 @@ export function extractFacebookItemData(
const recursiveResult = findMarketplaceData(parsed.require); const recursiveResult = findMarketplaceData(parsed.require);
if (recursiveResult) { if (recursiveResult) {
console.log( console.log(
"Successfully extracted Facebook item data using recursive search" "Successfully extracted Facebook item data using recursive search",
); );
return recursiveResult; return recursiveResult;
} }
@@ -727,14 +585,13 @@ export function extractFacebookItemData(
bboxData.__typename === "GroupCommerceProductItem" bboxData.__typename === "GroupCommerceProductItem"
) { ) {
console.log( console.log(
"Successfully extracted Facebook item data from __bbox structure" "Successfully extracted Facebook item data from __bbox structure",
); );
return bboxData as FacebookMarketplaceItem; return bboxData as FacebookMarketplaceItem;
} }
} }
} }
} catch { } catch {}
}
} }
return null; return null;
@@ -743,7 +600,9 @@ export function extractFacebookItemData(
/** /**
Parse Facebook marketplace search results into ListingDetails[] Parse Facebook marketplace search results into ListingDetails[]
*/ */
export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] { export function parseFacebookAds(
ads: FacebookAdNode[],
): FacebookListingDetails[] {
const results: FacebookListingDetails[] = []; const results: FacebookListingDetails[] = [];
for (const adJson of ads) { for (const adJson of ads) {
@@ -805,7 +664,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
const address = cityName || null; const address = cityName || null;
// Determine listing status from Facebook flags // Determine listing status from Facebook flags
let listingStatus: string | undefined ; let listingStatus: string | undefined;
if (listing.is_sold) { if (listing.is_sold) {
listingStatus = "SOLD"; listingStatus = "SOLD";
} else if (listing.is_pending) { } else if (listing.is_pending) {
@@ -840,7 +699,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
title, title,
listingPrice: { listingPrice: {
amountFormatted: amountFormatted:
priceObj.formatted_amount || formatCentsToCurrency(cents / 100, "en-CA"), priceObj.formatted_amount || formatCentsToCurrency(cents, "en-CA"),
cents, cents,
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
}, },
@@ -856,8 +715,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
}; };
results.push(listingDetails); results.push(listingDetails);
} catch { } catch {}
}
} }
return results; return results;
@@ -868,7 +726,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
Updated for 2026 GroupCommerceProductItem structure Updated for 2026 GroupCommerceProductItem structure
*/ */
export function parseFacebookItem( export function parseFacebookItem(
item: FacebookMarketplaceItem item: FacebookMarketplaceItem,
): FacebookListingDetails | null { ): FacebookListingDetails | null {
try { try {
const title = item.marketplace_listing_title || item.custom_title; const title = item.marketplace_listing_title || item.custom_title;
@@ -888,7 +746,7 @@ export function parseFacebookItem(
if (!Number.isNaN(amount)) { if (!Number.isNaN(amount)) {
cents = Math.round(amount * 100); cents = Math.round(amount * 100);
amountFormatted = amountFormatted =
item.formatted_price?.text || formatCentsToCurrency(cents / 100, "en-CA"); item.formatted_price?.text || formatCentsToCurrency(cents, "en-CA");
} }
} }
} }
@@ -963,31 +821,17 @@ export default async function fetchFacebookItems(
LOCATION = "toronto", LOCATION = "toronto",
MAX_ITEMS = 25, MAX_ITEMS = 25,
cookiesSource?: string, cookiesSource?: string,
cookiePath?: string cookiePath?: string,
) { ) {
// Load Facebook cookies - required for Facebook Marketplace access // Load Facebook cookies with priority: URL param > ENV var > file
let cookies: Cookie[]; const cookies = await ensureFacebookCookies(cookiesSource, cookiePath);
if (cookiesSource) {
// Use provided cookie source (backward compatibility)
cookies = await loadFacebookCookies(cookiesSource);
} else {
// Auto-load from file or parse from env var
cookies = await ensureFacebookCookies(cookiePath);
}
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
);
}
// Format cookies for HTTP header // Format cookies for HTTP header
const domain = "www.facebook.com"; const domain = "www.facebook.com";
const cookiesHeader = formatCookiesForHeader(cookies, domain); const cookiesHeader = formatCookiesForHeader(cookies, domain);
if (!cookiesHeader) { if (!cookiesHeader) {
throw new Error( throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain." "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
); );
} }
@@ -1009,7 +853,7 @@ export default async function fetchFacebookItems(
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( console.log(
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s` `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
}, },
@@ -1018,11 +862,11 @@ export default async function fetchFacebookItems(
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.warn( console.warn(
`\nFacebook marketplace access failed (${err.status}): ${err.message}` `\nFacebook marketplace access failed (${err.status}): ${err.message}`,
); );
if (err.status === 400 || err.status === 401 || err.status === 403) { if (err.status === 400 || err.status === 401 || err.status === 403) {
console.warn( console.warn(
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies." "This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
); );
} }
return []; return [];
@@ -1040,7 +884,7 @@ export default async function fetchFacebookItems(
const progressBar = new cliProgress.SingleBar( const progressBar = new cliProgress.SingleBar(
{}, {},
cliProgress.Presets.shades_classic cliProgress.Presets.shades_classic,
); );
const totalProgress = ads.length; const totalProgress = ads.length;
const currentProgress = 0; const currentProgress = 0;
@@ -1050,7 +894,7 @@ export default async function fetchFacebookItems(
// Filter to only priced items (already done in parseFacebookAds) // Filter to only priced items (already done in parseFacebookAds)
const pricedItems = items.filter( const pricedItems = items.filter(
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0 (item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
); );
progressBar.update(totalProgress); progressBar.update(totalProgress);
@@ -1066,31 +910,16 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem( export async function fetchFacebookItem(
itemId: string, itemId: string,
cookiesSource?: string, cookiesSource?: string,
cookiePath?: string _cookiePath?: string,
): Promise<FacebookListingDetails | null> { ): Promise<FacebookListingDetails | null> {
// Load Facebook cookies - required for Facebook Marketplace access // Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[]; const cookies = await ensureFacebookCookies(cookiesSource);
if (cookiesSource) {
// Use provided cookie source (backward compatibility)
cookies = await loadFacebookCookies(cookiesSource);
} else {
// Auto-load from file or parse from env var
cookies = await ensureFacebookCookies(cookiePath);
}
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
);
}
// Format cookies for HTTP header // Format cookies for HTTP header
const domain = "www.facebook.com"; const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
const cookiesHeader = formatCookiesForHeader(cookies, domain);
if (!cookiesHeader) { if (!cookiesHeader) {
throw new Error( throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain." "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
); );
} }
@@ -1104,7 +933,7 @@ export async function fetchFacebookItem(
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( console.log(
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s` `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
}, },
@@ -1113,7 +942,7 @@ export async function fetchFacebookItem(
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.warn( console.warn(
`\nFacebook marketplace item access failed (${err.status}): ${err.message}` `\nFacebook marketplace item access failed (${err.status}): ${err.message}`,
); );
// Enhanced error handling based on status codes // Enhanced error handling based on status codes
@@ -1122,27 +951,27 @@ export async function fetchFacebookItem(
case 401: case 401:
case 403: case 403:
console.warn( console.warn(
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies." "Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
); );
console.warn( console.warn(
"Try logging out and back into Facebook, then export fresh cookies." "Try logging out and back into Facebook, then export fresh cookies.",
); );
break; break;
case 404: case 404:
console.warn( console.warn(
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid." "Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.",
); );
break; break;
case 429: case 429:
console.warn( console.warn(
"Rate limited: Too many requests. Facebook is blocking access temporarily." "Rate limited: Too many requests. Facebook is blocking access temporarily.",
); );
break; break;
case 500: case 500:
case 502: case 502:
case 503: case 503:
console.warn( console.warn(
"Facebook server error: Marketplace may be temporarily unavailable." "Facebook server error: Marketplace may be temporarily unavailable.",
); );
break; break;
default: default:
@@ -1163,7 +992,7 @@ export async function fetchFacebookItem(
itemHtml.includes("This item has been sold") itemHtml.includes("This item has been sold")
) { ) {
console.warn( console.warn(
`Item ${itemId} appears to be sold or removed from marketplace.` `Item ${itemId} appears to be sold or removed from marketplace.`,
); );
return null; return null;
} }
@@ -1174,13 +1003,13 @@ export async function fetchFacebookItem(
itemHtml.includes("authentication required") itemHtml.includes("authentication required")
) { ) {
console.warn( console.warn(
`Authentication failed for item ${itemId}. Cookies may be expired.` `Authentication failed for item ${itemId}. Cookies may be expired.`,
); );
return null; return null;
} }
console.warn( console.warn(
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:` `No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
); );
console.warn(" - The listing was removed or sold"); console.warn(" - The listing was removed or sold");
console.warn(" - Authentication issues"); console.warn(" - Authentication issues");

View File

@@ -1,19 +1,30 @@
/* eslint-disable @typescript-eslint/no-explicit-any */ import cliProgress from "cli-progress";
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import unidecode from "unidecode"; import unidecode from "unidecode";
import cliProgress from "cli-progress"; import type { HTMLString } from "../types/common";
import {
type CookieConfig,
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { formatCentsToCurrency } from "../utils/format";
import { import {
fetchHtml, fetchHtml,
isRecord,
HttpError, HttpError,
isRecord,
NetworkError, NetworkError,
ParseError, ParseError,
RateLimitError, RateLimitError,
ValidationError, ValidationError,
} from "../utils/http"; } from "../utils/http";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format"; // Kijiji cookie configuration
import type { HTMLString } from "../types/common"; const KIJIJI_COOKIE_CONFIG: CookieConfig = {
name: "Kijiji",
domain: ".kijiji.ca",
envVar: "KIJIJI_COOKIE",
filePath: "./cookies/kijiji.json",
};
// ----------------------------- Types ----------------------------- // ----------------------------- Types -----------------------------
@@ -112,6 +123,7 @@ export interface SearchOptions {
maxPages?: number; // Default: 5 maxPages?: number; // Default: 5
priceMin?: number; priceMin?: number;
priceMax?: number; priceMax?: number;
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
} }
export interface ListingFetchOptions { export interface ListingFetchOptions {
@@ -219,7 +231,7 @@ export function resolveCategoryId(category?: number | string): number {
export function buildSearchUrl( export function buildSearchUrl(
keywords: string, keywords: string,
options: SearchOptions & { page?: number }, options: SearchOptions & { page?: number },
BASE_URL = "https://www.kijiji.ca" BASE_URL = "https://www.kijiji.ca",
): string { ): string {
const locationId = resolveLocationId(options.location); const locationId = resolveLocationId(options.location);
const categoryId = resolveCategoryId(options.category); const categoryId = resolveCategoryId(options.category);
@@ -319,7 +331,7 @@ const GRAPHQL_QUERIES = {
async function fetchGraphQLData( async function fetchGraphQLData(
query: string, query: string,
variables: Record<string, unknown>, variables: Record<string, unknown>,
BASE_URL = "https://www.kijiji.ca" BASE_URL = "https://www.kijiji.ca",
): Promise<unknown> { ): Promise<unknown> {
const endpoint = `${BASE_URL}/anvil/api`; const endpoint = `${BASE_URL}/anvil/api`;
@@ -340,7 +352,7 @@ async function fetchGraphQLData(
throw new HttpError( throw new HttpError(
`GraphQL request failed with status ${response.status}`, `GraphQL request failed with status ${response.status}`,
response.status, response.status,
endpoint endpoint,
); );
} }
@@ -349,7 +361,7 @@ async function fetchGraphQLData(
if (result.errors) { if (result.errors) {
throw new ParseError( throw new ParseError(
`GraphQL errors: ${JSON.stringify(result.errors)}`, `GraphQL errors: ${JSON.stringify(result.errors)}`,
result.errors result.errors,
); );
} }
@@ -361,7 +373,7 @@ async function fetchGraphQLData(
throw new NetworkError( throw new NetworkError(
`Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`, `Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`,
endpoint, endpoint,
err instanceof Error ? err : undefined err instanceof Error ? err : undefined,
); );
} }
} }
@@ -371,7 +383,7 @@ async function fetchGraphQLData(
*/ */
async function fetchSellerDetails( async function fetchSellerDetails(
posterId: string, posterId: string,
BASE_URL = "https://www.kijiji.ca" BASE_URL = "https://www.kijiji.ca",
): Promise<{ ): Promise<{
reviewCount?: number; reviewCount?: number;
reviewScore?: number; reviewScore?: number;
@@ -383,12 +395,12 @@ async function fetchSellerDetails(
fetchGraphQLData( fetchGraphQLData(
GRAPHQL_QUERIES.getReviewSummary, GRAPHQL_QUERIES.getReviewSummary,
{ userId: posterId }, { userId: posterId },
BASE_URL BASE_URL,
), ),
fetchGraphQLData( fetchGraphQLData(
GRAPHQL_QUERIES.getProfileMetrics, GRAPHQL_QUERIES.getProfileMetrics,
{ profileId: posterId }, { profileId: posterId },
BASE_URL BASE_URL,
), ),
]); ]);
@@ -405,7 +417,7 @@ async function fetchSellerDetails(
// Silently fail for GraphQL errors - not critical for basic functionality // Silently fail for GraphQL errors - not critical for basic functionality
console.warn( console.warn(
`Failed to fetch seller details for ${posterId}:`, `Failed to fetch seller details for ${posterId}:`,
err instanceof Error ? err.message : String(err) err instanceof Error ? err.message : String(err),
); );
return {}; return {};
} }
@@ -416,7 +428,9 @@ async function fetchSellerDetails(
/** /**
Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML. Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML.
*/ */
export function extractApolloState(htmlString: HTMLString): ApolloRecord | null { export function extractApolloState(
htmlString: HTMLString,
): ApolloRecord | null {
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const nextData = document.getElementById("__NEXT_DATA__"); const nextData = document.getElementById("__NEXT_DATA__");
if (!nextData || !nextData.textContent) return null; if (!nextData || !nextData.textContent) return null;
@@ -436,7 +450,7 @@ export function extractApolloState(htmlString: HTMLString): ApolloRecord | null
*/ */
export function parseSearch( export function parseSearch(
htmlString: HTMLString, htmlString: HTMLString,
BASE_URL: string BASE_URL: string,
): SearchListing[] { ): SearchListing[] {
const apolloState = extractApolloState(htmlString); const apolloState = extractApolloState(htmlString);
if (!apolloState) return []; if (!apolloState) return [];
@@ -463,16 +477,16 @@ export function parseSearch(
/** /**
Parse a listing page into a typed object (backward compatible). Parse a listing page into a typed object (backward compatible).
*/ */
function parseListing( function _parseListing(
htmlString: HTMLString, htmlString: HTMLString,
BASE_URL: string BASE_URL: string,
): KijijiListingDetails | null { ): KijijiListingDetails | null {
const apolloState = extractApolloState(htmlString); const apolloState = extractApolloState(htmlString);
if (!apolloState) return null; if (!apolloState) return null;
// Find the listing root key // Find the listing root key
const listingKey = Object.keys(apolloState).find((k) => const listingKey = Object.keys(apolloState).find((k) =>
k.includes("Listing") k.includes("Listing"),
); );
if (!listingKey) return null; if (!listingKey) return null;
@@ -494,7 +508,7 @@ function parseListing(
const cents = price?.amount != null ? Number(price.amount) : undefined; const cents = price?.amount != null ? Number(price.amount) : undefined;
const amountFormatted = const amountFormatted =
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined; cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
const numberOfViews = const numberOfViews =
metrics?.views != null ? Number(metrics.views) : undefined; metrics?.views != null ? Number(metrics.views) : undefined;
@@ -515,7 +529,8 @@ function parseListing(
listingPrice: amountFormatted listingPrice: amountFormatted
? { ? {
amountFormatted, amountFormatted,
cents: Number.isFinite(cents!) ? cents : undefined, cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency, currency: price?.currency,
} }
: undefined, : undefined,
@@ -523,7 +538,10 @@ function parseListing(
listingStatus: status, listingStatus: status,
creationDate: activationDate, creationDate: activationDate,
endDate, endDate,
numberOfViews: Number.isFinite(numberOfViews!) ? numberOfViews : undefined, numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null, address: location?.address ?? null,
}; };
} }
@@ -534,14 +552,14 @@ function parseListing(
export async function parseDetailedListing( export async function parseDetailedListing(
htmlString: HTMLString, htmlString: HTMLString,
BASE_URL: string, BASE_URL: string,
options: ListingFetchOptions = {} options: ListingFetchOptions = {},
): Promise<DetailedListing | null> { ): Promise<DetailedListing | null> {
const apolloState = extractApolloState(htmlString); const apolloState = extractApolloState(htmlString);
if (!apolloState) return null; if (!apolloState) return null;
// Find the listing root key // Find the listing root key
const listingKey = Object.keys(apolloState).find((k) => const listingKey = Object.keys(apolloState).find((k) =>
k.includes("Listing") k.includes("Listing"),
); );
if (!listingKey) return null; if (!listingKey) return null;
@@ -569,7 +587,7 @@ export async function parseDetailedListing(
const cents = price?.amount != null ? Number(price.amount) : undefined; const cents = price?.amount != null ? Number(price.amount) : undefined;
const amountFormatted = const amountFormatted =
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined; cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
const numberOfViews = const numberOfViews =
metrics?.views != null ? Number(metrics.views) : undefined; metrics?.views != null ? Number(metrics.views) : undefined;
@@ -621,7 +639,7 @@ export async function parseDetailedListing(
try { try {
const additionalData = await fetchSellerDetails( const additionalData = await fetchSellerDetails(
posterInfo.posterId, posterInfo.posterId,
BASE_URL BASE_URL,
); );
sellerInfo = { sellerInfo = {
...sellerInfo, ...sellerInfo,
@@ -630,7 +648,7 @@ export async function parseDetailedListing(
} catch { } catch {
// Silently fail - GraphQL data is optional // Silently fail - GraphQL data is optional
console.warn( console.warn(
`Failed to fetch additional seller data for ${posterInfo.posterId}` `Failed to fetch additional seller data for ${posterInfo.posterId}`,
); );
} }
} }
@@ -683,10 +701,20 @@ export default async function fetchKijijiItems(
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
BASE_URL = "https://www.kijiji.ca", BASE_URL = "https://www.kijiji.ca",
searchOptions: SearchOptions = {}, searchOptions: SearchOptions = {},
listingOptions: ListingFetchOptions = {} listingOptions: ListingFetchOptions = {},
) { ) {
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
// Load Kijiji cookies (optional - helps bypass bot detection)
const cookies = await loadCookiesOptional(
KIJIJI_COOKIE_CONFIG,
searchOptions.cookies,
);
const cookieHeader =
cookies.length > 0
? formatCookiesForHeader(cookies, "www.kijiji.ca")
: undefined;
// Set defaults for configuration // Set defaults for configuration
const finalSearchOptions: Required<SearchOptions> = { const finalSearchOptions: Required<SearchOptions> = {
location: searchOptions.location ?? 1700272, // Default to GTA location: searchOptions.location ?? 1700272, // Default to GTA
@@ -697,6 +725,7 @@ export default async function fetchKijijiItems(
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
priceMin: searchOptions.priceMin as number, priceMin: searchOptions.priceMin as number,
priceMax: searchOptions.priceMax as number, priceMax: searchOptions.priceMax as number,
cookies: searchOptions.cookies ?? "",
}; };
const finalListingOptions: Required<ListingFetchOptions> = { const finalListingOptions: Required<ListingFetchOptions> = {
@@ -717,7 +746,7 @@ export default async function fetchKijijiItems(
// Add page parameter for pagination // Add page parameter for pagination
...(page > 1 && { page }), ...(page > 1 && { page }),
}, },
BASE_URL BASE_URL,
); );
console.log(`Fetching search page ${page}: ${searchUrl}`); console.log(`Fetching search page ${page}: ${searchUrl}`);
@@ -725,16 +754,17 @@ export default async function fetchKijijiItems(
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( console.log(
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s` `\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
}, },
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
}); });
const searchResults = parseSearch(searchHtml, BASE_URL); const searchResults = parseSearch(searchHtml, BASE_URL);
if (searchResults.length === 0) { if (searchResults.length === 0) {
console.log( console.log(
`No more results found on page ${page}. Stopping pagination.` `No more results found on page ${page}. Stopping pagination.`,
); );
break; break;
} }
@@ -749,54 +779,79 @@ export default async function fetchKijijiItems(
} }
console.log( console.log(
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}` `\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
); );
// Fetch details for this page's listings // Fetch details for this page's listings with controlled concurrency
const progressBar = new cliProgress.SingleBar( const isTTY = process.stdout?.isTTY ?? false;
{}, const progressBar = isTTY
cliProgress.Presets.shades_classic ? new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
); : null;
const totalProgress = newListingLinks.length; const totalProgress = newListingLinks.length;
let currentProgress = 0; let currentProgress = 0;
progressBar.start(totalProgress, currentProgress); progressBar?.start(totalProgress, currentProgress);
for (const link of newListingLinks) { // Process in batches for controlled concurrency
const CONCURRENT_REQUESTS = REQUESTS_PER_SECOND * 2; // 2x rate for faster processing
const results: (DetailedListing | null)[] = [];
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
const batchPromises = batch.map(async (link) => {
try { try {
const html = await fetchHtml(link, DELAY_MS, { const html = await fetchHtml(link, 0, {
// No per-request delay, batch handles rate limit
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( console.log(
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s` `\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
}, },
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
}); });
const parsed = await parseDetailedListing( const parsed = await parseDetailedListing(
html, html,
BASE_URL, BASE_URL,
finalListingOptions finalListingOptions,
); );
if (parsed) { return parsed;
allListings.push(parsed);
}
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.error( console.error(
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}` `\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
); );
} else { } else {
console.error( console.error(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}` `\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
); );
} }
return null;
} finally { } finally {
currentProgress++; currentProgress++;
progressBar.update(currentProgress); progressBar?.update(currentProgress);
if (!progressBar) {
console.log(`Progress: ${currentProgress}/${totalProgress}`);
}
}
});
const batchResults = await Promise.all(batchPromises);
results.push(...batchResults);
// Wait between batches to respect rate limit
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
await new Promise((resolve) =>
setTimeout(resolve, DELAY_MS * batch.length),
);
} }
} }
progressBar.stop(); allListings.push(
...results.filter((r): r is DetailedListing => r !== null),
);
progressBar?.stop();
// If we got fewer results than expected (40 per page), we've reached the end // If we got fewer results than expected (40 per page), we've reached the end
if (searchResults.length < 40) { if (searchResults.length < 40) {
@@ -809,10 +864,4 @@ export default async function fetchKijijiItems(
} }
// Re-export error classes for convenience // Re-export error classes for convenience
export { export { HttpError, NetworkError, ParseError, RateLimitError, ValidationError };
HttpError,
NetworkError,
ParseError,
RateLimitError,
ValidationError,
};

View File

@@ -0,0 +1,227 @@
/**
* Shared cookie handling utilities for marketplace scrapers
*/
export interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
export interface CookieConfig {
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
name: string;
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
domain: string;
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
envVar: string;
/** Path to cookie file (e.g., "./cookies/facebook.json") */
filePath: string;
}
/**
* Parse cookie string format into Cookie array
* Supports format: "name1=value1; name2=value2"
*/
export function parseCookieString(
cookieString: string,
domain: string,
): Cookie[] {
if (!cookieString?.trim()) {
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, ...valueParts] = pair.split("=");
const trimmedName = name.trim();
const trimmedValue = valueParts.join("=").trim();
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain,
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined,
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
}
/**
* Parse JSON array format into Cookie array
* Supports format: [{"name": "foo", "value": "bar", ...}]
*/
export function parseJsonCookies(jsonString: string): Cookie[] {
const parsed = JSON.parse(jsonString);
if (!Array.isArray(parsed)) {
return [];
}
return parsed.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
/**
* Try to parse cookies from a string (tries JSON first, then cookie string format)
*/
export function parseCookiesAuto(
input: string,
defaultDomain: string,
): Cookie[] {
// Try JSON array format first
try {
const cookies = parseJsonCookies(input);
if (cookies.length > 0) {
return cookies;
}
} catch {
// JSON parse failed, try cookie string format
}
// Try cookie string format
return parseCookieString(input, defaultDomain);
}
/**
* Load cookies from file (supports both JSON array and cookie string formats)
*/
export async function loadCookiesFromFile(
filePath: string,
defaultDomain: string,
): Promise<Cookie[]> {
const file = Bun.file(filePath);
if (!(await file.exists())) {
return [];
}
const content = await file.text();
return parseCookiesAuto(content.trim(), defaultDomain);
}
/**
* Format cookies array into Cookie header string for HTTP requests
*/
export function formatCookiesForHeader(
cookies: Cookie[],
targetDomain: string,
): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
targetDomain.endsWith(cookie.domain.slice(1)) ||
targetDomain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === targetDomain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false;
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
}
/**
* Load cookies with priority: URL param > ENV var > file
* Supports both JSON array and cookie string formats for all sources
*/
export async function ensureCookies(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
// Priority 1: URL/API parameter (if provided)
if (cookiesSource) {
const cookies = parseCookiesAuto(cookiesSource, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
);
return cookies;
}
console.warn(
`${config.name} cookies parameter provided but no valid cookies extracted`,
);
}
// Priority 2: Environment variable
const envValue = process.env[config.envVar];
if (envValue?.trim()) {
const cookies = parseCookiesAuto(envValue, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
);
return cookies;
}
console.warn(`${config.envVar} env var contains no valid cookies`);
}
// Priority 3: Cookie file (fallback)
try {
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
);
return cookies;
}
} catch (e) {
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
}
// No cookies found from any source
throw new Error(
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
` 1. 'cookies' parameter (highest priority), or\n` +
` 2. ${config.envVar} environment variable, or\n` +
` 3. ${config.filePath} file (lowest priority)\n` +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
* Try to load cookies, return empty array if none found (non-throwing version)
*/
export async function loadCookiesOptional(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
try {
return await ensureCookies(config, cookiesSource);
} catch {
return [];
}
}

View File

@@ -4,7 +4,10 @@
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US') * @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
* @returns Formatted currency string * @returns Formatted currency string
*/ */
export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string { export function formatCentsToCurrency(
cents: number,
locale: string = "en-CA",
): string {
try { try {
const formatter = new Intl.NumberFormat(locale, { const formatter = new Intl.NumberFormat(locale, {
style: "currency", style: "currency",
@@ -13,7 +16,7 @@ export function formatCentsToCurrency(cents: number, locale: string = "en-CA"):
maximumFractionDigits: 2, maximumFractionDigits: 2,
}); });
return formatter.format(cents / 100); return formatter.format(cents / 100);
} catch (error) { } catch {
// Fallback if locale is not supported // Fallback if locale is not supported
const dollars = (cents / 100).toFixed(2); const dollars = (cents / 100).toFixed(2);
return `$${dollars}`; return `$${dollars}`;

View File

@@ -3,7 +3,7 @@ export class HttpError extends Error {
constructor( constructor(
message: string, message: string,
public readonly statusCode: number, public readonly statusCode: number,
public readonly url?: string public readonly url?: string,
) { ) {
super(message); super(message);
this.name = "HttpError"; this.name = "HttpError";
@@ -15,7 +15,7 @@ export class NetworkError extends Error {
constructor( constructor(
message: string, message: string,
public readonly url: string, public readonly url: string,
public readonly cause?: Error public readonly cause?: Error,
) { ) {
super(message); super(message);
this.name = "NetworkError"; this.name = "NetworkError";
@@ -26,7 +26,7 @@ export class NetworkError extends Error {
export class ParseError extends Error { export class ParseError extends Error {
constructor( constructor(
message: string, message: string,
public readonly data?: unknown public readonly data?: unknown,
) { ) {
super(message); super(message);
this.name = "ParseError"; this.name = "ParseError";
@@ -38,7 +38,7 @@ export class RateLimitError extends Error {
constructor( constructor(
message: string, message: string,
public readonly url: string, public readonly url: string,
public readonly resetTime?: number public readonly resetTime?: number,
) { ) {
super(message); super(message);
this.name = "RateLimitError"; this.name = "RateLimitError";
@@ -87,7 +87,7 @@ export interface FetchHtmlOptions {
export async function fetchHtml( export async function fetchHtml(
url: string, url: string,
delayMs: number, delayMs: number,
opts?: FetchHtmlOptions opts?: FetchHtmlOptions,
): Promise<string> { ): Promise<string> {
const maxRetries = opts?.maxRetries ?? 3; const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 1000; const retryBaseMs = opts?.retryBaseMs ?? 1000;
@@ -137,14 +137,14 @@ export async function fetchHtml(
throw new RateLimitError( throw new RateLimitError(
`Rate limit exceeded for ${url}`, `Rate limit exceeded for ${url}`,
url, url,
resetSeconds resetSeconds,
); );
} }
// Retry on server errors // Retry on server errors
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await new Promise((resolve) => await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
); );
continue; continue;
} }
@@ -152,7 +152,7 @@ export async function fetchHtml(
throw new HttpError( throw new HttpError(
`Request failed with status ${res.status}`, `Request failed with status ${res.status}`,
res.status, res.status,
url url,
); );
} }
@@ -174,7 +174,7 @@ export async function fetchHtml(
if (err instanceof Error && err.name === "AbortError") { if (err instanceof Error && err.name === "AbortError") {
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
); );
continue; continue;
} }
@@ -184,14 +184,14 @@ export async function fetchHtml(
// Network or other errors // Network or other errors
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
); );
continue; continue;
} }
throw new NetworkError( throw new NetworkError(
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`, `Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
url, url,
err instanceof Error ? err : undefined err instanceof Error ? err : undefined,
); );
} }
} }

View File

@@ -5,7 +5,6 @@ import {
fetchFacebookItem, fetchFacebookItem,
formatCentsToCurrency, formatCentsToCurrency,
formatCookiesForHeader, formatCookiesForHeader,
loadFacebookCookies,
parseFacebookAds, parseFacebookAds,
parseFacebookCookieString, parseFacebookCookieString,
parseFacebookItem, parseFacebookItem,
@@ -183,7 +182,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}); });
}); });
const result = await fetchFacebookItem("123", mockCookies); const _result = await fetchFacebookItem("123", mockCookies);
expect(attempts).toBe(2); expect(attempts).toBe(2);
// Should eventually succeed after retry // Should eventually succeed after retry
}); });

View File

@@ -1,5 +1,5 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook"; import { fetchFacebookItems } from "../src/scrapers/facebook";
// Mock fetch globally // Mock fetch globally
const originalFetch = global.fetch; const originalFetch = global.fetch;

View File

@@ -1,13 +1,12 @@
import { describe, expect, test } from "bun:test"; import { describe, expect, test } from "bun:test";
import { import {
HttpError, buildSearchUrl,
NetworkError, NetworkError,
ParseError, ParseError,
RateLimitError, RateLimitError,
ValidationError,
buildSearchUrl,
resolveCategoryId, resolveCategoryId,
resolveLocationId, resolveLocationId,
ValidationError,
} from "../src/scrapers/kijiji"; } from "../src/scrapers/kijiji";
describe("Location and Category Resolution", () => { describe("Location and Category Resolution", () => {
@@ -121,20 +120,12 @@ describe("URL Construction", () => {
}); });
describe("Error Classes", () => { describe("Error Classes", () => {
test("HttpError should store status and URL", () => {
const error = new HttpError("Not found", 404, "https://example.com");
expect(error.message).toBe("Not found");
expect(error.statusCode).toBe(404);
expect(error.url).toBe("https://example.com");
expect(error.name).toBe("HttpError");
});
test("NetworkError should store URL and cause", () => { test("NetworkError should store URL and cause", () => {
const cause = new Error("Connection failed"); const cause = new Error("Connection failed");
const error = new NetworkError( const error = new NetworkError(
"Network error", "Network error",
"https://example.com", "https://example.com",
cause cause,
); );
expect(error.message).toBe("Network error"); expect(error.message).toBe("Network error");
expect(error.url).toBe("https://example.com"); expect(error.url).toBe("https://example.com");

View File

@@ -1,4 +1,4 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test"; import { describe, expect, test } from "bun:test";
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji"; import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
describe("Utility Functions", () => { describe("Utility Functions", () => {

View File

@@ -5,12 +5,15 @@ const PORT = process.env.MCP_PORT || 4006;
const server = Bun.serve({ const server = Bun.serve({
port: PORT as number | string, port: PORT as number | string,
idleTimeout: 0, idleTimeout: 255, // 255 seconds (max allowed)
routes: { routes: {
// MCP metadata discovery endpoint // MCP metadata discovery endpoint
"/.well-known/mcp/server-card.json": new Response(JSON.stringify(serverCard), { "/.well-known/mcp/server-card.json": new Response(
JSON.stringify(serverCard),
{
headers: { "Content-Type": "application/json" }, headers: { "Content-Type": "application/json" },
}), },
),
// MCP JSON-RPC 2.0 protocol endpoint // MCP JSON-RPC 2.0 protocol endpoint
"/mcp": async (req: Request) => { "/mcp": async (req: Request) => {
@@ -19,13 +22,13 @@ const server = Bun.serve({
} }
return Response.json( return Response.json(
{ message: "MCP endpoint requires POST request" }, { message: "MCP endpoint requires POST request" },
{ status: 405 } { status: 405 },
); );
}, },
}, },
// Fallback for all other routes // Fallback for all other routes
fetch(req: Request) { fetch(_req: Request) {
return new Response("Not Found", { status: 404 }); return new Response("Not Found", { status: 404 });
}, },
}); });

View File

@@ -1,6 +1,8 @@
import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core";
import { tools } from "./tools"; import { tools } from "./tools";
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
/** /**
* Handle MCP JSON-RPC 2.0 protocol requests * Handle MCP JSON-RPC 2.0 protocol requests
*/ */
@@ -16,7 +18,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
error: { code: -32600, message: "Invalid Request" }, error: { code: -32600, message: "Invalid Request" },
id: body.id, id: body.id,
}, },
{ status: 400 } { status: 400 },
); );
} }
@@ -38,7 +40,8 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
name: "marketplace-scrapers", name: "marketplace-scrapers",
version: "1.0.0", version: "1.0.0",
}, },
instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces", instructions:
"Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
}, },
}); });
} }
@@ -78,15 +81,18 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
{ {
jsonrpc: "2.0", jsonrpc: "2.0",
id, id,
error: { code: -32602, message: "Invalid params: name and arguments required" }, error: {
code: -32602,
message: "Invalid params: name and arguments required",
}, },
{ status: 400 } },
{ status: 400 },
); );
} }
// Route tool calls to appropriate handlers // Route tool calls to appropriate handlers
try { try {
let result; let result: unknown;
if (name === "search_kijiji") { if (name === "search_kijiji") {
const query = args.query; const query = args.query;
@@ -97,24 +103,45 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
error: { code: -32602, message: "query parameter is required" }, error: { code: -32602, message: "query parameter is required" },
}); });
} }
const searchOptions = { const params = new URLSearchParams({ q: query });
location: args.location, if (args.location) params.append("location", args.location);
category: args.category, if (args.category) params.append("category", args.category);
keywords: args.keywords, if (args.keywords) params.append("keywords", args.keywords);
sortBy: args.sortBy, if (args.sortBy) params.append("sortBy", args.sortBy);
sortOrder: args.sortOrder, if (args.sortOrder) params.append("sortOrder", args.sortOrder);
maxPages: args.maxPages || 5, if (args.maxPages)
priceMin: args.priceMin, params.append("maxPages", args.maxPages.toString());
priceMax: args.priceMax, if (args.priceMin)
}; params.append("priceMin", args.priceMin.toString());
const items = await fetchKijijiItems( if (args.priceMax)
query, params.append("priceMax", args.priceMax.toString());
1, if (args.cookies) params.append("cookies", args.cookies);
"https://www.kijiji.ca",
searchOptions, console.log(
{} `[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/kijiji?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] Kijiji API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
); );
result = items || [];
} else if (name === "search_facebook") { } else if (name === "search_facebook") {
const query = args.query; const query = args.query;
if (!query) { if (!query) {
@@ -124,15 +151,37 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
error: { code: -32602, message: "query parameter is required" }, error: { code: -32602, message: "query parameter is required" },
}); });
} }
const items = await fetchFacebookItems( const params = new URLSearchParams({ q: query });
query, if (args.location) params.append("location", args.location);
1, if (args.maxItems)
args.location || "toronto", params.append("maxItems", args.maxItems.toString());
args.maxItems || 25, if (args.cookiesSource) params.append("cookies", args.cookiesSource);
args.cookiesSource,
undefined console.log(
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] Facebook API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
); );
result = items || [];
} else if (name === "search_ebay") { } else if (name === "search_ebay") {
const query = args.query; const query = args.query;
if (!query) { if (!query) {
@@ -142,18 +191,50 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
error: { code: -32602, message: "query parameter is required" }, error: { code: -32602, message: "query parameter is required" },
}); });
} }
const items = await fetchEbayItems(query, 1, { const params = new URLSearchParams({ q: query });
minPrice: args.minPrice, if (args.minPrice)
maxPrice: args.maxPrice, params.append("minPrice", args.minPrice.toString());
strictMode: args.strictMode || false, if (args.maxPrice)
exclusions: args.exclusions || [], params.append("maxPrice", args.maxPrice.toString());
keywords: args.keywords || [query], if (args.strictMode !== undefined)
buyItNowOnly: args.buyItNowOnly !== false, params.append("strictMode", args.strictMode.toString());
canadaOnly: args.canadaOnly !== false, if (args.exclusions?.length)
}); params.append("exclusions", args.exclusions.join(","));
if (args.keywords?.length)
params.append("keywords", args.keywords.join(","));
if (args.buyItNowOnly !== undefined)
params.append("buyItNowOnly", args.buyItNowOnly.toString());
if (args.canadaOnly !== undefined)
params.append("canadaOnly", args.canadaOnly.toString());
if (args.maxItems)
params.append("maxItems", args.maxItems.toString());
if (args.cookies) params.append("cookies", args.cookies);
const results = args.maxItems ? items.slice(0, args.maxItems) : items; console.log(
result = results || []; `[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] eBay API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
);
} else { } else {
return Response.json({ return Response.json({
jsonrpc: "2.0", jsonrpc: "2.0",
@@ -175,11 +256,15 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
}, },
}); });
} catch (error) { } catch (error) {
const errorMessage = error instanceof Error ? error.message : "Unknown error"; const errorMessage =
error instanceof Error ? error.message : "Unknown error";
return Response.json({ return Response.json({
jsonrpc: "2.0", jsonrpc: "2.0",
id, id,
error: { code: -32603, message: `Tool execution failed: ${errorMessage}` }, error: {
code: -32603,
message: `Tool execution failed: ${errorMessage}`,
},
}); });
} }
} }
@@ -191,16 +276,17 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
id, id,
error: { code: -32601, message: `Method not found: ${method}` }, error: { code: -32601, message: `Method not found: ${method}` },
}, },
{ status: 404 } { status: 404 },
); );
} catch (error) { } catch (error) {
const errorMessage = error instanceof Error ? error.message : "Unknown error"; const errorMessage =
error instanceof Error ? error.message : "Unknown error";
return Response.json( return Response.json(
{ {
jsonrpc: "2.0", jsonrpc: "2.0",
error: { code: -32700, message: `Parse error: ${errorMessage}` }, error: { code: -32700, message: `Parse error: ${errorMessage}` },
}, },
{ status: 400 } { status: 400 },
); );
} }
} }

View File

@@ -3,7 +3,8 @@
*/ */
export const serverCard = { export const serverCard = {
$schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json", $schema:
"https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
version: "1.0", version: "1.0",
protocolVersion: "2025-06-18", protocolVersion: "2025-06-18",
serverInfo: { serverInfo: {
@@ -20,6 +21,7 @@ export const serverCard = {
listChanged: true, listChanged: true,
}, },
}, },
description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay", description:
"Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
tools: "dynamic", tools: "dynamic",
}; };

View File

@@ -15,11 +15,13 @@ export const tools = [
}, },
location: { location: {
type: "string", type: "string",
description: "Location name or ID (e.g., 'toronto', 'gta', 'ontario')", description:
"Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
}, },
category: { category: {
type: "string", type: "string",
description: "Category name or ID (e.g., 'computers', 'furniture', 'bikes')", description:
"Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
}, },
keywords: { keywords: {
type: "string", type: "string",
@@ -50,6 +52,11 @@ export const tools = [
type: "number", type: "number",
description: "Maximum price in cents", description: "Maximum price in cents",
}, },
cookies: {
type: "string",
description:
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
},
}, },
required: ["query"], required: ["query"],
}, },
@@ -84,7 +91,8 @@ export const tools = [
}, },
{ {
name: "search_ebay", name: "search_ebay",
description: "Search eBay for listings matching a query (default: Buy It Now only, Canada only)", description:
"Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
inputSchema: { inputSchema: {
type: "object", type: "object",
properties: { properties: {
@@ -130,6 +138,11 @@ export const tools = [
description: "Maximum number of items to return", description: "Maximum number of items to return",
default: 5, default: 5,
}, },
cookies: {
type: "string",
description:
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
},
}, },
required: ["query"], required: ["query"],
}, },

26
scripts/biome-symlink.sh Executable file
View File

@@ -0,0 +1,26 @@
#!/usr/bin/env bash
# Get the path to the system biome executable
BIOME_PATH=$(which biome)
if [ -z "$BIOME_PATH" ]; then
echo "Error: biome executable not found in PATH"
exit 1
fi
# Find all biome executables in node_modules
files=$(fd biome node_modules --type executable --no-ignore --follow)
if [ -z "$files" ]; then
echo "No biome executables found in node_modules"
exit 0
fi
# Replace each with a symlink to the system biome
for file in $files; do
echo "Replacing $file with symlink to $BIOME_PATH"
rm "$file"
ln -s "$BIOME_PATH" "$file"
done
echo "Done."

30
scripts/remove-eslint.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
PATTERN="eslint"
FILES="$(fd .)" # Or use 'find .' to search recursively
for file in $FILES; do
if [[ -f "$file" ]]; then
# 1. Use rg with line numbers (-n) and only the matched line (-o)
# 2. Use awk to print ONLY the line number (field 1)
# 3. Use xargs to pass multiple line numbers to a single sed command
LINE_NUMBERS=$(rg --line-number --no-filename "$PATTERN" "$file" | awk -F':' '{print $1}' | tr '\n' ',')
# Remove trailing comma if any
LINE_NUMBERS=${LINE_NUMBERS%,}
if [[ -n "$LINE_NUMBERS" ]]; then
echo "Deleting lines $LINE_NUMBERS from $file..."
# Use sed to delete the specified comma-separated line numbers in-place (-i)
# NOTE: The syntax for -i might vary slightly between GNU sed (Linux) and BSD sed (macOS).
sed -i.bak "${LINE_NUMBERS}d" "$file"
# Optional: Remove the backup file created by sed -i.bak
# rm "${file}.bak"
else
echo "$file: No lines matching pattern found."
fi
fi
done

25
scripts/start.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
set -e
# Trap SIGTERM and SIGINT for graceful shutdown
trap 'echo "Received shutdown signal, stopping services..."; kill -TERM $API_PID $MCP_PID 2>/dev/null; wait' TERM INT
# Start API Server in background
echo "Starting API Server on port ${API_PORT:-4005}..."
bun dist/api/index.js &
API_PID=$!
# Give API server a moment to initialize
sleep 1
# Start MCP Server in background
echo "Starting MCP Server on port ${API_PORT:-4006}..."
bun dist/mcp/index.js &
MCP_PID=$!
echo "Both services started successfully"
echo "API Server PID: $API_PID"
echo "MCP Server PID: $MCP_PID"
# Wait for both processes
wait $API_PID $MCP_PID