Compare commits
31 Commits
da23ca1c3f
...
update
| Author | SHA1 | Date | |
|---|---|---|---|
| e4ab145d70 | |||
| 1dce0392e3 | |||
| 251fcbb7d9 | |||
| 9bc57d6b54 | |||
| 4a467c9f02 | |||
| f944d319c2 | |||
| cf9784a565 | |||
| df0c528535 | |||
| 2f97d3eafd | |||
| 65eb8d1724 | |||
| f3839aba54 | |||
| 90b98bfb09 | |||
| eb6705df0f | |||
| 72525609ed | |||
| 8b0a65860c | |||
| f9b1c7e096 | |||
| 9edc74cbeb | |||
| ee0fca826d | |||
| f7372612fb | |||
| bce126664e | |||
| 8cbf11538e | |||
| 79f47fdaef | |||
| de5069bf2b | |||
| 637f1a4e75 | |||
| 441ff436c4 | |||
| 1f53ec912a | |||
| 053efd815b | |||
| d619fa5d77 | |||
| 050fd0adba | |||
| 7b106c91ce | |||
| 6e0487f8f3 |
181
.dockerignore
181
.dockerignore
@@ -1,145 +1,84 @@
|
|||||||
# Dependencies
|
# =============================================================================
|
||||||
|
# Dependencies & Build Output
|
||||||
|
# =============================================================================
|
||||||
node_modules/
|
node_modules/
|
||||||
npm-debug.log*
|
dist/
|
||||||
yarn-debug.log*
|
out/
|
||||||
yarn-error.log*
|
|
||||||
bun.sum
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
pids
|
|
||||||
*.pid
|
|
||||||
*.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
coverage/
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
jspm_packages/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Microbundle cache
|
|
||||||
.rpt2_cache/
|
|
||||||
.rts2_cache_cjs/
|
|
||||||
.rts2_cache_es/
|
|
||||||
.rts2_cache_umd/
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
*.tgz
|
*.tgz
|
||||||
|
|
||||||
# Yarn Integrity file
|
# =============================================================================
|
||||||
.yarn-integrity
|
# Sensitive Files
|
||||||
|
# =============================================================================
|
||||||
# dotenv environment variables file
|
|
||||||
.env
|
.env
|
||||||
.env.local
|
.env.*
|
||||||
.env.development.local
|
.envrc
|
||||||
.env.test.local
|
cookies/
|
||||||
.env.production.local
|
*.pem
|
||||||
|
*.key
|
||||||
|
*.cert
|
||||||
|
*secret*
|
||||||
|
*credential*
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
# =============================================================================
|
||||||
.cache
|
# Development Tools & Config
|
||||||
.parcel-cache
|
# =============================================================================
|
||||||
|
# Nix/Devenv
|
||||||
|
.devenv/
|
||||||
|
.devenv.flake.nix
|
||||||
|
devenv.*
|
||||||
|
.direnv/
|
||||||
|
|
||||||
# Next.js build output
|
# Linting/Formatting
|
||||||
.next
|
biome.json
|
||||||
|
.eslintcache
|
||||||
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
# Nuxt.js build / generate output
|
# IDE/Editor
|
||||||
.nuxt
|
|
||||||
dist
|
|
||||||
|
|
||||||
# Gatsby files
|
|
||||||
.cache/
|
|
||||||
public
|
|
||||||
|
|
||||||
# Vuepress build output
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# TernJS port file
|
|
||||||
.tern-port
|
|
||||||
|
|
||||||
# Stores VSCode versions used for testing VSCode extensions
|
|
||||||
.vscode-test
|
|
||||||
|
|
||||||
# IDE and editor files
|
|
||||||
.vscode/
|
.vscode/
|
||||||
.idea/
|
.idea/
|
||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
*~
|
*~
|
||||||
|
|
||||||
# OS generated files
|
# AI Assistant Config
|
||||||
.DS_Store
|
.claude/
|
||||||
.DS_Store?
|
CLAUDE.md
|
||||||
._*
|
AGENTS.md
|
||||||
.Spotlight-V100
|
opencode.jsonc
|
||||||
.Trashes
|
|
||||||
ehthumbs.db
|
|
||||||
Thumbs.db
|
|
||||||
|
|
||||||
# Git
|
# =============================================================================
|
||||||
.git
|
# Documentation (not needed at runtime)
|
||||||
|
# =============================================================================
|
||||||
|
README.md
|
||||||
|
*.md
|
||||||
|
docs/
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Git & Docker (avoid recursive inclusion)
|
||||||
|
# =============================================================================
|
||||||
|
.git/
|
||||||
.gitignore
|
.gitignore
|
||||||
|
|
||||||
# Docker
|
|
||||||
Dockerfile*
|
Dockerfile*
|
||||||
.dockerignore
|
.dockerignore
|
||||||
|
|
||||||
# Documentation
|
# =============================================================================
|
||||||
README.md
|
# Testing & Coverage
|
||||||
docs/
|
# =============================================================================
|
||||||
|
|
||||||
# Test files
|
|
||||||
test/
|
test/
|
||||||
tests/
|
tests/
|
||||||
*.test.js
|
|
||||||
*.test.ts
|
*.test.ts
|
||||||
*.spec.js
|
|
||||||
*.spec.ts
|
*.spec.ts
|
||||||
|
coverage/
|
||||||
|
*.lcov
|
||||||
|
.nyc_output/
|
||||||
|
|
||||||
# Development files
|
# =============================================================================
|
||||||
CLAUDE.md
|
# OS & Misc
|
||||||
devenv.*
|
# =============================================================================
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
*.log
|
*.log
|
||||||
|
*.pid
|
||||||
# Runtime cookies/config
|
.cache/
|
||||||
cookies/
|
examples/
|
||||||
|
scripts/
|
||||||
|
|||||||
54
AGENTS.md
54
AGENTS.md
@@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var).
|
|||||||
- `GET /api/status` - Health check
|
- `GET /api/status` - Health check
|
||||||
- `GET /api/kijiji?q={query}` - Search Kijiji
|
- `GET /api/kijiji?q={query}` - Search Kijiji
|
||||||
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
|
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
|
||||||
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay
|
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay
|
||||||
- `GET /api/*` - 404 fallback
|
- `GET /api/*` - 404 fallback
|
||||||
|
|
||||||
### MCP Server (`@marketplace-scrapers/mcp-server`)
|
### MCP Server (`@marketplace-scrapers/mcp-server`)
|
||||||
@@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var).
|
|||||||
**Tools:**
|
**Tools:**
|
||||||
- `search_kijiji` - Search Kijiji (query, maxItems)
|
- `search_kijiji` - Search Kijiji (query, maxItems)
|
||||||
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
|
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
|
||||||
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems)
|
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies)
|
||||||
|
|
||||||
## API Response Formats
|
## API Response Formats
|
||||||
|
|
||||||
@@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields:
|
|||||||
### eBay-specific fields
|
### eBay-specific fields
|
||||||
Minimal - mainly the common fields
|
Minimal - mainly the common fields
|
||||||
|
|
||||||
|
## Cookie Management
|
||||||
|
|
||||||
|
Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping.
|
||||||
|
|
||||||
|
### Cookie Priority Hierarchy (High → Low)
|
||||||
|
All scrapers follow this loading order:
|
||||||
|
1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority)
|
||||||
|
2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||||
|
3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback)
|
||||||
|
|
||||||
|
### Facebook Cookies
|
||||||
|
- **Required for**: Facebook Marketplace scraping
|
||||||
|
- **Format**: JSON array (see `cookies/README.md`)
|
||||||
|
- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb`
|
||||||
|
|
||||||
|
**Setup:**
|
||||||
|
```bash
|
||||||
|
# Option 1: File (fallback)
|
||||||
|
# Create cookies/facebook.json with cookie array
|
||||||
|
|
||||||
|
# Option 2: Environment variable
|
||||||
|
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||||
|
|
||||||
|
# Option 3: URL parameter (highest priority)
|
||||||
|
curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]"
|
||||||
|
```
|
||||||
|
|
||||||
|
### eBay Cookies
|
||||||
|
- **Required for**: Bypassing bot detection
|
||||||
|
- **Format**: Cookie string `"name=value; name2=value2"`
|
||||||
|
- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession`
|
||||||
|
|
||||||
|
**Setup:**
|
||||||
|
```bash
|
||||||
|
# Option 1: File (fallback)
|
||||||
|
# Create cookies/ebay.json with cookie string
|
||||||
|
|
||||||
|
# Option 2: Environment variable
|
||||||
|
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||||
|
|
||||||
|
# Option 3: URL parameter (highest priority)
|
||||||
|
curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings.
|
||||||
|
|
||||||
## Technical Details
|
## Technical Details
|
||||||
|
|
||||||
- **TypeScript** with path mapping (`@/*` → `src/*`) per package
|
- **TypeScript** with path mapping (`@/*` → `src/*`) per package
|
||||||
@@ -126,7 +172,7 @@ Minimal - mainly the common fields
|
|||||||
|
|
||||||
## Development Notes
|
## Development Notes
|
||||||
|
|
||||||
- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json`
|
- **Cookie files** are git-ignored for security (see `cookies/README.md`)
|
||||||
- eBay uses custom headers to bypass basic bot detection
|
|
||||||
- Kijiji parses Apollo state from Next.js hydration data
|
- Kijiji parses Apollo state from Next.js hydration data
|
||||||
- All scrapers handle retries on 429/5xx errors
|
- All scrapers handle retries on 429/5xx errors
|
||||||
|
- Cookie priority ensures flexibility across different deployment environments
|
||||||
|
|||||||
@@ -1,24 +1,33 @@
|
|||||||
# Facebook Marketplace Cookies Setup
|
# Marketplace Cookies Setup
|
||||||
|
|
||||||
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies.
|
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
|
||||||
|
|
||||||
## Option 1: Cookies File (`facebook.json`)
|
## Cookie Priority Hierarchy
|
||||||
|
|
||||||
1. Log into Facebook in your browser
|
All scrapers follow this priority order (highest to lowest):
|
||||||
2. Open Developer Tools → Network tab
|
1. **URL Parameter** - Passed directly in API/MCP request (overrides all)
|
||||||
3. Visit facebook.com/marketplace (ensure you're logged in)
|
2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||||
4. Look for any marketplace-related requests in the Network tab
|
3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback)
|
||||||
5. Export cookies from the browser's Application/Storage → Cookies section
|
|
||||||
6. Save the cookies as a JSON array to `facebook.json`
|
|
||||||
|
|
||||||
The `facebook.json` file should contain Facebook session cookies, particularly:
|
---
|
||||||
|
|
||||||
|
## Facebook Marketplace (`facebook.json`)
|
||||||
|
|
||||||
|
### Required Cookies
|
||||||
- `c_user`: Your Facebook user ID
|
- `c_user`: Your Facebook user ID
|
||||||
- `xs`: Facebook session token
|
- `xs`: Facebook session token
|
||||||
- `fr`: Facebook request token
|
- `fr`: Facebook request token
|
||||||
- `datr`: Data attribution token
|
- `datr`: Data attribution token
|
||||||
- `sb`: Session browser token
|
- `sb`: Session browser token
|
||||||
|
|
||||||
Example structure:
|
### Setup Methods
|
||||||
|
|
||||||
|
**Method 1: Cookie File (Lowest Priority)**
|
||||||
|
1. Log into Facebook in your browser
|
||||||
|
2. Open Developer Tools → Application/Storage → Cookies
|
||||||
|
3. Export cookies as JSON array to `facebook.json`
|
||||||
|
|
||||||
|
Example `facebook.json`:
|
||||||
```json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
@@ -27,26 +36,59 @@ Example structure:
|
|||||||
"domain": ".facebook.com",
|
"domain": ".facebook.com",
|
||||||
"path": "/",
|
"path": "/",
|
||||||
"secure": true
|
"secure": true
|
||||||
},
|
}
|
||||||
// ... other cookies
|
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Option 2: URL Parameter
|
**Method 2: Environment Variable**
|
||||||
|
```bash
|
||||||
You can pass cookies directly via the `cookies` URL parameter:
|
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||||
|
|
||||||
```
|
```
|
||||||
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
|
|
||||||
|
**Method 3: URL Parameter (Highest Priority)**
|
||||||
```
|
```
|
||||||
|
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}]
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## eBay (`ebay.json`)
|
||||||
|
|
||||||
|
eBay has aggressive bot detection that blocks requests without valid session cookies.
|
||||||
|
|
||||||
|
### Setup Methods
|
||||||
|
|
||||||
|
**Method 1: Cookie File (Lowest Priority)**
|
||||||
|
1. Log into eBay in your browser
|
||||||
|
2. Open Developer Tools → Network tab
|
||||||
|
3. Visit ebay.ca and inspect any request headers
|
||||||
|
4. Copy the full `Cookie` header value
|
||||||
|
5. Save as plain text to `ebay.json` (see `ebay.json.example`)
|
||||||
|
|
||||||
|
Example `ebay.json`:
|
||||||
|
```
|
||||||
|
s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE
|
||||||
|
```
|
||||||
|
|
||||||
|
**Method 2: Environment Variable**
|
||||||
|
```bash
|
||||||
|
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Method 3: URL Parameter (Highest Priority)**
|
||||||
|
```
|
||||||
|
GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Important Notes
|
## Important Notes
|
||||||
|
|
||||||
- Cookies must be from an active Facebook session
|
- Cookies must be from active browser sessions
|
||||||
- Cookies expire, so you may need to refresh them periodically
|
- Cookies expire and need periodic refresh
|
||||||
- Never share real cookies or commit them to version control
|
- **NEVER** commit real cookies to version control
|
||||||
- Facebook may block automated scraping even with valid cookies
|
- Platforms may still block automated scraping despite valid cookies
|
||||||
|
|
||||||
## Security
|
## Security
|
||||||
|
|
||||||
The cookies file is intentionally left out of version control for security reasons.</content>
|
All `*.json` files in this directory are git-ignored for security.</content>
|
||||||
|
|||||||
1
cookies/ebay.json.example
Normal file
1
cookies/ebay.json.example
Normal file
@@ -0,0 +1 @@
|
|||||||
|
s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE
|
||||||
9
opencode.jsonc
Normal file
9
opencode.jsonc
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"$schema": "https://opencode.ai/config.json",
|
||||||
|
"mcp": {
|
||||||
|
"marketplace-scrape": {
|
||||||
|
"type": "remote",
|
||||||
|
"url": "http://localhost:4006/mcp"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
12
package.json
12
package.json
@@ -2,11 +2,19 @@
|
|||||||
"name": "marketplace-scrapers-monorepo",
|
"name": "marketplace-scrapers-monorepo",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"ci": "biome ci"
|
"ci": "biome ci",
|
||||||
|
"clean": "rm -rf dist",
|
||||||
|
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
|
||||||
|
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
|
||||||
|
"build:all": "bun run build:api && bun run build:mcp",
|
||||||
|
"build": "bun run clean && bun run build:all",
|
||||||
|
"start": "./scripts/start.sh"
|
||||||
},
|
},
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"workspaces": ["packages/*"],
|
"workspaces": [
|
||||||
|
"packages/*"
|
||||||
|
],
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@biomejs/biome": "2.3.11"
|
"@biomejs/biome": "2.3.11"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { statusRoute } from "./routes/status";
|
|
||||||
import { kijijiRoute } from "./routes/kijiji";
|
|
||||||
import { facebookRoute } from "./routes/facebook";
|
|
||||||
import { ebayRoute } from "./routes/ebay";
|
import { ebayRoute } from "./routes/ebay";
|
||||||
|
import { facebookRoute } from "./routes/facebook";
|
||||||
|
import { kijijiRoute } from "./routes/kijiji";
|
||||||
|
import { statusRoute } from "./routes/status";
|
||||||
|
|
||||||
const PORT = process.env.PORT || 4005;
|
const PORT = process.env.PORT || 4005;
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ const server = Bun.serve({
|
|||||||
},
|
},
|
||||||
|
|
||||||
// Fallback for all other routes
|
// Fallback for all other routes
|
||||||
fetch(req: Request) {
|
fetch(_req: Request) {
|
||||||
return new Response("Not Found", { status: 404 });
|
return new Response("Not Found", { status: 404 });
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
|
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
|
||||||
* Search eBay for listings (default: Buy It Now only, Canada only)
|
* Search eBay for listings (default: Buy It Now only, Canada only)
|
||||||
|
* Optional: Pass cookies parameter to bypass bot detection
|
||||||
*/
|
*/
|
||||||
export async function ebayRoute(req: Request): Promise<Response> {
|
export async function ebayRoute(req: Request): Promise<Response> {
|
||||||
|
try {
|
||||||
const reqUrl = new URL(req.url);
|
const reqUrl = new URL(req.url);
|
||||||
|
|
||||||
const SEARCH_QUERY =
|
const SEARCH_QUERY =
|
||||||
@@ -18,23 +20,27 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
|||||||
{ status: 400 },
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
|
|
||||||
// Parse optional parameters with defaults
|
const minPriceParam = reqUrl.searchParams.get("minPrice");
|
||||||
const minPrice = reqUrl.searchParams.get("minPrice")
|
const minPrice = minPriceParam ? parseInt(minPriceParam, 10) : undefined;
|
||||||
? parseInt(reqUrl.searchParams.get("minPrice")!)
|
const maxPriceParam = reqUrl.searchParams.get("maxPrice");
|
||||||
: undefined;
|
const maxPrice = maxPriceParam ? parseInt(maxPriceParam, 10) : undefined;
|
||||||
const maxPrice = reqUrl.searchParams.get("maxPrice")
|
|
||||||
? parseInt(reqUrl.searchParams.get("maxPrice")!)
|
|
||||||
: undefined;
|
|
||||||
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
|
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
|
||||||
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
|
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
|
||||||
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
|
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
|
||||||
const exclusionsParam = reqUrl.searchParams.get("exclusions");
|
const exclusionsParam = reqUrl.searchParams.get("exclusions");
|
||||||
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : [];
|
const exclusions = exclusionsParam
|
||||||
|
? exclusionsParam.split(",").map((s) => s.trim())
|
||||||
|
: [];
|
||||||
const keywordsParam = reqUrl.searchParams.get("keywords");
|
const keywordsParam = reqUrl.searchParams.get("keywords");
|
||||||
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY];
|
const keywords = keywordsParam
|
||||||
|
? keywordsParam.split(",").map((s) => s.trim())
|
||||||
|
: [SEARCH_QUERY];
|
||||||
|
|
||||||
try {
|
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||||
const items = await fetchEbayItems(SEARCH_QUERY, 5, {
|
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
||||||
|
const cookies = reqUrl.searchParams.get("cookies") || undefined;
|
||||||
|
|
||||||
|
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
||||||
minPrice,
|
minPrice,
|
||||||
maxPrice,
|
maxPrice,
|
||||||
strictMode,
|
strictMode,
|
||||||
@@ -42,19 +48,21 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
|||||||
keywords,
|
keywords,
|
||||||
buyItNowOnly,
|
buyItNowOnly,
|
||||||
canadaOnly,
|
canadaOnly,
|
||||||
|
cookies,
|
||||||
});
|
});
|
||||||
if (!items || items.length === 0)
|
|
||||||
|
const results = maxItems ? items.slice(0, maxItems) : items;
|
||||||
|
|
||||||
|
if (!results || results.length === 0)
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{ message: "Search didn't return any results!" },
|
{ message: "Search didn't return any results!" },
|
||||||
{ status: 404 },
|
{ status: 404 },
|
||||||
);
|
);
|
||||||
return Response.json(items, { status: 200 });
|
return Response.json(results, { status: 200 });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("eBay scraping error:", error);
|
console.error("eBay scraping error:", error);
|
||||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
const errorMessage =
|
||||||
return Response.json(
|
error instanceof Error ? error.message : "Unknown error occurred";
|
||||||
{ message: errorMessage },
|
return Response.json({ message: errorMessage }, { status: 400 });
|
||||||
{ status: 400 },
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,17 +12,25 @@ export async function facebookRoute(req: Request): Promise<Response> {
|
|||||||
if (!SEARCH_QUERY)
|
if (!SEARCH_QUERY)
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{
|
{
|
||||||
message:
|
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||||
"Request didn't have 'query' header or 'q' search parameter!",
|
|
||||||
},
|
},
|
||||||
{ status: 400 },
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
|
|
||||||
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
|
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
|
||||||
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
|
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
|
||||||
|
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||||
|
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : 25;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE);
|
const items = await fetchFacebookItems(
|
||||||
|
SEARCH_QUERY,
|
||||||
|
1,
|
||||||
|
LOCATION,
|
||||||
|
maxItems,
|
||||||
|
COOKIES_SOURCE,
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
if (!items || items.length === 0)
|
if (!items || items.length === 0)
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{ message: "Search didn't return any results!" },
|
{ message: "Search didn't return any results!" },
|
||||||
@@ -31,10 +39,8 @@ export async function facebookRoute(req: Request): Promise<Response> {
|
|||||||
return Response.json(items, { status: 200 });
|
return Response.json(items, { status: 200 });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Facebook scraping error:", error);
|
console.error("Facebook scraping error:", error);
|
||||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
const errorMessage =
|
||||||
return Response.json(
|
error instanceof Error ? error.message : "Unknown error occurred";
|
||||||
{ message: errorMessage },
|
return Response.json({ message: errorMessage }, { status: 400 });
|
||||||
{ status: 400 },
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,14 +12,46 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
|||||||
if (!SEARCH_QUERY)
|
if (!SEARCH_QUERY)
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{
|
{
|
||||||
message:
|
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||||
"Request didn't have 'query' header or 'q' search parameter!",
|
|
||||||
},
|
},
|
||||||
{ status: 400 },
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const maxPagesParam = reqUrl.searchParams.get("maxPages");
|
||||||
|
const maxPages = maxPagesParam ? parseInt(maxPagesParam, 10) : 5;
|
||||||
|
const priceMinParam = reqUrl.searchParams.get("priceMin");
|
||||||
|
const priceMin = priceMinParam ? parseInt(priceMinParam, 10) : undefined;
|
||||||
|
const priceMaxParam = reqUrl.searchParams.get("priceMax");
|
||||||
|
const priceMax = priceMaxParam ? parseInt(priceMaxParam, 10) : undefined;
|
||||||
|
|
||||||
|
const searchOptions = {
|
||||||
|
location: reqUrl.searchParams.get("location") || undefined,
|
||||||
|
category: reqUrl.searchParams.get("category") || undefined,
|
||||||
|
keywords: reqUrl.searchParams.get("keywords") || undefined,
|
||||||
|
sortBy: reqUrl.searchParams.get("sortBy") as
|
||||||
|
| "relevancy"
|
||||||
|
| "date"
|
||||||
|
| "price"
|
||||||
|
| "distance"
|
||||||
|
| undefined,
|
||||||
|
sortOrder: reqUrl.searchParams.get("sortOrder") as
|
||||||
|
| "desc"
|
||||||
|
| "asc"
|
||||||
|
| undefined,
|
||||||
|
maxPages,
|
||||||
|
priceMin,
|
||||||
|
priceMax,
|
||||||
|
cookies: reqUrl.searchParams.get("cookies") || undefined,
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const items = await fetchKijijiItems(SEARCH_QUERY, 5);
|
const items = await fetchKijijiItems(
|
||||||
|
SEARCH_QUERY,
|
||||||
|
4, // 4 requests per second for faster scraping
|
||||||
|
"https://www.kijiji.ca",
|
||||||
|
searchOptions,
|
||||||
|
{},
|
||||||
|
);
|
||||||
if (!items)
|
if (!items)
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{ message: "Search didn't return any results!" },
|
{ message: "Search didn't return any results!" },
|
||||||
@@ -28,10 +60,8 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
|||||||
return Response.json(items, { status: 200 });
|
return Response.json(items, { status: 200 });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Kijiji scraping error:", error);
|
console.error("Kijiji scraping error:", error);
|
||||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
const errorMessage =
|
||||||
return Response.json(
|
error instanceof Error ? error.message : "Unknown error occurred";
|
||||||
{ message: errorMessage },
|
return Response.json({ message: errorMessage }, { status: 400 });
|
||||||
{ status: 400 },
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,45 +1,43 @@
|
|||||||
// Export all scrapers
|
// Export all scrapers
|
||||||
|
|
||||||
|
export type { EbayListingDetails } from "./scrapers/ebay";
|
||||||
|
export { default as fetchEbayItems } from "./scrapers/ebay";
|
||||||
|
export type { FacebookListingDetails } from "./scrapers/facebook";
|
||||||
|
export {
|
||||||
|
default as fetchFacebookItems,
|
||||||
|
ensureFacebookCookies,
|
||||||
|
extractFacebookItemData,
|
||||||
|
extractFacebookMarketplaceData,
|
||||||
|
fetchFacebookItem,
|
||||||
|
parseFacebookAds,
|
||||||
|
parseFacebookCookieString,
|
||||||
|
parseFacebookItem,
|
||||||
|
} from "./scrapers/facebook";
|
||||||
|
export type {
|
||||||
|
DetailedListing,
|
||||||
|
KijijiListingDetails,
|
||||||
|
ListingFetchOptions,
|
||||||
|
SearchOptions,
|
||||||
|
} from "./scrapers/kijiji";
|
||||||
export {
|
export {
|
||||||
default as fetchKijijiItems,
|
|
||||||
slugify,
|
|
||||||
resolveLocationId,
|
|
||||||
resolveCategoryId,
|
|
||||||
buildSearchUrl,
|
buildSearchUrl,
|
||||||
|
default as fetchKijijiItems,
|
||||||
extractApolloState,
|
extractApolloState,
|
||||||
parseSearch,
|
|
||||||
parseDetailedListing,
|
|
||||||
HttpError,
|
HttpError,
|
||||||
NetworkError,
|
NetworkError,
|
||||||
ParseError,
|
ParseError,
|
||||||
|
parseDetailedListing,
|
||||||
|
parseSearch,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
|
resolveCategoryId,
|
||||||
|
resolveLocationId,
|
||||||
|
slugify,
|
||||||
ValidationError,
|
ValidationError,
|
||||||
} from "./scrapers/kijiji";
|
} from "./scrapers/kijiji";
|
||||||
export type {
|
|
||||||
KijijiListingDetails,
|
|
||||||
DetailedListing,
|
|
||||||
SearchOptions,
|
|
||||||
ListingFetchOptions,
|
|
||||||
} from "./scrapers/kijiji";
|
|
||||||
|
|
||||||
export {
|
|
||||||
default as fetchFacebookItems,
|
|
||||||
fetchFacebookItem,
|
|
||||||
parseFacebookCookieString,
|
|
||||||
ensureFacebookCookies,
|
|
||||||
extractFacebookMarketplaceData,
|
|
||||||
extractFacebookItemData,
|
|
||||||
parseFacebookAds,
|
|
||||||
parseFacebookItem,
|
|
||||||
} from "./scrapers/facebook";
|
|
||||||
export type { FacebookListingDetails } from "./scrapers/facebook";
|
|
||||||
|
|
||||||
export { default as fetchEbayItems } from "./scrapers/ebay";
|
|
||||||
export type { EbayListingDetails } from "./scrapers/ebay";
|
|
||||||
|
|
||||||
// Export shared utilities
|
|
||||||
export * from "./utils/http";
|
|
||||||
export * from "./utils/delay";
|
|
||||||
export * from "./utils/format";
|
|
||||||
|
|
||||||
// Export shared types
|
// Export shared types
|
||||||
export * from "./types/common";
|
export * from "./types/common";
|
||||||
|
// Export shared utilities
|
||||||
|
export * from "./utils/cookies";
|
||||||
|
export * from "./utils/delay";
|
||||||
|
export * from "./utils/format";
|
||||||
|
export * from "./utils/http";
|
||||||
|
|||||||
@@ -1,9 +1,18 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import { isRecord } from "../utils/http";
|
import {
|
||||||
|
type CookieConfig,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
loadCookiesOptional,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { delay } from "../utils/delay";
|
import { delay } from "../utils/delay";
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
|
||||||
import type { HTMLString } from "../types/common";
|
// eBay cookie configuration
|
||||||
|
const EBAY_COOKIE_CONFIG: CookieConfig = {
|
||||||
|
name: "eBay",
|
||||||
|
domain: ".ebay.ca",
|
||||||
|
envVar: "EBAY_COOKIE",
|
||||||
|
filePath: "./cookies/ebay.json",
|
||||||
|
};
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// ----------------------------- Types -----------------------------
|
||||||
|
|
||||||
@@ -29,8 +38,10 @@ export interface EbayListingDetails {
|
|||||||
/**
|
/**
|
||||||
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
|
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
|
||||||
*/
|
*/
|
||||||
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null {
|
function parseEbayPrice(
|
||||||
if (!priceText || typeof priceText !== 'string') return null;
|
priceText: string,
|
||||||
|
): { cents: number; currency: string } | null {
|
||||||
|
if (!priceText || typeof priceText !== "string") return null;
|
||||||
|
|
||||||
// Clean up the price text and extract currency and amount
|
// Clean up the price text and extract currency and amount
|
||||||
const cleaned = priceText.trim();
|
const cleaned = priceText.trim();
|
||||||
@@ -39,19 +50,23 @@ function parseEbayPrice(priceText: string): { cents: number; currency: string }
|
|||||||
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
|
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
|
||||||
if (!numberMatches) return null;
|
if (!numberMatches) return null;
|
||||||
|
|
||||||
const amountStr = numberMatches[0].replace(/,/g, '');
|
const amountStr = numberMatches[0].replace(/,/g, "");
|
||||||
const dollars = parseFloat(amountStr);
|
const dollars = parseFloat(amountStr);
|
||||||
if (isNaN(dollars)) return null;
|
if (Number.isNaN(dollars)) return null;
|
||||||
|
|
||||||
const cents = Math.round(dollars * 100);
|
const cents = Math.round(dollars * 100);
|
||||||
|
|
||||||
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
||||||
let currency = 'USD'; // Default
|
let currency = "USD"; // Default
|
||||||
|
|
||||||
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) {
|
if (
|
||||||
currency = 'CAD';
|
cleaned.toUpperCase().includes("CAD") ||
|
||||||
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) {
|
cleaned.includes("CA$") ||
|
||||||
currency = 'USD';
|
cleaned.includes("C $")
|
||||||
|
) {
|
||||||
|
currency = "CAD";
|
||||||
|
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
|
||||||
|
currency = "USD";
|
||||||
}
|
}
|
||||||
|
|
||||||
return { cents, currency };
|
return { cents, currency };
|
||||||
@@ -77,7 +92,7 @@ function parseEbayListings(
|
|||||||
htmlString: HTMLString,
|
htmlString: HTMLString,
|
||||||
keywords: string[],
|
keywords: string[],
|
||||||
exclusions: string[],
|
exclusions: string[],
|
||||||
strictMode: boolean
|
strictMode: boolean,
|
||||||
): EbayListingDetails[] {
|
): EbayListingDetails[] {
|
||||||
const { document } = parseHTML(htmlString);
|
const { document } = parseHTML(htmlString);
|
||||||
const results: EbayListingDetails[] = [];
|
const results: EbayListingDetails[] = [];
|
||||||
@@ -85,38 +100,60 @@ function parseEbayListings(
|
|||||||
// Find all listing links by looking for eBay item URLs (/itm/)
|
// Find all listing links by looking for eBay item URLs (/itm/)
|
||||||
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
||||||
|
|
||||||
|
|
||||||
for (const linkElement of linkElements) {
|
for (const linkElement of linkElements) {
|
||||||
try {
|
try {
|
||||||
// Get href attribute
|
// Get href attribute
|
||||||
let href = linkElement.getAttribute('href');
|
let href = linkElement.getAttribute("href");
|
||||||
if (!href) continue;
|
if (!href) continue;
|
||||||
|
|
||||||
// Make href absolute
|
// Make href absolute
|
||||||
if (!href.startsWith('http')) {
|
if (!href.startsWith("http")) {
|
||||||
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`;
|
href = href.startsWith("//")
|
||||||
|
? `https:${href}`
|
||||||
|
: `https://www.ebay.com${href}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the container - go up several levels to find the item container
|
// Find the container - go up several levels to find the item container
|
||||||
// Modern eBay uses complex nested structures
|
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||||
let container = linkElement.parentElement?.parentElement?.parentElement;
|
let container: Element | null = linkElement;
|
||||||
if (!container) {
|
let depth = 0;
|
||||||
// Try a different level
|
const maxDepth = 15;
|
||||||
container = linkElement.parentElement?.parentElement;
|
|
||||||
|
// Walk up until we find a list item or results container
|
||||||
|
while (container && depth < maxDepth) {
|
||||||
|
const classes = container.className || "";
|
||||||
|
if (
|
||||||
|
classes.includes("s-item") ||
|
||||||
|
classes.includes("srp-results") ||
|
||||||
|
container.tagName === "LI"
|
||||||
|
) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (!container) continue;
|
container = container.parentElement;
|
||||||
|
depth++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!container || depth >= maxDepth) continue;
|
||||||
|
|
||||||
// Extract title - look for heading or title-related elements near the link
|
// Extract title - look for heading or title-related elements near the link
|
||||||
// Modern eBay often uses h3, span, or div with text content near the link
|
// Modern eBay often uses h3, span, or div with text content near the link
|
||||||
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span');
|
let titleElement = container.querySelector(
|
||||||
|
'h3, [role="heading"], .s-item__title span',
|
||||||
|
);
|
||||||
|
|
||||||
// If no direct title element, try finding text content around the link
|
// If no direct title element, try finding text content around the link
|
||||||
if (!titleElement) {
|
if (!titleElement) {
|
||||||
// Look for spans or divs with text near this link
|
// Look for spans or divs with text near this link
|
||||||
const nearbySpans = container.querySelectorAll('span, div');
|
const nearbySpans = container.querySelectorAll("span, div");
|
||||||
for (const span of nearbySpans) {
|
for (const span of nearbySpans) {
|
||||||
const text = span.textContent?.trim();
|
const text = span.textContent?.trim();
|
||||||
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) {
|
if (
|
||||||
|
text &&
|
||||||
|
text.length > 10 &&
|
||||||
|
text.length < 200 &&
|
||||||
|
!text.includes("$") &&
|
||||||
|
!text.includes("item")
|
||||||
|
) {
|
||||||
titleElement = span;
|
titleElement = span;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -129,12 +166,12 @@ function parseEbayListings(
|
|||||||
if (title) {
|
if (title) {
|
||||||
// Remove common eBay UI strings that appear at the end of titles
|
// Remove common eBay UI strings that appear at the end of titles
|
||||||
const uiStrings = [
|
const uiStrings = [
|
||||||
'Opens in a new window',
|
"Opens in a new window",
|
||||||
'Opens in a new tab',
|
"Opens in a new tab",
|
||||||
'Opens in a new window or tab',
|
"Opens in a new window or tab",
|
||||||
'opens in a new window',
|
"opens in a new window",
|
||||||
'opens in a new tab',
|
"opens in a new tab",
|
||||||
'opens in a new window or tab'
|
"opens in a new window or tab",
|
||||||
];
|
];
|
||||||
|
|
||||||
for (const uiString of uiStrings) {
|
for (const uiString of uiStrings) {
|
||||||
@@ -157,17 +194,28 @@ function parseEbayListings(
|
|||||||
if (title === "Shop on eBay" || title.length < 3) continue;
|
if (title === "Shop on eBay" || title.length < 3) continue;
|
||||||
|
|
||||||
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
||||||
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]');
|
// Updated for 2026 eBay HTML structure
|
||||||
|
let priceElement = container.querySelector(
|
||||||
|
'[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
|
||||||
|
);
|
||||||
|
|
||||||
// If no direct price class, look for spans containing $ (but not titles)
|
// If no direct price class, look for spans containing $ (but not titles)
|
||||||
if (!priceElement) {
|
if (!priceElement) {
|
||||||
const spansAndElements = container.querySelectorAll('span, div, b, em, strong');
|
const spansAndElements = container.querySelectorAll(
|
||||||
|
"span, div, b, em, strong",
|
||||||
|
);
|
||||||
for (const el of spansAndElements) {
|
for (const el of spansAndElements) {
|
||||||
const text = el.textContent?.trim();
|
const text = el.textContent?.trim();
|
||||||
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
||||||
if (text && text.includes('$') && text.length < 100 &&
|
if (
|
||||||
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') &&
|
text?.includes("$") &&
|
||||||
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') &&
|
text.length < 100 &&
|
||||||
|
!text.includes("laptop") &&
|
||||||
|
!text.includes("computer") &&
|
||||||
|
!text.includes("intel") &&
|
||||||
|
!text.includes("core") &&
|
||||||
|
!text.includes("ram") &&
|
||||||
|
!text.includes("ssd") &&
|
||||||
!/\d{4}/.test(text) && // Avoid years like "2024"
|
!/\d{4}/.test(text) && // Avoid years like "2024"
|
||||||
!text.includes('"') // Avoid measurements
|
!text.includes('"') // Avoid measurements
|
||||||
) {
|
) {
|
||||||
@@ -181,17 +229,26 @@ function parseEbayListings(
|
|||||||
// Prefer sale/current price over original/strikethrough price
|
// Prefer sale/current price over original/strikethrough price
|
||||||
if (priceElement) {
|
if (priceElement) {
|
||||||
// Check if this element or its parent contains multiple price elements
|
// Check if this element or its parent contains multiple price elements
|
||||||
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement;
|
const priceContainer =
|
||||||
|
priceElement.closest('[class*="s-item__price"]') ||
|
||||||
|
priceElement.parentElement;
|
||||||
|
|
||||||
if (priceContainer) {
|
if (priceContainer) {
|
||||||
// Look for all price elements within this container, including strikethrough prices
|
// Look for all price elements within this container, including strikethrough prices
|
||||||
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike');
|
const allPriceElements = priceContainer.querySelectorAll(
|
||||||
|
'[class*="s-item__price"], span, b, em, strong, s, del, strike',
|
||||||
|
);
|
||||||
|
|
||||||
// Filter to only elements that actually contain prices (not labels)
|
// Filter to only elements that actually contain prices (not labels)
|
||||||
const actualPrices: HTMLElement[] = [];
|
const actualPrices: HTMLElement[] = [];
|
||||||
for (const el of allPriceElements) {
|
for (const el of allPriceElements) {
|
||||||
const text = el.textContent?.trim();
|
const text = el.textContent?.trim();
|
||||||
if (text && /^\s*[$£€¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) {
|
if (
|
||||||
|
text &&
|
||||||
|
/^\s*[$£€¥]/u.test(text) &&
|
||||||
|
text.length < 50 &&
|
||||||
|
!/\d{4}/.test(text)
|
||||||
|
) {
|
||||||
actualPrices.push(el);
|
actualPrices.push(el);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -199,11 +256,18 @@ function parseEbayListings(
|
|||||||
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
|
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
|
||||||
if (actualPrices.length > 1) {
|
if (actualPrices.length > 1) {
|
||||||
// First, look for prices that are NOT struck through
|
// First, look for prices that are NOT struck through
|
||||||
const nonStrikethroughPrices = actualPrices.filter(el => {
|
const nonStrikethroughPrices = actualPrices.filter((el) => {
|
||||||
const tagName = el.tagName.toLowerCase();
|
const tagName = el.tagName.toLowerCase();
|
||||||
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') ||
|
const styles =
|
||||||
el.closest('s, del, strike');
|
el.classList.contains("s-strikethrough") ||
|
||||||
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles;
|
el.classList.contains("u-flStrike") ||
|
||||||
|
el.closest("s, del, strike");
|
||||||
|
return (
|
||||||
|
tagName !== "s" &&
|
||||||
|
tagName !== "del" &&
|
||||||
|
tagName !== "strike" &&
|
||||||
|
!styles
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (nonStrikethroughPrices.length > 0) {
|
if (nonStrikethroughPrices.length > 0) {
|
||||||
@@ -227,12 +291,22 @@ function parseEbayListings(
|
|||||||
if (!priceInfo) continue;
|
if (!priceInfo) continue;
|
||||||
|
|
||||||
// Apply exclusion filters
|
// Apply exclusion filters
|
||||||
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) {
|
if (
|
||||||
|
exclusions.some((exclusion) =>
|
||||||
|
title.toLowerCase().includes(exclusion.toLowerCase()),
|
||||||
|
)
|
||||||
|
) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply strict mode filter (title must contain at least one keyword)
|
// Apply strict mode filter (title must contain at least one keyword)
|
||||||
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) {
|
if (
|
||||||
|
strictMode &&
|
||||||
|
title &&
|
||||||
|
!keywords.some((keyword) =>
|
||||||
|
title.toLowerCase().includes(keyword.toLowerCase()),
|
||||||
|
)
|
||||||
|
) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -258,6 +332,32 @@ function parseEbayListings(
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------------------------- Cookie Loading -----------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load eBay cookies with priority: URL param > ENV var > file
|
||||||
|
* Uses shared cookie utility for consistent handling across all scrapers
|
||||||
|
*/
|
||||||
|
async function loadEbayCookies(
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<string | undefined> {
|
||||||
|
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
|
||||||
|
|
||||||
|
if (cookies.length === 0) {
|
||||||
|
console.warn(
|
||||||
|
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
|
||||||
|
"Provide cookies via (in priority order):\n" +
|
||||||
|
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||||
|
" 2. EBAY_COOKIE environment variable, or\n" +
|
||||||
|
" 3. ./cookies/ebay.json file (lowest priority)\n" +
|
||||||
|
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||||
|
);
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
return formatCookiesForHeader(cookies, "www.ebay.ca");
|
||||||
|
}
|
||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
export default async function fetchEbayItems(
|
export default async function fetchEbayItems(
|
||||||
@@ -271,6 +371,7 @@ export default async function fetchEbayItems(
|
|||||||
keywords?: string[];
|
keywords?: string[];
|
||||||
buyItNowOnly?: boolean;
|
buyItNowOnly?: boolean;
|
||||||
canadaOnly?: boolean;
|
canadaOnly?: boolean;
|
||||||
|
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||||
} = {},
|
} = {},
|
||||||
) {
|
) {
|
||||||
const {
|
const {
|
||||||
@@ -281,8 +382,12 @@ export default async function fetchEbayItems(
|
|||||||
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
||||||
buyItNowOnly = true,
|
buyItNowOnly = true,
|
||||||
canadaOnly = true,
|
canadaOnly = true,
|
||||||
|
cookies: cookiesSource,
|
||||||
} = opts;
|
} = opts;
|
||||||
|
|
||||||
|
// Load eBay cookies with priority: URL param > ENV var > file
|
||||||
|
const cookies = await loadEbayCookies(cookiesSource);
|
||||||
|
|
||||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||||
const urlParams = new URLSearchParams({
|
const urlParams = new URLSearchParams({
|
||||||
_nkw: SEARCH_QUERY,
|
_nkw: SEARCH_QUERY,
|
||||||
@@ -307,20 +412,26 @@ export default async function fetchEbayItems(
|
|||||||
try {
|
try {
|
||||||
// Use custom headers modeled after real browser requests to bypass bot detection
|
// Use custom headers modeled after real browser requests to bypass bot detection
|
||||||
const headers: Record<string, string> = {
|
const headers: Record<string, string> = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0',
|
"User-Agent":
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
||||||
'Accept-Language': 'en-US,en;q=0.5',
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
'Accept-Encoding': 'gzip, deflate, br',
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
'Referer': 'https://www.ebay.ca/',
|
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||||
'Connection': 'keep-alive',
|
Referer: "https://www.ebay.ca/",
|
||||||
'Upgrade-Insecure-Requests': '1',
|
Connection: "keep-alive",
|
||||||
'Sec-Fetch-Dest': 'document',
|
"Upgrade-Insecure-Requests": "1",
|
||||||
'Sec-Fetch-Mode': 'navigate',
|
"Sec-Fetch-Dest": "document",
|
||||||
'Sec-Fetch-Site': 'same-origin',
|
"Sec-Fetch-Mode": "navigate",
|
||||||
'Sec-Fetch-User': '?1',
|
"Sec-Fetch-Site": "same-origin",
|
||||||
'Priority': 'u=0, i'
|
"Sec-Fetch-User": "?1",
|
||||||
|
Priority: "u=0, i",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Add cookies if available (helps bypass bot detection)
|
||||||
|
if (cookies) {
|
||||||
|
headers.Cookie = cookies;
|
||||||
|
}
|
||||||
|
|
||||||
const res = await fetch(searchUrl, {
|
const res = await fetch(searchUrl, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers,
|
headers,
|
||||||
@@ -340,17 +451,21 @@ export default async function fetchEbayItems(
|
|||||||
|
|
||||||
console.log(`\nParsing eBay listings...`);
|
console.log(`\nParsing eBay listings...`);
|
||||||
|
|
||||||
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode);
|
const listings = parseEbayListings(
|
||||||
|
searchHtml,
|
||||||
|
keywords,
|
||||||
|
exclusions,
|
||||||
|
strictMode,
|
||||||
|
);
|
||||||
|
|
||||||
// Filter by price range (additional safety check)
|
// Filter by price range (additional safety check)
|
||||||
const filteredListings = listings.filter(listing => {
|
const filteredListings = listings.filter((listing) => {
|
||||||
const cents = listing.listingPrice?.cents;
|
const cents = listing.listingPrice?.cents;
|
||||||
return cents && cents >= minPrice && cents <= maxPrice;
|
return cents && cents >= minPrice && cents <= maxPrice;
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||||
return filteredListings;
|
return filteredListings;
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof HttpError) {
|
if (err instanceof HttpError) {
|
||||||
console.error(
|
console.error(
|
||||||
|
|||||||
@@ -1,10 +1,16 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
||||||
import { parseHTML } from "linkedom";
|
|
||||||
import cliProgress from "cli-progress";
|
import cliProgress from "cli-progress";
|
||||||
import { isRecord } from "../utils/http";
|
import { parseHTML } from "linkedom";
|
||||||
|
import type { HTMLString } from "../types/common";
|
||||||
|
import {
|
||||||
|
type Cookie,
|
||||||
|
type CookieConfig,
|
||||||
|
ensureCookies,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
parseCookieString,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { delay } from "../utils/delay";
|
import { delay } from "../utils/delay";
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
import { formatCentsToCurrency } from "../utils/format";
|
||||||
import type { HTMLString } from "../types/common";
|
import { isRecord } from "../utils/http";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Facebook Marketplace Scraper
|
* Facebook Marketplace Scraper
|
||||||
@@ -14,21 +20,13 @@ import type { HTMLString } from "../types/common";
|
|||||||
* This is by design to respect Facebook's authentication requirements.
|
* This is by design to respect Facebook's authentication requirements.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// Facebook cookie configuration
|
||||||
|
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
|
||||||
interface Cookie {
|
name: "Facebook",
|
||||||
name: string;
|
domain: ".facebook.com",
|
||||||
value: string;
|
envVar: "FACEBOOK_COOKIE",
|
||||||
domain: string;
|
filePath: "./cookies/facebook.json",
|
||||||
path: string;
|
};
|
||||||
secure?: boolean;
|
|
||||||
httpOnly?: boolean;
|
|
||||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
|
||||||
session?: boolean;
|
|
||||||
expirationDate?: number;
|
|
||||||
partitionKey?: Record<string, unknown>;
|
|
||||||
storeId?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface FacebookAdNode {
|
interface FacebookAdNode {
|
||||||
node: {
|
node: {
|
||||||
@@ -204,171 +202,31 @@ export interface FacebookListingDetails {
|
|||||||
|
|
||||||
// ----------------------------- Utilities -----------------------------
|
// ----------------------------- Utilities -----------------------------
|
||||||
|
|
||||||
/**
|
|
||||||
* Load Facebook cookies from file or string
|
|
||||||
*/
|
|
||||||
async function loadFacebookCookies(
|
|
||||||
cookiesSource?: string,
|
|
||||||
cookiePath = "./cookies/facebook.json"
|
|
||||||
): Promise<Cookie[]> {
|
|
||||||
// First try to load from provided string parameter
|
|
||||||
if (cookiesSource) {
|
|
||||||
try {
|
|
||||||
const cookies = JSON.parse(cookiesSource);
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
return cookies.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to load from specified path
|
|
||||||
try {
|
|
||||||
const cookiesPath = cookiePath;
|
|
||||||
const file = Bun.file(cookiesPath);
|
|
||||||
if (await file.exists()) {
|
|
||||||
const content = await file.text();
|
|
||||||
const cookies = JSON.parse(content);
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
return cookies.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse Facebook cookie string into Cookie array format
|
* Parse Facebook cookie string into Cookie array format
|
||||||
|
* @deprecated Use parseCookieString from utils/cookies instead
|
||||||
*/
|
*/
|
||||||
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
||||||
if (!cookieString || !cookieString.trim()) {
|
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
return cookieString
|
|
||||||
.split(";")
|
|
||||||
.map((pair) => pair.trim())
|
|
||||||
.filter((pair) => pair.includes("="))
|
|
||||||
.map((pair) => {
|
|
||||||
const [name, value] = pair.split("=", 2);
|
|
||||||
const trimmedName = name.trim();
|
|
||||||
const trimmedValue = value.trim();
|
|
||||||
|
|
||||||
// Skip empty names or values
|
|
||||||
if (!trimmedName || !trimmedValue) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
name: trimmedName,
|
|
||||||
value: decodeURIComponent(trimmedValue),
|
|
||||||
domain: ".facebook.com",
|
|
||||||
path: "/",
|
|
||||||
secure: true,
|
|
||||||
httpOnly: false,
|
|
||||||
sameSite: "lax" as const,
|
|
||||||
expirationDate: undefined, // Session cookies
|
|
||||||
};
|
|
||||||
})
|
|
||||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure Facebook cookies are available, parsing from env var if needed
|
* Load Facebook cookies with priority: URL param > ENV var > file
|
||||||
|
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
||||||
|
* @param _cookiePath - Deprecated, uses default path from config
|
||||||
*/
|
*/
|
||||||
export async function ensureFacebookCookies(
|
export async function ensureFacebookCookies(
|
||||||
cookiePath = "./cookies/facebook.json"
|
cookiesSource?: string,
|
||||||
|
_cookiePath?: string,
|
||||||
): Promise<Cookie[]> {
|
): Promise<Cookie[]> {
|
||||||
// First try to load existing cookies
|
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
|
||||||
try {
|
|
||||||
const existing = await loadFacebookCookies(undefined, cookiePath);
|
|
||||||
if (existing.length > 0) {
|
|
||||||
return existing;
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// File doesn't exist or is invalid, continue to check env var
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to parse from environment variable
|
|
||||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
|
||||||
if (!cookieString || !cookieString.trim()) {
|
|
||||||
throw new Error(
|
|
||||||
"No valid Facebook cookies found. Either:\n" +
|
|
||||||
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
|
|
||||||
" 2. Create ./cookies/facebook.json manually with cookie array"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse the cookie string
|
|
||||||
const cookies = parseFacebookCookieString(cookieString);
|
|
||||||
if (cookies.length === 0) {
|
|
||||||
throw new Error(
|
|
||||||
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
|
|
||||||
'Expected format: "name1=value1; name2=value2;"'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save to file for future use
|
|
||||||
try {
|
|
||||||
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
|
|
||||||
console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
|
|
||||||
} catch (error) {
|
|
||||||
console.warn(`Could not save cookies to ${cookiePath}: ${error}`);
|
|
||||||
// Continue anyway, we have the cookies in memory
|
|
||||||
}
|
|
||||||
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format cookies array into Cookie header string
|
|
||||||
*/
|
|
||||||
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
|
||||||
const validCookies = cookies
|
|
||||||
.filter((cookie) => {
|
|
||||||
// Check if cookie applies to this domain
|
|
||||||
if (cookie.domain.startsWith(".")) {
|
|
||||||
// Domain cookie (applies to subdomains)
|
|
||||||
return (
|
|
||||||
domain.endsWith(cookie.domain.slice(1)) ||
|
|
||||||
domain === cookie.domain.slice(1)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// Host-only cookie
|
|
||||||
return cookie.domain === domain;
|
|
||||||
})
|
|
||||||
.filter((cookie) => {
|
|
||||||
// Check expiration
|
|
||||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
|
||||||
return false; // Expired
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
|
|
||||||
return validCookies
|
|
||||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
|
||||||
.join("; ");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class HttpError extends Error {
|
class HttpError extends Error {
|
||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly status: number,
|
public readonly status: number,
|
||||||
public readonly url: string
|
public readonly url: string,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "HttpError";
|
this.name = "HttpError";
|
||||||
@@ -407,7 +265,7 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
|
|||||||
!extractionStats.lastApiChangeDetected
|
!extractionStats.lastApiChangeDetected
|
||||||
) {
|
) {
|
||||||
console.warn(
|
console.warn(
|
||||||
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes."
|
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
|
||||||
);
|
);
|
||||||
extractionStats.lastApiChangeDetected = new Date();
|
extractionStats.lastApiChangeDetected = new Date();
|
||||||
}
|
}
|
||||||
@@ -433,7 +291,7 @@ async function fetchHtml(
|
|||||||
retryBaseMs?: number;
|
retryBaseMs?: number;
|
||||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||||
cookies?: string;
|
cookies?: string;
|
||||||
}
|
},
|
||||||
): Promise<HTMLString> {
|
): Promise<HTMLString> {
|
||||||
const maxRetries = opts?.maxRetries ?? 3;
|
const maxRetries = opts?.maxRetries ?? 3;
|
||||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||||
@@ -487,7 +345,7 @@ async function fetchHtml(
|
|||||||
throw new HttpError(
|
throw new HttpError(
|
||||||
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
||||||
res.status,
|
res.status,
|
||||||
url
|
url,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
// Retry on 5xx
|
// Retry on 5xx
|
||||||
@@ -498,7 +356,7 @@ async function fetchHtml(
|
|||||||
throw new HttpError(
|
throw new HttpError(
|
||||||
`Request failed with status ${res.status}`,
|
`Request failed with status ${res.status}`,
|
||||||
res.status,
|
res.status,
|
||||||
url
|
url,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -521,7 +379,7 @@ async function fetchHtml(
|
|||||||
Extract marketplace search data from Facebook page script tags
|
Extract marketplace search data from Facebook page script tags
|
||||||
*/
|
*/
|
||||||
export function extractFacebookMarketplaceData(
|
export function extractFacebookMarketplaceData(
|
||||||
htmlString: HTMLString
|
htmlString: HTMLString,
|
||||||
): FacebookAdNode[] | null {
|
): FacebookAdNode[] | null {
|
||||||
const { document } = parseHTML(htmlString);
|
const { document } = parseHTML(htmlString);
|
||||||
const scripts = document.querySelectorAll("script");
|
const scripts = document.querySelectorAll("script");
|
||||||
@@ -567,13 +425,12 @@ export function extractFacebookMarketplaceData(
|
|||||||
if (
|
if (
|
||||||
result &&
|
result &&
|
||||||
isRecord(result) &&
|
isRecord(result) &&
|
||||||
(result as any).feed_units?.edges?.length > 0
|
(result as Record<string, unknown>).feed_units?.edges?.length > 0
|
||||||
) {
|
) {
|
||||||
marketplaceData = result as FacebookMarketplaceSearch;
|
marketplaceData = result as FacebookMarketplaceSearch;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (marketplaceData) break;
|
if (marketplaceData) break;
|
||||||
@@ -583,13 +440,13 @@ export function extractFacebookMarketplaceData(
|
|||||||
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
|
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
|
||||||
const searchData =
|
const searchData =
|
||||||
parsed.marketplace_search as FacebookMarketplaceSearch;
|
parsed.marketplace_search as FacebookMarketplaceSearch;
|
||||||
if (searchData.feed_units?.edges?.length ?? 0 > 0) {
|
const feedLength = searchData.feed_units?.edges?.length ?? 0;
|
||||||
|
if (feedLength > 0) {
|
||||||
marketplaceData = searchData;
|
marketplaceData = searchData;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!marketplaceData?.feed_units?.edges?.length) {
|
if (!marketplaceData?.feed_units?.edges?.length) {
|
||||||
@@ -598,7 +455,7 @@ export function extractFacebookMarketplaceData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`
|
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`,
|
||||||
);
|
);
|
||||||
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
|
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
|
||||||
}
|
}
|
||||||
@@ -608,7 +465,7 @@ export function extractFacebookMarketplaceData(
|
|||||||
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
|
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
|
||||||
*/
|
*/
|
||||||
export function extractFacebookItemData(
|
export function extractFacebookItemData(
|
||||||
htmlString: HTMLString
|
htmlString: HTMLString,
|
||||||
): FacebookMarketplaceItem | null {
|
): FacebookMarketplaceItem | null {
|
||||||
const { document } = parseHTML(htmlString);
|
const { document } = parseHTML(htmlString);
|
||||||
const scripts = document.querySelectorAll("script");
|
const scripts = document.querySelectorAll("script");
|
||||||
@@ -657,7 +514,7 @@ export function extractFacebookItemData(
|
|||||||
targetData.__typename === "GroupCommerceProductItem"
|
targetData.__typename === "GroupCommerceProductItem"
|
||||||
) {
|
) {
|
||||||
console.log(
|
console.log(
|
||||||
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`
|
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`,
|
||||||
);
|
);
|
||||||
return targetData as FacebookMarketplaceItem;
|
return targetData as FacebookMarketplaceItem;
|
||||||
}
|
}
|
||||||
@@ -671,18 +528,19 @@ export function extractFacebookItemData(
|
|||||||
const findMarketplaceData = (
|
const findMarketplaceData = (
|
||||||
obj: unknown,
|
obj: unknown,
|
||||||
depth = 0,
|
depth = 0,
|
||||||
maxDepth = 10
|
maxDepth = 10,
|
||||||
): FacebookMarketplaceItem | null => {
|
): FacebookMarketplaceItem | null => {
|
||||||
if (depth > maxDepth) return null; // Prevent infinite recursion
|
if (depth > maxDepth) return null; // Prevent infinite recursion
|
||||||
if (isRecord(obj)) {
|
if (isRecord(obj)) {
|
||||||
// Check if this object matches the expected marketplace item structure
|
// Check if this object matches the expected marketplace item structure
|
||||||
|
const candidate = obj as Record<string, unknown>;
|
||||||
if (
|
if (
|
||||||
(obj as any).marketplace_listing_title &&
|
candidate.marketplace_listing_title &&
|
||||||
(obj as any).id &&
|
candidate.id &&
|
||||||
(obj as any).__typename === "GroupCommerceProductItem" &&
|
candidate.__typename === "GroupCommerceProductItem" &&
|
||||||
(obj as any).redacted_description
|
candidate.redacted_description
|
||||||
) {
|
) {
|
||||||
return obj as unknown as FacebookMarketplaceItem;
|
return candidate as unknown as FacebookMarketplaceItem;
|
||||||
}
|
}
|
||||||
// Recursively search nested objects and arrays
|
// Recursively search nested objects and arrays
|
||||||
for (const key in obj) {
|
for (const key in obj) {
|
||||||
@@ -706,7 +564,7 @@ export function extractFacebookItemData(
|
|||||||
const recursiveResult = findMarketplaceData(parsed.require);
|
const recursiveResult = findMarketplaceData(parsed.require);
|
||||||
if (recursiveResult) {
|
if (recursiveResult) {
|
||||||
console.log(
|
console.log(
|
||||||
"Successfully extracted Facebook item data using recursive search"
|
"Successfully extracted Facebook item data using recursive search",
|
||||||
);
|
);
|
||||||
return recursiveResult;
|
return recursiveResult;
|
||||||
}
|
}
|
||||||
@@ -727,14 +585,13 @@ export function extractFacebookItemData(
|
|||||||
bboxData.__typename === "GroupCommerceProductItem"
|
bboxData.__typename === "GroupCommerceProductItem"
|
||||||
) {
|
) {
|
||||||
console.log(
|
console.log(
|
||||||
"Successfully extracted Facebook item data from __bbox structure"
|
"Successfully extracted Facebook item data from __bbox structure",
|
||||||
);
|
);
|
||||||
return bboxData as FacebookMarketplaceItem;
|
return bboxData as FacebookMarketplaceItem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
@@ -743,7 +600,9 @@ export function extractFacebookItemData(
|
|||||||
/**
|
/**
|
||||||
Parse Facebook marketplace search results into ListingDetails[]
|
Parse Facebook marketplace search results into ListingDetails[]
|
||||||
*/
|
*/
|
||||||
export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
export function parseFacebookAds(
|
||||||
|
ads: FacebookAdNode[],
|
||||||
|
): FacebookListingDetails[] {
|
||||||
const results: FacebookListingDetails[] = [];
|
const results: FacebookListingDetails[] = [];
|
||||||
|
|
||||||
for (const adJson of ads) {
|
for (const adJson of ads) {
|
||||||
@@ -840,7 +699,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
|
|||||||
title,
|
title,
|
||||||
listingPrice: {
|
listingPrice: {
|
||||||
amountFormatted:
|
amountFormatted:
|
||||||
priceObj.formatted_amount || formatCentsToCurrency(cents / 100, "en-CA"),
|
priceObj.formatted_amount || formatCentsToCurrency(cents, "en-CA"),
|
||||||
cents,
|
cents,
|
||||||
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
|
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
|
||||||
},
|
},
|
||||||
@@ -856,8 +715,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
|
|||||||
};
|
};
|
||||||
|
|
||||||
results.push(listingDetails);
|
results.push(listingDetails);
|
||||||
} catch {
|
} catch {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
@@ -868,7 +726,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
|
|||||||
Updated for 2026 GroupCommerceProductItem structure
|
Updated for 2026 GroupCommerceProductItem structure
|
||||||
*/
|
*/
|
||||||
export function parseFacebookItem(
|
export function parseFacebookItem(
|
||||||
item: FacebookMarketplaceItem
|
item: FacebookMarketplaceItem,
|
||||||
): FacebookListingDetails | null {
|
): FacebookListingDetails | null {
|
||||||
try {
|
try {
|
||||||
const title = item.marketplace_listing_title || item.custom_title;
|
const title = item.marketplace_listing_title || item.custom_title;
|
||||||
@@ -888,7 +746,7 @@ export function parseFacebookItem(
|
|||||||
if (!Number.isNaN(amount)) {
|
if (!Number.isNaN(amount)) {
|
||||||
cents = Math.round(amount * 100);
|
cents = Math.round(amount * 100);
|
||||||
amountFormatted =
|
amountFormatted =
|
||||||
item.formatted_price?.text || formatCentsToCurrency(cents / 100, "en-CA");
|
item.formatted_price?.text || formatCentsToCurrency(cents, "en-CA");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -963,31 +821,17 @@ export default async function fetchFacebookItems(
|
|||||||
LOCATION = "toronto",
|
LOCATION = "toronto",
|
||||||
MAX_ITEMS = 25,
|
MAX_ITEMS = 25,
|
||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath?: string
|
cookiePath?: string,
|
||||||
) {
|
) {
|
||||||
// Load Facebook cookies - required for Facebook Marketplace access
|
// Load Facebook cookies with priority: URL param > ENV var > file
|
||||||
let cookies: Cookie[];
|
const cookies = await ensureFacebookCookies(cookiesSource, cookiePath);
|
||||||
if (cookiesSource) {
|
|
||||||
// Use provided cookie source (backward compatibility)
|
|
||||||
cookies = await loadFacebookCookies(cookiesSource);
|
|
||||||
} else {
|
|
||||||
// Auto-load from file or parse from env var
|
|
||||||
cookies = await ensureFacebookCookies(cookiePath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cookies.length === 0) {
|
|
||||||
throw new Error(
|
|
||||||
"Facebook cookies are required for marketplace access. " +
|
|
||||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format cookies for HTTP header
|
// Format cookies for HTTP header
|
||||||
const domain = "www.facebook.com";
|
const domain = "www.facebook.com";
|
||||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||||
if (!cookiesHeader) {
|
if (!cookiesHeader) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain."
|
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1009,7 +853,7 @@ export default async function fetchFacebookItems(
|
|||||||
onRateInfo: (remaining, reset) => {
|
onRateInfo: (remaining, reset) => {
|
||||||
if (remaining && reset) {
|
if (remaining && reset) {
|
||||||
console.log(
|
console.log(
|
||||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -1018,11 +862,11 @@ export default async function fetchFacebookItems(
|
|||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof HttpError) {
|
if (err instanceof HttpError) {
|
||||||
console.warn(
|
console.warn(
|
||||||
`\nFacebook marketplace access failed (${err.status}): ${err.message}`
|
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
|
||||||
);
|
);
|
||||||
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
||||||
console.warn(
|
console.warn(
|
||||||
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies."
|
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return [];
|
return [];
|
||||||
@@ -1040,7 +884,7 @@ export default async function fetchFacebookItems(
|
|||||||
|
|
||||||
const progressBar = new cliProgress.SingleBar(
|
const progressBar = new cliProgress.SingleBar(
|
||||||
{},
|
{},
|
||||||
cliProgress.Presets.shades_classic
|
cliProgress.Presets.shades_classic,
|
||||||
);
|
);
|
||||||
const totalProgress = ads.length;
|
const totalProgress = ads.length;
|
||||||
const currentProgress = 0;
|
const currentProgress = 0;
|
||||||
@@ -1050,7 +894,7 @@ export default async function fetchFacebookItems(
|
|||||||
|
|
||||||
// Filter to only priced items (already done in parseFacebookAds)
|
// Filter to only priced items (already done in parseFacebookAds)
|
||||||
const pricedItems = items.filter(
|
const pricedItems = items.filter(
|
||||||
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0
|
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
|
||||||
);
|
);
|
||||||
|
|
||||||
progressBar.update(totalProgress);
|
progressBar.update(totalProgress);
|
||||||
@@ -1066,31 +910,16 @@ export default async function fetchFacebookItems(
|
|||||||
export async function fetchFacebookItem(
|
export async function fetchFacebookItem(
|
||||||
itemId: string,
|
itemId: string,
|
||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath?: string
|
_cookiePath?: string,
|
||||||
): Promise<FacebookListingDetails | null> {
|
): Promise<FacebookListingDetails | null> {
|
||||||
// Load Facebook cookies - required for Facebook Marketplace access
|
// Load Facebook cookies - required for Facebook Marketplace access
|
||||||
let cookies: Cookie[];
|
const cookies = await ensureFacebookCookies(cookiesSource);
|
||||||
if (cookiesSource) {
|
|
||||||
// Use provided cookie source (backward compatibility)
|
|
||||||
cookies = await loadFacebookCookies(cookiesSource);
|
|
||||||
} else {
|
|
||||||
// Auto-load from file or parse from env var
|
|
||||||
cookies = await ensureFacebookCookies(cookiePath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cookies.length === 0) {
|
|
||||||
throw new Error(
|
|
||||||
"Facebook cookies are required for marketplace access. " +
|
|
||||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format cookies for HTTP header
|
// Format cookies for HTTP header
|
||||||
const domain = "www.facebook.com";
|
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
|
||||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
|
||||||
if (!cookiesHeader) {
|
if (!cookiesHeader) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain."
|
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1104,7 +933,7 @@ export async function fetchFacebookItem(
|
|||||||
onRateInfo: (remaining, reset) => {
|
onRateInfo: (remaining, reset) => {
|
||||||
if (remaining && reset) {
|
if (remaining && reset) {
|
||||||
console.log(
|
console.log(
|
||||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -1113,7 +942,7 @@ export async function fetchFacebookItem(
|
|||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof HttpError) {
|
if (err instanceof HttpError) {
|
||||||
console.warn(
|
console.warn(
|
||||||
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`
|
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Enhanced error handling based on status codes
|
// Enhanced error handling based on status codes
|
||||||
@@ -1122,27 +951,27 @@ export async function fetchFacebookItem(
|
|||||||
case 401:
|
case 401:
|
||||||
case 403:
|
case 403:
|
||||||
console.warn(
|
console.warn(
|
||||||
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies."
|
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
|
||||||
);
|
);
|
||||||
console.warn(
|
console.warn(
|
||||||
"Try logging out and back into Facebook, then export fresh cookies."
|
"Try logging out and back into Facebook, then export fresh cookies.",
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
case 404:
|
case 404:
|
||||||
console.warn(
|
console.warn(
|
||||||
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid."
|
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.",
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
case 429:
|
case 429:
|
||||||
console.warn(
|
console.warn(
|
||||||
"Rate limited: Too many requests. Facebook is blocking access temporarily."
|
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
case 500:
|
case 500:
|
||||||
case 502:
|
case 502:
|
||||||
case 503:
|
case 503:
|
||||||
console.warn(
|
console.warn(
|
||||||
"Facebook server error: Marketplace may be temporarily unavailable."
|
"Facebook server error: Marketplace may be temporarily unavailable.",
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -1163,7 +992,7 @@ export async function fetchFacebookItem(
|
|||||||
itemHtml.includes("This item has been sold")
|
itemHtml.includes("This item has been sold")
|
||||||
) {
|
) {
|
||||||
console.warn(
|
console.warn(
|
||||||
`Item ${itemId} appears to be sold or removed from marketplace.`
|
`Item ${itemId} appears to be sold or removed from marketplace.`,
|
||||||
);
|
);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@@ -1174,13 +1003,13 @@ export async function fetchFacebookItem(
|
|||||||
itemHtml.includes("authentication required")
|
itemHtml.includes("authentication required")
|
||||||
) {
|
) {
|
||||||
console.warn(
|
console.warn(
|
||||||
`Authentication failed for item ${itemId}. Cookies may be expired.`
|
`Authentication failed for item ${itemId}. Cookies may be expired.`,
|
||||||
);
|
);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.warn(
|
console.warn(
|
||||||
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`
|
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
|
||||||
);
|
);
|
||||||
console.warn(" - The listing was removed or sold");
|
console.warn(" - The listing was removed or sold");
|
||||||
console.warn(" - Authentication issues");
|
console.warn(" - Authentication issues");
|
||||||
|
|||||||
@@ -1,19 +1,30 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
import cliProgress from "cli-progress";
|
||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import unidecode from "unidecode";
|
import unidecode from "unidecode";
|
||||||
import cliProgress from "cli-progress";
|
import type { HTMLString } from "../types/common";
|
||||||
|
import {
|
||||||
|
type CookieConfig,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
loadCookiesOptional,
|
||||||
|
} from "../utils/cookies";
|
||||||
|
import { formatCentsToCurrency } from "../utils/format";
|
||||||
import {
|
import {
|
||||||
fetchHtml,
|
fetchHtml,
|
||||||
isRecord,
|
|
||||||
HttpError,
|
HttpError,
|
||||||
|
isRecord,
|
||||||
NetworkError,
|
NetworkError,
|
||||||
ParseError,
|
ParseError,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
ValidationError,
|
ValidationError,
|
||||||
} from "../utils/http";
|
} from "../utils/http";
|
||||||
import { delay } from "../utils/delay";
|
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
// Kijiji cookie configuration
|
||||||
import type { HTMLString } from "../types/common";
|
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
|
||||||
|
name: "Kijiji",
|
||||||
|
domain: ".kijiji.ca",
|
||||||
|
envVar: "KIJIJI_COOKIE",
|
||||||
|
filePath: "./cookies/kijiji.json",
|
||||||
|
};
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// ----------------------------- Types -----------------------------
|
||||||
|
|
||||||
@@ -112,6 +123,7 @@ export interface SearchOptions {
|
|||||||
maxPages?: number; // Default: 5
|
maxPages?: number; // Default: 5
|
||||||
priceMin?: number;
|
priceMin?: number;
|
||||||
priceMax?: number;
|
priceMax?: number;
|
||||||
|
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ListingFetchOptions {
|
export interface ListingFetchOptions {
|
||||||
@@ -219,7 +231,7 @@ export function resolveCategoryId(category?: number | string): number {
|
|||||||
export function buildSearchUrl(
|
export function buildSearchUrl(
|
||||||
keywords: string,
|
keywords: string,
|
||||||
options: SearchOptions & { page?: number },
|
options: SearchOptions & { page?: number },
|
||||||
BASE_URL = "https://www.kijiji.ca"
|
BASE_URL = "https://www.kijiji.ca",
|
||||||
): string {
|
): string {
|
||||||
const locationId = resolveLocationId(options.location);
|
const locationId = resolveLocationId(options.location);
|
||||||
const categoryId = resolveCategoryId(options.category);
|
const categoryId = resolveCategoryId(options.category);
|
||||||
@@ -319,7 +331,7 @@ const GRAPHQL_QUERIES = {
|
|||||||
async function fetchGraphQLData(
|
async function fetchGraphQLData(
|
||||||
query: string,
|
query: string,
|
||||||
variables: Record<string, unknown>,
|
variables: Record<string, unknown>,
|
||||||
BASE_URL = "https://www.kijiji.ca"
|
BASE_URL = "https://www.kijiji.ca",
|
||||||
): Promise<unknown> {
|
): Promise<unknown> {
|
||||||
const endpoint = `${BASE_URL}/anvil/api`;
|
const endpoint = `${BASE_URL}/anvil/api`;
|
||||||
|
|
||||||
@@ -340,7 +352,7 @@ async function fetchGraphQLData(
|
|||||||
throw new HttpError(
|
throw new HttpError(
|
||||||
`GraphQL request failed with status ${response.status}`,
|
`GraphQL request failed with status ${response.status}`,
|
||||||
response.status,
|
response.status,
|
||||||
endpoint
|
endpoint,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -349,7 +361,7 @@ async function fetchGraphQLData(
|
|||||||
if (result.errors) {
|
if (result.errors) {
|
||||||
throw new ParseError(
|
throw new ParseError(
|
||||||
`GraphQL errors: ${JSON.stringify(result.errors)}`,
|
`GraphQL errors: ${JSON.stringify(result.errors)}`,
|
||||||
result.errors
|
result.errors,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -361,7 +373,7 @@ async function fetchGraphQLData(
|
|||||||
throw new NetworkError(
|
throw new NetworkError(
|
||||||
`Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`,
|
`Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
endpoint,
|
endpoint,
|
||||||
err instanceof Error ? err : undefined
|
err instanceof Error ? err : undefined,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -371,7 +383,7 @@ async function fetchGraphQLData(
|
|||||||
*/
|
*/
|
||||||
async function fetchSellerDetails(
|
async function fetchSellerDetails(
|
||||||
posterId: string,
|
posterId: string,
|
||||||
BASE_URL = "https://www.kijiji.ca"
|
BASE_URL = "https://www.kijiji.ca",
|
||||||
): Promise<{
|
): Promise<{
|
||||||
reviewCount?: number;
|
reviewCount?: number;
|
||||||
reviewScore?: number;
|
reviewScore?: number;
|
||||||
@@ -383,12 +395,12 @@ async function fetchSellerDetails(
|
|||||||
fetchGraphQLData(
|
fetchGraphQLData(
|
||||||
GRAPHQL_QUERIES.getReviewSummary,
|
GRAPHQL_QUERIES.getReviewSummary,
|
||||||
{ userId: posterId },
|
{ userId: posterId },
|
||||||
BASE_URL
|
BASE_URL,
|
||||||
),
|
),
|
||||||
fetchGraphQLData(
|
fetchGraphQLData(
|
||||||
GRAPHQL_QUERIES.getProfileMetrics,
|
GRAPHQL_QUERIES.getProfileMetrics,
|
||||||
{ profileId: posterId },
|
{ profileId: posterId },
|
||||||
BASE_URL
|
BASE_URL,
|
||||||
),
|
),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@@ -405,7 +417,7 @@ async function fetchSellerDetails(
|
|||||||
// Silently fail for GraphQL errors - not critical for basic functionality
|
// Silently fail for GraphQL errors - not critical for basic functionality
|
||||||
console.warn(
|
console.warn(
|
||||||
`Failed to fetch seller details for ${posterId}:`,
|
`Failed to fetch seller details for ${posterId}:`,
|
||||||
err instanceof Error ? err.message : String(err)
|
err instanceof Error ? err.message : String(err),
|
||||||
);
|
);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
@@ -416,7 +428,9 @@ async function fetchSellerDetails(
|
|||||||
/**
|
/**
|
||||||
Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML.
|
Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML.
|
||||||
*/
|
*/
|
||||||
export function extractApolloState(htmlString: HTMLString): ApolloRecord | null {
|
export function extractApolloState(
|
||||||
|
htmlString: HTMLString,
|
||||||
|
): ApolloRecord | null {
|
||||||
const { document } = parseHTML(htmlString);
|
const { document } = parseHTML(htmlString);
|
||||||
const nextData = document.getElementById("__NEXT_DATA__");
|
const nextData = document.getElementById("__NEXT_DATA__");
|
||||||
if (!nextData || !nextData.textContent) return null;
|
if (!nextData || !nextData.textContent) return null;
|
||||||
@@ -436,7 +450,7 @@ export function extractApolloState(htmlString: HTMLString): ApolloRecord | null
|
|||||||
*/
|
*/
|
||||||
export function parseSearch(
|
export function parseSearch(
|
||||||
htmlString: HTMLString,
|
htmlString: HTMLString,
|
||||||
BASE_URL: string
|
BASE_URL: string,
|
||||||
): SearchListing[] {
|
): SearchListing[] {
|
||||||
const apolloState = extractApolloState(htmlString);
|
const apolloState = extractApolloState(htmlString);
|
||||||
if (!apolloState) return [];
|
if (!apolloState) return [];
|
||||||
@@ -463,16 +477,16 @@ export function parseSearch(
|
|||||||
/**
|
/**
|
||||||
Parse a listing page into a typed object (backward compatible).
|
Parse a listing page into a typed object (backward compatible).
|
||||||
*/
|
*/
|
||||||
function parseListing(
|
function _parseListing(
|
||||||
htmlString: HTMLString,
|
htmlString: HTMLString,
|
||||||
BASE_URL: string
|
BASE_URL: string,
|
||||||
): KijijiListingDetails | null {
|
): KijijiListingDetails | null {
|
||||||
const apolloState = extractApolloState(htmlString);
|
const apolloState = extractApolloState(htmlString);
|
||||||
if (!apolloState) return null;
|
if (!apolloState) return null;
|
||||||
|
|
||||||
// Find the listing root key
|
// Find the listing root key
|
||||||
const listingKey = Object.keys(apolloState).find((k) =>
|
const listingKey = Object.keys(apolloState).find((k) =>
|
||||||
k.includes("Listing")
|
k.includes("Listing"),
|
||||||
);
|
);
|
||||||
if (!listingKey) return null;
|
if (!listingKey) return null;
|
||||||
|
|
||||||
@@ -494,7 +508,7 @@ function parseListing(
|
|||||||
|
|
||||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||||
const amountFormatted =
|
const amountFormatted =
|
||||||
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined;
|
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||||
|
|
||||||
const numberOfViews =
|
const numberOfViews =
|
||||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||||
@@ -515,7 +529,8 @@ function parseListing(
|
|||||||
listingPrice: amountFormatted
|
listingPrice: amountFormatted
|
||||||
? {
|
? {
|
||||||
amountFormatted,
|
amountFormatted,
|
||||||
cents: Number.isFinite(cents!) ? cents : undefined,
|
cents:
|
||||||
|
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
|
||||||
currency: price?.currency,
|
currency: price?.currency,
|
||||||
}
|
}
|
||||||
: undefined,
|
: undefined,
|
||||||
@@ -523,7 +538,10 @@ function parseListing(
|
|||||||
listingStatus: status,
|
listingStatus: status,
|
||||||
creationDate: activationDate,
|
creationDate: activationDate,
|
||||||
endDate,
|
endDate,
|
||||||
numberOfViews: Number.isFinite(numberOfViews!) ? numberOfViews : undefined,
|
numberOfViews:
|
||||||
|
numberOfViews !== undefined && Number.isFinite(numberOfViews)
|
||||||
|
? numberOfViews
|
||||||
|
: undefined,
|
||||||
address: location?.address ?? null,
|
address: location?.address ?? null,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -534,14 +552,14 @@ function parseListing(
|
|||||||
export async function parseDetailedListing(
|
export async function parseDetailedListing(
|
||||||
htmlString: HTMLString,
|
htmlString: HTMLString,
|
||||||
BASE_URL: string,
|
BASE_URL: string,
|
||||||
options: ListingFetchOptions = {}
|
options: ListingFetchOptions = {},
|
||||||
): Promise<DetailedListing | null> {
|
): Promise<DetailedListing | null> {
|
||||||
const apolloState = extractApolloState(htmlString);
|
const apolloState = extractApolloState(htmlString);
|
||||||
if (!apolloState) return null;
|
if (!apolloState) return null;
|
||||||
|
|
||||||
// Find the listing root key
|
// Find the listing root key
|
||||||
const listingKey = Object.keys(apolloState).find((k) =>
|
const listingKey = Object.keys(apolloState).find((k) =>
|
||||||
k.includes("Listing")
|
k.includes("Listing"),
|
||||||
);
|
);
|
||||||
if (!listingKey) return null;
|
if (!listingKey) return null;
|
||||||
|
|
||||||
@@ -569,7 +587,7 @@ export async function parseDetailedListing(
|
|||||||
|
|
||||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||||
const amountFormatted =
|
const amountFormatted =
|
||||||
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined;
|
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||||
|
|
||||||
const numberOfViews =
|
const numberOfViews =
|
||||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||||
@@ -621,7 +639,7 @@ export async function parseDetailedListing(
|
|||||||
try {
|
try {
|
||||||
const additionalData = await fetchSellerDetails(
|
const additionalData = await fetchSellerDetails(
|
||||||
posterInfo.posterId,
|
posterInfo.posterId,
|
||||||
BASE_URL
|
BASE_URL,
|
||||||
);
|
);
|
||||||
sellerInfo = {
|
sellerInfo = {
|
||||||
...sellerInfo,
|
...sellerInfo,
|
||||||
@@ -630,7 +648,7 @@ export async function parseDetailedListing(
|
|||||||
} catch {
|
} catch {
|
||||||
// Silently fail - GraphQL data is optional
|
// Silently fail - GraphQL data is optional
|
||||||
console.warn(
|
console.warn(
|
||||||
`Failed to fetch additional seller data for ${posterInfo.posterId}`
|
`Failed to fetch additional seller data for ${posterInfo.posterId}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -683,10 +701,20 @@ export default async function fetchKijijiItems(
|
|||||||
REQUESTS_PER_SECOND = 1,
|
REQUESTS_PER_SECOND = 1,
|
||||||
BASE_URL = "https://www.kijiji.ca",
|
BASE_URL = "https://www.kijiji.ca",
|
||||||
searchOptions: SearchOptions = {},
|
searchOptions: SearchOptions = {},
|
||||||
listingOptions: ListingFetchOptions = {}
|
listingOptions: ListingFetchOptions = {},
|
||||||
) {
|
) {
|
||||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||||
|
|
||||||
|
// Load Kijiji cookies (optional - helps bypass bot detection)
|
||||||
|
const cookies = await loadCookiesOptional(
|
||||||
|
KIJIJI_COOKIE_CONFIG,
|
||||||
|
searchOptions.cookies,
|
||||||
|
);
|
||||||
|
const cookieHeader =
|
||||||
|
cookies.length > 0
|
||||||
|
? formatCookiesForHeader(cookies, "www.kijiji.ca")
|
||||||
|
: undefined;
|
||||||
|
|
||||||
// Set defaults for configuration
|
// Set defaults for configuration
|
||||||
const finalSearchOptions: Required<SearchOptions> = {
|
const finalSearchOptions: Required<SearchOptions> = {
|
||||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||||
@@ -697,6 +725,7 @@ export default async function fetchKijijiItems(
|
|||||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||||
priceMin: searchOptions.priceMin as number,
|
priceMin: searchOptions.priceMin as number,
|
||||||
priceMax: searchOptions.priceMax as number,
|
priceMax: searchOptions.priceMax as number,
|
||||||
|
cookies: searchOptions.cookies ?? "",
|
||||||
};
|
};
|
||||||
|
|
||||||
const finalListingOptions: Required<ListingFetchOptions> = {
|
const finalListingOptions: Required<ListingFetchOptions> = {
|
||||||
@@ -717,7 +746,7 @@ export default async function fetchKijijiItems(
|
|||||||
// Add page parameter for pagination
|
// Add page parameter for pagination
|
||||||
...(page > 1 && { page }),
|
...(page > 1 && { page }),
|
||||||
},
|
},
|
||||||
BASE_URL
|
BASE_URL,
|
||||||
);
|
);
|
||||||
|
|
||||||
console.log(`Fetching search page ${page}: ${searchUrl}`);
|
console.log(`Fetching search page ${page}: ${searchUrl}`);
|
||||||
@@ -725,16 +754,17 @@ export default async function fetchKijijiItems(
|
|||||||
onRateInfo: (remaining, reset) => {
|
onRateInfo: (remaining, reset) => {
|
||||||
if (remaining && reset) {
|
if (remaining && reset) {
|
||||||
console.log(
|
console.log(
|
||||||
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||||
if (searchResults.length === 0) {
|
if (searchResults.length === 0) {
|
||||||
console.log(
|
console.log(
|
||||||
`No more results found on page ${page}. Stopping pagination.`
|
`No more results found on page ${page}. Stopping pagination.`,
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -749,54 +779,79 @@ export default async function fetchKijijiItems(
|
|||||||
}
|
}
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`
|
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Fetch details for this page's listings
|
// Fetch details for this page's listings with controlled concurrency
|
||||||
const progressBar = new cliProgress.SingleBar(
|
const isTTY = process.stdout?.isTTY ?? false;
|
||||||
{},
|
const progressBar = isTTY
|
||||||
cliProgress.Presets.shades_classic
|
? new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
|
||||||
);
|
: null;
|
||||||
const totalProgress = newListingLinks.length;
|
const totalProgress = newListingLinks.length;
|
||||||
let currentProgress = 0;
|
let currentProgress = 0;
|
||||||
progressBar.start(totalProgress, currentProgress);
|
progressBar?.start(totalProgress, currentProgress);
|
||||||
|
|
||||||
for (const link of newListingLinks) {
|
// Process in batches for controlled concurrency
|
||||||
|
const CONCURRENT_REQUESTS = REQUESTS_PER_SECOND * 2; // 2x rate for faster processing
|
||||||
|
const results: (DetailedListing | null)[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
|
||||||
|
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
|
||||||
|
const batchPromises = batch.map(async (link) => {
|
||||||
try {
|
try {
|
||||||
const html = await fetchHtml(link, DELAY_MS, {
|
const html = await fetchHtml(link, 0, {
|
||||||
|
// No per-request delay, batch handles rate limit
|
||||||
onRateInfo: (remaining, reset) => {
|
onRateInfo: (remaining, reset) => {
|
||||||
if (remaining && reset) {
|
if (remaining && reset) {
|
||||||
console.log(
|
console.log(
|
||||||
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||||
});
|
});
|
||||||
const parsed = await parseDetailedListing(
|
const parsed = await parseDetailedListing(
|
||||||
html,
|
html,
|
||||||
BASE_URL,
|
BASE_URL,
|
||||||
finalListingOptions
|
finalListingOptions,
|
||||||
);
|
);
|
||||||
if (parsed) {
|
return parsed;
|
||||||
allListings.push(parsed);
|
|
||||||
}
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof HttpError) {
|
if (err instanceof HttpError) {
|
||||||
console.error(
|
console.error(
|
||||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`
|
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
console.error(
|
console.error(
|
||||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`
|
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
} finally {
|
} finally {
|
||||||
currentProgress++;
|
currentProgress++;
|
||||||
progressBar.update(currentProgress);
|
progressBar?.update(currentProgress);
|
||||||
|
if (!progressBar) {
|
||||||
|
console.log(`Progress: ${currentProgress}/${totalProgress}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const batchResults = await Promise.all(batchPromises);
|
||||||
|
results.push(...batchResults);
|
||||||
|
|
||||||
|
// Wait between batches to respect rate limit
|
||||||
|
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
|
||||||
|
await new Promise((resolve) =>
|
||||||
|
setTimeout(resolve, DELAY_MS * batch.length),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
progressBar.stop();
|
allListings.push(
|
||||||
|
...results.filter((r): r is DetailedListing => r !== null),
|
||||||
|
);
|
||||||
|
|
||||||
|
progressBar?.stop();
|
||||||
|
|
||||||
// If we got fewer results than expected (40 per page), we've reached the end
|
// If we got fewer results than expected (40 per page), we've reached the end
|
||||||
if (searchResults.length < 40) {
|
if (searchResults.length < 40) {
|
||||||
@@ -809,10 +864,4 @@ export default async function fetchKijijiItems(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Re-export error classes for convenience
|
// Re-export error classes for convenience
|
||||||
export {
|
export { HttpError, NetworkError, ParseError, RateLimitError, ValidationError };
|
||||||
HttpError,
|
|
||||||
NetworkError,
|
|
||||||
ParseError,
|
|
||||||
RateLimitError,
|
|
||||||
ValidationError,
|
|
||||||
};
|
|
||||||
|
|||||||
227
packages/core/src/utils/cookies.ts
Normal file
227
packages/core/src/utils/cookies.ts
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
/**
|
||||||
|
* Shared cookie handling utilities for marketplace scrapers
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface Cookie {
|
||||||
|
name: string;
|
||||||
|
value: string;
|
||||||
|
domain: string;
|
||||||
|
path: string;
|
||||||
|
secure?: boolean;
|
||||||
|
httpOnly?: boolean;
|
||||||
|
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||||
|
session?: boolean;
|
||||||
|
expirationDate?: number;
|
||||||
|
partitionKey?: Record<string, unknown>;
|
||||||
|
storeId?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CookieConfig {
|
||||||
|
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
|
||||||
|
name: string;
|
||||||
|
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
|
||||||
|
domain: string;
|
||||||
|
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
|
||||||
|
envVar: string;
|
||||||
|
/** Path to cookie file (e.g., "./cookies/facebook.json") */
|
||||||
|
filePath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse cookie string format into Cookie array
|
||||||
|
* Supports format: "name1=value1; name2=value2"
|
||||||
|
*/
|
||||||
|
export function parseCookieString(
|
||||||
|
cookieString: string,
|
||||||
|
domain: string,
|
||||||
|
): Cookie[] {
|
||||||
|
if (!cookieString?.trim()) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return cookieString
|
||||||
|
.split(";")
|
||||||
|
.map((pair) => pair.trim())
|
||||||
|
.filter((pair) => pair.includes("="))
|
||||||
|
.map((pair) => {
|
||||||
|
const [name, ...valueParts] = pair.split("=");
|
||||||
|
const trimmedName = name.trim();
|
||||||
|
const trimmedValue = valueParts.join("=").trim();
|
||||||
|
|
||||||
|
if (!trimmedName || !trimmedValue) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: trimmedName,
|
||||||
|
value: decodeURIComponent(trimmedValue),
|
||||||
|
domain,
|
||||||
|
path: "/",
|
||||||
|
secure: true,
|
||||||
|
httpOnly: false,
|
||||||
|
sameSite: "lax" as const,
|
||||||
|
expirationDate: undefined,
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse JSON array format into Cookie array
|
||||||
|
* Supports format: [{"name": "foo", "value": "bar", ...}]
|
||||||
|
*/
|
||||||
|
export function parseJsonCookies(jsonString: string): Cookie[] {
|
||||||
|
const parsed = JSON.parse(jsonString);
|
||||||
|
if (!Array.isArray(parsed)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsed.filter(
|
||||||
|
(cookie): cookie is Cookie =>
|
||||||
|
cookie &&
|
||||||
|
typeof cookie.name === "string" &&
|
||||||
|
typeof cookie.value === "string",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to parse cookies from a string (tries JSON first, then cookie string format)
|
||||||
|
*/
|
||||||
|
export function parseCookiesAuto(
|
||||||
|
input: string,
|
||||||
|
defaultDomain: string,
|
||||||
|
): Cookie[] {
|
||||||
|
// Try JSON array format first
|
||||||
|
try {
|
||||||
|
const cookies = parseJsonCookies(input);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// JSON parse failed, try cookie string format
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try cookie string format
|
||||||
|
return parseCookieString(input, defaultDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load cookies from file (supports both JSON array and cookie string formats)
|
||||||
|
*/
|
||||||
|
export async function loadCookiesFromFile(
|
||||||
|
filePath: string,
|
||||||
|
defaultDomain: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
const file = Bun.file(filePath);
|
||||||
|
if (!(await file.exists())) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = await file.text();
|
||||||
|
return parseCookiesAuto(content.trim(), defaultDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format cookies array into Cookie header string for HTTP requests
|
||||||
|
*/
|
||||||
|
export function formatCookiesForHeader(
|
||||||
|
cookies: Cookie[],
|
||||||
|
targetDomain: string,
|
||||||
|
): string {
|
||||||
|
const validCookies = cookies
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check if cookie applies to this domain
|
||||||
|
if (cookie.domain.startsWith(".")) {
|
||||||
|
// Domain cookie (applies to subdomains)
|
||||||
|
return (
|
||||||
|
targetDomain.endsWith(cookie.domain.slice(1)) ||
|
||||||
|
targetDomain === cookie.domain.slice(1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Host-only cookie
|
||||||
|
return cookie.domain === targetDomain;
|
||||||
|
})
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check expiration
|
||||||
|
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
return validCookies
|
||||||
|
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||||
|
.join("; ");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load cookies with priority: URL param > ENV var > file
|
||||||
|
* Supports both JSON array and cookie string formats for all sources
|
||||||
|
*/
|
||||||
|
export async function ensureCookies(
|
||||||
|
config: CookieConfig,
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
// Priority 1: URL/API parameter (if provided)
|
||||||
|
if (cookiesSource) {
|
||||||
|
const cookies = parseCookiesAuto(cookiesSource, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
console.warn(
|
||||||
|
`${config.name} cookies parameter provided but no valid cookies extracted`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 2: Environment variable
|
||||||
|
const envValue = process.env[config.envVar];
|
||||||
|
if (envValue?.trim()) {
|
||||||
|
const cookies = parseCookiesAuto(envValue, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
console.warn(`${config.envVar} env var contains no valid cookies`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 3: Cookie file (fallback)
|
||||||
|
try {
|
||||||
|
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No cookies found from any source
|
||||||
|
throw new Error(
|
||||||
|
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
|
||||||
|
` 1. 'cookies' parameter (highest priority), or\n` +
|
||||||
|
` 2. ${config.envVar} environment variable, or\n` +
|
||||||
|
` 3. ${config.filePath} file (lowest priority)\n` +
|
||||||
|
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to load cookies, return empty array if none found (non-throwing version)
|
||||||
|
*/
|
||||||
|
export async function loadCookiesOptional(
|
||||||
|
config: CookieConfig,
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
try {
|
||||||
|
return await ensureCookies(config, cookiesSource);
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,7 +4,10 @@
|
|||||||
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
|
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
|
||||||
* @returns Formatted currency string
|
* @returns Formatted currency string
|
||||||
*/
|
*/
|
||||||
export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string {
|
export function formatCentsToCurrency(
|
||||||
|
cents: number,
|
||||||
|
locale: string = "en-CA",
|
||||||
|
): string {
|
||||||
try {
|
try {
|
||||||
const formatter = new Intl.NumberFormat(locale, {
|
const formatter = new Intl.NumberFormat(locale, {
|
||||||
style: "currency",
|
style: "currency",
|
||||||
@@ -13,7 +16,7 @@ export function formatCentsToCurrency(cents: number, locale: string = "en-CA"):
|
|||||||
maximumFractionDigits: 2,
|
maximumFractionDigits: 2,
|
||||||
});
|
});
|
||||||
return formatter.format(cents / 100);
|
return formatter.format(cents / 100);
|
||||||
} catch (error) {
|
} catch {
|
||||||
// Fallback if locale is not supported
|
// Fallback if locale is not supported
|
||||||
const dollars = (cents / 100).toFixed(2);
|
const dollars = (cents / 100).toFixed(2);
|
||||||
return `$${dollars}`;
|
return `$${dollars}`;
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ export class HttpError extends Error {
|
|||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly statusCode: number,
|
public readonly statusCode: number,
|
||||||
public readonly url?: string
|
public readonly url?: string,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "HttpError";
|
this.name = "HttpError";
|
||||||
@@ -15,7 +15,7 @@ export class NetworkError extends Error {
|
|||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly url: string,
|
public readonly url: string,
|
||||||
public readonly cause?: Error
|
public readonly cause?: Error,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "NetworkError";
|
this.name = "NetworkError";
|
||||||
@@ -26,7 +26,7 @@ export class NetworkError extends Error {
|
|||||||
export class ParseError extends Error {
|
export class ParseError extends Error {
|
||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly data?: unknown
|
public readonly data?: unknown,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "ParseError";
|
this.name = "ParseError";
|
||||||
@@ -38,7 +38,7 @@ export class RateLimitError extends Error {
|
|||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly url: string,
|
public readonly url: string,
|
||||||
public readonly resetTime?: number
|
public readonly resetTime?: number,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "RateLimitError";
|
this.name = "RateLimitError";
|
||||||
@@ -87,7 +87,7 @@ export interface FetchHtmlOptions {
|
|||||||
export async function fetchHtml(
|
export async function fetchHtml(
|
||||||
url: string,
|
url: string,
|
||||||
delayMs: number,
|
delayMs: number,
|
||||||
opts?: FetchHtmlOptions
|
opts?: FetchHtmlOptions,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const maxRetries = opts?.maxRetries ?? 3;
|
const maxRetries = opts?.maxRetries ?? 3;
|
||||||
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
||||||
@@ -137,14 +137,14 @@ export async function fetchHtml(
|
|||||||
throw new RateLimitError(
|
throw new RateLimitError(
|
||||||
`Rate limit exceeded for ${url}`,
|
`Rate limit exceeded for ${url}`,
|
||||||
url,
|
url,
|
||||||
resetSeconds
|
resetSeconds,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retry on server errors
|
// Retry on server errors
|
||||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||||
await new Promise((resolve) =>
|
await new Promise((resolve) =>
|
||||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -152,7 +152,7 @@ export async function fetchHtml(
|
|||||||
throw new HttpError(
|
throw new HttpError(
|
||||||
`Request failed with status ${res.status}`,
|
`Request failed with status ${res.status}`,
|
||||||
res.status,
|
res.status,
|
||||||
url
|
url,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -174,7 +174,7 @@ export async function fetchHtml(
|
|||||||
if (err instanceof Error && err.name === "AbortError") {
|
if (err instanceof Error && err.name === "AbortError") {
|
||||||
if (attempt < maxRetries) {
|
if (attempt < maxRetries) {
|
||||||
await new Promise((resolve) =>
|
await new Promise((resolve) =>
|
||||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -184,14 +184,14 @@ export async function fetchHtml(
|
|||||||
// Network or other errors
|
// Network or other errors
|
||||||
if (attempt < maxRetries) {
|
if (attempt < maxRetries) {
|
||||||
await new Promise((resolve) =>
|
await new Promise((resolve) =>
|
||||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
throw new NetworkError(
|
throw new NetworkError(
|
||||||
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
url,
|
url,
|
||||||
err instanceof Error ? err : undefined
|
err instanceof Error ? err : undefined,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import {
|
|||||||
fetchFacebookItem,
|
fetchFacebookItem,
|
||||||
formatCentsToCurrency,
|
formatCentsToCurrency,
|
||||||
formatCookiesForHeader,
|
formatCookiesForHeader,
|
||||||
loadFacebookCookies,
|
|
||||||
parseFacebookAds,
|
parseFacebookAds,
|
||||||
parseFacebookCookieString,
|
parseFacebookCookieString,
|
||||||
parseFacebookItem,
|
parseFacebookItem,
|
||||||
@@ -183,7 +182,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
const result = await fetchFacebookItem("123", mockCookies);
|
const _result = await fetchFacebookItem("123", mockCookies);
|
||||||
expect(attempts).toBe(2);
|
expect(attempts).toBe(2);
|
||||||
// Should eventually succeed after retry
|
// Should eventually succeed after retry
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||||
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
|
import { fetchFacebookItems } from "../src/scrapers/facebook";
|
||||||
|
|
||||||
// Mock fetch globally
|
// Mock fetch globally
|
||||||
const originalFetch = global.fetch;
|
const originalFetch = global.fetch;
|
||||||
|
|||||||
@@ -1,13 +1,12 @@
|
|||||||
import { describe, expect, test } from "bun:test";
|
import { describe, expect, test } from "bun:test";
|
||||||
import {
|
import {
|
||||||
HttpError,
|
buildSearchUrl,
|
||||||
NetworkError,
|
NetworkError,
|
||||||
ParseError,
|
ParseError,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
ValidationError,
|
|
||||||
buildSearchUrl,
|
|
||||||
resolveCategoryId,
|
resolveCategoryId,
|
||||||
resolveLocationId,
|
resolveLocationId,
|
||||||
|
ValidationError,
|
||||||
} from "../src/scrapers/kijiji";
|
} from "../src/scrapers/kijiji";
|
||||||
|
|
||||||
describe("Location and Category Resolution", () => {
|
describe("Location and Category Resolution", () => {
|
||||||
@@ -121,20 +120,12 @@ describe("URL Construction", () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
describe("Error Classes", () => {
|
describe("Error Classes", () => {
|
||||||
test("HttpError should store status and URL", () => {
|
|
||||||
const error = new HttpError("Not found", 404, "https://example.com");
|
|
||||||
expect(error.message).toBe("Not found");
|
|
||||||
expect(error.statusCode).toBe(404);
|
|
||||||
expect(error.url).toBe("https://example.com");
|
|
||||||
expect(error.name).toBe("HttpError");
|
|
||||||
});
|
|
||||||
|
|
||||||
test("NetworkError should store URL and cause", () => {
|
test("NetworkError should store URL and cause", () => {
|
||||||
const cause = new Error("Connection failed");
|
const cause = new Error("Connection failed");
|
||||||
const error = new NetworkError(
|
const error = new NetworkError(
|
||||||
"Network error",
|
"Network error",
|
||||||
"https://example.com",
|
"https://example.com",
|
||||||
cause
|
cause,
|
||||||
);
|
);
|
||||||
expect(error.message).toBe("Network error");
|
expect(error.message).toBe("Network error");
|
||||||
expect(error.url).toBe("https://example.com");
|
expect(error.url).toBe("https://example.com");
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
import { describe, expect, test } from "bun:test";
|
||||||
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
|
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
|
||||||
|
|
||||||
describe("Utility Functions", () => {
|
describe("Utility Functions", () => {
|
||||||
|
|||||||
@@ -5,12 +5,15 @@ const PORT = process.env.MCP_PORT || 4006;
|
|||||||
|
|
||||||
const server = Bun.serve({
|
const server = Bun.serve({
|
||||||
port: PORT as number | string,
|
port: PORT as number | string,
|
||||||
idleTimeout: 0,
|
idleTimeout: 255, // 255 seconds (max allowed)
|
||||||
routes: {
|
routes: {
|
||||||
// MCP metadata discovery endpoint
|
// MCP metadata discovery endpoint
|
||||||
"/.well-known/mcp/server-card.json": new Response(JSON.stringify(serverCard), {
|
"/.well-known/mcp/server-card.json": new Response(
|
||||||
|
JSON.stringify(serverCard),
|
||||||
|
{
|
||||||
headers: { "Content-Type": "application/json" },
|
headers: { "Content-Type": "application/json" },
|
||||||
}),
|
},
|
||||||
|
),
|
||||||
|
|
||||||
// MCP JSON-RPC 2.0 protocol endpoint
|
// MCP JSON-RPC 2.0 protocol endpoint
|
||||||
"/mcp": async (req: Request) => {
|
"/mcp": async (req: Request) => {
|
||||||
@@ -19,13 +22,13 @@ const server = Bun.serve({
|
|||||||
}
|
}
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{ message: "MCP endpoint requires POST request" },
|
{ message: "MCP endpoint requires POST request" },
|
||||||
{ status: 405 }
|
{ status: 405 },
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
// Fallback for all other routes
|
// Fallback for all other routes
|
||||||
fetch(req: Request) {
|
fetch(_req: Request) {
|
||||||
return new Response("Not Found", { status: 404 });
|
return new Response("Not Found", { status: 404 });
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core";
|
|
||||||
import { tools } from "./tools";
|
import { tools } from "./tools";
|
||||||
|
|
||||||
|
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
|
||||||
|
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handle MCP JSON-RPC 2.0 protocol requests
|
* Handle MCP JSON-RPC 2.0 protocol requests
|
||||||
*/
|
*/
|
||||||
@@ -16,7 +18,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
error: { code: -32600, message: "Invalid Request" },
|
error: { code: -32600, message: "Invalid Request" },
|
||||||
id: body.id,
|
id: body.id,
|
||||||
},
|
},
|
||||||
{ status: 400 }
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -38,7 +40,8 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
name: "marketplace-scrapers",
|
name: "marketplace-scrapers",
|
||||||
version: "1.0.0",
|
version: "1.0.0",
|
||||||
},
|
},
|
||||||
instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
instructions:
|
||||||
|
"Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -78,15 +81,18 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
{
|
{
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
id,
|
id,
|
||||||
error: { code: -32602, message: "Invalid params: name and arguments required" },
|
error: {
|
||||||
|
code: -32602,
|
||||||
|
message: "Invalid params: name and arguments required",
|
||||||
},
|
},
|
||||||
{ status: 400 }
|
},
|
||||||
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Route tool calls to appropriate handlers
|
// Route tool calls to appropriate handlers
|
||||||
try {
|
try {
|
||||||
let result;
|
let result: unknown;
|
||||||
|
|
||||||
if (name === "search_kijiji") {
|
if (name === "search_kijiji") {
|
||||||
const query = args.query;
|
const query = args.query;
|
||||||
@@ -97,8 +103,45 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
error: { code: -32602, message: "query parameter is required" },
|
error: { code: -32602, message: "query parameter is required" },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const items = await fetchKijijiItems(query, args.maxItems || 5);
|
const params = new URLSearchParams({ q: query });
|
||||||
result = items || [];
|
if (args.location) params.append("location", args.location);
|
||||||
|
if (args.category) params.append("category", args.category);
|
||||||
|
if (args.keywords) params.append("keywords", args.keywords);
|
||||||
|
if (args.sortBy) params.append("sortBy", args.sortBy);
|
||||||
|
if (args.sortOrder) params.append("sortOrder", args.sortOrder);
|
||||||
|
if (args.maxPages)
|
||||||
|
params.append("maxPages", args.maxPages.toString());
|
||||||
|
if (args.priceMin)
|
||||||
|
params.append("priceMin", args.priceMin.toString());
|
||||||
|
if (args.priceMax)
|
||||||
|
params.append("priceMax", args.priceMax.toString());
|
||||||
|
if (args.cookies) params.append("cookies", args.cookies);
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
||||||
|
);
|
||||||
|
const response = await Promise.race([
|
||||||
|
fetch(`${API_BASE_URL}/kijiji?${params.toString()}`),
|
||||||
|
new Promise<Response>((_, reject) =>
|
||||||
|
setTimeout(
|
||||||
|
() =>
|
||||||
|
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||||
|
API_TIMEOUT,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text();
|
||||||
|
console.error(
|
||||||
|
`[MCP] Kijiji API error ${response.status}: ${errorText}`,
|
||||||
|
);
|
||||||
|
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||||
|
}
|
||||||
|
result = await response.json();
|
||||||
|
console.log(
|
||||||
|
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||||
|
);
|
||||||
} else if (name === "search_facebook") {
|
} else if (name === "search_facebook") {
|
||||||
const query = args.query;
|
const query = args.query;
|
||||||
if (!query) {
|
if (!query) {
|
||||||
@@ -108,14 +151,37 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
error: { code: -32602, message: "query parameter is required" },
|
error: { code: -32602, message: "query parameter is required" },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const items = await fetchFacebookItems(
|
const params = new URLSearchParams({ q: query });
|
||||||
query,
|
if (args.location) params.append("location", args.location);
|
||||||
args.maxItems || 5,
|
if (args.maxItems)
|
||||||
args.location || "toronto",
|
params.append("maxItems", args.maxItems.toString());
|
||||||
25,
|
if (args.cookiesSource) params.append("cookies", args.cookiesSource);
|
||||||
args.cookiesSource
|
|
||||||
|
console.log(
|
||||||
|
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
|
||||||
|
);
|
||||||
|
const response = await Promise.race([
|
||||||
|
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
|
||||||
|
new Promise<Response>((_, reject) =>
|
||||||
|
setTimeout(
|
||||||
|
() =>
|
||||||
|
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||||
|
API_TIMEOUT,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text();
|
||||||
|
console.error(
|
||||||
|
`[MCP] Facebook API error ${response.status}: ${errorText}`,
|
||||||
|
);
|
||||||
|
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||||
|
}
|
||||||
|
result = await response.json();
|
||||||
|
console.log(
|
||||||
|
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||||
);
|
);
|
||||||
result = items || [];
|
|
||||||
} else if (name === "search_ebay") {
|
} else if (name === "search_ebay") {
|
||||||
const query = args.query;
|
const query = args.query;
|
||||||
if (!query) {
|
if (!query) {
|
||||||
@@ -125,16 +191,50 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
error: { code: -32602, message: "query parameter is required" },
|
error: { code: -32602, message: "query parameter is required" },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const items = await fetchEbayItems(query, args.maxItems || 5, {
|
const params = new URLSearchParams({ q: query });
|
||||||
minPrice: args.minPrice,
|
if (args.minPrice)
|
||||||
maxPrice: args.maxPrice,
|
params.append("minPrice", args.minPrice.toString());
|
||||||
strictMode: args.strictMode || false,
|
if (args.maxPrice)
|
||||||
exclusions: args.exclusions || [],
|
params.append("maxPrice", args.maxPrice.toString());
|
||||||
keywords: args.keywords || [query],
|
if (args.strictMode !== undefined)
|
||||||
buyItNowOnly: args.buyItNowOnly !== false,
|
params.append("strictMode", args.strictMode.toString());
|
||||||
canadaOnly: args.canadaOnly !== false,
|
if (args.exclusions?.length)
|
||||||
});
|
params.append("exclusions", args.exclusions.join(","));
|
||||||
result = items || [];
|
if (args.keywords?.length)
|
||||||
|
params.append("keywords", args.keywords.join(","));
|
||||||
|
if (args.buyItNowOnly !== undefined)
|
||||||
|
params.append("buyItNowOnly", args.buyItNowOnly.toString());
|
||||||
|
if (args.canadaOnly !== undefined)
|
||||||
|
params.append("canadaOnly", args.canadaOnly.toString());
|
||||||
|
if (args.maxItems)
|
||||||
|
params.append("maxItems", args.maxItems.toString());
|
||||||
|
if (args.cookies) params.append("cookies", args.cookies);
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
||||||
|
);
|
||||||
|
const response = await Promise.race([
|
||||||
|
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
|
||||||
|
new Promise<Response>((_, reject) =>
|
||||||
|
setTimeout(
|
||||||
|
() =>
|
||||||
|
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||||
|
API_TIMEOUT,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text();
|
||||||
|
console.error(
|
||||||
|
`[MCP] eBay API error ${response.status}: ${errorText}`,
|
||||||
|
);
|
||||||
|
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||||
|
}
|
||||||
|
result = await response.json();
|
||||||
|
console.log(
|
||||||
|
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
return Response.json({
|
return Response.json({
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
@@ -156,11 +256,15 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
const errorMessage =
|
||||||
|
error instanceof Error ? error.message : "Unknown error";
|
||||||
return Response.json({
|
return Response.json({
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
id,
|
id,
|
||||||
error: { code: -32603, message: `Tool execution failed: ${errorMessage}` },
|
error: {
|
||||||
|
code: -32603,
|
||||||
|
message: `Tool execution failed: ${errorMessage}`,
|
||||||
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -172,16 +276,17 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
id,
|
id,
|
||||||
error: { code: -32601, message: `Method not found: ${method}` },
|
error: { code: -32601, message: `Method not found: ${method}` },
|
||||||
},
|
},
|
||||||
{ status: 404 }
|
{ status: 404 },
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
const errorMessage =
|
||||||
|
error instanceof Error ? error.message : "Unknown error";
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{
|
{
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
error: { code: -32700, message: `Parse error: ${errorMessage}` },
|
error: { code: -32700, message: `Parse error: ${errorMessage}` },
|
||||||
},
|
},
|
||||||
{ status: 400 }
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
export const serverCard = {
|
export const serverCard = {
|
||||||
$schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
$schema:
|
||||||
|
"https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
||||||
version: "1.0",
|
version: "1.0",
|
||||||
protocolVersion: "2025-06-18",
|
protocolVersion: "2025-06-18",
|
||||||
serverInfo: {
|
serverInfo: {
|
||||||
@@ -20,6 +21,7 @@ export const serverCard = {
|
|||||||
listChanged: true,
|
listChanged: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
description:
|
||||||
|
"Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
||||||
tools: "dynamic",
|
tools: "dynamic",
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -13,11 +13,50 @@ export const tools = [
|
|||||||
type: "string",
|
type: "string",
|
||||||
description: "Search query for Kijiji listings",
|
description: "Search query for Kijiji listings",
|
||||||
},
|
},
|
||||||
maxItems: {
|
location: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
|
||||||
|
},
|
||||||
|
category: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
|
||||||
|
},
|
||||||
|
keywords: {
|
||||||
|
type: "string",
|
||||||
|
description: "Additional keywords to filter results",
|
||||||
|
},
|
||||||
|
sortBy: {
|
||||||
|
type: "string",
|
||||||
|
description: "Sort results by field",
|
||||||
|
enum: ["relevancy", "date", "price", "distance"],
|
||||||
|
default: "relevancy",
|
||||||
|
},
|
||||||
|
sortOrder: {
|
||||||
|
type: "string",
|
||||||
|
description: "Sort order",
|
||||||
|
enum: ["asc", "desc"],
|
||||||
|
default: "desc",
|
||||||
|
},
|
||||||
|
maxPages: {
|
||||||
type: "number",
|
type: "number",
|
||||||
description: "Maximum number of items to return",
|
description: "Maximum pages to fetch (~40 items per page)",
|
||||||
default: 5,
|
default: 5,
|
||||||
},
|
},
|
||||||
|
priceMin: {
|
||||||
|
type: "number",
|
||||||
|
description: "Minimum price in cents",
|
||||||
|
},
|
||||||
|
priceMax: {
|
||||||
|
type: "number",
|
||||||
|
description: "Maximum price in cents",
|
||||||
|
},
|
||||||
|
cookies: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
required: ["query"],
|
required: ["query"],
|
||||||
},
|
},
|
||||||
@@ -52,7 +91,8 @@ export const tools = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "search_ebay",
|
name: "search_ebay",
|
||||||
description: "Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
|
description:
|
||||||
|
"Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
|
||||||
inputSchema: {
|
inputSchema: {
|
||||||
type: "object",
|
type: "object",
|
||||||
properties: {
|
properties: {
|
||||||
@@ -98,6 +138,11 @@ export const tools = [
|
|||||||
description: "Maximum number of items to return",
|
description: "Maximum number of items to return",
|
||||||
default: 5,
|
default: 5,
|
||||||
},
|
},
|
||||||
|
cookies: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
required: ["query"],
|
required: ["query"],
|
||||||
},
|
},
|
||||||
|
|||||||
26
scripts/biome-symlink.sh
Executable file
26
scripts/biome-symlink.sh
Executable file
@@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Get the path to the system biome executable
|
||||||
|
BIOME_PATH=$(which biome)
|
||||||
|
|
||||||
|
if [ -z "$BIOME_PATH" ]; then
|
||||||
|
echo "Error: biome executable not found in PATH"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Find all biome executables in node_modules
|
||||||
|
files=$(fd biome node_modules --type executable --no-ignore --follow)
|
||||||
|
|
||||||
|
if [ -z "$files" ]; then
|
||||||
|
echo "No biome executables found in node_modules"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Replace each with a symlink to the system biome
|
||||||
|
for file in $files; do
|
||||||
|
echo "Replacing $file with symlink to $BIOME_PATH"
|
||||||
|
rm "$file"
|
||||||
|
ln -s "$BIOME_PATH" "$file"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Done."
|
||||||
30
scripts/remove-eslint.sh
Executable file
30
scripts/remove-eslint.sh
Executable file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
PATTERN="eslint"
|
||||||
|
FILES="$(fd .)" # Or use 'find .' to search recursively
|
||||||
|
|
||||||
|
for file in $FILES; do
|
||||||
|
if [[ -f "$file" ]]; then
|
||||||
|
# 1. Use rg with line numbers (-n) and only the matched line (-o)
|
||||||
|
# 2. Use awk to print ONLY the line number (field 1)
|
||||||
|
# 3. Use xargs to pass multiple line numbers to a single sed command
|
||||||
|
|
||||||
|
LINE_NUMBERS=$(rg --line-number --no-filename "$PATTERN" "$file" | awk -F':' '{print $1}' | tr '\n' ',')
|
||||||
|
|
||||||
|
# Remove trailing comma if any
|
||||||
|
LINE_NUMBERS=${LINE_NUMBERS%,}
|
||||||
|
|
||||||
|
if [[ -n "$LINE_NUMBERS" ]]; then
|
||||||
|
echo "Deleting lines $LINE_NUMBERS from $file..."
|
||||||
|
|
||||||
|
# Use sed to delete the specified comma-separated line numbers in-place (-i)
|
||||||
|
# NOTE: The syntax for -i might vary slightly between GNU sed (Linux) and BSD sed (macOS).
|
||||||
|
sed -i.bak "${LINE_NUMBERS}d" "$file"
|
||||||
|
|
||||||
|
# Optional: Remove the backup file created by sed -i.bak
|
||||||
|
# rm "${file}.bak"
|
||||||
|
else
|
||||||
|
echo "$file: No lines matching pattern found."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
25
scripts/start.sh
Executable file
25
scripts/start.sh
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Trap SIGTERM and SIGINT for graceful shutdown
|
||||||
|
trap 'echo "Received shutdown signal, stopping services..."; kill -TERM $API_PID $MCP_PID 2>/dev/null; wait' TERM INT
|
||||||
|
|
||||||
|
# Start API Server in background
|
||||||
|
echo "Starting API Server on port ${API_PORT:-4005}..."
|
||||||
|
bun dist/api/index.js &
|
||||||
|
API_PID=$!
|
||||||
|
|
||||||
|
# Give API server a moment to initialize
|
||||||
|
sleep 1
|
||||||
|
|
||||||
|
# Start MCP Server in background
|
||||||
|
echo "Starting MCP Server on port ${API_PORT:-4006}..."
|
||||||
|
bun dist/mcp/index.js &
|
||||||
|
MCP_PID=$!
|
||||||
|
|
||||||
|
echo "Both services started successfully"
|
||||||
|
echo "API Server PID: $API_PID"
|
||||||
|
echo "MCP Server PID: $MCP_PID"
|
||||||
|
|
||||||
|
# Wait for both processes
|
||||||
|
wait $API_PID $MCP_PID
|
||||||
Reference in New Issue
Block a user