Compare commits

..

62 Commits

Author SHA1 Message Date
ec545723bb feat(facebook): add challenge detection and session warming utilities
facebook-challenge.ts: session warmup, header construction, and challenge type detection. Spec document for the anti-bot challenge solver design.
2026-05-02 19:03:00 -04:00
0a246a29bf feat(facebook): add session warming and challenge detection
Facebook Marketplace no longer requires authentication cookies.
Session warming sends proper browser headers. Checkpoint and
login-wall challenges are detected and handled gracefully.
Added marketplace_product_details_page.target extraction path
for current item page structure.
2026-05-02 18:58:53 -04:00
7ab33d0b02 chore: format markdown
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-05-01 11:42:54 -04:00
d2c3c07e7d docs: price filtering schema adjustments
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 23:18:49 -04:00
0470a7bec7 docs(mcp): clarify price filters are dollars 2026-04-30 23:17:59 -04:00
89ad1c521f fix(api): parse price filters as dollars 2026-04-30 23:17:56 -04:00
5c732287c5 test: guard live listing prices 2026-04-30 22:46:48 -04:00
20fb46190a test: add live parser script 2026-04-30 22:46:07 -04:00
e791fc5478 test(facebook): add live parser suite 2026-04-30 22:44:28 -04:00
c1fa5168dc test(kijiji): add live parser suite 2026-04-30 22:43:52 -04:00
ec2a26cedf test(ebay): add live parser suite 2026-04-30 22:42:32 -04:00
5d99e984e0 docs: plan live parser tests 2026-04-30 22:41:41 -04:00
b657ea594a chore: update agents docs
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:29:01 -04:00
5651a194e9 chore: use biome check instead of biome ci
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:28:02 -04:00
31cc0660bc refactor(ebay): reuse fetchHtml after challenge
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:26:24 -04:00
fc7200777e style: format expected json output in protocol test 2026-04-30 22:25:47 -04:00
f68a5a8d9b feat(linter): enforce correctness on unused imports
Configures the linter to treat unused imports as an error under the
`correctness` rule category. This tightens up code quality standards by
ensuring all imported bindings are utilized.
If the import is unused, there is a high chance refactoring missed this
flow. Review in-depth root causes.
2026-04-30 22:24:06 -04:00
a6b24b318e fix(types): expose argon2 declaration globally 2026-04-30 22:16:48 -04:00
0873df7e82 chore: merge code-smell-cleanup 2026-04-30 21:08:34 -04:00
24e0a8266e Revert "test: preload core fetch guard"
This reverts commit 28b3267b7d.
2026-04-30 20:58:06 -04:00
db173aef1b Revert "chore: add sentinel file for bun test root"
This reverts commit d1cd028f34.
2026-04-30 20:58:06 -04:00
d1cd028f34 chore: add sentinel file for bun test root 2026-04-30 20:56:14 -04:00
28b3267b7d test: preload core fetch guard 2026-04-30 20:53:31 -04:00
c0dda57f64 test: require explicit fetch mocks 2026-04-30 20:51:13 -04:00
31866de787 refactor: clean kijiji scraper internals 2026-04-30 20:48:15 -04:00
9c4c347933 feat: ebay splashui challenge solver
argon2id pow → /challengesvc/answer → chlgref cookie
warm homepage for akamai cookies, detect 307 redirect,
solve + retry transparently in fetchEbayItems flow
2026-04-30 20:44:37 -04:00
53eafe6d4c chore: agent-browser skills path env
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 20:44:05 -04:00
84f17fbdfd chore: ebay parser fix
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 16:56:55 -04:00
3a722a2d11 chore: agent-browser vars
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 16:56:44 -04:00
f95b974c7e fix: harden shared http helper 2026-04-29 21:09:10 -04:00
f5339cadf1 style: format shared http refactor 2026-04-29 21:05:36 -04:00
5d86a4e54d fix: preserve ebay rate-limit fallback 2026-04-29 14:52:08 -04:00
82e7abc057 fix: keep shared http refactor in scope 2026-04-29 14:48:47 -04:00
6e50ebf901 refactor: share scraper http fetching 2026-04-29 13:14:20 -04:00
5ecb645ee3 docs: smell cleanup plan
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-29 13:09:38 -04:00
82e12283de docs: surface Kijiji AND-matching behavior in tool, API, and MCP responses
Kijiji zero-result queries (e.g. 'macbook air m1 apple silicon') are
confusing because the failure mode is non-obvious. Surface the root
cause everywhere the caller can see it:
- MCP tool description warns about AND-matching and gives a concrete
  before/after example
- API 404 body includes the actionable hint via emptySearchResponse(hint)
- Core scraper logs the built URL and tip on page-1 zero results
- MCP handler unwraps the API message field so the hint reaches the LLM
2026-04-29 13:06:31 -04:00
22eb65d4a2 refactor: share mcp api calls 2026-04-29 05:37:24 -04:00
abdd39d65c fix: complete ebay integer validation test coverage 2026-04-29 00:56:37 -04:00
3e4e35c9ae fix: tighten route integer parsing and test coverage 2026-04-29 00:32:23 -04:00
3ea6ee3938 fix: strictly parse route integers 2026-04-29 00:12:26 -04:00
d178f9c9cb fix: remove cookie query forwarding 2026-04-28 23:52:45 -04:00
9cbba9ba13 chore: ignore local worktrees 2026-04-28 23:08:04 -04:00
b6aaec0b65 chore: update ruler docs
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 22:29:12 -04:00
11dce39428 fix(core): parse Kijiji StandardListing records 2026-04-28 21:57:10 -04:00
2a5701aeb9 test: quiet and speed up test runs 2026-04-28 21:45:06 -04:00
c6c44a0914 fix(api): preserve unstable buckets 2026-04-28 21:34:47 -04:00
3fe5fdb63f fix(core): handle partial listing data 2026-04-28 21:34:45 -04:00
7966073bf8 fix(core): prefer explicit cookie source 2026-04-28 21:34:40 -04:00
df2635d92f chore: prepend typecheck command before biome ci
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 20:11:43 -04:00
ddadc7d5ae chore: add bun types to global tsconfig
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 20:07:05 -04:00
d77a006ded chore: ignore .turbo cache dirs 2026-04-28 19:56:39 -04:00
56b2198df1 chore: fix turbo build outputs path to match actual dist location 2026-04-28 19:56:29 -04:00
63716272c5 chore: slim per-package tsconfigs to extend root 2026-04-28 19:55:59 -04:00
1d21c66945 chore: use exports field and catalog refs in all packages 2026-04-28 19:55:37 -04:00
f2f78225f3 chore: add workspace catalog and turbo to root package.json 2026-04-28 19:54:46 -04:00
43d15fce5f chore: add shared root tsconfig.json 2026-04-28 19:53:58 -04:00
fef2f1968a chore: add bunfig.toml and turbo.json 2026-04-28 19:53:47 -04:00
01081f6b2e docs: add opencode monorepo config adoption implementation plan 2026-04-28 19:52:28 -04:00
d10d5305a3 docs: add opencode monorepo config adoption design spec 2026-04-28 19:50:51 -04:00
bf393eacae chore: setup typecheck scripts for each package
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 19:37:18 -04:00
79bb249603 chore: replace any cast by asserting tool schema property types
Tightens the type assertion for the `unstableFilter` schema property in tests to ensure correct structural checking of its `type` and `description` fields.
2026-04-28 19:24:39 -04:00
957e0f137b chore: biome lint and formatting
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 19:21:16 -04:00
73 changed files with 6459 additions and 1601 deletions

4
.envrc
View File

@@ -1,4 +1,8 @@
export DIRENV_WARN_TIMEOUT=20s
export AGENT_BROWSER_EXECUTABLE_PATH=/run/current-system/sw/bin/google-chrome-unstable
export AGENT_BROWSER_ENGINE=chrome
export AGENT_BROWSER_HEADED=0
export AGENT_BROWSER_SKILLS_DIR=.claude/skills
export OPENCODE_CONFIG_CONTENT="{\"plugin\":[\"superpowers@git+https://github.com/obra/superpowers.git\"]}"
eval "$(devenv direnvrc)"

2
.gitignore vendored
View File

@@ -33,6 +33,8 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
.eslintcache
.cache
*.tsbuildinfo
.turbo
.worktrees/
# IntelliJ based IDEs
.idea

View File

@@ -1,52 +1,9 @@
## Bun Guidelines
## Bun Guide
**CRITICAL**: Do not assume you know full Bun APIs. For **ANY** Bun API you use, confirm them by using `bun-docs` MCP tools.
Default to using Bun instead of Node.js.
- Use `bun <file>` instead of `node <file>` or `ts-node <file>`
- Use `bun test` instead of `jest` or `vitest`
- Use `bun build <file.html|file.ts|file.css>` instead of `webpack` or `esbuild`
- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install`
- Use `bun run <script>` instead of `npm run <script>` or `yarn run <script>` or `pnpm run <script>`
- Use `bunx <package> <command>` instead of `npx <package> <command>`
- Bun automatically loads .env, so don't use dotenv.
### APIs
- `Bun.serve()` supports WebSockets, HTTPS, and routes. Don't use `express`.
- `bun:sqlite` for SQLite. Don't use `better-sqlite3`.
- `Bun.redis` for Redis. Don't use `ioredis`.
- `Bun.sql` for Postgres. Don't use `pg` or `postgres.js`.
- `WebSocket` is built-in. Don't use `ws`.
- Prefer `Bun.file` over `node:fs`'s readFile/writeFile
- Bun.$`ls` instead of execa.
### Testing
#### Quick Start
- Run tests: `bun test`
- Write tests in `tests/` folder
#### Test Structure
- Use `describe` blocks to group related tests
- Use `test` for individual test cases
- Use `beforeEach`/`afterEach` for setup/teardown
#### Assertions
- Import: `import { test, expect, describe, beforeEach, afterEach, mock } from "bun:test";`
- Common: `expect(value).toBe(expected)`, `expect(fn).rejects.toThrow()`
- Async: `await expect(asyncFn()).resolves.toBe(expected)`
#### Mocking
- Mock functions: `mock(fn)`
- Mock globals: `global.fetch = mock(...)`
- Restore mocks in `afterEach` or `finally`
#### Best Practices
- Mock external APIs (fetch, file I/O)
- Test error cases and edge conditions
- Use descriptive test names
- Clean up resources in `afterEach`
For more information, read the Bun API docs in `node_modules/bun-types/docs/**.mdx`.
- Package manager/runtime/test runner is Bun `1.3.13`.
- Use `bun install`, `bun run <script>`, `bun test`, and `bun build`; do not add npm/yarn/pnpm scripts.
- Prefer Bun-native runtime APIs already used in repo: `Bun.serve`, built-in `fetch`, Web APIs, and `bun:test`.
- Keep servers framework-free. Do not introduce Express/Koa/Fastify for the adapters.
- Bun auto-loads `.env`; do not add `dotenv`.
- For tests, import from `bun:test` and restore mocked globals/env in `afterEach` or `finally`.
- Root `bun test` is misleading because `bunfig.toml` sets a dummy root. Run package test paths explicitly.

View File

@@ -2,37 +2,47 @@
## Repo Shape
- Bun workspace monorepo.
- `packages/core`: scraper logic, parsing, shared cookie/http/format helpers, and the only checked-in tests.
- `packages/api-server`: Bun HTTP adapter exposing `/api/*` routes.
- `packages/mcp-server`: MCP JSON-RPC adapter that proxies to the API server.
- `dist/`: build output. Do not edit generated files here.
- `cookies/`: local cookie examples and docs. Never commit real session cookies.
- Bun workspace monorepo with packages under `packages/*`.
- `packages/core`: scraper behavior, parsing, result types, cookie handling, HTTP helpers.
- `packages/api-server`: Bun HTTP adapter exposing `/api/*` routes over core.
- `packages/mcp-server`: MCP/JSON-RPC adapter that proxies to the API server.
- `cookies/`: local cookie docs/examples only. Treat real cookie files as secrets.
- `dist/`, `node_modules/`, `.turbo/`, `.direnv/`, `.devenv/`: generated/vendor/cache. Do not edit.
## Commands
- Install: `bun install`
- Lint/format check: `bun run ci`
- Build everything: `bun run build`
- Run tests: `bun test`
- Lint/format/typecheck: `bun run ci`
- Build all packages: `bun run build`
- Build bundled runtime output: `bun run build:all`
- Run tests: `bun test packages/core/test packages/api-server/test packages/mcp-server/test`
- API dev server: `bun run --cwd packages/api-server dev`
- MCP dev server: `bun run --cwd packages/mcp-server dev`
## Repo Conventions
## Boundaries
- Keep marketplace scraping behavior in `packages/core`. `api-server` and `mcp-server` stay thin adapters.
- Preserve cookie precedence everywhere: request parameter > environment variable > cookie file.
- Shared public surface for scraper code is `packages/core/src/index.ts`. Update exports deliberately.
- Tests should stay deterministic and offline. Mock `fetch`; do not hit live marketplace endpoints.
- Use Bun and Bun-native APIs in this repo. Do not introduce Node-specific tooling unless already required.
- Biome and strict TypeScript are part of the contract. Fix code to satisfy them; do not relax config.
- Marketplace behavior belongs in `packages/core`, not adapter packages.
- HTTP route code should parse request input, call core, and map status/errors.
- MCP code should define tools, validate JSON-RPC flow, and map tool args to API URLs.
- Keep API query params and MCP tool args in sync.
- Shared public surface for scraper code is `packages/core/src/index.ts`; update exports deliberately.
## Invariants
- Cookie precedence in core helpers: explicit/request cookie string before environment variable.
- Tests must be deterministic and offline. Mock `fetch`; do not hit live marketplace endpoints.
- Use Bun and Bun-native APIs. Do not add Node-specific tooling unless already required.
- Biome and strict TypeScript are contract. Fix code; do not relax config.
## Verification
- Core changes: `bun test && bun run ci`
- Cross-package contract changes: `bun test && bun run ci && bun run build`
- Adapter-only changes: run the relevant package build plus `bun run ci`
- Core changes: `bun test packages/core/test && bun run ci`
- Adapter-only changes: relevant package build plus `bun run ci`
- Cross-package contract changes: `bun test packages/core/test packages/api-server/test packages/mcp-server/test && bun run ci && bun run build`
## Gotchas
- The root `build` script emits separate bundles to `dist/api` and `dist/mcp`, then `scripts/start.sh` launches both.
- `bunfig.toml` points test root at `./do-not-run-tests-from-root`; pass package test paths explicitly.
- Root `build` cleans `dist`, then Turbo emits bundles for API and MCP.
- `scripts/start.sh` launches `dist/api/index.js` and `dist/mcp/index.js`.
- Package `tsconfig.json` files override root `include`; shared ambient declarations under root `types/` must be included from each package that typechecks cross-package source.

View File

@@ -1,44 +1,56 @@
# Facebook Marketplace API Reverse Engineering
## Overview
This document tracks findings from reverse-engineering Facebook Marketplace APIs for listing details.
This document tracks findings from reverse-engineering Facebook Marketplace APIs for
listing details.
## Current Implementation Status
- Search functionality: Implemented in `src/facebook.ts`
- Individual listing details: Not yet implemented
## Findings
### Step 1: Initial Setup
- Using Chrome DevTools to inspect Facebook Marketplace
- Need to authenticate with Facebook account to access marketplace data
- Cookies required for full access
- Current status: Successfully logged in and accessed marketplace data
### Step 2: Individual Listing Details Analysis - COMPLETED
- **Data Location**: Embedded in HTML script tags within `require` array structure
- **Path**: `require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Path**:
`require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Authentication**: Required for full data access
- **Current Status**: Successfully reverse-engineered the API structure and data extraction method
- **Current Status**: Successfully reverse-engineered the API structure and data
extraction method
### API Endpoints Discovered
#### Search Endpoint
- URL: `https://www.facebook.com/marketplace/{location}/search`
- Parameters: `query`, `sortBy`, `exact`
- Data embedded in HTML script tags with `require` structure
- Authentication: Required (cookies)
#### Listing Details Endpoint
- **URL Structure**: `https://www.facebook.com/marketplace/item/{listing_id}/`
- **Data Source**: Server-side rendered HTML with embedded JSON data in script tags
- **Data Structure**: Relay/GraphQL style data structure under `require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Extraction Method**: Parse JSON from script tags containing marketplace data, navigate to the target object
- **Data Structure**: Relay/GraphQL style data structure under
`require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Extraction Method**: Parse JSON from script tags containing marketplace data,
navigate to the target object
- **Authentication**: Required (cookies)
### Listing Data Structure Discovered (Current - 2026)
The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem` object with the following key properties:
The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem`
object with the following key properties:
```typescript
interface FacebookMarketplaceItem {
@@ -151,6 +163,7 @@ interface FacebookMarketplaceItem {
```
### Example Data Extracted (Current Structure)
```json
{
"__typename": "GroupCommerceProductItem",
@@ -228,36 +241,47 @@ interface FacebookMarketplaceItem {
## Data Extraction Method
### Current Method (2026)
Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML response. The extraction process:
1. **Find the Correct Script**: Look for script tags containing marketplace listing data by searching for key fields like `marketplace_listing_title`, `redacted_description`, and `formatted_price`.
Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML
response. The extraction process:
1. **Find the Correct Script**: Look for script tags containing marketplace listing data
by searching for key fields like `marketplace_listing_title`, `redacted_description`,
and `formatted_price`.
2. **Parse JSON Structure**: The data is nested within a `require` array structure:
```
require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target
```
3. **Navigate to Target Object**: The actual listing data is a `GroupCommerceProductItem` object containing comprehensive information about the listing, seller, and vehicle details.
3. **Navigate to Target Object**: The actual listing data is a
`GroupCommerceProductItem` object containing comprehensive information about the
listing, seller, and vehicle details.
4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust extraction should search for the target object recursively within the parsed JSON.
4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust
extraction should search for the target object recursively within the parsed JSON.
### Authentication Requirements
- Valid Facebook session cookies are required
- User must be logged in to Facebook
- Marketplace access may be location-restricted
## Tools Used
- Chrome DevTools Protocol
- Network monitoring
- HTML/script parsing
- JSON structure analysis
## Implementation Status
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
- ✅ Identified current data structure and extraction method (2026)
- ✅ Documented comprehensive GroupCommerceProductItem interface
- ✅ Implemented `extractFacebookItemData()` function with script parsing logic
- ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to ListingDetails
- ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to
ListingDetails
- ✅ Implemented `fetchFacebookItem()` function with authentication and error handling
- ✅ Updated TypeScript interfaces to match current API structure
- ✅ Added robust extraction with fallback methods for changing API paths
@@ -266,12 +290,15 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
### Core Functions Implemented
1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from HTML-embedded JSON in script tags
1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from
HTML-embedded JSON in script tags
- Searches for scripts containing marketplace listing data
- Uses primary path: `require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
- Uses primary path:
`require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
- Falls back to recursive search for GroupCommerceProductItem objects
2. **`parseFacebookItem(item)`**: Converts Facebook's GroupCommerceProductItem to unified ListingDetails format
2. **`parseFacebookItem(item)`**: Converts Facebooks GroupCommerceProductItem to
unified ListingDetails format
- Handles pricing (FREE listings, CAD currency)
- Extracts seller information, location, and status
- Supports vehicle-specific metadata
@@ -284,25 +311,31 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
- Returns parsed ListingDetails or null on failure
### Authentication Requirements
- Facebook session cookies required in `./cookies/facebook.json` or provided as parameter
- Facebook session cookies required in `./cookies/facebook.json` or provided as
parameter
- Cookies must include valid authentication tokens for marketplace access
- Handles cookie expiration and domain validation
## Current Implementation Status - 2026 Verification
### Step 3: API Verification and Current Structure Analysis (January 2026)
- **Verification Date**: January 22, 2026
- **Status**: Successfully verified current Facebook Marketplace API structure
- **Data Source**: Embedded JSON in HTML script tags (server-side rendered)
- **Extraction Path**: `require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Extraction Path**:
`require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
#### Verified Listing Structure (Real Example - 2006 Hyundai Tiburon)
- **Listing ID**: 1226468515995685
- **Title**: "2006 Hyundai Tiburon"
- **Title**: 2006 Hyundai Tiburon
- **Price**: CA$3,000 (formatted_price.text)
- **Raw Price Data**: {"amount_with_offset": "300000", "currency": "CAD", "amount": "3000.00"}
- **Raw Price Data**: {"amount_with_offset": 300000, currency: CAD, amount”:
"3000.00"}
- **Location**: Hamilton, ON (with coordinates: 43.250427246094, -79.963989257812)
- **Description**: "As is" (redacted_description.text)
- **Description**: As is (redacted_description.text)
- **Vehicle Details**:
- Make: Hyundai
- Model: Tiburon
@@ -323,41 +356,54 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
- **Messaging**: Enabled
#### Current API Characteristics
- **Authentication**: Still requires valid Facebook session cookies
- **Data Format**: Server-side rendered HTML with embedded GraphQL/Relay JSON
- **Structure Stability**: Primary extraction path remains functional
- **Additional Features**: Includes marketplace ratings, seller verification badges, cross-posting info
- **Additional Features**: Includes marketplace ratings, seller verification badges,
cross-posting info
### API Changes Observed Since 2024 Documentation
- **Minimal Changes**: Core data structure largely unchanged
- **Enhanced Fields**: Added more detailed vehicle specifications and seller profile information
- **GraphQL Integration**: Deeper integration with Facebook's GraphQL infrastructure
- **Enhanced Fields**: Added more detailed vehicle specifications and seller profile
information
- **GraphQL Integration**: Deeper integration with Facebooks GraphQL infrastructure
- **Security Features**: Additional integrity checks and reporting mechanisms
### Multi-Category Testing Results (January 2026)
Successfully tested extraction across different listing categories:
#### 1. Vehicle Listings (Automotive)
- **Example**: 2006 Hyundai Tiburon (ID: 1226468515995685)
- **Status**: ✅ Fully functional
- **Data Extracted**: Complete vehicle specs, pricing, seller info, location coordinates
- **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data, vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color, vehicle_fuel_type
- **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data,
vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color,
vehicle_fuel_type
#### 2. Electronics Listings
- **Example**: Nintendo Switch (ID: 3903865769914262)
- **Status**: ✅ Fully functional
- **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used - like new), seller (Yitao Hou)
- **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used -
like new), seller (Yitao Hou)
- **Category**: Electronics (category_id: 479353692612078)
- **Notes**: Standard GroupCommerceProductItem structure applies
#### 3. Home Goods/Furniture Listings
- **Example**: Tabletop Mirror (cat not included) (ID: 1082389057290709)
- **Status**: ✅ Fully functional
- **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used - like new), seller (Rohit Rehan)
- **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used -
like new), seller (Rohit Rehan)
- **Category**: Home Goods (category_id: 1569171756675761)
- **Notes**: Includes detailed description and delivery options
#### Testing Summary
- **Extraction Method**: Consistent across all categories
- **Data Structure**: GroupCommerceProductItem interface works for all listing types
- **Authentication**: Required for all categories
@@ -365,18 +411,22 @@ Successfully tested extraction across different listing categories:
- **Edge Cases**: All tested listings were active/in-person pickup
## Implementation Status - COMPLETED (January 2026)
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
- ✅ Verified current API structure and extraction method (January 2026)
- ✅ Tested extraction across multiple listing categories (vehicles, electronics, home goods)
- ✅ Implemented comprehensive error handling for sold/removed listings and authentication failures
- ✅ Tested extraction across multiple listing categories (vehicles, electronics, home
goods)
- ✅ Implemented comprehensive error handling for sold/removed listings and
authentication failures
- ✅ Enhanced rate limiting and retry logic (already robust)
- ✅ Added monitoring and metrics for API stability detection
- ✅ Updated all scraper functions to use verified extraction methods
- ✅ Documented comprehensive GroupCommerceProductItem interface with real examples
## Next Steps (Future Maintenance)
1. Monitor extraction success rates for API change detection
2. Update extraction paths if Facebook changes their API structure
3. Add support for additional marketplace features as they become available
4. Implement caching mechanisms for improved performance
5. Add support for marketplace messaging and negotiation features
5. Add support for marketplace messaging and negotiation features

145
KIJIJI.md
View File

@@ -1,9 +1,13 @@
# Kijiji API Findings
## Overview
Kijiji is a Canadian classifieds marketplace that uses a modern web application built with Next.js and Apollo GraphQL. The search results are powered by a GraphQL API with client-side state management.
Kijiji is a Canadian classifieds marketplace that uses a modern web application built
with Next.js and Apollo GraphQL. The search results are powered by a GraphQL API with
client-side state management.
## Initial Page Load (Homepage)
- **URL**: https://www.kijiji.ca/
- **Architecture**: Server-side rendered React application with Next.js
- **Data Sources**:
@@ -12,18 +16,27 @@ Kijiji is a Canadian classifieds marketplace that uses a modern web application
- No initial API calls for listings - data appears to be embedded in HTML
## Search Results Page
- **URL Pattern**: `https://www.kijiji.ca/b-[location]/[keywords]/k0l0`
- **Example**: `https://www.kijiji.ca/b-canada/iphone/k0l0`
- **Technology Stack**: Next.js with Apollo GraphQL client
- **Data Structure**: Uses `__APOLLO_STATE__` global object containing normalized GraphQL cache
- **Data Structure**: Uses `__APOLLO_STATE__` global object containing normalized
GraphQL cache
### GraphQL Data Structure
#### Data Location
Search results data is embedded in the Next.js page props under `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`. The data is pre-rendered on the server and sent to the client. Each page (including pagination) has its own pre-rendered data.
Search results data is embedded in the Next.js page props under
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`. The data is pre-rendered on the server
and sent to the client.
Each page (including pagination) has its own pre-rendered data.
#### Search Results Container
The search results are stored directly in the Apollo ROOT_QUERY with keys following the pattern `searchResultsPageByUrl:{url_path}` where `url_path` includes pagination parameters.
The search results are stored directly in the Apollo ROOT_QUERY with keys following the
pattern `searchResultsPageByUrl:{url_path}` where `url_path` includes pagination
parameters.
```json
{
@@ -33,17 +46,20 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
```
#### Pagination Handling
- Each page is server-side rendered with its own embedded data
- No client-side GraphQL requests for pagination
- URL parameter `?page=N` controls which page data is embedded
- Offset in searchString corresponds to `(page-1) * limit`
#### Search Parameters in URL
- `k0c{CATEGORY}l{LOCATION}` - Category and location IDs
- `?page=N` - Page number (1-based)
- Data contains `offset` and `limit` for API-style pagination
#### Individual Listing Structure
```json
{
"id": "1732061412",
@@ -90,6 +106,7 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
```
### URL Parameters
- `sort=MATCH` - Sort by relevance
- `order=DESC` - Descending order
- `type=OFFER` - Show offerings (not wanted ads)
@@ -102,6 +119,7 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
- `eaTopAdPosition=1` - ?
### Image API
- **Endpoint**: `https://media.kijiji.ca/api/v1/`
- **Pattern**: `/ca-prod-fsbo-ads/images/{uuid}?rule=kijijica-{size}-jpg`
- **Sizes**: 200, 300, 400, 500 pixels
@@ -109,10 +127,12 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
### Categories and Locations
#### Category Structure
Categories are hierarchical with parent-child relationships. The main categories under "Buy & Sell" include:
Categories are hierarchical with parent-child relationships.
The main categories under “Buy & Sell” include:
| ID | Name | Total Results (iPhone search) |
|----|------|------------------------------|
| --- | --- | --- |
| 10 | Buy & Sell | 19956 |
| 12 | Arts & Collectibles | 149 |
| 767 | Audio | 481 |
@@ -145,10 +165,11 @@ Categories are hierarchical with parent-child relationships. The main categories
| 26 | Other | 286 |
#### Location Structure
Locations are also hierarchical, with provinces/states under the main "Canada" location:
Locations are also hierarchical, with provinces/states under the main “Canada” location:
| ID | Name | Total Results (iPhone search) |
|----|------|------------------------------|
| --- | --- | --- |
| 0 | Canada | - |
| 9001 | Québec | 2516 |
| 9002 | Nova Scotia | 875 |
@@ -163,16 +184,20 @@ Locations are also hierarchical, with provinces/states under the main "Canada" l
| 9011 | Prince Edward Island | 31 |
#### URL Patterns
- Categories: `/b-{category-slug}/canada/{keywords}/k0c{CATEGORY_ID}l0`
- Locations: `/b-buy-sell/{location-slug}/iphone/k0c10l{LOCATION_ID}`
- Combined: `/b-{category-slug}/{location-slug}/{keywords}/k0c{CATEGORY_ID}l{LOCATION_ID}`
- Combined:
`/b-{category-slug}/{location-slug}/{keywords}/k0c{CATEGORY_ID}l{LOCATION_ID}`
### Pagination
- Uses offset-based pagination
- 40 results per page
- Total count provided in pagination metadata
## Authentication & User Management
- **Authentication System**: OAuth2-based using CIS (Customer Identity Service)
- **Identity Provider**: `id.kijiji.ca`
- **OAuth2 Flow**:
@@ -184,24 +209,30 @@ Locations are also hierarchical, with provinces/states under the main "Canada" l
- **User Features**: Saved searches, messaging, flagging require authentication
## Posting API
- **Posting Flow**: Requires authentication, redirects to login if not authenticated
- **Posting URL**: `https://www.kijiji.ca/p-post-ad.html`
- **Authentication Required**: Yes, redirects to `/consumer/login` for unauthenticated users
- **Post-Creation**: Likely uses authenticated GraphQL mutations (not observed in anonymous browsing)
- **Authentication Required**: Yes, redirects to `/consumer/login` for unauthenticated
users
- **Post-Creation**: Likely uses authenticated GraphQL mutations (not observed in
anonymous browsing)
## GraphQL API Endpoint
- **URL**: `https://www.kijiji.ca/anvil/api`
- **Method**: POST
- **Content-Type**: application/json
- **Headers**:
- `apollo-require-preflight: true`
- Standard CORS headers
- **Authentication**: No authentication required for basic queries (uses cookies for session tracking)
- **Authentication**: No authentication required for basic queries (uses cookies for
session tracking)
- **Technology**: Apollo GraphQL server
### Sample GraphQL Queries Discovered
#### Get Search Categories
```graphql
query getSearchCategories($locale: String!) {
searchCategories {
@@ -218,6 +249,7 @@ Variables: `{"locale": "en-CA"}`
Response includes hierarchical category structure with IDs and localized names.
#### Get Geocode from IP (fails for current IP)
```graphql
query GetGeocodeReverseFromIp {
geocodeReverseFromIp {
@@ -229,9 +261,11 @@ query GetGeocodeReverseFromIp {
}
```
This query fails for the current IP address, suggesting geolocation-based features may not work or require different IP ranges.
This query fails for the current IP address, suggesting geolocation-based features may
not work or require different IP ranges.
#### Get Category Path
```graphql
query GetCategoryPath($categoryId: Int!, $locale: String, $locationId: Int) {
category(id: $categoryId) {
@@ -256,25 +290,33 @@ Variables: `{"categoryId": 10, "locationId": 0, "locale": "en-CA"}`
## Latest Findings (2026-01-21)
### Client-Side GraphQL Queries Observed
- **getSearchCategories**: Retrieves category hierarchy for search filters
- **GetGeocodeReverseFromIp**: Attempts to geolocate user (fails for current IP)
### GraphQL Schema Insights
Testing direct GraphQL queries revealed:
- Field "searchResults" does not exist on Query type
- Suggested alternatives: "searchResultsPage" or "searchUrl"
- This suggests the search functionality may use different GraphQL operations than direct queries
The embedded Apollo state approach appears to be the primary method for accessing search data, with GraphQL used for auxiliary operations like categories and geolocation.
Testing direct GraphQL queries revealed:
- Field “searchResults” does not exist on Query type
- Suggested alternatives: “searchResultsPage” or “searchUrl”
- This suggests the search functionality may use different GraphQL operations than
direct queries
The embedded Apollo state approach appears to be the primary method for accessing search
data, with GraphQL used for auxiliary operations like categories and geolocation.
### Server-Side Rendering Architecture
Search results are fully server-side rendered with data embedded in HTML. Each page (including pagination) contains its own pre-rendered data. No client-side GraphQL requests are made for:
Search results are fully server-side rendered with data embedded in HTML. Each page
(including pagination) contains its own pre-rendered data.
No client-side GraphQL requests are made for:
- Initial search results
- Pagination navigation
- Search result data
### Network Analysis Findings
- GraphQL endpoint: `https://www.kijiji.ca/anvil/api`
- Method: POST
- Content-Type: application/json
@@ -282,7 +324,10 @@ Search results are fully server-side rendered with data embedded in HTML. Each p
- Cookies required for session tracking
### Embedded Data Structure
Search results data is embedded in the HTML within Next.js `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` object. The data includes:
Search results data is embedded in the HTML within Next.js
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` object.
The data includes:
- Individual ad listings with complete metadata
- Pagination information
@@ -290,20 +335,24 @@ Search results data is embedded in the HTML within Next.js `__NEXT_DATA__.props.
- Category/location hierarchies
### Current Scraper Implementation
The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo state:
- Uses `extractApolloState()` to parse `__NEXT_DATA__` from HTML
- Filters Apollo keys containing "Listing" to find ad data
- Filters Apollo keys containing Listing to find ad data
- Extracts `url`, `title`, and other metadata from each listing
- Successfully scrapes listings without needing API authentication
### Authentication Status
- **Search functionality**: No authentication required - all search and listing data accessible anonymously
- **Search functionality**: No authentication required - all search and listing data
accessible anonymously
- **Posting functionality**: Requires authentication (redirects to login)
- **User features**: Saved searches, messaging require authentication
- **Rate limiting**: May apply but not observed in anonymous browsing
### Pagination Implementation
- Each page is a separate server-rendered route
- URL pattern: `/b-{location}/{keywords}/page-{number}/k0{category}l{location_id}`
- No client-side pagination API calls
@@ -313,20 +362,24 @@ The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo
## URL Pattern Analysis
### Search URL Structure
`https://www.kijiji.ca/b-{category_slug}/{location_slug}/{keywords}/k0c{category_id}l{location_id}`
#### Examples Observed:
- All categories, Canada: `/b-canada/iphone/k0l0` (c0 = All Categories, l0 = Canada)
- Cell phones category: `/b-cell-phones/canada/iphone/k0c132l0` (c132 = Cell Phones)
- With pagination: `/b-canada/iphone/page-2/k0l0`
#### URL Components:
- `c{CATEGORY_ID}`: Category ID (0 = All Categories, 132 = Cell Phones, etc.)
- `l{LOCATION_ID}`: Location ID (0 = Canada, 1700272 = GTA, etc.)
- `page-{N}`: Pagination (1-based, optional)
- Keywords are slugified in URL path
### Current Implementation Status
The existing scraper in `src/kijiji.ts` successfully implements the approach:
- Parses embedded Apollo state from HTML responses
- Handles rate limiting and retries
@@ -336,14 +389,22 @@ The existing scraper in `src/kijiji.ts` successfully implements the approach:
## Listing Details Page
### Overview
Similar to search results, listing details pages use server-side rendering with embedded Apollo GraphQL state in the HTML. No dedicated API endpoint serves individual listing data - all information is pre-rendered on the server.
Similar to search results, listing details pages use server-side rendering with embedded
Apollo GraphQL state in the HTML. No dedicated API endpoint serves individual listing
data - all information is pre-rendered on the server.
### Data Architecture
- **Server-Side Rendering**: Each listing page is fully server-rendered with data embedded in HTML
- **Embedded Apollo State**: Listing data is stored in `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`
- **Client-Side GraphQL**: Additional data (categories, campaigns, similar listings, user profiles) fetched via GraphQL API
- **Server-Side Rendering**: Each listing page is fully server-rendered with data
embedded in HTML
- **Embedded Apollo State**: Listing data is stored in
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`
- **Client-Side GraphQL**: Additional data (categories, campaigns, similar listings,
user profiles) fetched via GraphQL API
### Listing Data Structure
The main listing data follows the same pattern as search results:
```json
@@ -385,40 +446,50 @@ The main listing data follows the same pattern as search results:
```
### Client-Side GraphQL Queries
When loading a listing details page, the following GraphQL queries are executed:
#### 1. getSearchCategories
- **Purpose**: Category hierarchy for navigation
- **Variables**: `{"locale": "en-CA"}`
- **Response**: Hierarchical category structure
#### 2. getCampaignsForVip
- **Purpose**: Advertisement targeting data
- **Variables**: `{"placement": "vip", "locationId": 1700275, "categoryId": 760, "platform": "desktop"}`
- **Variables**:
`{"placement": "vip", "locationId": 1700275, "categoryId": 760, "platform": "desktop"}`
- **Response**: Campaign/ads data (usually null)
#### 3. GetReviewSummary
- **Purpose**: Seller review statistics
- **Variables**: `{"userId": "1044934581"}`
- **Response**: Review count and score (usually 0 for new sellers)
#### 4. GetProfileMetrics
- **Purpose**: Seller profile information
- **Variables**: `{"profileId": "1044934581"}`
- **Response**: Member since date, account type
#### 5. GetListingsSimilar
- **Purpose**: Similar listings for cross-selling
- **Variables**: `{"listingId": "1705585530", "limit": 10, "isExternalId": false}`
- **Response**: Array of similar listings with basic metadata
#### 6. GetGeocodeReverseFromIp
- **Purpose**: Geolocation-based features
- **Variables**: `{}`
- **Response**: Fails with 404 for most IPs
### Implementation Status
The existing `parseListing()` function in `src/kijiji.ts` successfully extracts listing details from embedded Apollo state:
The existing `parseListing()` function in `src/kijiji.ts` successfully extracts listing
details from embedded Apollo state:
- ✅ Extracts title, description, price, location
- ✅ Handles contact-based pricing ("Please Contact")
@@ -427,22 +498,30 @@ The existing `parseListing()` function in `src/kijiji.ts` successfully extracts
- ✅ Works without authentication or API keys
### Key Findings
1. **No Dedicated Listing API**: Unlike search results, there's no separate GraphQL query for individual listing data
2. **Complete Data Available**: All listing information is embedded in the initial HTML response
3. **Additional Context Fetched**: Secondary GraphQL queries provide complementary data (reviews, similar listings)
1. **No Dedicated Listing API**: Unlike search results, theres no separate GraphQL
query for individual listing data
2. **Complete Data Available**: All listing information is embedded in the initial HTML
response
3. **Additional Context Fetched**: Secondary GraphQL queries provide complementary data
(reviews, similar listings)
4. **Consistent Architecture**: Same Apollo state embedding pattern as search pages
### Current Scraper Implementation
The scraper successfully extracts listing details by:
1. Fetching the listing URL HTML
2. Parsing embedded `__NEXT_DATA__` Apollo state
3. Extracting the `Listing:{id}` object from Apollo cache
4. Mapping fields to typed `ListingDetails` interface
This approach works reliably without requiring authentication or dealing with rate limiting on individual listing fetches.
This approach works reliably without requiring authentication or dealing with rate
limiting on individual listing fetches.
## Next Steps
- Explore posting/authentication APIs (requires user login)
- Investigate if GraphQL API can be used for programmatic access with proper authentication
- Investigate if GraphQL API can be used for programmatic access with proper
authentication
- Test rate limiting patterns and optimal scraping strategies
- Document additional category and location ID mappings

View File

@@ -1 +1,2 @@
# ca-marketplace-scraper

View File

@@ -15,7 +15,10 @@
"linter": {
"enabled": true,
"rules": {
"recommended": true
"recommended": true,
"correctness": {
"noUnusedImports": "error"
}
}
},
"javascript": {

View File

@@ -4,8 +4,13 @@
"workspaces": {
"": {
"name": "marketplace-scrapers-monorepo",
"dependencies": {
"@types/bun": "1.3.13",
},
"devDependencies": {
"@biomejs/biome": "2.3.11",
"@tsconfig/bun": "catalog:",
"turbo": "2.5.4",
},
},
"packages/api-server": {
@@ -13,9 +18,10 @@
"version": "1.0.0",
"dependencies": {
"@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:",
},
"devDependencies": {
"@types/bun": "latest",
"@types/bun": "catalog:",
},
"peerDependencies": {
"typescript": "^5",
@@ -25,14 +31,16 @@
"name": "@marketplace-scrapers/core",
"version": "1.0.0",
"dependencies": {
"@typescript/native-preview": "catalog:",
"argon2-wasm-pro": "1.1.0",
"cli-progress": "^3.12.0",
"linkedom": "^0.18.12",
"unidecode": "^1.1.0",
},
"devDependencies": {
"@types/bun": "latest",
"@types/cli-progress": "^3.11.6",
"@types/unidecode": "^1.1.0",
"@types/bun": "catalog:",
"@types/cli-progress": "catalog:",
"@types/unidecode": "catalog:",
},
"peerDependencies": {
"typescript": "^5",
@@ -43,15 +51,23 @@
"version": "1.0.0",
"dependencies": {
"@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:",
},
"devDependencies": {
"@types/bun": "latest",
"@types/bun": "catalog:",
},
"peerDependencies": {
"typescript": "^5",
},
},
},
"catalog": {
"@tsconfig/bun": "1.0.9",
"@types/bun": "1.3.13",
"@types/cli-progress": "3.11.6",
"@types/unidecode": "1.1.0",
"@typescript/native-preview": "7.0.0-dev.20260428.1",
},
"packages": {
"@biomejs/biome": ["@biomejs/biome@2.3.11", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "2.3.11", "@biomejs/cli-darwin-x64": "2.3.11", "@biomejs/cli-linux-arm64": "2.3.11", "@biomejs/cli-linux-arm64-musl": "2.3.11", "@biomejs/cli-linux-x64": "2.3.11", "@biomejs/cli-linux-x64-musl": "2.3.11", "@biomejs/cli-win32-arm64": "2.3.11", "@biomejs/cli-win32-x64": "2.3.11" }, "bin": { "biome": "bin/biome" } }, "sha512-/zt+6qazBWguPG6+eWmiELqO+9jRsMZ/DBU3lfuU2ngtIQYzymocHhKiZRyrbra4aCOoyTg/BmY+6WH5mv9xmQ=="],
@@ -77,7 +93,9 @@
"@marketplace-scrapers/mcp-server": ["@marketplace-scrapers/mcp-server@workspace:packages/mcp-server"],
"@types/bun": ["@types/bun@1.3.4", "", { "dependencies": { "bun-types": "1.3.4" } }, "sha512-EEPTKXHP+zKGPkhRLv+HI0UEX8/o+65hqARxLy8Ov5rIxMBPNTjeZww00CIihrIQGEQBYg+0roO5qOnS/7boGA=="],
"@tsconfig/bun": ["@tsconfig/bun@1.0.9", "", {}, "sha512-4M0/Ivfwcpz325z6CwSifOBZYji3DFOEpY6zEUt0+Xi2qRhzwvmqQN9XAHJh3OVvRJuAqVTLU2abdCplvp6mwQ=="],
"@types/bun": ["@types/bun@1.3.13", "", { "dependencies": { "bun-types": "1.3.13" } }, "sha512-9fqXWk5YIHGGnUau9TEi+qdlTYDAnOj+xLCmSTwXfAIqXr2x4tytJb43E9uCvt09zJURKXwAtkoH4nLQfzeTXw=="],
"@types/cli-progress": ["@types/cli-progress@3.11.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-cE3+jb9WRlu+uOSAugewNpITJDt1VF8dHOopPO4IABFc3SXYL5WE/+PTz/FCdZRRfIujiWW3n3aMbv1eIGVRWA=="],
@@ -85,11 +103,29 @@
"@types/unidecode": ["@types/unidecode@1.1.0", "", {}, "sha512-NTIsFsTe9WRek39/8DDj7KiQ0nU33DHMrKwNHcD1rKlUvn4N0Rc4Di8q/Xavs8bsDZmBa4MMtQA8+HNgwfxC/A=="],
"@typescript/native-preview": ["@typescript/native-preview@7.0.0-dev.20260428.1", "", { "optionalDependencies": { "@typescript/native-preview-darwin-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-darwin-x64": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-arm": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-x64": "7.0.0-dev.20260428.1", "@typescript/native-preview-win32-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-win32-x64": "7.0.0-dev.20260428.1" }, "bin": { "tsgo": "bin/tsgo.js" } }, "sha512-JiM4PYWDGs57TT0mV2KArmaW7BnTkk3XRid79NdG17tfvDbRyg4hBCpKI7vARiQPtxjKrHlxyzxOGDpv5W5T7Q=="],
"@typescript/native-preview-darwin-arm64": ["@typescript/native-preview-darwin-arm64@7.0.0-dev.20260428.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Lll6WmXfgTEj1G3QBIoHlabQwUtJiyhlRgSLksa06QFL5BoA7V+Lu1waa9PtPNZbGsXLDMHodtk/bRQABKuPiw=="],
"@typescript/native-preview-darwin-x64": ["@typescript/native-preview-darwin-x64@7.0.0-dev.20260428.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-WbsBNSHlo+4sGrTxDWdmI7r8x48tCtSCuKdmK62FvVOq58UWAs6sL13Z4Rev4ohLcGHdXC5E/8AIdpLPqDYQpw=="],
"@typescript/native-preview-linux-arm": ["@typescript/native-preview-linux-arm@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "arm" }, "sha512-/d/NnZFvEJU67L5mHh+cO3gsfwNCvJ9HGtxGq1KGz1VwTabOIcwLdpTpfsAR39WXzzfh9GJHL28n6GSGZInPow=="],
"@typescript/native-preview-linux-arm64": ["@typescript/native-preview-linux-arm64@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-cgcBX/ZBMdepkamLT8g8jQdHe7DZS/s6zTZRof6mvcrnJHlMeUnKoC9UO8/c22IrUMV3n0XPh7R8FYjUP0ll+Q=="],
"@typescript/native-preview-linux-x64": ["@typescript/native-preview-linux-x64@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "x64" }, "sha512-4gJCE7wzenx1BH2Vtx2uKWUo8rFxnhGkxNEH1zxbYy/6ASwo+PnOPYmKHAzNE1C3yB5lzw71/vR5p5zyO57Y4A=="],
"@typescript/native-preview-win32-arm64": ["@typescript/native-preview-win32-arm64@7.0.0-dev.20260428.1", "", { "os": "win32", "cpu": "arm64" }, "sha512-yn6Rzbn62L4QTWrp0QgG8al6l/VG7PCPRdbE0vuGDSlKhInlC+Flo4QSc1qA8KHTbpHgl+nEsq9DymiitI4G4g=="],
"@typescript/native-preview-win32-x64": ["@typescript/native-preview-win32-x64@7.0.0-dev.20260428.1", "", { "os": "win32", "cpu": "x64" }, "sha512-T9z13mcMowXmwGjprA2FIR2EEdYZxgqH8+qk7dFZVBlo5vfk41AN/qJfAdN7IsAhEb640MJ8cMN/aiczweZKmA=="],
"ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="],
"argon2-wasm-pro": ["argon2-wasm-pro@1.1.0", "", {}, "sha512-ApZAKEgbWQILckY+IdjrETB0oTC8L9YHT3JVQhdun77tilExkXNyM/T/qbkvX+Uv68+IQmVwewQwg6yJnSwVxQ=="],
"boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="],
"bun-types": ["bun-types@1.3.4", "", { "dependencies": { "@types/node": "*" } }, "sha512-5ua817+BZPZOlNaRgGBpZJOSAQ9RQ17pkwPD0yR7CfJg+r8DgIILByFifDTa+IPDDxzf5VNhtNlcKqFzDgJvlQ=="],
"bun-types": ["bun-types@1.3.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-QXKeHLlOLqQX9LgYaHJfzdBaV21T63HhFJnvuRCcjZiaUDpbs5ED1MgxbMra71CsryN/1dAoXuJJJwIv/2drVA=="],
"cli-progress": ["cli-progress@3.12.0", "", { "dependencies": { "string-width": "^4.2.3" } }, "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A=="],
@@ -125,6 +161,20 @@
"strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
"turbo": ["turbo@2.5.4", "", { "optionalDependencies": { "turbo-darwin-64": "2.5.4", "turbo-darwin-arm64": "2.5.4", "turbo-linux-64": "2.5.4", "turbo-linux-arm64": "2.5.4", "turbo-windows-64": "2.5.4", "turbo-windows-arm64": "2.5.4" }, "bin": { "turbo": "bin/turbo" } }, "sha512-kc8ZibdRcuWUG1pbYSBFWqmIjynlD8Lp7IB6U3vIzvOv9VG+6Sp8bzyeBWE3Oi8XV5KsQrznyRTBPvrf99E4mA=="],
"turbo-darwin-64": ["turbo-darwin-64@2.5.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-ah6YnH2dErojhFooxEzmvsoZQTMImaruZhFPfMKPBq8sb+hALRdvBNLqfc8NWlZq576FkfRZ/MSi4SHvVFT9PQ=="],
"turbo-darwin-arm64": ["turbo-darwin-arm64@2.5.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-2+Nx6LAyuXw2MdXb7pxqle3MYignLvS7OwtsP9SgtSBaMlnNlxl9BovzqdYAgkUW3AsYiQMJ/wBRb7d+xemM5A=="],
"turbo-linux-64": ["turbo-linux-64@2.5.4", "", { "os": "linux", "cpu": "x64" }, "sha512-5May2kjWbc8w4XxswGAl74GZ5eM4Gr6IiroqdLhXeXyfvWEdm2mFYCSWOzz0/z5cAgqyGidF1jt1qzUR8hTmOA=="],
"turbo-linux-arm64": ["turbo-linux-arm64@2.5.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-/2yqFaS3TbfxV3P5yG2JUI79P7OUQKOUvAnx4MV9Bdz6jqHsHwc9WZPpO4QseQm+NvmgY6ICORnoVPODxGUiJg=="],
"turbo-windows-64": ["turbo-windows-64@2.5.4", "", { "os": "win32", "cpu": "x64" }, "sha512-EQUO4SmaCDhO6zYohxIjJpOKRN3wlfU7jMAj3CgcyTPvQR/UFLEKAYHqJOnJtymbQmiiM/ihX6c6W6Uq0yC7mA=="],
"turbo-windows-arm64": ["turbo-windows-arm64@2.5.4", "", { "os": "win32", "cpu": "arm64" }, "sha512-oQ8RrK1VS8lrxkLriotFq+PiF7iiGgkZtfLKF4DDKsmdbPo0O9R2mQxm7jHLuXraRCuIQDWMIw6dpcr7Iykf4A=="],
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
"uhyphen": ["uhyphen@0.2.0", "", {}, "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA=="],

5
bunfig.toml Normal file
View File

@@ -0,0 +1,5 @@
[install]
exact = true
[test]
root = "./do-not-run-tests-from-root"

View File

@@ -1,55 +1,18 @@
# Marketplace Cookies Setup
# cookies
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
## Scope
## Cookie Configuration
- This directory is for cookie setup docs and local examples only.
- Treat any real browser cookie export as a secret, even if already present locally.
Authenticated scrapers now read cookies only from environment variables:
1. `FACEBOOK_COOKIE`
2. `EBAY_COOKIE`
## Runtime Sources
---
- Authenticated scrapers read raw `Cookie` header strings from environment variables such as `FACEBOOK_COOKIE` and `EBAY_COOKIE`.
- Some core entrypoints also accept explicit cookie strings from request/options; explicit input takes precedence over environment values.
## Facebook Marketplace
## Safety Rules
### Required Cookies
- `c_user`: Your Facebook user ID
- `xs`: Facebook session token
- `fr`: Facebook request token
- `datr`: Data attribution token
- `sb`: Session browser token
### Setup
```bash
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
```
Use the raw `Cookie` header string copied from an authenticated browser session.
---
## eBay
eBay has aggressive bot detection that blocks requests without valid session cookies.
### Setup
```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
```
Use the raw `Cookie` header string copied from an authenticated browser session.
---
## Important Notes
- Cookies must be from active browser sessions
- Cookies expire and need periodic refresh
- **NEVER** commit real cookies to version control
- Platforms may still block automated scraping despite valid cookies
## Security
Do not commit real cookie values or store them in tracked files.
- Never commit real cookie values, browser exports, or session files.
- Use placeholder values in docs: `c_user=123; xs=token; fr=request`.
- Do not paste cookie values into logs, tests, fixtures, or generated agent docs.
- If editing this directory, verify diffs do not contain real `c_user`, `xs`, `fr`, `datr`, `sb`, `s`, `ds2`, or `ebay` values.

View File

@@ -0,0 +1,511 @@
# opencode Monorepo Config Adoption Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Adopt opencode-style monorepo config: Turbo task orchestration, workspace dep
catalog, shared root tsconfig, bunfig.toml, and `exports` field in all packages.
**Architecture:** Pure config changes across 10 files — no source code touched.
Root config files are added/updated first, then per-package files updated to reference
them. Changes are independent within each task and safe to commit atomically.
**Tech Stack:** Bun workspaces, Turbo 2.x, @tsconfig/bun, TypeScript (tsgo /
@typescript/native-preview)
* * *
## File Map
| File | Action | Responsible for |
| --- | --- | --- |
| `package.json` | Modify | Workspace catalog, turbo devDep, @tsconfig/bun devDep, updated scripts |
| `turbo.json` | Create | Task graph: typecheck, build, test |
| `tsconfig.json` | Create | Shared TS compiler options for all packages |
| `bunfig.toml` | Create | Exact installs, root test guard |
| `packages/core/package.json` | Modify | exports field, catalog refs, script rename |
| `packages/api-server/package.json` | Modify | exports field, catalog refs, script rename |
| `packages/mcp-server/package.json` | Modify | exports field, catalog refs, script rename |
| `packages/core/tsconfig.json` | Modify | Slim — extends root, paths only |
| `packages/api-server/tsconfig.json` | Modify | Slim — extends root, paths only |
| `packages/mcp-server/tsconfig.json` | Modify | Slim — extends root, paths only |
* * *
### Task 1: Add `bunfig.toml` and `turbo.json`
Two new root config files with no dependencies on other tasks.
**Files:**
- Create: `bunfig.toml`
- Create: `turbo.json`
- [ ] **Step 1: Create `bunfig.toml`**
Write this file at repo root (`/path/to/ca-marketplace-scraper/bunfig.toml`):
```toml
[install]
exact = true
[test]
root = "./do-not-run-tests-from-root"
```
- [ ] **Step 2: Create `turbo.json`**
Write this file at repo root:
```json
{
"$schema": "https://turbo.build/schema.json",
"tasks": {
"typecheck": {},
"build": {
"dependsOn": ["^build"],
"outputs": ["dist/**"]
},
"test": {
"dependsOn": ["^build"],
"outputs": []
}
}
}
```
- [ ] **Step 3: Verify files exist**
Run:
```bash
ls bunfig.toml turbo.json
```
Expected: both files listed, no errors.
- [ ] **Step 4: Commit**
```bash
git add bunfig.toml turbo.json
git commit -m "chore: add bunfig.toml and turbo.json"
```
* * *
### Task 2: Create root `tsconfig.json`
Shared base tsconfig all packages will extend.
Extracts the common options currently duplicated in all 3 per-package tsconfigs.
**Files:**
- Create: `tsconfig.json`
- [ ] **Step 1: Create root `tsconfig.json`**
Write this file at repo root:
```json
{
"$schema": "https://json.schemastore.org/tsconfig",
"extends": "@tsconfig/bun/tsconfig.json",
"compilerOptions": {
"lib": ["dom", "ESNext"],
"target": "ESNext",
"module": "preserve",
"moduleResolution": "bundler",
"strict": true,
"noEmit": true,
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitOverride": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false
}
}
```
- [ ] **Step 2: Commit**
```bash
git add tsconfig.json
git commit -m "chore: add shared root tsconfig.json"
```
* * *
### Task 3: Update root `package.json`
Add workspace catalog, `turbo` + `@tsconfig/bun` devDependencies, and update scripts to
use `turbo run`.
**Files:**
- Modify: `package.json`
- [ ] **Step 1: Replace root `package.json`**
Write this complete file:
```json
{
"name": "marketplace-scrapers-monorepo",
"version": "1.0.0",
"private": true,
"type": "module",
"packageManager": "bun@1.3.13",
"scripts": {
"typecheck": "turbo run typecheck",
"build": "bun run clean && turbo run build",
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
"build:all": "bun run build:api && bun run build:mcp",
"ci": "biome ci",
"clean": "rm -rf dist",
"start": "./scripts/start.sh"
},
"workspaces": {
"packages": [
"packages/*"
],
"catalog": {
"@tsconfig/bun": "1.0.9",
"@typescript/native-preview": "7.0.0-dev.20260428.1",
"@types/bun": "1.2.18",
"@types/cli-progress": "3.11.6",
"@types/unidecode": "1.1.0"
}
},
"devDependencies": {
"@biomejs/biome": "2.3.11",
"@tsconfig/bun": "catalog:",
"turbo": "2.5.4"
}
}
```
> **Note on catalog versions:** The catalog pins exact versions.
> The values above are taken from the current package installs.
> If `@types/bun` was `latest`, check `node_modules/@types/bun/package.json` for the
> actual installed version and use that.
> Same for `@typescript/native-preview`.
- [ ] **Step 2: Check actual installed versions**
Run:
```bash
cat node_modules/@types/bun/package.json | grep '"version"'
cat node_modules/@typescript/native-preview/package.json | grep '"version"'
cat node_modules/@types/cli-progress/package.json | grep '"version"'
cat node_modules/@types/unidecode/package.json | grep '"version"'
```
Update the catalog values in `package.json` to match the exact installed versions.
- [ ] **Step 3: Install turbo and @tsconfig/bun**
```bash
bun install
```
Expected: lock file updated, `turbo` and `@tsconfig/bun` appear in `node_modules`.
- [ ] **Step 4: Verify turbo works**
```bash
bunx turbo run typecheck --dry
```
Expected: output lists the `typecheck` task for each package (even if no `typecheck`
script exists yet — turbo will note them as skipped/missing).
- [ ] **Step 5: Commit**
```bash
git add package.json bun.lock
git commit -m "chore: add workspace catalog and turbo to root package.json"
```
* * *
### Task 4: Update per-package `package.json` files
Rename `type:check``typecheck`, replace `main`/`module` with `exports`, swap pinned
dep versions for `catalog:` references.
**Files:**
- Modify: `packages/core/package.json`
- Modify: `packages/api-server/package.json`
- Modify: `packages/mcp-server/package.json`
- [ ] **Step 1: Replace `packages/core/package.json`**
```json
{
"name": "@marketplace-scrapers/core",
"version": "1.0.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"typecheck": "bun tsgo"
},
"dependencies": {
"@typescript/native-preview": "catalog:",
"cli-progress": "^3.12.0",
"linkedom": "^0.18.12",
"unidecode": "^1.1.0"
},
"devDependencies": {
"@types/bun": "catalog:",
"@types/cli-progress": "catalog:",
"@types/unidecode": "catalog:"
},
"peerDependencies": {
"typescript": "^5"
}
}
```
- [ ] **Step 2: Replace `packages/api-server/package.json`**
```json
{
"name": "@marketplace-scrapers/api-server",
"version": "1.0.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"start": "bun ./src/index.ts",
"dev": "bun --watch ./src/index.ts",
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api",
"typecheck": "bun tsgo"
},
"dependencies": {
"@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:"
},
"devDependencies": {
"@types/bun": "catalog:"
},
"peerDependencies": {
"typescript": "^5"
}
}
```
- [ ] **Step 3: Replace `packages/mcp-server/package.json`**
```json
{
"name": "@marketplace-scrapers/mcp-server",
"version": "1.0.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"start": "bun ./src/index.ts",
"dev": "bun --watch ./src/index.ts",
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp",
"typecheck": "bun tsgo"
},
"dependencies": {
"@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:"
},
"devDependencies": {
"@types/bun": "catalog:"
},
"peerDependencies": {
"typescript": "^5"
}
}
```
- [ ] **Step 4: Run `bun install` to sync lockfile**
```bash
bun install
```
Expected: no errors.
Catalog refs resolved.
`bun.lock` updated.
- [ ] **Step 5: Verify typecheck still works per-package**
```bash
cd packages/core && bun run typecheck
cd ../api-server && bun run typecheck
cd ../mcp-server && bun run typecheck
cd ../..
```
Expected: each exits 0 (or same errors as before — no new errors introduced).
- [ ] **Step 6: Commit**
```bash
git add packages/core/package.json packages/api-server/package.json packages/mcp-server/package.json bun.lock
git commit -m "chore: use exports field and catalog refs in all packages"
```
* * *
### Task 5: Slim per-package `tsconfig.json` files
Replace the duplicated full tsconfig in each package with a slim `extends`-based one
pointing to root.
**Files:**
- Modify: `packages/core/tsconfig.json`
- Modify: `packages/api-server/tsconfig.json`
- Modify: `packages/mcp-server/tsconfig.json`
- [ ] **Step 1: Replace `packages/core/tsconfig.json`**
```json
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["./src", "./test"]
}
```
- [ ] **Step 2: Replace `packages/api-server/tsconfig.json`**
```json
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["./src", "./test"]
}
```
- [ ] **Step 3: Replace `packages/mcp-server/tsconfig.json`**
```json
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["./src", "./test"]
}
```
- [ ] **Step 4: Verify `@tsconfig/bun` is resolvable**
The root tsconfig extends `@tsconfig/bun/tsconfig.json`. Confirm the package is
installed:
```bash
ls node_modules/@tsconfig/bun/tsconfig.json
```
Expected: file exists.
- [ ] **Step 5: Run typecheck via Turbo**
```bash
bun run typecheck
```
Expected: Turbo runs `typecheck` for all 3 packages in parallel, all pass (or same
pre-existing errors — no new ones).
- [ ] **Step 6: Commit**
```bash
git add packages/core/tsconfig.json packages/api-server/tsconfig.json packages/mcp-server/tsconfig.json
git commit -m "chore: slim per-package tsconfigs to extend root"
```
* * *
### Task 6: Smoke test full build pipeline
Verify everything works end-to-end.
**Files:** none (verification only)
- [ ] **Step 1: Run turbo typecheck**
```bash
bun run typecheck
```
Expected: Turbo runs `typecheck` across all packages.
Exit 0.
- [ ] **Step 2: Run full build**
```bash
bun run build
```
Expected: `dist/` cleaned, Turbo runs `build` (core first, then api-server and
mcp-server in parallel), build artifacts appear in `dist/api/` and `dist/mcp/`.
- [ ] **Step 3: Verify dist artifacts**
```bash
ls dist/api/ dist/mcp/
```
Expected: compiled output files in both directories.
- [ ] **Step 4: Verify `bun install` is exact**
```bash
grep -c '\^' bun.lock | head -5
```
With `exact = true` in bunfig.toml, new installs wont add `^` ranges.
Existing `^` ranges in `bun.lock` from before are fine — theyll be resolved to exact on
next fresh install.
- [ ] **Step 5: Final commit if any loose files**
```bash
git status
```
If clean: done. If any files modified by `bun install` (e.g. `bun.lock`):
```bash
git add bun.lock
git commit -m "chore: sync lockfile after monorepo config adoption"
```

View File

@@ -1,53 +1,64 @@
# Cookie Env-Only Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Remove cookie files and request-provided cookie overrides so all authenticated marketplace scraping reads raw `Cookie` header strings only from environment variables.
**Goal:** Remove cookie files and request-provided cookie overrides so all authenticated
marketplace scraping reads raw `Cookie` header strings only from environment variables.
**Architecture:** Collapse shared cookie loading to a single env-var reader in `packages/core/src/utils/cookies.ts`, then tighten Facebook and eBay core signatures to stop accepting request/file cookie inputs. Update the API and MCP adapters so they no longer advertise or forward cookie parameters, and rewrite docs/tests to match the env-only contract.
**Architecture:** Collapse shared cookie loading to a single env-var reader in
`packages/core/src/utils/cookies.ts`, then tighten Facebook and eBay core signatures to
stop accepting request/file cookie inputs.
Update the API and MCP adapters so they no longer advertise or forward cookie
parameters, and rewrite docs/tests to match the env-only contract.
**Tech Stack:** Bun, TypeScript, Bun test, Biome, workspace package exports
---
* * *
## File Map
- Modify: `packages/core/src/utils/cookies.ts`
Purpose: remove JSON/file/request-source loading and keep env-only cookie parsing/formatting.
- Modify: `packages/core/src/scrapers/facebook.ts`
Purpose: drop `cookiesSource` / `cookiePath` arguments and env-only error text.
- Modify: `packages/core/src/scrapers/ebay.ts`
Purpose: remove `opts.cookies` request override and use env-only cookie loading.
- Modify: `packages/core/src/index.ts`
Purpose: keep exports aligned with tightened core signatures.
- Modify: `packages/core/test/facebook-core.test.ts`
Purpose: replace missing-file coverage with env-only auth tests.
- Create: `packages/core/test/ebay-core.test.ts`
Purpose: add dedicated eBay auth regression coverage instead of mixing it into Facebook tests.
- Modify: `packages/api-server/src/routes/facebook.ts`
Purpose: stop parsing/forwarding `cookies` query params.
- Modify: `packages/api-server/src/routes/ebay.ts`
Purpose: stop parsing/forwarding `cookies` query params.
- Create: `packages/api-server/test/routes.test.ts`
Purpose: verify Facebook/eBay routes ignore cookie query params and still call core correctly.
- Modify: `packages/mcp-server/src/protocol/tools.ts`
Purpose: remove Facebook/eBay cookie tool inputs and descriptions.
- Modify: `packages/mcp-server/src/protocol/handler.ts`
Purpose: stop mapping removed cookie tool inputs into API URLs.
- Create: `packages/mcp-server/test/protocol.test.ts`
Purpose: verify tool schemas and handler URL building no longer include Facebook/eBay cookie fields.
- Modify: `cookies/AGENTS.md`
Purpose: document env vars as the only supported cookie input.
- Modify: `packages/core/src/utils/cookies.ts` Purpose: remove JSON/file/request-source
loading and keep env-only cookie parsing/formatting.
- Modify: `packages/core/src/scrapers/facebook.ts` Purpose: drop `cookiesSource` /
`cookiePath` arguments and env-only error text.
- Modify: `packages/core/src/scrapers/ebay.ts` Purpose: remove `opts.cookies` request
override and use env-only cookie loading.
- Modify: `packages/core/src/index.ts` Purpose: keep exports aligned with tightened core
signatures.
- Modify: `packages/core/test/facebook-core.test.ts` Purpose: replace missing-file
coverage with env-only auth tests.
- Create: `packages/core/test/ebay-core.test.ts` Purpose: add dedicated eBay auth
regression coverage instead of mixing it into Facebook tests.
- Modify: `packages/api-server/src/routes/facebook.ts` Purpose: stop parsing/forwarding
`cookies` query params.
- Modify: `packages/api-server/src/routes/ebay.ts` Purpose: stop parsing/forwarding
`cookies` query params.
- Create: `packages/api-server/test/routes.test.ts` Purpose: verify Facebook/eBay routes
ignore cookie query params and still call core correctly.
- Modify: `packages/mcp-server/src/protocol/tools.ts` Purpose: remove Facebook/eBay
cookie tool inputs and descriptions.
- Modify: `packages/mcp-server/src/protocol/handler.ts` Purpose: stop mapping removed
cookie tool inputs into API URLs.
- Create: `packages/mcp-server/test/protocol.test.ts` Purpose: verify tool schemas and
handler URL building no longer include Facebook/eBay cookie fields.
- Modify: `cookies/AGENTS.md` Purpose: document env vars as the only supported cookie
input.
### Task 1: Lock core cookie utilities to env-only loading
**Files:**
- Modify: `packages/core/src/utils/cookies.ts:19-227`
- Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing test**
Add or replace the auth-source test block in `packages/core/test/facebook-core.test.ts` with env-only expectations:
Add or replace the auth-source test block in `packages/core/test/facebook-core.test.ts`
with env-only expectations:
```ts
test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => {
@@ -85,12 +96,14 @@ test("should reject missing Facebook auth env var", async () => {
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts`
Expected: FAIL because the current implementation still allows missing env values to fall through to file/request-based behavior and does not emit the new env-only error.
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: FAIL because the
current implementation still allows missing env values to fall through to
file/request-based behavior and does not emit the new env-only error.
- [ ] **Step 3: Write minimal implementation**
Replace the multi-source loader in `packages/core/src/utils/cookies.ts` with an env-only loader. The target shape is:
Replace the multi-source loader in `packages/core/src/utils/cookies.ts` with an env-only
loader. The target shape is:
```ts
export interface CookieConfig {
@@ -129,8 +142,8 @@ Delete the now-dead helpers and types that exist only for JSON/file/request load
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts`
Expected: PASS for the new env-only tests.
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS for the new
env-only tests.
- [ ] **Step 5: Commit**
@@ -142,10 +155,15 @@ git commit -m "refactor: make cookie loading env-only"
### Task 2: Tighten Facebook core APIs to the new contract
**Files:**
- Modify: `packages/core/src/scrapers/facebook.ts:23-29`
- Modify: `packages/core/src/scrapers/facebook.ts:214-228`
- Modify: `packages/core/src/scrapers/facebook.ts:823-929`
- Modify: `packages/core/src/index.ts:5-15`
- Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing test**
@@ -171,8 +189,9 @@ test("should fail Facebook item fetch when FACEBOOK_COOKIE is unset", async () =
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts`
Expected: FAIL because the current function signatures and error text still mention parameter/file-based auth paths.
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: FAIL because the
current function signatures and error text still mention parameter/file-based auth
paths.
- [ ] **Step 3: Write minimal implementation**
@@ -206,12 +225,14 @@ console.warn(
);
```
Remove the extra cookie arguments from `fetchFacebookItem(...)` and keep `packages/core/src/index.ts` exporting the tightened functions without the old parameter contract.
Remove the extra cookie arguments from `fetchFacebookItem(...)` and keep
`packages/core/src/index.ts` exporting the tightened functions without the old parameter
contract.
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts`
Expected: PASS with the new env-only Facebook API surface.
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS with the new
env-only Facebook API surface.
- [ ] **Step 5: Commit**
@@ -223,8 +244,11 @@ git commit -m "refactor: remove facebook cookie overrides"
### Task 3: Tighten eBay core APIs to env-only auth
**Files:**
- Modify: `packages/core/src/scrapers/ebay.ts:9-15`
- Modify: `packages/core/src/scrapers/ebay.ts:337-389`
- Create: `packages/core/test/ebay-core.test.ts`
- [ ] **Step 1: Write the failing test**
@@ -249,8 +273,8 @@ test("should warn and continue without eBay cookies when EBAY_COOKIE is unset",
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/ebay-core.test.ts`
Expected: FAIL because `loadEbayCookies` still accepts request overrides and mentions file/json sources.
Run: `bun test packages/core/test/ebay-core.test.ts` Expected: FAIL because
`loadEbayCookies` still accepts request overrides and mentions file/json sources.
- [ ] **Step 3: Write minimal implementation**
@@ -276,12 +300,13 @@ async function loadEbayCookies(): Promise<string | undefined> {
}
```
Then remove `cookies` from `fetchEbayItems(..., opts)` and the destructuring that feeds it into `loadEbayCookies()`.
Then remove `cookies` from `fetchEbayItems(..., opts)` and the destructuring that feeds
it into `loadEbayCookies()`.
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/ebay-core.test.ts`
Expected: PASS for the eBay env-only regression coverage.
Run: `bun test packages/core/test/ebay-core.test.ts` Expected: PASS for the eBay
env-only regression coverage.
- [ ] **Step 5: Commit**
@@ -293,13 +318,17 @@ git commit -m "refactor: make ebay auth env-only"
### Task 4: Remove cookie query parameters from the API adapter
**Files:**
- Modify: `packages/api-server/src/routes/facebook.ts:3-33`
- Modify: `packages/api-server/src/routes/ebay.ts:3-52`
- Create: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Write the failing test**
Create `packages/api-server/test/routes.test.ts` and mock `@marketplace-scrapers/core` so the route contract is explicit:
Create `packages/api-server/test/routes.test.ts` and mock `@marketplace-scrapers/core`
so the route contract is explicit:
```ts
import { afterEach, describe, expect, mock, test } from "bun:test";
@@ -347,8 +376,9 @@ test("ebayRoute ignores cookies query parameter", async () => {
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: FAIL because the current routes still parse `reqUrl.searchParams.get("cookies")` and forward it downstream.
Run: `bun test packages/api-server/test/routes.test.ts` Expected: FAIL because the
current routes still parse `reqUrl.searchParams.get("cookies")` and forward it
downstream.
- [ ] **Step 3: Write minimal implementation**
@@ -383,8 +413,8 @@ const items = await fetchEbayItems(SEARCH_QUERY, 1, {
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: PASS for route coverage and no remaining adapter references to `cookies` for Facebook/eBay.
Run: `bun test packages/api-server/test/routes.test.ts` Expected: PASS for route
coverage and no remaining adapter references to `cookies` for Facebook/eBay.
- [ ] **Step 5: Commit**
@@ -396,13 +426,17 @@ git commit -m "refactor: remove api cookie query overrides"
### Task 5: Remove cookie inputs from MCP tool schemas and request mapping
**Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts:65-148`
- Modify: `packages/mcp-server/src/protocol/handler.ts:154-211`
- Create: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Write the failing test**
Create `packages/mcp-server/test/protocol.test.ts` with schema and URL-building assertions:
Create `packages/mcp-server/test/protocol.test.ts` with schema and URL-building
assertions:
```ts
import { expect, mock, test } from "bun:test";
@@ -445,8 +479,8 @@ expect(calledUrl).not.toContain("cookies=");
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: FAIL because the current MCP schema and handler still expose and forward those inputs.
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: FAIL because the
current MCP schema and handler still expose and forward those inputs.
- [ ] **Step 3: Write minimal implementation**
@@ -465,12 +499,13 @@ Delete the Facebook/eBay cookie tool properties and handler mapping:
// if (args.cookies) params.append("cookies", args.cookies);
```
Leave Kijiji alone; this plan only changes Facebook/eBay env-only auth paths defined by the approved spec.
Leave Kijiji alone; this plan only changes Facebook/eBay env-only auth paths defined by
the approved spec.
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: PASS with MCP definitions and handler mapping in sync.
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: PASS with MCP
definitions and handler mapping in sync.
- [ ] **Step 5: Commit**
@@ -482,12 +517,16 @@ git commit -m "refactor: remove mcp cookie parameters"
### Task 6: Rewrite cookie documentation and run full verification
**Files:**
- Modify: `cookies/AGENTS.md:9-85`
- Modify: `docs/superpowers/specs/2026-04-21-cookie-env-only-design.md` only if implementation reveals a spec mismatch
- Modify: `docs/superpowers/specs/2026-04-21-cookie-env-only-design.md` only if
implementation reveals a spec mismatch
- [ ] **Step 1: Write the failing test**
Treat docs drift as a contract failure. Capture the required state before editing:
Treat docs drift as a contract failure.
Capture the required state before editing:
```md
- Cookie setup docs mention env vars only for Facebook and eBay
@@ -497,14 +536,14 @@ Treat docs drift as a contract failure. Capture the required state before editin
- [ ] **Step 2: Run verification to prove current docs are stale**
Run: `rg -n "facebook\.json|ebay\.json|cookies=" cookies/AGENTS.md`
Expected: matches found
Run: `rg -n "facebook\.json|ebay\.json|cookies=" cookies/AGENTS.md` Expected: matches
found
- [ ] **Step 3: Write minimal implementation**
Rewrite the cookie setup doc so Facebook and eBay each show only env-var setup:
```md
````md
## Cookie Configuration
All supported authenticated scrapers read cookies only from environment variables.
@@ -513,14 +552,14 @@ All supported authenticated scrapers read cookies only from environment variable
```bash
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
```
````
### eBay
```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
```
```
````
Remove the file-based and request-parameter sections entirely.
@@ -534,10 +573,14 @@ Expected: all commands pass
```bash
git add cookies/AGENTS.md docs/superpowers/specs/2026-04-21-cookie-env-only-design.md
git commit -m "docs: align cookie setup with env-only auth"
```
````
## Self-Review
- Spec coverage check: shared cookie utils, Facebook, eBay, API adapter, MCP adapter, tests, and docs each have explicit tasks.
- Placeholder scan: concrete test files are now named for eBay core, API routes, and MCP protocol coverage.
- Type consistency check: `ensureCookies(config)` is the single shared loader name used across Tasks 1-3, and Facebook/eBay route signatures stay aligned with the core changes.
- Spec coverage check: shared cookie utils, Facebook, eBay, API adapter, MCP adapter,
tests, and docs each have explicit tasks.
- Placeholder scan: concrete test files are now named for eBay core, API routes, and MCP
protocol coverage.
- Type consistency check: `ensureCookies(config)` is the single shared loader name used
across Tasks 1-3, and Facebook/eBay route signatures stay aligned with the core
changes.

View File

@@ -1,34 +1,49 @@
# Facebook Comet Rewrite Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid Comet-bootstrap parser for both search and item routes.
**Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid
Comet-bootstrap parser for both search and item routes.
**Architecture:** Keep authenticated direct HTTP fetches as the transport. Classify each Facebook response first, then parse route-specific Comet bootstrap/state candidates, and fall back to rendered-HTML extraction only when bootstrap decoding cannot produce the expected search or item shape.
**Architecture:** Keep authenticated direct HTTP fetches as the transport.
Classify each Facebook response first, then parse route-specific Comet bootstrap/state
candidates, and fall back to rendered-HTML extraction only when bootstrap decoding
cannot produce the expected search or item shape.
**Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http helpers
**Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http
helpers
---
* * *
## File Structure
- Modify: `packages/core/src/scrapers/facebook.ts`
- Owns Facebook fetch flow, response classification, bootstrap candidate extraction, search parsing, item parsing, and HTML fallbacks.
- Owns Facebook fetch flow, response classification, bootstrap candidate extraction,
search parsing, item parsing, and HTML fallbacks.
- Modify: `packages/core/test/facebook-core.test.ts`
- Owns unit coverage for response classification, bootstrap parsing, fallback parsing, and route-aware item/search extraction behavior.
- Owns unit coverage for response classification, bootstrap parsing, fallback parsing,
and route-aware item/search extraction behavior.
- Modify: `packages/core/test/facebook-integration.test.ts`
- Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping for search/item entrypoints.
- Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping
for search/item entrypoints.
### Task 1: Add Route Classification Coverage
**Files:**
- Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests**
Add these tests near the Facebook parser tests in `packages/core/test/facebook-core.test.ts`:
Add these tests near the Facebook parser tests in
`packages/core/test/facebook-core.test.ts`:
```ts
test("classifies Comet search responses", () => {
@@ -89,12 +104,14 @@ test("classifies unavailable item responses", () => {
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
Expected: FAIL because `classifyFacebookResponse` does not exist yet.
- [ ] **Step 3: Write minimal implementation**
Add this type and function near the parsing section in `packages/core/src/scrapers/facebook.ts`:
Add this type and function near the parsing section in
`packages/core/src/scrapers/facebook.ts`:
```ts
type FacebookResponseKind = "search" | "item" | "auth_gated" | "unavailable" | "unknown";
@@ -128,7 +145,8 @@ export function classifyFacebookResponse(htmlString: HTMLString, responseUrl: st
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
Expected: PASS
- [ ] **Step 5: Commit**
@@ -141,8 +159,11 @@ git commit -m "refactor: add facebook response classification"
### Task 2: Add Bootstrap Candidate Extraction
**Files:**
- Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests**
@@ -185,7 +206,8 @@ test("keeps candidate order stable for later scoring", () => {
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
Expected: FAIL because `extractFacebookBootstrapCandidates` does not exist.
- [ ] **Step 3: Write minimal implementation**
@@ -218,7 +240,8 @@ export function extractFacebookBootstrapCandidates(htmlString: HTMLString): Reco
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
Expected: PASS
- [ ] **Step 5: Commit**
@@ -231,10 +254,15 @@ git commit -m "refactor: add facebook bootstrap candidate extraction"
### Task 3: Replace Search Parsing With Candidate Scoring
**Files:**
- Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/test/facebook-integration.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts`
- Test: `packages/core/test/facebook-integration.test.ts`
- [ ] **Step 1: Write the failing tests**
@@ -323,12 +351,15 @@ const mockSearchHtml = `
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"`
Expected: FAIL because the current search extractor only understands legacy `marketplace_search` shapes.
Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"`
Expected: FAIL because the current search extractor only understands legacy
`marketplace_search` shapes.
- [ ] **Step 3: Write minimal implementation**
Replace the search extraction internals in `extractFacebookMarketplaceData()` with candidate scoring like this:
Replace the search extraction internals in `extractFacebookMarketplaceData()` with
candidate scoring like this:
```ts
function findSearchEdges(candidate: unknown): FacebookEdge[] | null {
@@ -383,7 +414,8 @@ export function extractFacebookMarketplaceData(htmlString: HTMLString): Facebook
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
Run:
`bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
Expected: PASS for the rewritten search fixtures and existing unaffected tests.
- [ ] **Step 5: Commit**
@@ -396,8 +428,11 @@ git commit -m "refactor: rewrite facebook search parser for comet bootstrap"
### Task 4: Replace Item Parsing With Candidate Scoring
**Files:**
- Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests**
@@ -438,7 +473,8 @@ test("extracts item details from Comet permalink bootstrap candidates", () => {
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"`
Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"`
Expected: FAIL because the current item extractor depends on legacy permalink markers.
- [ ] **Step 3: Write minimal implementation**
@@ -491,8 +527,8 @@ export function extractFacebookItemData(htmlString: HTMLString): FacebookMarketp
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts`
Expected: PASS for current-shape item tests and remaining parser tests.
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS for
current-shape item tests and remaining parser tests.
- [ ] **Step 5: Commit**
@@ -504,8 +540,11 @@ git commit -m "refactor: rewrite facebook item parser for comet bootstrap"
### Task 5: Add HTML Fallback Extraction
**Files:**
- Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests**
@@ -549,8 +588,10 @@ test("falls back to rendered item HTML when bootstrap payloads are undecodable",
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
Expected: FAIL because the extractor currently returns `null` without a structured candidate.
Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
Expected: FAIL because the extractor currently returns `null` without a structured
candidate.
- [ ] **Step 3: Write minimal implementation**
@@ -607,11 +648,13 @@ function extractItemFallback(htmlString: HTMLString): FacebookMarketplaceItem |
}
```
Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()` and `extractFacebookItemData()`.
Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()`
and `extractFacebookItemData()`.
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
Expected: PASS
- [ ] **Step 5: Commit**
@@ -624,8 +667,11 @@ git commit -m "refactor: add facebook html fallbacks"
### Task 6: Wire Route-Aware Failures Into Entry Points
**Files:**
- Modify: `packages/core/test/facebook-integration.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-integration.test.ts`
- [ ] **Step 1: Write the failing tests**
@@ -664,8 +710,10 @@ test("returns null for unavailable item responses", async () => {
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"`
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by route/auth state.
Run:
`bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"`
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by
route/auth state.
- [ ] **Step 3: Write minimal implementation**
@@ -690,12 +738,13 @@ if (itemResponseClass.kind === "unavailable") {
}
```
Use the actual response URL from `fetchHtml` plumbing if that helper is extended to return both HTML and final URL; otherwise start by threading final URL support through the fetch helper in the same task.
Use the actual response URL from `fetchHtml` plumbing if that helper is extended to
return both HTML and final URL; otherwise start by threading final URL support through
the fetch helper in the same task.
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-integration.test.ts`
Expected: PASS
Run: `bun test packages/core/test/facebook-integration.test.ts` Expected: PASS
- [ ] **Step 5: Commit**
@@ -707,19 +756,22 @@ git commit -m "refactor: handle facebook route-aware failure states"
### Task 7: Run Full Verification And Live Probe
**Files:**
- Modify: `packages/core/src/scrapers/facebook.ts` if small cleanup is required
- Modify: `packages/core/test/facebook-core.test.ts` if small cleanup is required
- Modify: `packages/core/test/facebook-integration.test.ts` if small cleanup is required
- [ ] **Step 1: Run focused Facebook tests**
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
Run:
`bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
Expected: PASS
- [ ] **Step 2: Run broader core tests**
Run: `bun test packages/core/test`
Expected: PASS
Run: `bun test packages/core/test` Expected: PASS
- [ ] **Step 3: Run live authenticated Facebook probe**
@@ -742,11 +794,14 @@ if (results[0]?.url) {
Expected:
- search returns at least one result
- item fetch returns non-null for the first live result when the route is not stale/unavailable
- item fetch returns non-null for the first live result when the route is not
stale/unavailable
- [ ] **Step 4: Make any minimal cleanup needed to keep tests and live probe green**
If cleanup is needed, keep it limited to naming, dead-code removal caused by the rewrite, or small parser corrections directly exposed by the verification commands.
If cleanup is needed, keep it limited to naming, dead-code removal caused by the
rewrite, or small parser corrections directly exposed by the verification commands.
- [ ] **Step 5: Re-run verification**
@@ -767,6 +822,11 @@ git commit -m "refactor: complete facebook comet scraper rewrite"
## Self-Review
- Spec coverage: the plan covers classification, route-aware search parsing, route-aware item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and live verification.
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps remain.
- Type consistency: all planned functions and types use the same names across tasks: `classifyFacebookResponse`, `extractFacebookBootstrapCandidates`, `extractFacebookMarketplaceData`, and `extractFacebookItemData`.
- Spec coverage: the plan covers classification, route-aware search parsing, route-aware
item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and
live verification.
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps
remain.
- Type consistency: all planned functions and types use the same names across tasks:
`classifyFacebookResponse`, `extractFacebookBootstrapCandidates`,
`extractFacebookMarketplaceData`, and `extractFacebookItemData`.

View File

@@ -1,63 +1,75 @@
# Unstable Listing Mode Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add an optional shared mode across Facebook, eBay, and Kijiji that moves listings priced below 80% of the median into `unstableResults`, while preserving current default response shapes.
**Goal:** Add an optional shared mode across Facebook, eBay, and Kijiji that moves
listings priced below 80% of the median into `unstableResults`, while preserving current
default response shapes.
**Architecture:** Introduce a shared generic classifier in `packages/core` that splits any listing array into `results` and `unstableResults` using the same median-based rule. Then thread one opt-in flag through the scraper entrypoints, API routes, and MCP tool definitions so all surfaces expose the same behavior without changing existing defaults.
**Architecture:** Introduce a shared generic classifier in `packages/core` that splits
any listing array into `results` and `unstableResults` using the same median-based rule.
Then thread one opt-in flag through the scraper entrypoints, API routes, and MCP tool
definitions so all surfaces expose the same behavior without changing existing defaults.
**Tech Stack:** Bun, TypeScript, Bun test, workspace packages, JSON-RPC MCP server
---
* * *
## File Map
- Create: `packages/core/src/utils/unstable.ts`
Purpose: shared generic median/cutoff classifier for listing arrays.
- Modify: `packages/core/src/types/common.ts`
Purpose: add shared mode types used by scrapers and adapters.
- Modify: `packages/core/src/index.ts`
Purpose: export the new shared classifier/types.
- Modify: `packages/core/src/scrapers/facebook.ts`
Purpose: add the optional mode flag and return bucketed results when enabled.
- Modify: `packages/core/src/scrapers/ebay.ts`
Purpose: add the optional mode flag and return bucketed results when enabled.
- Modify: `packages/core/src/scrapers/kijiji.ts`
Purpose: add the optional mode flag and return bucketed results when enabled.
- Create: `packages/core/test/unstable-listing-mode.test.ts`
Purpose: lock the shared classifier behavior with direct unit tests.
- Modify: `packages/core/test/facebook-core.test.ts`
Purpose: prove Facebook preserves default arrays and returns buckets when enabled.
- Modify: `packages/core/test/ebay-core.test.ts`
Purpose: prove eBay preserves default arrays and returns buckets when enabled.
- Modify: `packages/core/test/kijiji-core.test.ts`
Purpose: prove Kijiji preserves default arrays and returns buckets when enabled.
- Modify: `packages/api-server/src/routes/facebook.ts`
Purpose: expose a shared opt-in query parameter and preserve default response shape.
- Modify: `packages/api-server/src/routes/ebay.ts`
Purpose: expose the same query parameter and preserve default response shape.
- Modify: `packages/api-server/src/routes/kijiji.ts`
Purpose: expose the same query parameter and preserve default response shape.
- Modify: `packages/api-server/test/routes.test.ts`
Purpose: verify route forwarding and route response-shape switching.
- Modify: `packages/mcp-server/src/protocol/tools.ts`
Purpose: document the optional unstable mode in all search tools.
- Modify: `packages/mcp-server/src/protocol/handler.ts`
Purpose: forward the optional mode to API routes for all search tools.
- Modify: `packages/mcp-server/test/protocol.test.ts`
Purpose: verify MCP tool metadata and forwarded URLs include the new option.
- Create: `packages/core/src/utils/unstable.ts` Purpose: shared generic median/cutoff
classifier for listing arrays.
- Modify: `packages/core/src/types/common.ts` Purpose: add shared mode types used by
scrapers and adapters.
- Modify: `packages/core/src/index.ts` Purpose: export the new shared classifier/types.
- Modify: `packages/core/src/scrapers/facebook.ts` Purpose: add the optional mode flag
and return bucketed results when enabled.
- Modify: `packages/core/src/scrapers/ebay.ts` Purpose: add the optional mode flag and
return bucketed results when enabled.
- Modify: `packages/core/src/scrapers/kijiji.ts` Purpose: add the optional mode flag and
return bucketed results when enabled.
- Create: `packages/core/test/unstable-listing-mode.test.ts` Purpose: lock the shared
classifier behavior with direct unit tests.
- Modify: `packages/core/test/facebook-core.test.ts` Purpose: prove Facebook preserves
default arrays and returns buckets when enabled.
- Modify: `packages/core/test/ebay-core.test.ts` Purpose: prove eBay preserves default
arrays and returns buckets when enabled.
- Modify: `packages/core/test/kijiji-core.test.ts` Purpose: prove Kijiji preserves
default arrays and returns buckets when enabled.
- Modify: `packages/api-server/src/routes/facebook.ts` Purpose: expose a shared opt-in
query parameter and preserve default response shape.
- Modify: `packages/api-server/src/routes/ebay.ts` Purpose: expose the same query
parameter and preserve default response shape.
- Modify: `packages/api-server/src/routes/kijiji.ts` Purpose: expose the same query
parameter and preserve default response shape.
- Modify: `packages/api-server/test/routes.test.ts` Purpose: verify route forwarding and
route response-shape switching.
- Modify: `packages/mcp-server/src/protocol/tools.ts` Purpose: document the optional
unstable mode in all search tools.
- Modify: `packages/mcp-server/src/protocol/handler.ts` Purpose: forward the optional
mode to API routes for all search tools.
- Modify: `packages/mcp-server/test/protocol.test.ts` Purpose: verify MCP tool metadata
and forwarded URLs include the new option.
### Task 1: Add the shared unstable-listing classifier
**Files:**
- Create: `packages/core/src/utils/unstable.ts`
- Modify: `packages/core/src/types/common.ts`
- Modify: `packages/core/src/index.ts`
- Test: `packages/core/test/unstable-listing-mode.test.ts`
- [ ] **Step 1: Write the failing test**
Create `packages/core/test/unstable-listing-mode.test.ts` with focused shared-behavior coverage:
Create `packages/core/test/unstable-listing-mode.test.ts` with focused shared-behavior
coverage:
```ts
import { describe, expect, test } from "bun:test";
@@ -127,8 +139,8 @@ describe("classifyUnstableListings", () => {
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts`
Expected: FAIL because `classifyUnstableListings` and the shared mode types do not exist yet.
Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Expected: FAIL because
`classifyUnstableListings` and the shared mode types do not exist yet.
- [ ] **Step 3: Write minimal implementation**
@@ -202,8 +214,8 @@ export { classifyUnstableListings } from "./utils/unstable";
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts`
Expected: PASS with 4 passing tests.
Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Expected: PASS with 4
passing tests.
- [ ] **Step 5: Commit**
@@ -215,16 +227,24 @@ git commit -m "feat: add shared unstable listing classifier"
### Task 2: Thread the optional mode through all core scrapers
**Files:**
- Modify: `packages/core/src/scrapers/facebook.ts`
- Modify: `packages/core/src/scrapers/ebay.ts`
- Modify: `packages/core/src/scrapers/kijiji.ts`
- Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/test/ebay-core.test.ts`
- Modify: `packages/core/test/kijiji-core.test.ts`
- [ ] **Step 1: Write the failing tests**
Add one focused opt-in test per scraper. Use the new shared classifier through the public scraper entrypoints instead of testing internal helpers.
Add one focused opt-in test per scraper.
Use the new shared classifier through the public scraper entrypoints instead of testing
internal helpers.
In `packages/core/test/facebook-core.test.ts`, add:
@@ -286,7 +306,8 @@ test("fetchKijijiItems returns stable and unstable buckets when unstable mode is
});
```
Also add one default-mode assertion in one existing scraper test file, for example in `packages/core/test/facebook-core.test.ts`:
Also add one default-mode assertion in one existing scraper test file, for example in
`packages/core/test/facebook-core.test.ts`:
```ts
test("fetchFacebookItems keeps returning an array by default", async () => {
@@ -307,8 +328,10 @@ test("fetchFacebookItems keeps returning an array by default", async () => {
- [ ] **Step 2: Run tests to verify they fail**
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
Expected: FAIL because the scraper signatures do not yet accept the new option and still always return arrays.
Run:
`bun test packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
Expected: FAIL because the scraper signatures do not yet accept the new option and still
always return arrays.
- [ ] **Step 3: Write minimal implementation**
@@ -322,7 +345,8 @@ import {
} from "../index";
```
In `packages/core/src/scrapers/facebook.ts`, extend the default export signature and branch at the end:
In `packages/core/src/scrapers/facebook.ts`, extend the default export signature and
branch at the end:
```ts
export default async function fetchFacebookItems(
@@ -371,7 +395,8 @@ export default async function fetchEbayItems(
}
```
In `packages/core/src/scrapers/kijiji.ts`, add the same final argument after `listingOptions`:
In `packages/core/src/scrapers/kijiji.ts`, add the same final argument after
`listingOptions`:
```ts
export default async function fetchKijijiItems(
@@ -392,12 +417,15 @@ export default async function fetchKijijiItems(
}
```
Keep the default branch untouched in all three files so existing callers still receive arrays.
Keep the default branch untouched in all three files so existing callers still receive
arrays.
- [ ] **Step 4: Run tests to verify they pass**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
Expected: PASS, including the new opt-in bucket assertions and the default-array regression assertion.
Run:
`bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
Expected: PASS, including the new opt-in bucket assertions and the default-array
regression assertion.
- [ ] **Step 5: Commit**
@@ -409,14 +437,19 @@ git commit -m "feat: add unstable mode to scraper results"
### Task 3: Expose unstable mode in API routes
**Files:**
- Modify: `packages/api-server/src/routes/facebook.ts`
- Modify: `packages/api-server/src/routes/ebay.ts`
- Modify: `packages/api-server/src/routes/kijiji.ts`
- Modify: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Write the failing tests**
Extend `packages/api-server/test/routes.test.ts` with route-forwarding coverage for the new query parameter:
Extend `packages/api-server/test/routes.test.ts` with route-forwarding coverage for the
new query parameter:
```ts
test("facebookRoute forwards unstableFilter=true to core", async () => {
@@ -480,8 +513,8 @@ test("kijijiRoute forwards unstableFilter=true to core", async () => {
- [ ] **Step 2: Run tests to verify they fail**
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: FAIL because the routes do not yet parse or forward `unstableFilter`.
Run: `bun test packages/api-server/test/routes.test.ts` Expected: FAIL because the
routes do not yet parse or forward `unstableFilter`.
- [ ] **Step 3: Write minimal implementation**
@@ -533,12 +566,14 @@ const items = await fetchKijijiItems(
);
```
Do not add any response wrapper logic in the routes; simply return whatever the core scraper returns so the default array path remains unchanged.
Do not add any response wrapper logic in the routes; simply return whatever the core
scraper returns so the default array path remains unchanged.
- [ ] **Step 4: Run tests to verify they pass**
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: PASS, including existing cookie-parameter regression tests and the new unstable-mode forwarding assertions.
Run: `bun test packages/api-server/test/routes.test.ts` Expected: PASS, including
existing cookie-parameter regression tests and the new unstable-mode forwarding
assertions.
- [ ] **Step 5: Commit**
@@ -550,13 +585,17 @@ git commit -m "feat: expose unstable mode in api routes"
### Task 4: Document and forward unstable mode in MCP tools
**Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts`
- Modify: `packages/mcp-server/src/protocol/handler.ts`
- Modify: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Write the failing tests**
Extend `packages/mcp-server/test/protocol.test.ts` with metadata and forwarding coverage:
Extend `packages/mcp-server/test/protocol.test.ts` with metadata and forwarding
coverage:
```ts
test("search tools document unstable listing mode", () => {
@@ -601,12 +640,14 @@ Mirror the forwarding assertion for `search_kijiji` and `search_ebay` in the sam
- [ ] **Step 2: Run tests to verify they fail**
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: FAIL because the tools do not yet describe `unstableFilter` and the handler does not append it to API URLs.
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: FAIL because the
tools do not yet describe `unstableFilter` and the handler does not append it to API
URLs.
- [ ] **Step 3: Write minimal implementation**
In `packages/mcp-server/src/protocol/tools.ts`, add the same optional property to all three tools:
In `packages/mcp-server/src/protocol/tools.ts`, add the same optional property to all
three tools:
```ts
unstableFilter: {
@@ -617,7 +658,8 @@ unstableFilter: {
},
```
In `packages/mcp-server/src/protocol/handler.ts`, append the shared flag in each search branch:
In `packages/mcp-server/src/protocol/handler.ts`, append the shared flag in each search
branch:
```ts
if (args.unstableFilter !== undefined) {
@@ -629,8 +671,8 @@ Add that snippet to the `search_kijiji`, `search_facebook`, and `search_ebay` br
- [ ] **Step 4: Run tests to verify they pass**
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: PASS, including the new tool-schema assertions and URL-forwarding assertions.
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: PASS, including the
new tool-schema assertions and URL-forwarding assertions.
- [ ] **Step 5: Commit**
@@ -642,21 +684,23 @@ git commit -m "docs: expose unstable mode in mcp tools"
### Task 5: Verify the full cross-package feature end to end
**Files:**
- No code changes expected.
- [ ] **Step 1: Run the focused package tests**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts packages/api-server/test/routes.test.ts packages/mcp-server/test/protocol.test.ts`
Run:
`bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts packages/api-server/test/routes.test.ts packages/mcp-server/test/protocol.test.ts`
Expected: PASS with zero failing tests.
- [ ] **Step 2: Run the broader workspace verification**
Run: `bun run ci`
Expected: PASS with clean workspace validation.
Run: `bun run ci` Expected: PASS with clean workspace validation.
- [ ] **Step 3: Commit verification-only follow-ups if needed**
If verification forced any tiny fixes, commit them immediately after the fix with a focused message, for example:
If verification forced any tiny fixes, commit them immediately after the fix with a
focused message, for example:
```bash
git add <exact files changed>
@@ -667,6 +711,8 @@ If no files changed during verification, skip this commit step.
## Self-Review
- Spec coverage: shared classifier, all three scrapers, API exposure, MCP documentation, and tests are each mapped to a task.
- Placeholder scan: no `TODO`, `TBD`, or "write tests later" placeholders remain.
- Type consistency: the plan uses one shared flag name, `unstableFilter`, and one shared core option, `hideUnstableResults`, across all tasks.
- Spec coverage: shared classifier, all three scrapers, API exposure, MCP documentation,
and tests are each mapped to a task.
- Placeholder scan: no `TODO`, `TBD`, or “write tests later” placeholders remain.
- Type consistency: the plan uses one shared flag name, `unstableFilter`, and one shared
core option, `hideUnstableResults`, across all tasks.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,110 @@
# Marketplace Dollar Price Inputs Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to
> implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make public marketplace price inputs use dollars while preserving core scraper
cent-based filtering.
**Architecture:** API server owns HTTP query parsing and converts dollar amounts to
cents before calling core.
MCP server keeps forwarding numeric dollar values as query params.
Core scraper internals remain unchanged because parsed listing prices already use cents.
This applies to eBay `minPrice`/`maxPrice` and Kijiji `priceMin`/`priceMax`; Facebook
exposes no price filter inputs.
**Tech Stack:** Bun, TypeScript, `bun:test`, MCP JSON-RPC adapter, framework-free Bun
HTTP routes.
* * *
### Task 1: API Dollar Parsing
**Files:**
- Modify: `packages/api-server/src/routes/helpers.ts`
- Modify: `packages/api-server/src/routes/ebay.ts`
- Modify: `packages/api-server/src/routes/kijiji.ts`
- Test: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Add failing API route tests**
Add tests proving eBay `minPrice=999.99` / `maxPrice=1000` and Kijiji `priceMin=999.99`
/ `priceMax=1000` are forwarded to core as `99999` and `100000` cents.
Add validation tests for empty, whitespace, negative, hex, mixed text, and malformed
decimal price values.
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: new forwarding tests fail because route currently rejects decimals and
forwards integer dollars unchanged.
- [ ] **Step 2: Implement dollar parser helper**
Add `parseDollarPriceParam(searchParams, name)` in
`packages/api-server/src/routes/helpers.ts`. Accept `0`, `1000`, `999.99`, and `0.99`.
Reject values that do not match `^\d+(?:\.\d{1,2})?$`. Convert to cents with
`Math.round(Number(rawValue) * 100)`.
- [ ] **Step 3: Use dollar parser in eBay route**
Replace `parseNonNegativeIntegerParam` calls for eBay `minPrice`/`maxPrice` and Kijiji
`priceMin`/`priceMax` with `parseDollarPriceParam`. Keep pagination/count params on
integer parsing.
- [ ] **Step 4: Verify API tests**
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: all API route tests pass.
### Task 2: MCP Schema Contract
**Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts`
- Test: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Add MCP schema/forwarding tests**
Add tests that `search_ebay` describes `minPrice` and `maxPrice` as dollar filters and
forwards numeric dollar values unchanged in API query params.
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: description test fails until schema text changes; forwarding behavior should
already pass or reveal mapping gaps.
- [ ] **Step 2: Update tool descriptions**
Change eBay `minPrice` and Kijiji `priceMin` descriptions to `Minimum price in dollars`.
Change eBay `maxPrice` and Kijiji `priceMax` descriptions to `Maximum price in dollars`.
- [ ] **Step 3: Verify MCP tests**
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: all MCP protocol tests pass.
### Task 3: Cross-Package Verification
**Files:**
- No additional edits expected.
- [ ] **Step 1: Run relevant package tests**
Run: `bun test packages/api-server/test packages/mcp-server/test`
Expected: all tests pass.
- [ ] **Step 2: Run CI**
Run: `bun run ci`
Expected: typecheck and Biome pass without changing lint config.

View File

@@ -0,0 +1,187 @@
# Live Parser Tests Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add explicit live endpoint test suites for each core marketplace scraper,
excluded from default tests and runnable through one script.
**Architecture:** Live tests live under `packages/core/test/live/` and import public
scraper entry points directly.
Normal package tests remain offline because the new files are outside current explicit
test commands and run only through `bun run test:live`.
**Tech Stack:** Bun `1.3.13`, `bun:test`, TypeScript, existing core scraper APIs.
* * *
## File Structure
- Create `packages/core/test/live/ebay.live.test.ts`: live eBay search smoke test
against `fetchEbayItems`.
- Create `packages/core/test/live/kijiji.live.test.ts`: live Kijiji search smoke test
against `fetchKijijiItems`.
- Create `packages/core/test/live/facebook.live.test.ts`: strict live Facebook search
smoke test against `fetchFacebookItems` and `FACEBOOK_COOKIE`.
- Modify `package.json`: add root script `test:live` running all files under
`packages/core/test/live`.
### Task 1: Add eBay Live Suite
**Files:**
- Create: `packages/core/test/live/ebay.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchEbayItems from "../../src/scrapers/ebay";
describe("eBay live parser", () => {
test("scrapes live search results into listing details", async () => {
const results = await fetchEbayItems("iphone", 1, { maxItems: 3 });
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run eBay live test**
Run: `bun test packages/core/test/live/ebay.live.test.ts` Expected: PASS when eBay
returns parseable search results; FAIL on endpoint/rate-limit/parser breakage.
### Task 2: Add Kijiji Live Suite
**Files:**
- Create: `packages/core/test/live/kijiji.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchKijijiItems from "../../src/scrapers/kijiji";
describe("Kijiji live parser", () => {
test("scrapes live search results into detailed listings", async () => {
const results = await fetchKijijiItems(
"iphone",
1,
"https://www.kijiji.ca",
{ maxPages: 1 },
{ includeImages: false, sellerDataDepth: "basic" },
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://www.kijiji.ca/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run Kijiji live test**
Run: `bun test packages/core/test/live/kijiji.live.test.ts` Expected: PASS when Kijiji
returns parseable search and detail pages; FAIL on endpoint/parser breakage.
### Task 3: Add Facebook Live Suite
**Files:**
- Create: `packages/core/test/live/facebook.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchFacebookItems from "../../src/scrapers/facebook";
describe("Facebook live parser", () => {
test("requires FACEBOOK_COOKIE for strict live testing", () => {
expect(process.env.FACEBOOK_COOKIE?.trim().length ?? 0).toBeGreaterThan(0);
});
test("scrapes live marketplace search results into listing details", async () => {
const results = await fetchFacebookItems("iphone", 1, "toronto", 3);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://www.facebook.com/marketplace/item/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run Facebook live test**
Run: `bun test packages/core/test/live/facebook.live.test.ts` Expected: PASS with valid
`FACEBOOK_COOKIE`; FAIL when `FACEBOOK_COOKIE` is missing, expired, or parser output is
empty.
### Task 4: Add Root Live Test Script
**Files:**
- Modify: `package.json`
- [ ] **Step 1: Add script**
Change root `scripts` to include:
```json
{
"test:live": "bun test packages/core/test/live"
}
```
- [ ] **Step 2: Run all live tests through script**
Run: `bun run test:live` Expected: runs eBay, Kijiji, and Facebook live suites.
Facebook fails if `FACEBOOK_COOKIE` is unset.
### Task 5: Verify Default Suite Exclusion
**Files:**
- No code files modified.
- [ ] **Step 1: Run existing core tests**
Run: `bun test packages/core/test` Expected: existing mocked tests run.
If Bun discovers `packages/core/test/live`, change normal verification command to
explicit glob `bun test packages/core/test/*.test.ts` and document that in final notes.
- [ ] **Step 2: Run static checks**
Run: `bun run ci` Expected: typecheck and Biome pass.
Fix code issues without changing lint or TypeScript rules.
## Commit Note
Do not commit during execution unless user explicitly requests a commit.
This repo session policy overrides generic plan commit steps.
## Self-Review
- Spec coverage: eBay, Kijiji, Facebook live suites; explicit script; strict Facebook
auth; excluded from default flow.
- Placeholder scan: no `TBD`, `TODO`, or underspecified implementation steps.
- Type consistency: tests use current exported scraper signatures and shared listing
fields from `ListingDetails`.

View File

@@ -0,0 +1,140 @@
# Design: Adopt opencode Monorepo Config
**Date:** 2025-07-14\
**Status:** Approved\
**Approach:** Full adoption (A)
## Context
Current repo (`marketplace-scrapers-monorepo`) has basic bun workspaces with 3 packages
(`core`, `api-server`, `mcp-server`). Reference: `anomalyco/opencode` monorepo patterns.
**Gaps vs opencode:**
- No Turbo (task orchestration, caching, dep graph)
- No workspace catalog (shared dep versions duplicated across packages)
- No root tsconfig (identical tsconfigs duplicated in all 3 packages)
- No `bunfig.toml` (no exact installs, no root test guard)
- `main`/`module` fields instead of `exports` field
## Changes
### 1. Root `package.json`
- Add `workspaces.catalog` block with shared deps:
- `@typescript/native-preview`, `@types/bun`, `@types/unidecode`,
`@types/cli-progress`
- Add `turbo` to `devDependencies`
- Add `@tsconfig/bun` to `devDependencies` + catalog
- Update root scripts: `typecheck` and `build` delegate to `turbo run`
- Keep `build:api`, `build:mcp`, `build:all`, `start` as-is (deployment-specific)
- Rename `type:check``typecheck` in all packages (Turbo convention)
### 2. `turbo.json` (new file)
Tasks:
```json
{
"tasks": {
"typecheck": {},
"build": { "dependsOn": ["^build"], "outputs": ["dist/**"] },
"test": { "dependsOn": ["^build"], "outputs": [] }
}
}
```
`core` builds before `api-server`/`mcp-server` due to `^build` dep.
### 3. Root `tsconfig.json` (new file)
```json
{
"extends": "@tsconfig/bun/tsconfig.json",
"compilerOptions": {
"lib": ["dom", "ESNext"],
"target": "ESNext",
"module": "preserve",
"moduleResolution": "bundler",
"strict": true,
"noEmit": true,
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitOverride": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false
}
}
```
### 4. Per-package `tsconfig.json` (slim)
All 3 packages slim to:
```json
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"paths": { "@/*": ["./src/*"] }
},
"include": ["./src", "./test"]
}
```
### 5. `bunfig.toml` (new file)
```toml
[install]
exact = true
[test]
root = "./do-not-run-tests-from-root"
```
Exact installs = reproducible.
Root test guard prevents accidental root-level test runs.
### 6. Package `exports` field
Replace `main`/`module` with `exports` in all 3 packages:
```json
"exports": { ".": "./src/index.ts" }
```
Remove `main` and `module` fields.
Bun resolves `.ts` directly.
### 7. Catalog references in per-package `package.json`
Replace pinned versions with `"catalog:"` for shared deps:
- `@typescript/native-preview: "catalog:"`
- `@types/bun: "catalog:"`
- `@types/unidecode: "catalog:"` (core only)
- `@types/cli-progress: "catalog:"` (core only)
## Files Changed
| File | Action |
| --- | --- |
| `package.json` | Update (catalog, turbo dep, scripts) |
| `turbo.json` | Create |
| `tsconfig.json` | Create |
| `bunfig.toml` | Create |
| `packages/core/package.json` | Update (exports, catalog refs, script rename) |
| `packages/api-server/package.json` | Update (exports, catalog refs, script rename) |
| `packages/mcp-server/package.json` | Update (exports, catalog refs, script rename) |
| `packages/core/tsconfig.json` | Update (slim, extends root) |
| `packages/api-server/tsconfig.json` | Update (slim, extends root) |
| `packages/mcp-server/tsconfig.json` | Update (slim, extends root) |
## Non-Goals
- No Husky/git hooks (not needed yet)
- No SST/cloud infra (not applicable)
- No prettier (keep biome as formatter)
- No patches mechanism
- No `postinstall` scripts

View File

@@ -3,7 +3,9 @@
## Summary
Remove all file-based and request-provided cookie inputs across the repo.
The only supported authentication input becomes a raw `Cookie` header string supplied through scraper-specific environment variables such as `FACEBOOK_COOKIE` and `EBAY_COOKIE`.
The only supported authentication input becomes a raw `Cookie` header string supplied
through scraper-specific environment variables such as `FACEBOOK_COOKIE` and
`EBAY_COOKIE`.
## Goals
@@ -17,7 +19,8 @@ The only supported authentication input becomes a raw `Cookie` header string sup
- Changing scraper behavior unrelated to authentication input.
- Adding new cookie formats or migration helpers.
- Preserving backward compatibility for cookie files, JSON cookie arrays, or request overrides.
- Preserving backward compatibility for cookie files, JSON cookie arrays, or request
overrides.
## Current State
@@ -27,27 +30,33 @@ The current shared cookie utilities support three sources in priority order:
2. Environment variable
3. Cookie file
`packages/core/src/utils/cookies.ts` includes file loading, JSON array parsing, and auto-detection between JSON and header-string formats.
Facebook also exposes deprecated `cookiePath` arguments that still reach shared loading logic.
Docs in `cookies/AGENTS.md` still describe file-based setup and request-level overrides.
`packages/core/src/utils/cookies.ts` includes file loading, JSON array parsing, and
auto-detection between JSON and header-string formats.
Facebook also exposes deprecated `cookiePath` arguments that still reach shared loading
logic. Docs in `cookies/AGENTS.md` still describe file-based setup and request-level
overrides.
## Chosen Approach
Use the hard-reset approach.
Delete the shared multi-source cookie-loading model and reduce the cookie surface to env-header parsing only.
This is a larger diff than a surgical removal, but it avoids leaving behind abstractions that imply unsupported inputs still exist.
Delete the shared multi-source cookie-loading model and reduce the cookie surface to
env-header parsing only.
This is a larger diff than a surgical removal, but it avoids leaving behind abstractions
that imply unsupported inputs still exist.
## Design
### Shared Cookie Utilities
`packages/core/src/utils/cookies.ts` will keep only the pieces needed for env-header-based auth:
`packages/core/src/utils/cookies.ts` will keep only the pieces needed for
env-header-based auth:
- `Cookie` type
- A reduced cookie config shape containing only `name`, `domain`, and `envVar`
- `parseCookieString()` for raw `Cookie` header strings
- `formatCookiesForHeader()` for domain filtering and request formatting
- An env-only loader that reads `process.env[config.envVar]`, parses it, and throws a targeted error when missing or invalid
- An env-only loader that reads `process.env[config.envVar]`, parses it, and throws a
targeted error when missing or invalid
The following shared utilities will be removed:
@@ -68,15 +77,18 @@ For Facebook this means:
For eBay this means:
- Remove any remaining fallback/file-oriented behavior from shared calls and error strings
- Remove any remaining fallback/file-oriented behavior from shared calls and error
strings
- Keep the existing env-var auth path, but make it the only path
### Public API Surface
Exports from `packages/core/src/index.ts` should reflect the new contract.
If exported functions currently advertise cookie-source or cookie-path arguments, their signatures will be tightened so callers cannot pass unsupported inputs.
If exported functions currently advertise cookie-source or cookie-path arguments, their
signatures will be tightened so callers cannot pass unsupported inputs.
Downstream adapter packages should continue calling core through the simplified signatures without adding their own cookie-loading behavior.
Downstream adapter packages should continue calling core through the simplified
signatures without adding their own cookie-loading behavior.
### Error Handling
@@ -93,8 +105,8 @@ Errors should be blunt and specific:
### Testing Strategy
Follow TDD.
Start by changing or adding core tests so the old file/request behavior is no longer accepted.
Follow TDD. Start by changing or adding core tests so the old file/request behavior is
no longer accepted.
Coverage targets:
@@ -102,7 +114,8 @@ Coverage targets:
2. Missing env vars fail with the new env-only error.
3. Invalid env strings fail without falling back to files or request data.
4. Facebook APIs no longer expose or honor cookie-path/request-cookie behavior.
5. Existing tests that depended on missing files or JSON cookie arrays are rewritten to the env-only contract.
5. Existing tests that depended on missing files or JSON cookie arrays are rewritten to
the env-only contract.
Verification target after implementation:
@@ -121,11 +134,15 @@ Update cookie-related docs to match the new contract:
## Risks
- External callers using request cookie overrides will break at compile time or runtime, depending on how they consume the package.
- Recent work added support for custom Facebook cookie paths, so removing that path intentionally reverses a newly introduced behavior.
- Tests that currently model missing-file behavior must be rewritten rather than preserved.
- External callers using request cookie overrides will break at compile time or runtime,
depending on how they consume the package.
- Recent work added support for custom Facebook cookie paths, so removing that path
intentionally reverses a newly introduced behavior.
- Tests that currently model missing-file behavior must be rewritten rather than
preserved.
## Rollout Notes
This is an intentional contract break.
The code, tests, and docs should all land together so there is no mixed messaging about supported cookie sources.
The code, tests, and docs should all land together so there is no mixed messaging about
supported cookie sources.

View File

@@ -2,35 +2,46 @@
## Summary
Replace the legacy Facebook Marketplace scraper with a route-aware implementation built around current Comet bootstrap markers and route-specific extraction.
The new scraper will keep authenticated direct HTTP fetches as the primary transport, but it will stop treating legacy `require`, `__bbox`, and `marketplace_product_details_page` structures as the main parsing contract.
Replace the legacy Facebook Marketplace scraper with a route-aware implementation built
around current Comet bootstrap markers and route-specific extraction.
The new scraper will keep authenticated direct HTTP fetches as the primary transport,
but it will stop treating legacy `require`, `__bbox`, and
`marketplace_product_details_page` structures as the main parsing contract.
## Goals
- Replace both Facebook search and item-detail extraction with a current-shape parser.
- Keep authenticated direct HTTP requests as the primary fetch strategy.
- Parse route-specific Comet bootstrap/state payloads before falling back to rendered-HTML extraction.
- Parse route-specific Comet bootstrap/state payloads before falling back to
rendered-HTML extraction.
- Detect auth-gated, unavailable, and unknown responses explicitly.
- Update tests so they model current route markers and failure modes instead of legacy page objects.
- Update tests so they model current route markers and failure modes instead of legacy
page objects.
## Non-Goals
- Reworking non-Facebook scrapers.
- Converting the scraper to browser-only automation.
- Preserving old parser behavior for `marketplace_product_details_page` or `__bbox`-driven item extraction.
- Reverse-engineering every internal Facebook bootstrap payload shape exhaustively before implementation.
- Preserving old parser behavior for `marketplace_product_details_page` or
`__bbox`-driven item extraction.
- Reverse-engineering every internal Facebook bootstrap payload shape exhaustively
before implementation.
## Current State
The current implementation in `packages/core/src/scrapers/facebook.ts` still uses authenticated HTTP requests, which remains correct.
The search path parses embedded script JSON and looks for `marketplace_search.feed_units.edges`.
The item-detail path is centered on legacy extraction paths such as:
The current implementation in `packages/core/src/scrapers/facebook.ts` still uses
authenticated HTTP requests, which remains correct.
The search path parses embedded script JSON and looks for
`marketplace_search.feed_units.edges`. The item-detail path is centered on legacy
extraction paths such as:
- `parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
- nested `__bbox.require[...]` variations
- recursive search through `parsed.require`
Live evidence gathered earlier in this session and by the isolated research subagent shows that current Facebook Marketplace pages are Comet route-driven and expose markers such as:
Live evidence gathered earlier in this session and by the isolated research subagent
shows that current Facebook Marketplace pages are Comet route-driven and expose markers
such as:
- `XCometMarketplaceSearchController`
- `XCometMarketplacePermalinkController`
@@ -41,7 +52,9 @@ Live evidence gathered earlier in this session and by the isolated research suba
- `data-sjs`
- `data-btmanifest`
The same live investigation also showed that authenticated item pages no longer expose the old `marketplace_product_details_page` marker reliably, while live search still returns usable results.
The same live investigation also showed that authenticated item pages no longer expose
the old `marketplace_product_details_page` marker reliably, while live search still
returns usable results.
## Chosen Approach
@@ -52,9 +65,11 @@ The scraper will:
1. Fetch authenticated HTML directly.
2. Classify the response using current route and auth markers.
3. Parse inline bootstrap/state payloads using route-specific probes.
4. Fall back to rendered-HTML extraction only when bootstrap markers are present but the payload cannot be decoded into the expected search or item shape.
4. Fall back to rendered-HTML extraction only when bootstrap markers are present but the
payload cannot be decoded into the expected search or item shape.
This keeps the cheaper direct-HTTP transport while shifting the parser contract from legacy page-object names to current Comet route structure.
This keeps the cheaper direct-HTTP transport while shifting the parser contract from
legacy page-object names to current Comet route structure.
## Design
@@ -88,7 +103,8 @@ Primary behavior:
- fetch the Marketplace search HTML with auth cookies
- confirm the response class is `search`
- extract inline bootstrap/state blobs from script tags and page attributes
- probe for route-specific search payloads associated with `XCometMarketplaceSearchController`
- probe for route-specific search payloads associated with
`XCometMarketplaceSearchController`
- map decoded search results into summary listing records
Search summary fields should remain aligned with the current public output shape:
@@ -102,7 +118,8 @@ Search summary fields should remain aligned with the current public output shape
Fallback behavior:
- if search route markers are present but structured payload decoding fails, extract listing summaries from rendered HTML anchors and text patterns
- if search route markers are present but structured payload decoding fails, extract
listing summaries from rendered HTML anchors and text patterns
- use item links matching `/marketplace/item/<id>` as the anchor for fallback extraction
- treat fallback results as summary-only data, not rich detail data
@@ -132,9 +149,12 @@ Priority item fields:
Fallback behavior:
- if permalink route markers are present but no stable payload object is decodable, extract data from rendered HTML text structure
- prioritize title, price, condition, description, location text, and seller module content
- return partial item data when core user-facing fields are present rather than failing solely because deeper commerce metadata is missing
- if permalink route markers are present but no stable payload object is decodable,
extract data from rendered HTML text structure
- prioritize title, price, condition, description, location text, and seller module
content
- return partial item data when core user-facing fields are present rather than failing
solely because deeper commerce metadata is missing
### Bootstrap Parsing Strategy
@@ -151,11 +171,14 @@ Candidate discovery inputs:
- `ServerJS` / `Bootloader` inline blobs
- route controller names
Candidate scoring for search should favor objects that contain repeated result-card semantics, item IDs, listing links, titles, prices, or location summaries.
Candidate scoring for item pages should favor objects that contain singular listing semantics, title, price, condition, description, location, seller, or permalink context.
Candidate scoring for search should favor objects that contain repeated result-card
semantics, item IDs, listing links, titles, prices, or location summaries.
Candidate scoring for item pages should favor objects that contain singular listing
semantics, title, price, condition, description, location, seller, or permalink context.
The parser should not depend on one hard-coded object name surviving forever.
Instead, it should look for route-specific semantic clusters and choose the strongest candidate.
Instead, it should look for route-specific semantic clusters and choose the strongest
candidate.
### Legacy Removal
@@ -166,7 +189,9 @@ Specifically:
- delete legacy-first `require` / `__bbox` navigation tables
- delete tests whose only purpose is to preserve those legacy paths
If a minimal legacy compatibility branch remains, it must be a last-resort fallback behind the new route-aware parser and should not shape test fixtures or design decisions.
If a minimal legacy compatibility branch remains, it must be a last-resort fallback
behind the new route-aware parser and should not shape test fixtures or design
decisions.
### Error Handling
@@ -178,7 +203,8 @@ Facebook responses should now fail with explicit route-aware outcomes:
4. Search or item route detected, but no decodable data found.
5. Unknown response shape.
Error messages should name the actual class of failure instead of implying that every parse miss is caused by expired cookies.
Error messages should name the actual class of failure instead of implying that every
parse miss is caused by expired cookies.
### Testing Strategy
@@ -190,11 +216,15 @@ Coverage targets:
1. Search responses classify correctly from current Comet controller markers.
2. Item responses classify correctly from current Comet controller markers.
3. Login-gated and unavailable responses are detected before parsing.
4. Search bootstrap parsing produces summary listing results from current-shape fixtures.
4. Search bootstrap parsing produces summary listing results from current-shape
fixtures.
5. Item bootstrap parsing produces rich listing details from current-shape fixtures.
6. Search fallback extraction works when route markers exist but structured payload decoding fails.
7. Item fallback extraction works when route markers exist but structured payload decoding fails.
8. Old legacy-only item fixtures are removed or rewritten so they no longer define the contract.
6. Search fallback extraction works when route markers exist but structured payload
decoding fails.
7. Item fallback extraction works when route markers exist but structured payload
decoding fails.
8. Old legacy-only item fixtures are removed or rewritten so they no longer define the
contract.
Verification target after implementation:
@@ -204,23 +234,30 @@ Verification target after implementation:
## Public API Surface
Keep the current public function names unless the rewrite proves that a signature change is required:
Keep the current public function names unless the rewrite proves that a signature change
is required:
- `fetchFacebookItems(...)`
- `fetchFacebookItem(...)`
- `extractFacebookMarketplaceData(...)`
- `extractFacebookItemData(...)`
The internals should change substantially, but callers should not need a new integration surface for this rewrite.
The internals should change substantially, but callers should not need a new integration
surface for this rewrite.
## Risks
- Facebook may change bootstrap payload naming again, so route/controller markers are more stable than exact nested object paths but still not guaranteed.
- Search and item pages may each contain multiple partial payloads, making candidate ranking important.
- Fallback rendered-HTML extraction may be noisier than bootstrap decoding and needs clear precedence rules.
- Live fixtures can drift from production quickly, so tests must model route semantics rather than exact one-off payloads where possible.
- Facebook may change bootstrap payload naming again, so route/controller markers are
more stable than exact nested object paths but still not guaranteed.
- Search and item pages may each contain multiple partial payloads, making candidate
ranking important.
- Fallback rendered-HTML extraction may be noisier than bootstrap decoding and needs
clear precedence rules.
- Live fixtures can drift from production quickly, so tests must model route semantics
rather than exact one-off payloads where possible.
## Rollout Notes
The code, fixtures, and tests should change together.
There should be no mixed state where the implementation is Comet-aware but the tests still encode `marketplace_product_details_page` as the primary contract.
There should be no mixed state where the implementation is Comet-aware but the tests
still encode `marketplace_product_details_page` as the primary contract.

View File

@@ -2,15 +2,18 @@
## Summary
Add an optional shared result mode across Facebook, eBay, and Kijiji that moves suspiciously cheap listings out of the main results into a separate `unstableResults` bucket.
Listings are considered unstable when their price is more than 20% below the median price of the scraper's priced search results.
Add an optional shared result mode across Facebook, eBay, and Kijiji that moves
suspiciously cheap listings out of the main results into a separate `unstableResults`
bucket. Listings are considered unstable when their price is more than 20% below the
median price of the scrapers priced search results.
## Goals
- Support the same optional unstable-listing mode across all scrapers.
- Keep current default scraper and route behavior unchanged unless the mode is enabled.
- Hide unstable listings from the main results while still returning them separately.
- Implement the rule once in shared core code instead of duplicating marketplace-specific logic.
- Implement the rule once in shared core code instead of duplicating
marketplace-specific logic.
- Document the option in MCP tool descriptions so callers can discover it.
## Non-Goals
@@ -24,7 +27,8 @@ Listings are considered unstable when their price is more than 20% below the med
`packages/core` currently returns plain arrays from scraper search functions.
`packages/api-server` forwards those scraper results directly from marketplace routes.
`packages/mcp-server` documents search tools per marketplace, but does not expose or describe any result-stability mode.
`packages/mcp-server` documents search tools per marketplace, but does not expose or
describe any result-stability mode.
There is no shared result-classification utility today.
Price filtering exists in some scrapers, but not a cross-marketplace median-based split.
@@ -33,11 +37,14 @@ Price filtering exists in some scrapers, but not a cross-marketplace median-base
Use a shared core utility plus per-route and per-tool opt-in.
The shared utility will accept parsed listings, compute the median from valid positive prices, and split the data into `results` and `unstableResults`.
Each scraper will opt into that utility when the caller enables unstable-listing mode.
API routes and MCP tools will expose the same optional mode so the feature is consistently available everywhere scraper search is surfaced.
The shared utility will accept parsed listings, compute the median from valid positive
prices, and split the data into `results` and `unstableResults`. Each scraper will opt
into that utility when the caller enables unstable-listing mode.
API routes and MCP tools will expose the same optional mode so the feature is
consistently available everywhere scraper search is surfaced.
This keeps the heuristic centralized, minimizes duplicated logic, and preserves existing consumers by leaving the default path unchanged.
This keeps the heuristic centralized, minimizes duplicated logic, and preserves existing
consumers by leaving the default path unchanged.
## Design
@@ -48,14 +55,16 @@ Add a shared utility in `packages/core` for listing stability classification.
Responsibilities:
- accept parsed listing arrays with `listingPrice.cents`
- ignore listings whose price is missing, non-numeric, or non-positive when computing the median
- ignore listings whose price is missing, non-numeric, or non-positive when computing
the median
- compute the median price from valid priced listings
- classify listings as unstable when `listingPrice.cents < median * 0.8`
- return an object with:
- `results`: listings that remain in the main bucket
- `unstableResults`: listings moved out of the main bucket
Listings excluded from median computation because their price is missing or non-positive remain in `results` unchanged.
Listings excluded from median computation because their price is missing or non-positive
remain in `results` unchanged.
### Scraper Integration
@@ -68,7 +77,8 @@ Default behavior:
Opt-in behavior:
- run the shared classification utility after parsing search results
- classify before final result limiting so unstable items do not consume main-result slots
- classify before final result limiting so unstable items do not consume main-result
slots
- return an object shaped like:
```ts
@@ -82,7 +92,8 @@ Each scraper will use its existing concrete listing subtype for these arrays.
### API Surface
Marketplace API routes will expose an optional query parameter for unstable-listing mode.
Marketplace API routes will expose an optional query parameter for unstable-listing
mode.
Requirements:
@@ -90,7 +101,8 @@ Requirements:
- when enabled, return the object payload with `results` and `unstableResults`
- use the same semantics across Facebook, eBay, and Kijiji routes
The exact parameter name should be consistent across routes and intentionally describe the behavior, for example `unstableFilter=true`.
The exact parameter name should be consistent across routes and intentionally describe
the behavior, for example `unstableFilter=true`.
### MCP Surface
@@ -100,34 +112,43 @@ Tool descriptions should explicitly document:
- that the option is optional
- that it moves listings priced more than 20% below the median into `unstableResults`
- that enabling it changes the response shape from a plain list to an object with `results` and `unstableResults`
- that enabling it changes the response shape from a plain list to an object with
`results` and `unstableResults`
- that the behavior is available for Facebook, eBay, and Kijiji search tools
The wording should be aligned across all three tools so the feature reads as one shared capability.
The wording should be aligned across all three tools so the feature reads as one shared
capability.
### Error Handling
The unstable-listing mode should be best-effort and non-failing.
- If there are no valid positive prices, return all listings in `results` and an empty `unstableResults` array.
- If there are no valid positive prices, return all listings in `results` and an empty
`unstableResults` array.
- If there is only one valid priced listing, do not classify it as unstable.
- Parsing failures remain governed by existing scraper behavior; the classification layer should not introduce new scraper-specific errors.
- Parsing failures remain governed by existing scraper behavior; the classification
layer should not introduce new scraper-specific errors.
### Testing Strategy
Follow TDD.
Start with shared utility tests, then wire the option through scraper and route tests.
Follow TDD. Start with shared utility tests, then wire the option through scraper and
route tests.
Coverage targets:
1. Median calculation for odd-sized valid price sets.
2. Median calculation for even-sized valid price sets.
3. Strict cutoff behavior where only listings with `price < median * 0.8` move to `unstableResults`.
4. Missing, invalid, zero, or negative prices are excluded from median computation and remain in `results`.
3. Strict cutoff behavior where only listings with `price < median * 0.8` move to
`unstableResults`.
4. Missing, invalid, zero, or negative prices are excluded from median computation and
remain in `results`.
5. Default scraper behavior still returns plain arrays when the option is disabled.
6. Enabled scraper behavior returns `{ results, unstableResults }` for Facebook, eBay, and Kijiji.
7. API routes preserve existing response shapes by default and switch to the object payload only when enabled.
8. MCP tool metadata documents the new optional mode for all three marketplace search tools.
6. Enabled scraper behavior returns `{ results, unstableResults }` for Facebook, eBay,
and Kijiji.
7. API routes preserve existing response shapes by default and switch to the object
payload only when enabled.
8. MCP tool metadata documents the new optional mode for all three marketplace search
tools.
Verification target after implementation:
@@ -138,11 +159,15 @@ Verification target after implementation:
## Risks
- The optional mode introduces a union return shape for scraper callers, which can ripple into downstream TypeScript signatures.
- Applying classification before final limiting changes which items appear in the main bucket compared with a naive post-limit split.
- Kijiji and eBay may have different mixes of priced and unpriced results, so excluding non-positive prices from the median must remain explicit and tested.
- The optional mode introduces a union return shape for scraper callers, which can
ripple into downstream TypeScript signatures.
- Applying classification before final limiting changes which items appear in the main
bucket compared with a naive post-limit split.
- Kijiji and eBay may have different mixes of priced and unpriced results, so excluding
non-positive prices from the median must remain explicit and tested.
## Rollout Notes
Land the shared classifier, scraper wiring, route wiring, tests, and MCP description updates together.
That avoids a partial rollout where the feature exists in one surface but is undocumented or inconsistent elsewhere.
Land the shared classifier, scraper wiring, route wiring, tests, and MCP description
updates together. That avoids a partial rollout where the feature exists in one surface
but is undocumented or inconsistent elsewhere.

View File

@@ -0,0 +1,44 @@
# Live Parser Tests Design
## Summary
Add explicit live endpoint tests for each core scraper parser path.
These tests are excluded from normal deterministic test commands and run only through a
dedicated package script.
## Scope
- Add one live suite per parser: eBay, Kijiji, Facebook.
- Place suites under `packages/core/test/live/` so normal
`bun test packages/core/test/*.test.ts` patterns do not include them accidentally.
- Add a root `test:live` script that runs all live suites together.
- Keep existing mocked tests unchanged.
## Behavior
- Each suite calls the public scraper entry point for that marketplace with a narrow
query and low max item count.
- Assertions verify scrape output shape and parser viability, not exact listing
identity.
- eBay and Kijiji require live network access and fail on endpoint/parser breakage.
- Facebook is strict: missing or expired `FACEBOOK_COOKIE` fails the live suite instead
of skipping.
## Test Data
- Use stable broad Canadian queries such as `iphone` or `laptop` to reduce empty-result
risk.
- Use low limits to avoid unnecessary load and rate-limit pressure.
- Avoid exact prices, titles, listing IDs, or ordering assumptions.
## Failure Meaning
- Empty result arrays fail because live parser logic did not produce usable listings.
- Missing required fields fail because adapter contracts depend on those fields.
- Authentication failures fail for Facebook because selected scope is strict.
## Verification
- Normal suite remains offline: `bun test packages/core/test`.
- Live suite runs by explicit script: `bun run test:live`.
- Full static checks remain via `bun run ci`.

View File

@@ -0,0 +1,173 @@
# Facebook Marketplace Anti-Bot Challenge Solver Design
## Summary
Add a challenge-detection and challenge-solving layer to the Facebook Marketplace
scraper so it can handle anti-bot gates (checkpoint pages, token rotation, cookie
requirements) programmatically.
Build the solver in pure Bun — no browser automation in production.
Use `agent-browser` only for one-time debug reconnaissance.
## Goals
- Identify which anti-bot challenge(s) Facebook Marketplace triggers against
programmatic HTTP requests.
- Implement detection + solving for each discovered challenge type.
- Wire the solver into `fetchFacebookItems` and `fetchFacebookItem` so challenges are
handled transparently.
- Follow the same pattern as the existing `ebay-challenge.ts` (detect → solve → retry
with clearance).
- Zero browser automation at runtime.
Pure `fetch` + `Bun` APIs + npm packages only.
## Non-Goals
- Solving login/auth-wall challenges (those require fresh cookies — not solvable
programmatically).
- Full account login automation (cookies must be provided by the user).
- Browser-based scraping or Puppeteer/Playwright integration.
- Solving challenges for non-Marketplace Facebook endpoints.
## Current State
The Facebook scraper (`packages/core/src/scrapers/facebook.ts`) fetches Marketplace
search and item pages via authenticated `fetch` with cookies from `FACEBOOK_COOKIE` env
var. It:
- Sends a browser-like header set (`sec-ch-ua`, `user-agent`, etc.)
- Parses SSR HTML for embedded JSON in script tags
- Has no challenge detection — if Facebook returns a challenge page, the scraper
silently fails (no listings parsed, classifies as “unknown”)
- Depends entirely on cookie freshness
The eBay scraper already follows the challenge-solver pattern in this codebase:
`ebay.ts` uses `warmEbaySession()`, `isChallengeRedirect()`, `isChallengeHtml()`, and
`solveEbayChallenge()` from `ebay-challenge.ts`.
## Chosen Approach
**Reconnaissance-first development:**
1. Use `agent-browser` (debug only) to capture a real Facebook Marketplace browsing
session via HAR.
2. Probe programmatic `fetch` to see what Facebook returns without a browser.
3. Diff the two to identify the gap (missing headers?
missing cookies? missing JS execution?).
4. Build a modular solver in `packages/core/src/utils/facebook-challenge.ts` that
detects each challenge type and applies the appropriate fix.
5. Wire it into `facebook.ts` following the eBay pattern.
## Design
### File Plan
| File | Purpose |
| --- | --- |
| `packages/core/src/utils/facebook-challenge.ts` | Challenge detection, solving, and cookie/session utilities |
| `packages/core/src/scrapers/facebook.ts` | Modified: warmup, challenge detection before parsing, retry loop |
| `packages/core/test/facebook-challenge.test.ts` | Unit tests with mock challenge HTML fixtures |
### Flow
```
fetchFacebookItems(searchUrl)
├── warmFacebookSession() → GET facebook.com/ (collect datr + Akamai cookies)
├── fetchHtml(searchUrl) → receives response
├── detectFacebookChallenge(response)
│ ├── checkpoint/challenge HTML → solveCheckpointChallenge()
│ ├── redirect to /login → fail (cookies expired)
│ ├── missing required cookies → regenerate session
│ ├── 429 rate limit → backoff + retry (existing http.ts handles this)
│ └── no challenge → proceed to parsing
├── if solveCheckpointChallenge succeeds → retry fetchHtml with clearance cookie
└── parse results
```
### Challenge Types (to be confirmed by reconnaissance)
| Type | Expected Signal | Solving Strategy |
| --- | --- | --- |
| Login wall | Redirect to `/login` or HTML `"You must log in"` | Fail — user must provide fresh cookies |
| Checkpoint page | HTML contains `checkpoint` or `challenge` path | Parse hidden form fields, compute proof-of-work if present, submit answer endpoint |
| `datr` cookie missing | No `datr` in cookie jar → request fails | Fetch homepage first to obtain `datr` (session warmup) |
| DTSG token needed | Form submissions fail with CSRF error | Extract `fb_dtsg` from page HTML, include in request body |
| GraphQL header check | Request blocked without internal headers | Extract `x-fb-friendly-name` from browser HAR, replicate |
| Akamai/bot-manager | Redirect loops or blank pages without Akamai cookies | Homepage warmup to collect `bm_sv`, `bm_mi`, etc. |
### Key Modules
**`facebook-challenge.ts`:**
```
// Session warmup — fetch homepage to prime cookies
warmFacebookSession(): Promise<Record<string, string>>
// Challenge detection
detectFacebookChallenge(html, status, url, headers): ChallengeType | null
// Checkpoint solver
solveCheckpointChallenge(html, cookies): Promise<ChallengeResult>
// DTSG token extraction
extractDtsg(html): string | null
// Cookie jar management (shared with ebay.ts pattern)
mergeCookies(...): Record<string, string>
```
**`ChallengeResult` type:**
```ts
interface ChallengeResult {
solved: boolean;
cookies?: Record<string, string>; // clearance cookies to replay
token?: string; // challenge response token
error?: string; // why it failed
}
```
### Error Handling
- Solver failure → return `ChallengeResult { solved: false, error: "..." }`, scraper
logs warning and returns empty results (never throws).
- Unrecognized challenge → log the response URL and HTML snippet for future analysis.
- Rate limits → handled by existing `http.ts` exponential backoff (no change needed).
- Solver timeout → 30s cap on any challenge computation, fall back to `solved: false`.
### Testing
| Test | What It Verifies |
| --- | --- |
| `detectFacebookChallenge` with sample checkpoint HTML | Correctly identifies checkpoint challenge |
| `detectFacebookChallenge` with normal search HTML | Returns null (no false positives) |
| `detectFacebookChallenge` with login redirect | Identifies auth-gated |
| `solveCheckpointChallenge` with known PoW params | Produces correct answer |
| `warmFacebookSession` with mocked fetch | Collects expected cookies |
| `extractDtsg` with sample page HTML | Extracts the DTSG token |
| Integration: fetch → challenge → solve → retry → results | End-to-end mock flow |
| Solver throws → scraper returns empty, no crash | Graceful fallback |
| Solver unknown challenge → logs warning, returns empty | No unhandled challenge crashes |
Test data will use anonymized HTML fixtures (no real user data).
## Reconnaissance Steps (debug-only, one-time)
1. **Probe programmatically:** `fetch` Marketplace search with/without cookies, record
status code and HTML.
2. **Browser session:** `agent-browser` → log into Facebook → navigate Marketplace →
record HAR.
3. **Diff analysis:** Compare browser request headers vs.
our programmatic headers.
4. **Cookie inventory:** List all cookies from browser session, identify which are
essential.
5. **Challenge trigger:** Identify what change in request signature triggers a
challenge.
6. **Replay test:** Replay browsers exact request via `fetch` to confirm
headers/cookies are the differentiator.
All reconnaissance artifacts saved under `docs/facebook-challenge/`.
## Decisions Deferred to Post-Reconnaissance
- Exact challenge types and solving strategies (depends on what Facebook actually uses).
- Whether a PoW solver, CAPTCHA solver, or token-extraction approach is needed.
- npm package dependencies (only add what the reconnaissance proves necessary).

View File

@@ -1,21 +1,39 @@
{
"$schema": "https://json.schemastore.org/package.json",
"name": "marketplace-scrapers-monorepo",
"version": "1.0.0",
"private": true,
"type": "module",
"packageManager": "bun@1.3.13",
"scripts": {
"ci": "biome ci",
"clean": "rm -rf dist",
"typecheck": "turbo run typecheck",
"build": "bun run clean && turbo run build",
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
"build:all": "bun run build:api && bun run build:mcp",
"build": "bun run clean && bun run build:all",
"ci": "bun run typecheck && biome check --write",
"test:live": "bun test --cwd packages/core test/live",
"clean": "rm -rf dist",
"start": "./scripts/start.sh"
},
"private": true,
"type": "module",
"workspaces": [
"packages/*"
],
"workspaces": {
"packages": [
"packages/*"
],
"catalog": {
"@tsconfig/bun": "1.0.9",
"@typescript/native-preview": "7.0.0-dev.20260428.1",
"@types/bun": "1.3.13",
"@types/cli-progress": "3.11.6",
"@types/unidecode": "1.1.0"
}
},
"devDependencies": {
"@biomejs/biome": "2.3.11"
"@biomejs/biome": "2.3.11",
"@tsconfig/bun": "catalog:",
"turbo": "2.5.4"
},
"dependencies": {
"@types/bun": "1.3.13"
}
}

View File

@@ -19,5 +19,6 @@
## Verify
- `bun test packages/api-server/test`
- `bun run --cwd packages/api-server build`
- `bun run ci`

View File

@@ -2,18 +2,22 @@
"name": "@marketplace-scrapers/api-server",
"version": "1.0.0",
"type": "module",
"module": "./src/index.ts",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"start": "bun ./src/index.ts",
"dev": "bun --watch ./src/index.ts",
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api"
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api",
"typecheck": "bun tsgo"
},
"dependencies": {
"@marketplace-scrapers/core": "workspace:*"
"@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:"
},
"devDependencies": {
"@types/bun": "latest"
"@types/bun": "catalog:"
},
"peerDependencies": {
"typescript": "^5"

View File

@@ -1,3 +1,4 @@
import { logger } from "./logger";
import { ebayRoute } from "./routes/ebay";
import { facebookRoute } from "./routes/facebook";
import { kijijiRoute } from "./routes/kijiji";
@@ -27,4 +28,4 @@ const server = Bun.serve({
},
});
console.log(`API Server running on ${server.hostname}:${server.port}`);
logger.log(`API Server running on ${server.hostname}:${server.port}`);

View File

@@ -0,0 +1,10 @@
const isTest = () => process.env.NODE_ENV === "test";
export const logger = {
log: (...args: Parameters<typeof console.log>) => {
if (!isTest()) console.log(...args);
},
error: (...args: Parameters<typeof console.error>) => {
if (!isTest()) console.error(...args);
},
};

View File

@@ -1,91 +1,84 @@
import { fetchEbayItems } from "@marketplace-scrapers/core";
import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseDollarPriceParam,
parseNonNegativeIntegerParam,
} from "./helpers";
/**
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
* Search eBay for listings (default: Buy It Now only, Canada only)
*/
export async function ebayRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url);
const SEARCH_QUERY = getRequiredSearchQuery(req);
if (SEARCH_QUERY instanceof Response) {
return SEARCH_QUERY;
}
const minPrice = parseDollarPriceParam(reqUrl.searchParams, "minPrice");
if (minPrice instanceof Response) {
return minPrice;
}
const maxPrice = parseDollarPriceParam(reqUrl.searchParams, "maxPrice");
if (maxPrice instanceof Response) {
return maxPrice;
}
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
const maxItems = parseNonNegativeIntegerParam(
reqUrl.searchParams,
"maxItems",
);
if (maxItems instanceof Response) {
return maxItems;
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
const opts = {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
buyItNowOnly,
canadaOnly,
maxItems,
};
try {
const reqUrl = new URL(req.url);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const minPriceParam = reqUrl.searchParams.get("minPrice");
const minPrice = minPriceParam ? parseInt(minPriceParam, 10) : undefined;
if (minPriceParam && (Number.isNaN(minPrice) || minPrice < 0)) {
return Response.json(
{ message: "Invalid minPrice parameter" },
{ status: 400 },
);
if (hideUnstableResults) {
const items = await fetchEbayItems(SEARCH_QUERY, 1, opts, {
hideUnstableResults: true,
});
if (items.results.length === 0 && items.unstableResults.length === 0) {
return emptySearchResponse();
}
return Response.json(items, { status: 200 });
}
const maxPriceParam = reqUrl.searchParams.get("maxPrice");
const maxPrice = maxPriceParam ? parseInt(maxPriceParam, 10) : undefined;
if (maxPriceParam && (Number.isNaN(maxPrice) || maxPrice < 0)) {
return Response.json(
{ message: "Invalid maxPrice parameter" },
{ status: 400 },
);
const items = await fetchEbayItems(SEARCH_QUERY, 1, opts);
const isEmpty = !items || items.length === 0;
if (isEmpty) {
return emptySearchResponse();
}
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
const maxItemsParam = reqUrl.searchParams.get("maxItems");
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
if (maxItemsParam && (Number.isNaN(maxItems) || maxItems < 0)) {
return Response.json(
{ message: "Invalid maxItems parameter" },
{ status: 400 },
);
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
const opts = {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
buyItNowOnly,
canadaOnly,
maxItems,
};
const items = hideUnstableResults
? await fetchEbayItems(SEARCH_QUERY, 1, opts, {
hideUnstableResults: true,
})
: await fetchEbayItems(SEARCH_QUERY, 1, opts);
const isEmpty = hideUnstableResults
? items.results.length === 0 && items.unstableResults.length === 0
: !items || items.length === 0;
if (isEmpty)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("eBay scraping error:", error);
logger.error("eBay scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });

View File

@@ -1,4 +1,10 @@
import { fetchFacebookItems } from "@marketplace-scrapers/core";
import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseNonNegativeIntegerParam,
} from "./helpers";
/**
* GET /api/facebook?q={query}&location={location}
@@ -7,47 +13,47 @@ import { fetchFacebookItems } from "@marketplace-scrapers/core";
export async function facebookRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const SEARCH_QUERY = getRequiredSearchQuery(req);
if (SEARCH_QUERY instanceof Response) {
return SEARCH_QUERY;
}
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
const maxItemsParam = reqUrl.searchParams.get("maxItems");
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : 25;
if (maxItemsParam && (Number.isNaN(maxItems) || maxItems < 0)) {
return Response.json(
{ message: "Invalid maxItems parameter" },
{ status: 400 },
);
const maxItems = parseNonNegativeIntegerParam(
reqUrl.searchParams,
"maxItems",
25,
);
if (maxItems instanceof Response) {
return maxItems;
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
try {
const items = hideUnstableResults
? await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems, {
if (hideUnstableResults) {
const items = await fetchFacebookItems(
SEARCH_QUERY,
1,
LOCATION,
maxItems,
{
hideUnstableResults: true,
})
: await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems);
const isEmpty = hideUnstableResults
? items.results.length === 0 && items.unstableResults.length === 0
: !items || items.length === 0;
if (isEmpty)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
},
);
if (items.results.length === 0 && items.unstableResults.length === 0) {
return emptySearchResponse();
}
return Response.json(items, { status: 200 });
}
const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems);
if (!items || items.length === 0) {
return emptySearchResponse();
}
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Facebook scraping error:", error);
logger.error("Facebook scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });

View File

@@ -0,0 +1,64 @@
export function getRequiredSearchQuery(req: Request): string | Response {
const reqUrl = new URL(req.url);
const query = req.headers.get("query") || reqUrl.searchParams.get("q");
if (!query) {
return Response.json(
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
}
return query;
}
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
defaultValue: number,
): number | Response;
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
): number | undefined | Response;
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
defaultValue?: number,
): number | undefined | Response {
const rawValue = searchParams.get(name);
if (rawValue === null) {
return defaultValue;
}
if (!/^\d+$/.test(rawValue)) {
return Response.json(
{ message: `Invalid ${name} parameter` },
{ status: 400 },
);
}
return Number(rawValue);
}
export function parseDollarPriceParam(
searchParams: URLSearchParams,
name: string,
): number | undefined | Response {
const rawValue = searchParams.get(name);
if (rawValue === null) {
return undefined;
}
if (!/^\d+(?:\.\d{1,2})?$/.test(rawValue)) {
return Response.json(
{ message: `Invalid ${name} parameter` },
{ status: 400 },
);
}
return Math.round(Number(rawValue) * 100);
}
export function emptySearchResponse(hint?: string): Response {
const message = hint
? `Search didn't return any results! ${hint}`
: "Search didn't return any results!";
return Response.json({ message }, { status: 404 });
}

View File

@@ -1,4 +1,11 @@
import { fetchKijijiItems } from "@marketplace-scrapers/core";
import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseDollarPriceParam,
parseNonNegativeIntegerParam,
} from "./helpers";
/**
* GET /api/kijiji?q={query}
@@ -7,39 +14,26 @@ import { fetchKijijiItems } from "@marketplace-scrapers/core";
export async function kijijiRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const SEARCH_QUERY = getRequiredSearchQuery(req);
if (SEARCH_QUERY instanceof Response) {
return SEARCH_QUERY;
}
const maxPagesParam = reqUrl.searchParams.get("maxPages");
const maxPages = maxPagesParam ? parseInt(maxPagesParam, 10) : 5;
if (maxPagesParam && (Number.isNaN(maxPages) || maxPages < 0)) {
return Response.json(
{ message: "Invalid maxPages parameter" },
{ status: 400 },
);
const maxPages = parseNonNegativeIntegerParam(
reqUrl.searchParams,
"maxPages",
5,
);
if (maxPages instanceof Response) {
return maxPages;
}
const priceMinParam = reqUrl.searchParams.get("priceMin");
const priceMin = priceMinParam ? parseInt(priceMinParam, 10) : undefined;
if (priceMinParam && (Number.isNaN(priceMin) || priceMin < 0)) {
return Response.json(
{ message: "Invalid priceMin parameter" },
{ status: 400 },
);
const priceMin = parseDollarPriceParam(reqUrl.searchParams, "priceMin");
if (priceMin instanceof Response) {
return priceMin;
}
const priceMaxParam = reqUrl.searchParams.get("priceMax");
const priceMax = priceMaxParam ? parseInt(priceMaxParam, 10) : undefined;
if (priceMaxParam && (Number.isNaN(priceMax) || priceMax < 0)) {
return Response.json(
{ message: "Invalid priceMax parameter" },
{ status: 400 },
);
const priceMax = parseDollarPriceParam(reqUrl.searchParams, "priceMax");
if (priceMax instanceof Response) {
return priceMax;
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
@@ -48,52 +42,56 @@ export async function kijijiRoute(req: Request): Promise<Response> {
location: reqUrl.searchParams.get("location") || undefined,
category: reqUrl.searchParams.get("category") || undefined,
keywords: reqUrl.searchParams.get("keywords") || undefined,
sortBy: (reqUrl.searchParams.get("sortBy") as
| "relevancy"
| "date"
| "price"
| "distance"
| undefined) || undefined,
sortOrder: (reqUrl.searchParams.get("sortOrder") as
| "desc"
| "asc"
| undefined) || undefined,
sortBy:
(reqUrl.searchParams.get("sortBy") as
| "relevancy"
| "date"
| "price"
| "distance"
| undefined) || undefined,
sortOrder:
(reqUrl.searchParams.get("sortOrder") as "desc" | "asc" | undefined) ||
undefined,
maxPages,
priceMin,
priceMax,
cookies: reqUrl.searchParams.get("cookies") || undefined,
};
try {
const items = hideUnstableResults
? await fetchKijijiItems(
SEARCH_QUERY,
4, // 4 requests per second for faster scraping
"https://www.kijiji.ca",
searchOptions,
{},
{ hideUnstableResults: true },
)
: await fetchKijijiItems(
SEARCH_QUERY,
4, // 4 requests per second for faster scraping
"https://www.kijiji.ca",
searchOptions,
{},
);
const isEmpty = hideUnstableResults
? items.results.length === 0 && items.unstableResults.length === 0
: !items || items.length === 0;
if (isEmpty)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
if (hideUnstableResults) {
const items = await fetchKijijiItems(
SEARCH_QUERY,
4, // 4 requests per second for faster scraping
"https://www.kijiji.ca",
searchOptions,
{},
{ hideUnstableResults: true },
);
if (items.results.length === 0 && items.unstableResults.length === 0) {
return emptySearchResponse(
`Kijiji matches ALL words in the query against listing titles. ` +
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
);
}
return Response.json(items, { status: 200 });
}
const items = await fetchKijijiItems(
SEARCH_QUERY,
4, // 4 requests per second for faster scraping
"https://www.kijiji.ca",
searchOptions,
{},
);
if (!items || items.length === 0) {
return emptySearchResponse(
`Kijiji matches ALL words in the query against listing titles. ` +
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
);
}
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Kijiji scraping error:", error);
logger.error("Kijiji scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });

View File

@@ -1,8 +1,23 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import { beforeEach, describe, expect, mock, test } from "bun:test";
const fetchFacebookItems = mock(() => Promise.resolve([{ title: "item" }]));
const fetchEbayItems = mock(() => Promise.resolve([{ title: "item" }]));
const fetchKijijiItems = mock(() => Promise.resolve([{ title: "item" }]));
const fetchFacebookItems = mock(
(): Promise<
| { title: string }[]
| { results: { title: string }[]; unstableResults: { title: string }[] }
> => Promise.resolve([{ title: "item" }]),
);
const fetchEbayItems = mock(
(): Promise<
| { title: string }[]
| { results: { title: string }[]; unstableResults: { title: string }[] }
> => Promise.resolve([{ title: "item" }]),
);
const fetchKijijiItems = mock(
(): Promise<
| { title: string }[]
| { results: { title: string }[]; unstableResults: { title: string }[] }
> => Promise.resolve([{ title: "item" }]),
);
mock.module("@marketplace-scrapers/core", () => ({
fetchFacebookItems,
@@ -13,16 +28,19 @@ mock.module("@marketplace-scrapers/core", () => ({
describe("API routes", () => {
beforeEach(() => {
fetchFacebookItems.mockReset();
fetchFacebookItems.mockImplementation(() =>
Promise.resolve([{ title: "item" }]),
fetchFacebookItems.mockImplementation(
() =>
Promise.resolve([{ title: "item" }]) as Promise<{ title: string }[]>,
);
fetchEbayItems.mockReset();
fetchEbayItems.mockImplementation(() =>
Promise.resolve([{ title: "item" }]),
fetchEbayItems.mockImplementation(
() =>
Promise.resolve([{ title: "item" }]) as Promise<{ title: string }[]>,
);
fetchKijijiItems.mockReset();
fetchKijijiItems.mockImplementation(() =>
Promise.resolve([{ title: "item" }]),
fetchKijijiItems.mockImplementation(
() =>
Promise.resolve([{ title: "item" }]) as Promise<{ title: string }[]>,
);
});
@@ -58,7 +76,7 @@ describe("API routes", () => {
});
});
test("kijijiRoute passes cookies query parameter", async () => {
test("kijijiRoute ignores cookies query parameter", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
@@ -80,7 +98,6 @@ describe("API routes", () => {
maxPages: 3,
priceMin: undefined,
priceMax: undefined,
cookies: "s=1",
},
{},
);
@@ -123,17 +140,22 @@ describe("API routes", () => {
),
);
expect(fetchEbayItems).toHaveBeenCalledWith("laptop", 1, {
minPrice: undefined,
maxPrice: undefined,
strictMode: false,
exclusions: [],
keywords: ["laptop"],
buyItNowOnly: true,
canadaOnly: true,
}, {
hideUnstableResults: true,
});
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
{
minPrice: undefined,
maxPrice: undefined,
strictMode: false,
exclusions: [],
keywords: ["laptop"],
buyItNowOnly: true,
canadaOnly: true,
},
{
hideUnstableResults: true,
},
);
});
test("kijijiRoute forwards unstableFilter=true to core", async () => {
@@ -165,7 +187,6 @@ describe("API routes", () => {
maxPages: 5,
priceMin: undefined,
priceMax: undefined,
cookies: undefined,
},
{},
{
@@ -202,9 +223,7 @@ describe("API routes", () => {
const { ebayRoute } = await import("../src/routes/ebay");
await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&buyItNowOnly=true",
),
new Request("http://localhost/api/ebay?q=laptop&buyItNowOnly=true"),
);
expect(fetchEbayItems).toHaveBeenCalledWith("laptop", 1, {
@@ -242,9 +261,7 @@ describe("API routes", () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&maxPages=5",
),
new Request("http://localhost/api/kijiji?q=laptop&maxPages=5"),
);
expect(fetchKijijiItems).toHaveBeenCalledWith(
@@ -260,12 +277,29 @@ describe("API routes", () => {
maxPages: 5,
priceMin: undefined,
priceMax: undefined,
cookies: undefined,
},
{},
);
});
test("kijijiRoute forwards dollar price filters to core as cents", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&priceMin=999.99&priceMax=1000",
),
);
expect(fetchKijijiItems).toHaveBeenCalledWith(
"laptop",
4,
"https://www.kijiji.ca",
expect.objectContaining({ priceMin: 99_999, priceMax: 100_000 }),
{},
);
});
test("kijijiRoute does not forward unstableFilter when false", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
@@ -288,7 +322,6 @@ describe("API routes", () => {
maxPages: 5,
priceMin: undefined,
priceMax: undefined,
cookies: undefined,
},
{},
);
@@ -379,23 +412,42 @@ describe("API routes", () => {
expect(response.status).toBe(404);
const body = await response.json();
expect(body.message).toBe("Search didn't return any results!");
expect(body.message).toStartWith("Search didn't return any results!");
expect(body.message).toContain("Kijiji matches ALL words");
});
test("ebayRoute forwards maxItems to core in default mode", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
fetchEbayItems.mockImplementation(() =>
Promise.resolve([{ title: "a" }]),
fetchEbayItems.mockImplementation(() => Promise.resolve([{ title: "a" }]));
await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=2"),
);
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
expect.objectContaining({ maxItems: 2 }),
);
});
test("ebayRoute forwards dollar price filters to core as cents", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
fetchEbayItems.mockImplementation(() => Promise.resolve([{ title: "a" }]));
await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&maxItems=2",
"http://localhost/api/ebay?q=macbook&minPrice=999.99&maxPrice=1000",
),
);
expect(fetchEbayItems).toHaveBeenCalledWith("laptop", 1, expect.objectContaining({ maxItems: 2 }));
expect(fetchEbayItems).toHaveBeenCalledWith(
"macbook",
1,
expect.objectContaining({ minPrice: 99_999, maxPrice: 100_000 }),
);
});
test("ebayRoute passes through scraper payload unchanged in unstable mode", async () => {
@@ -419,9 +471,14 @@ describe("API routes", () => {
expect(body.unstableResults).toHaveLength(2);
expect(body.results[0].title).toBe("a");
expect(body.unstableResults[0].title).toBe("d");
expect(fetchEbayItems).toHaveBeenCalledWith("laptop", 1, expect.objectContaining({ maxItems: 4 }), {
hideUnstableResults: true,
});
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
expect.objectContaining({ maxItems: 4 }),
{
hideUnstableResults: true,
},
);
});
test("ebayRoute forwards maxItems to core in unstable mode", async () => {
@@ -440,9 +497,14 @@ describe("API routes", () => {
),
);
expect(fetchEbayItems).toHaveBeenCalledWith("laptop", 1, expect.objectContaining({ maxItems: 2 }), {
hideUnstableResults: true,
});
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
expect.objectContaining({ maxItems: 2 }),
{
hideUnstableResults: true,
},
);
});
test("ebayRoute returns 404 when unstable results are empty", async () => {
@@ -456,9 +518,7 @@ describe("API routes", () => {
);
const response = await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&unstableFilter=true",
),
new Request("http://localhost/api/ebay?q=laptop&unstableFilter=true"),
);
expect(response.status).toBe(404);
@@ -470,9 +530,67 @@ describe("API routes", () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&maxItems=abc",
),
new Request("http://localhost/api/ebay?q=laptop&maxItems=abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for non-integer maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for decimal maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=1.5"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for empty maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for whitespace maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for hex maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=0x10"),
);
expect(response.status).toBe(400);
@@ -484,9 +602,7 @@ describe("API routes", () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request(
"http://localhost/api/facebook?q=laptop&maxItems=abc",
),
new Request("http://localhost/api/facebook?q=laptop&maxItems=abc"),
);
expect(response.status).toBe(400);
@@ -494,13 +610,71 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for invalid minPrice", async () => {
test("facebookRoute returns 400 for non-integer maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for decimal maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems=1.5"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for empty maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for whitespace maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for hex maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for empty minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&minPrice=abc",
),
new Request("http://localhost/api/ebay?q=laptop&minPrice="),
);
expect(response.status).toBe(400);
@@ -508,13 +682,35 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute returns 400 for invalid maxPrice", async () => {
test("ebayRoute returns 400 for whitespace minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&maxPrice=abc",
),
new Request("http://localhost/api/ebay?q=laptop&minPrice=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute returns 400 for hex minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute returns 400 for empty maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice="),
);
expect(response.status).toBe(400);
@@ -522,13 +718,171 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute returns 400 for whitespace maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute returns 400 for hex maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute returns 400 for non-integer minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute returns 400 for invalid minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice=abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute accepts decimal minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice=1.5"),
);
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
expect.objectContaining({ minPrice: 150 }),
);
});
test("ebayRoute returns 400 for non-integer maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute returns 400 for invalid maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute accepts decimal maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=1.5"),
);
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
expect.objectContaining({ maxPrice: 150 }),
);
});
test("kijijiRoute returns 400 for decimal maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages=1.5"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for invalid maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&maxPages=abc",
),
new Request("http://localhost/api/kijiji?q=laptop&maxPages=abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for non-integer maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for empty maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for whitespace maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for hex maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages=0x10"),
);
expect(response.status).toBe(400);
@@ -540,9 +894,71 @@ describe("API routes", () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&priceMin=abc",
),
new Request("http://localhost/api/kijiji?q=laptop&priceMin=abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMin parameter");
});
test("kijijiRoute accepts decimal priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin=1.5"),
);
expect(fetchKijijiItems).toHaveBeenCalledWith(
"laptop",
4,
"https://www.kijiji.ca",
expect.objectContaining({ priceMin: 150 }),
{},
);
});
test("kijijiRoute returns 400 for non-integer priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMin parameter");
});
test("kijijiRoute returns 400 for empty priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMin parameter");
});
test("kijijiRoute returns 400 for whitespace priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMin parameter");
});
test("kijijiRoute returns 400 for hex priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin=0x10"),
);
expect(response.status).toBe(400);
@@ -554,9 +970,71 @@ describe("API routes", () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&priceMax=abc",
),
new Request("http://localhost/api/kijiji?q=laptop&priceMax=abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMax parameter");
});
test("kijijiRoute accepts decimal priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax=1.5"),
);
expect(fetchKijijiItems).toHaveBeenCalledWith(
"laptop",
4,
"https://www.kijiji.ca",
expect.objectContaining({ priceMax: 150 }),
{},
);
});
test("kijijiRoute returns 400 for non-integer priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMax parameter");
});
test("kijijiRoute returns 400 for empty priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMax parameter");
});
test("kijijiRoute returns 400 for whitespace priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMax parameter");
});
test("kijijiRoute returns 400 for hex priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax=0x10"),
);
expect(response.status).toBe(400);
@@ -568,9 +1046,7 @@ describe("API routes", () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request(
"http://localhost/api/facebook?q=laptop&maxItems=-1",
),
new Request("http://localhost/api/facebook?q=laptop&maxItems=-1"),
);
expect(response.status).toBe(400);
@@ -582,9 +1058,7 @@ describe("API routes", () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&maxItems=-1",
),
new Request("http://localhost/api/ebay?q=laptop&maxItems=-1"),
);
expect(response.status).toBe(400);
@@ -596,9 +1070,7 @@ describe("API routes", () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&minPrice=-5",
),
new Request("http://localhost/api/ebay?q=laptop&minPrice=-5"),
);
expect(response.status).toBe(400);
@@ -610,9 +1082,7 @@ describe("API routes", () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request(
"http://localhost/api/ebay?q=laptop&maxPrice=-10",
),
new Request("http://localhost/api/ebay?q=laptop&maxPrice=-10"),
);
expect(response.status).toBe(400);
@@ -624,9 +1094,7 @@ describe("API routes", () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&maxPages=-2",
),
new Request("http://localhost/api/kijiji?q=laptop&maxPages=-2"),
);
expect(response.status).toBe(400);
@@ -638,9 +1106,7 @@ describe("API routes", () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&priceMin=-5",
),
new Request("http://localhost/api/kijiji?q=laptop&priceMin=-5"),
);
expect(response.status).toBe(400);
@@ -652,9 +1118,7 @@ describe("API routes", () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&priceMax=-10",
),
new Request("http://localhost/api/kijiji?q=laptop&priceMax=-10"),
);
expect(response.status).toBe(400);

View File

@@ -1,13 +1,9 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"lib": ["dom"],
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"paths": {
"@/*": ["./src/*"]
},
"strict": true,
"noEmit": true
}
}
},
"include": ["./src", "./test", "../../types/**/*.d.ts"]
}

View File

@@ -18,6 +18,7 @@
- Isolate marketplace-specific hacks/selectors inside the owning scraper file unless they are genuinely shared.
- If a new helper is scraper-local, keep it local. Do not promote it into `utils` early.
- If you change shared types or exports, check downstream imports in both adapter packages.
- eBay SplashUI challenge handling needs raw `fetch` for manual redirects and `getSetCookie()`; use `fetchHtml` only once the flow only needs final HTML.
## Tests

View File

@@ -2,18 +2,24 @@
"name": "@marketplace-scrapers/core",
"version": "1.0.0",
"type": "module",
"main": "./src/index.ts",
"module": "./src/index.ts",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"typecheck": "bun tsgo"
},
"dependencies": {
"@typescript/native-preview": "catalog:",
"argon2-wasm-pro": "1.1.0",
"cli-progress": "^3.12.0",
"linkedom": "^0.18.12",
"unidecode": "^1.1.0"
},
"devDependencies": {
"@types/bun": "latest",
"@types/unidecode": "^1.1.0",
"@types/cli-progress": "^3.11.6"
"@types/bun": "catalog:",
"@types/cli-progress": "catalog:",
"@types/unidecode": "catalog:"
},
"peerDependencies": {
"typescript": "^5"

View File

@@ -39,6 +39,7 @@ export * from "./types/common";
// Export shared utilities
export * from "./utils/cookies";
export * from "./utils/delay";
export * from "./utils/ebay-challenge";
export * from "./utils/format";
export * from "./utils/http";
export * from "./utils/unstable";

View File

@@ -4,13 +4,16 @@ import type {
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable";
import {
type CookieConfig,
ensureCookies,
formatCookiesForHeader,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { solveEbayChallenge } from "../utils/ebay-challenge";
import { fetchHtml, HttpError, RateLimitError } from "../utils/http";
import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable";
// eBay cookie configuration
const EBAY_COOKIE_CONFIG: CookieConfig = {
@@ -39,12 +42,237 @@ export interface EbayListingDetails {
}
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C|US)\s*\$|\s*[$£¥])/u;
const EBAY_ITEM_URL_RE = /^https?:\/\/(?:www\.)?ebay\.(?:ca|com)\/itm\//u;
function decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, "&")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.trim();
}
function stripHtml(value: string): string {
return decodeHtmlEntities(
value.replace(/<[^>]*>/g, " ").replace(/\s+/g, " "),
);
}
function getHtmlAttr(tag: string, attrName: string): string | null {
const attrMatch = tag.match(
new RegExp(`\\s${attrName}=(?:"([^"]*)"|'([^']*)'|([^\\s>]+))`, "iu"),
);
return attrMatch?.[1] ?? attrMatch?.[2] ?? attrMatch?.[3] ?? null;
}
function normalizeEbayUrl(url: string): string | null {
const decodedUrl = decodeHtmlEntities(url);
try {
const parsed = new URL(decodedUrl, "https://www.ebay.ca");
return EBAY_ITEM_URL_RE.test(parsed.href) ? parsed.href : null;
} catch {
return null;
}
}
function toEbayListing(
url: string,
title: string,
priceText: string,
): EbayListingDetails | null {
const normalizedUrl = normalizeEbayUrl(url);
const cleanedTitle = stripHtml(title);
const cleanedPrice = stripHtml(priceText);
const priceInfo = parseEbayPrice(cleanedPrice);
if (!normalizedUrl || !cleanedTitle || cleanedTitle === "Shop on eBay") {
return null;
}
if (!priceInfo) return null;
return {
url: normalizedUrl,
title: cleanedTitle,
listingPrice: {
amountFormatted: cleanedPrice,
cents: priceInfo.cents,
currency: priceInfo.currency,
},
listingType: "OFFER",
listingStatus: "ACTIVE",
address: null,
};
}
function readObjectString(
value: Record<string, unknown>,
keys: string[],
): string | null {
for (const key of keys) {
const candidate = value[key];
if (typeof candidate === "string" && candidate.trim()) {
return candidate.trim();
}
}
return null;
}
function readPayloadPrice(value: Record<string, unknown>): string | null {
const directPrice = readObjectString(value, [
"price",
"currentPrice",
"displayPrice",
]);
if (directPrice) return directPrice;
for (const key of ["price", "currentPrice", "displayPrice", "priceInfo"]) {
const candidate = value[key];
if (
!candidate ||
typeof candidate !== "object" ||
Array.isArray(candidate)
) {
continue;
}
const priceObject = candidate as Record<string, unknown>;
const formatted = readObjectString(priceObject, [
"amount",
"formatted",
"text",
]);
if (formatted) return formatted;
const numericValue = priceObject.value;
const currency = readObjectString(priceObject, [
"currency",
"currencyCode",
]);
if (typeof numericValue === "string" && numericValue.trim()) {
return currency ? `${currency} ${numericValue}` : numericValue;
}
if (typeof numericValue === "number") {
return currency ? `${currency} ${numericValue}` : String(numericValue);
}
}
return null;
}
function collectPayloadListings(
value: unknown,
results: EbayListingDetails[],
): void {
if (!value || typeof value !== "object") return;
if (Array.isArray(value)) {
for (const item of value) {
collectPayloadListings(item, results);
}
return;
}
const objectValue = value as Record<string, unknown>;
const url = readObjectString(objectValue, [
"itemWebUrl",
"itemUrl",
"url",
"webUrl",
]);
const title = readObjectString(objectValue, ["title", "itemTitle", "name"]);
const priceText = readPayloadPrice(objectValue);
if (url && title && priceText) {
const listing = toEbayListing(url, title, priceText);
if (listing) {
results.push(listing);
return;
}
}
for (const child of Object.values(objectValue)) {
collectPayloadListings(child, results);
}
}
function parseEmbeddedEbayListings(
htmlString: HTMLString,
): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const payloadMatches = htmlString.matchAll(
/data-inlinepayload=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/giu,
);
for (const match of payloadMatches) {
const rawPayload = match[1] ?? match[2] ?? match[3];
if (!rawPayload) continue;
try {
const decodedPayload = decodeURIComponent(decodeHtmlEntities(rawPayload));
collectPayloadListings(JSON.parse(decodedPayload), results);
} catch {
// eBay inline payloads vary by module; non-JSON payloads are ignored.
}
}
return results;
}
function parseSCardHtmlListings(htmlString: HTMLString): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const cardMatches = htmlString.matchAll(
/<div\b[^>]*class=(?:"[^"]*\bs-card\b[^"]*"|'[^']*\bs-card\b[^']*'|[^\s>]*\bs-card\b[^\s>]*)[\s\S]*?(?=<div\b[^>]*class=(?:"[^"]*\bs-card\b[^"]*"|'[^']*\bs-card\b[^']*'|[^\s>]*\bs-card\b[^\s>]*)|<\/body>|<\/html>)/giu,
);
for (const cardMatch of cardMatches) {
const cardHtml = cardMatch[0];
const linkTag = cardHtml.match(
/<a\b[^>]*\bhref=(?:"[^"]*\/itm\/[^"]*"|'[^']*\/itm\/[^']*'|[^\s>]*\/itm\/[^\s>]*)[^>]*>/iu,
)?.[0];
const titleMatch = cardHtml.match(
/<[^>]*\bclass=(?:"[^"]*\bs-card__title\b[^"]*"|'[^']*\bs-card__title\b[^']*'|[^\s>]*\bs-card__title\b[^\s>]*)[^>]*>([\s\S]*?)<\/[^>]+>/iu,
);
const priceMatch = cardHtml.match(
/<[^>]*\bclass=(?:"[^"]*\bs-card__price\b[^"]*"|'[^']*\bs-card__price\b[^']*'|[^\s>]*\bs-card__price\b[^\s>]*)[^>]*>([\s\S]*?)<\/[^>]+>/iu,
);
if (!linkTag || !titleMatch?.[1] || !priceMatch?.[1]) continue;
const href = getHtmlAttr(linkTag, "href");
if (!href) continue;
const listing = toEbayListing(href, titleMatch[1], priceMatch[1]);
if (listing) results.push(listing);
}
return results;
}
function dedupeEbayListings(
listings: EbayListingDetails[],
): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const seenUrls = new Set<string>();
for (const listing of listings) {
const canonicalUrl = canonicalizeEbayItemUrl(listing.url);
if (seenUrls.has(canonicalUrl)) continue;
seenUrls.add(canonicalUrl);
results.push(listing);
}
return results;
}
function canonicalizeEbayItemUrl(url: string): string {
try {
const parsed = new URL(url, "https://www.ebay.ca");
const match = parsed.pathname.match(/\/itm\/(?:[^/?#]+\/)?\d+/);
return match ? `${parsed.origin}${match[0]}` : `${parsed.origin}${parsed.pathname}`;
return match
? `${parsed.origin}${match[0]}`
: `${parsed.origin}${parsed.pathname}`;
} catch {
return url;
}
@@ -99,17 +327,6 @@ function parseEbayPrice(
return { cents, currency };
}
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- Parsing -----------------------------
/**
@@ -121,6 +338,11 @@ function parseEbayListings(
exclusions: string[],
strictMode: boolean,
): EbayListingDetails[] {
const embeddedListings = parseEmbeddedEbayListings(htmlString);
if (embeddedListings.length > 0) {
return dedupeEbayListings(embeddedListings);
}
const { document } = parseHTML(htmlString);
const results: EbayListingDetails[] = [];
const seenUrls = new Set<string>();
@@ -272,14 +494,10 @@ function parseEbayListings(
);
// Filter to only elements that actually contain prices (not labels)
const actualPrices: HTMLElement[] = [];
const actualPrices: Element[] = [];
for (const el of allPriceElements) {
const text = el.textContent?.trim();
if (
text &&
EBAY_PRICE_TEXT_RE.test(text) &&
text.length < 50
) {
if (text && EBAY_PRICE_TEXT_RE.test(text) && text.length < 50) {
actualPrices.push(el);
}
}
@@ -303,11 +521,10 @@ function parseEbayListings(
if (nonStrikethroughPrices.length > 0) {
// Use the first non-strikethrough price (sale price)
priceElement = nonStrikethroughPrices[0];
priceElement = nonStrikethroughPrices[0] ?? null;
} else {
// Fallback: use the last price (likely the most current)
const lastPrice = actualPrices[actualPrices.length - 1];
priceElement = lastPrice;
priceElement = actualPrices[actualPrices.length - 1] ?? null;
}
}
}
@@ -357,45 +574,154 @@ function parseEbayListings(
results.push(listing);
seenUrls.add(canonicalUrl);
} catch (err) {
console.warn(`Error parsing eBay listing: ${err}`);
logger.warn(`Error parsing eBay listing: ${err}`);
}
}
return results;
if (results.length > 0) {
return results;
}
return dedupeEbayListings(
parseSCardHtmlListings(htmlString).filter((listing) => {
if (
exclusions.some((exclusion) =>
listing.title.toLowerCase().includes(exclusion.toLowerCase()),
)
) {
return false;
}
return (
!strictMode ||
keywords.some((keyword) =>
listing.title.toLowerCase().includes(keyword.toLowerCase()),
)
);
}),
);
}
// ----------------------------- Cookie Loading -----------------------------
// ----------------------------- Session & Challenge -----------------------------
/**
* Load eBay cookies from EBAY_COOKIE
* Load eBay cookies from EBAY_COOKIE env var
*/
async function loadEbayCookies(): Promise<string | undefined> {
try {
const cookies = await ensureCookies(EBAY_COOKIE_CONFIG);
return formatCookiesForHeader(cookies, "www.ebay.ca");
} catch {
console.warn(
logger.warn(
"No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.",
);
return undefined;
}
}
const EBAY_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
/**
* Visit eBay homepage to collect Akamai fingerprinting cookies.
* These are required to pass the edge layer before any search request.
*/
async function warmEbaySession(): Promise<string | undefined> {
try {
const res = await fetch("https://www.ebay.ca", {
headers: {
"User-Agent": EBAY_UA,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
},
redirect: "manual",
});
if (!res.ok) return undefined;
const setCookies = res.headers.getSetCookie?.() ?? [];
const jar: Record<string, string> = {};
for (const header of setCookies) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) jar[match[1]] = match[2];
}
const cookieKeys = Object.keys(jar);
if (cookieKeys.length === 0) return undefined;
return cookieKeys.map((k) => `${k}=${jar[k] ?? ""}`).join("; ");
} catch {
return undefined;
}
}
function mergeCookies(
base: string,
...additions: (string | undefined)[]
): string {
const jar: Record<string, string> = {};
const all = [base, ...additions.filter(Boolean)] as string[];
for (const str of all) {
for (const pair of str.split(";")) {
const eq = pair.indexOf("=");
if (eq > 0) {
jar[pair.substring(0, eq).trim()] = pair.substring(eq + 1).trim();
}
}
}
return Object.entries(jar)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
}
function collectResponseCookies(res: Response, jar: Record<string, string>) {
for (const header of res.headers.getSetCookie?.() ?? []) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) jar[match[1]] = match[2];
}
}
function cookiesToString(jar: Record<string, string>): string {
return Object.entries(jar)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
}
const CHALLENGE_REDIRECT = 307;
const CHALLENGE_MARKER = "splashui/challenge";
function isChallengeRedirect(res: Response): boolean {
return (
res.status === CHALLENGE_REDIRECT &&
(res.headers.get("location") ?? "").includes(CHALLENGE_MARKER)
);
}
function isChallengeHtml(html: string): boolean {
return (
html.length < 50000 &&
(html.includes("_crefId") || html.includes("_cdetail"))
);
}
// ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
opts: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
maxItems?: number;
} | undefined,
opts:
| {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
maxItems?: number;
}
| undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<EbayListingDetails>>;
export default async function fetchEbayItems(
@@ -454,7 +780,10 @@ export default async function fetchEbayItems(
return classifyUnstableListings(limitedListings);
};
const cookies = await loadEbayCookies();
// Collect cookies from env var + warm-up session
const envCookies = await loadEbayCookies();
const warmCookies = await warmEbaySession();
const baseCookies = mergeCookies(envCookies ?? "", warmCookies);
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
const urlParams = new URLSearchParams({
@@ -475,36 +804,107 @@ export default async function fetchEbayItems(
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
console.log(`Fetching eBay search: ${searchUrl}`);
logger.log(`Fetching eBay search: ${searchUrl}`);
try {
// Use custom headers modeled after real browser requests to bypass bot detection
const headers: Record<string, string> = {
"User-Agent":
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br, zstd",
const searchHeaders: Record<string, string> = {
"User-Agent": EBAY_UA,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
Referer: "https://www.ebay.ca/",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
Priority: "u=0, i",
};
// Add cookies if available (helps bypass bot detection)
if (cookies) {
headers.Cookie = cookies;
if (baseCookies) {
searchHeaders.Cookie = baseCookies;
}
const res = await fetch(searchUrl, {
// Step 1: Make search request (follow redirects for challenge flow)
let res = await fetch(searchUrl, {
method: "GET",
headers,
headers: searchHeaders,
redirect: "manual",
});
const cookieJar: Record<string, string> = {};
// Collect cookies from homepage warm-up
if (baseCookies) {
for (const pair of baseCookies.split(";")) {
const eq = pair.indexOf("=");
if (eq > 0) {
cookieJar[pair.substring(0, eq).trim()] = pair
.substring(eq + 1)
.trim();
}
}
}
// Step 2: Follow challenge redirect if present
if (isChallengeRedirect(res)) {
const chalUrl = res.headers.get("location") ?? "";
collectResponseCookies(res, cookieJar);
logger.log("Challenge detected, fetching challenge page...");
res = await fetch(chalUrl, {
headers: { ...searchHeaders, Cookie: cookiesToString(cookieJar) },
redirect: "manual",
});
collectResponseCookies(res, cookieJar);
}
// Step 3: If response is challenge HTML, solve and submit
const responseHtml = await res.text();
if (isChallengeHtml(responseHtml)) {
logger.log("Solving challenge...");
const result = await solveEbayChallenge(
responseHtml,
cookiesToString(cookieJar),
);
if (result) {
// Merge answer cookies into jar
if (baseCookies) {
searchHeaders.Cookie = mergeCookies(baseCookies, result.cookies);
} else {
searchHeaders.Cookie = result.cookies;
}
logger.log("Challenge solved, retrying search...");
// Delay briefly before retry
await delay(DELAY_MS);
const retryHtml = await fetchHtml(searchUrl, DELAY_MS, {
headers: searchHeaders,
});
const listings = parseEbayListings(
retryHtml,
keywords,
exclusions,
strictMode,
);
const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents;
return (
typeof cents === "number" && cents >= minPrice && cents <= maxPrice
);
});
logger.log(
`Parsed ${filteredListings.length} eBay listings (after challenge).`,
);
return finalizeResults(filteredListings);
}
logger.warn("Challenge solve failed, returning empty results.");
return finalizeResults([]);
}
// Step 4: Normal flow — no challenge
if (!res.ok) {
throw new HttpError(
`Request failed with status ${res.status}`,
@@ -513,31 +913,30 @@ export default async function fetchEbayItems(
);
}
const searchHtml = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
console.log(`\nParsing eBay listings...`);
logger.log(`\nParsing eBay listings...`);
const listings = parseEbayListings(
searchHtml,
responseHtml,
keywords,
exclusions,
strictMode,
);
// Filter by price range (additional safety check)
const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents;
return typeof cents === "number" && cents >= minPrice && cents <= maxPrice;
return (
typeof cents === "number" && cents >= minPrice && cents <= maxPrice
);
});
console.log(`Parsed ${filteredListings.length} eBay listings.`);
logger.log(`Parsed ${filteredListings.length} eBay listings.`);
return finalizeResults(filteredListings);
} catch (err) {
if (err instanceof HttpError) {
console.error(
`Failed to fetch eBay search (${err.status}): ${err.message}`,
if (err instanceof HttpError || err instanceof RateLimitError) {
logger.warn(
`Failed to fetch eBay search (${err instanceof HttpError ? err.statusCode : 429}): ${err.message}`,
);
return finalizeResults([]);
}

View File

@@ -5,24 +5,31 @@ import type {
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable";
import {
type Cookie,
type CookieConfig,
ensureCookies,
formatCookiesForHeader,
loadCookiesOptional,
parseCookieString,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import {
buildFacebookHeaders,
detectFacebookChallenge,
warmFacebookSession,
} from "../utils/facebook-challenge";
import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http";
import { fetchHtml, HttpError, isRecord, RateLimitError } from "../utils/http";
import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable";
/**
* Facebook Marketplace Scraper
*
* Note: Facebook Marketplace requires authentication cookies for full access.
* This implementation will return limited or no results without proper authentication.
* This is by design to respect Facebook's authentication requirements.
* Facebook Marketplace returns search results without authentication when
* proper browser headers are sent. Prices and seller details are hidden on
* search results but are available on individual item pages even without
* auth cookies. For full-price search results, provide FACEBOOK_COOKIE.
*/
// Facebook cookie configuration
@@ -86,7 +93,7 @@ interface FacebookMarketplaceItem {
__typename: "GroupCommerceProductItem";
// Listing content
marketplace_listing_title: string;
marketplace_listing_title?: string;
redacted_description?: {
text: string;
};
@@ -99,7 +106,7 @@ interface FacebookMarketplaceItem {
listing_price?: {
amount: string;
currency: string;
amount_with_offset: string;
amount_with_offset?: string;
};
// Location
@@ -127,9 +134,9 @@ interface FacebookMarketplaceItem {
// Seller information
marketplace_listing_seller?: {
__typename: "User";
id: string;
name: string;
__typename?: "User";
id?: string;
name?: string;
profile_picture?: {
uri: string;
};
@@ -218,17 +225,6 @@ export async function ensureFacebookCookies(): Promise<Cookie[]> {
return ensureCookies(FACEBOOK_COOKIE_CONFIG);
}
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- Extraction Metrics -----------------------------
/**
@@ -260,125 +256,28 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
successRate < 0.8 &&
!extractionStats.lastApiChangeDetected
) {
console.warn(
logger.warn(
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
);
extractionStats.lastApiChangeDetected = new Date();
}
if (!success && itemId) {
console.warn(`Facebook API extraction failed for item ${itemId}`);
logger.warn(`Facebook API extraction failed for item ${itemId}`);
}
}
// ----------------------------- HTTP Client -----------------------------
/**
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
- Retries on 429 and 5xx
- Respects X-RateLimit-Reset when present (seconds)
- Supports custom cookies for Facebook authentication
*/
async function fetchHtml(
url: string,
DELAY_MS: number,
opts?: {
maxRetries?: number;
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
cookies?: string;
},
): Promise<{ html: HTMLString; responseUrl: string }> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
let lastRateLimitError: HttpError | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const headers: Record<string, string> = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"accept-encoding": "gzip, deflate, br",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
};
// Add cookies if provided
if (opts?.cookies) {
headers.cookie = opts.cookies;
}
const res = await fetch(url, {
method: "GET",
headers,
});
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
lastRateLimitError = new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
if (attempt >= maxRetries) {
throw lastRateLimitError;
}
await delay(waitMs);
continue;
}
// For Facebook, 400 often means authentication required
// Don't retry 4xx client errors except 429
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
throw new HttpError(
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
res.status,
url,
);
}
// Retry on 5xx
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await delay((attempt + 1) * retryBaseMs);
continue;
}
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
}
const html = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
return { html, responseUrl: res.url || url };
} catch (err) {
if (err instanceof HttpError) {
throw err;
}
if (attempt >= maxRetries) throw err;
await delay((attempt + 1) * retryBaseMs);
function createFacebookHeaders(cookies: string): Record<string, string> {
const jar: Record<string, string> = {};
if (cookies) {
for (const pair of cookies.split(";")) {
const [name, ...rest] = pair.trim().split("=");
if (name && rest.length > 0) jar[name.trim()] = rest.join("=").trim();
}
}
throw lastRateLimitError ?? new Error("Exhausted retries without response");
return buildFacebookHeaders(jar);
}
// ----------------------------- Parsing -----------------------------
@@ -388,13 +287,29 @@ export type FacebookResponseKind =
| "item"
| "auth_gated"
| "unavailable"
| "checkpoint"
| "unknown";
export function classifyFacebookResponse(
htmlString: HTMLString,
responseUrl: string,
status = 200,
) {
const challengeType = detectFacebookChallenge(
status,
htmlString,
responseUrl,
);
if (challengeType === "checkpoint") {
return {
kind: "checkpoint" as const,
authGated: false,
unavailable: false,
};
}
const authGated =
challengeType === "login_wall" ||
responseUrl.includes("/login/") ||
htmlString.includes("You must log in") ||
htmlString.includes("log in to continue");
@@ -408,7 +323,11 @@ export function classifyFacebookResponse(
htmlString.includes("This listing is no longer available") ||
htmlString.includes("listing has been removed");
if (unavailable) {
return { kind: "unavailable" as const, authGated: false, unavailable: true };
return {
kind: "unavailable" as const,
authGated: false,
unavailable: true,
};
}
if (responseUrl.includes("/marketplace/item/")) {
@@ -455,7 +374,8 @@ function isFacebookSearchEdgeArray(value: unknown): value is FacebookEdge[] {
Array.isArray(value) &&
value.length > 0 &&
value.every(
(edge) => isRecord(edge) && isRecord(edge.node) && isRecord(edge.node.listing),
(edge) =>
isRecord(edge) && isRecord(edge.node) && isRecord(edge.node.listing),
)
);
}
@@ -552,8 +472,7 @@ function scoreMarketplaceItemPath(path: string[]): number {
if (
path.some(
(segment) =>
segment.includes("recommend") || segment.includes("related"),
(segment) => segment.includes("recommend") || segment.includes("related"),
)
) {
score -= 10;
@@ -567,7 +486,9 @@ function collectMarketplaceItemCandidates(
path: string[] = [],
): FacebookMarketplaceItemMatch[] {
if (Array.isArray(candidate)) {
return candidate.flatMap((item) => collectMarketplaceItemCandidates(item, path));
return candidate.flatMap((item) =>
collectMarketplaceItemCandidates(item, path),
);
}
if (!isRecord(candidate)) {
@@ -628,7 +549,9 @@ function extractRenderedText(node: ParentNode, selector: string): string[] {
.filter((text): text is string => Boolean(text));
}
function extractMarketplaceItemIdFromElement(element: Element | null): string | null {
function extractMarketplaceItemIdFromElement(
element: Element | null,
): string | null {
const href = element?.getAttribute("href") || "";
return href.match(FACEBOOK_ITEM_HREF_RE)?.[1] ?? null;
}
@@ -666,7 +589,9 @@ function extractFacebookPermalinkItemId(document: Document): string | null {
return extractMarketplaceItemIdFromElement(itemLinks.at(-1) ?? null);
}
function extractFacebookDescriptionText(document: Document): string | undefined {
function extractFacebookDescriptionText(
document: Document,
): string | undefined {
const labels = Array.from(document.querySelectorAll("div, span, h2, h3, p"));
for (const label of labels) {
@@ -759,7 +684,10 @@ function extractFacebookItemHtmlFallback(
const priceText = texts.find((text) => FACEBOOK_PRICE_TEXT_RE.test(text));
const parsedPrice = priceText ? parseFacebookRenderedPrice(priceText) : null;
const location = texts.find(
(text) => text !== title && text !== priceText && FACEBOOK_LOCATION_TEXT_RE.test(text),
(text) =>
text !== title &&
text !== priceText &&
FACEBOOK_LOCATION_TEXT_RE.test(text),
);
const description = extractFacebookDescriptionText(document);
@@ -807,18 +735,18 @@ export function extractFacebookMarketplaceData(
if (htmlString.includes("XCometMarketplaceSearchController")) {
const htmlFallback = extractFacebookMarketplaceHtmlFallback(htmlString);
if (htmlFallback?.length) {
console.log(
logger.log(
`Successfully parsed ${htmlFallback.length} Facebook marketplace listings from rendered HTML fallback`,
);
return htmlFallback;
}
}
console.warn("No marketplace data found in HTML response");
logger.warn("No marketplace data found in HTML response");
return null;
}
console.log(
logger.log(
`Successfully parsed ${bestEdges.length} Facebook marketplace listings`,
);
return bestEdges.map((edge) => ({ node: edge.node }));
@@ -841,7 +769,8 @@ export function extractFacebookItemData(
if (
!bestMatch ||
match.score > bestMatch.score ||
(match.score === bestMatch.score && match.path.length < bestMatch.path.length)
(match.score === bestMatch.score &&
match.path.length < bestMatch.path.length)
) {
bestMatch = match;
}
@@ -852,6 +781,22 @@ export function extractFacebookItemData(
return bestMatch.item;
}
// Try marketplace_product_details_page.target path (current item page structure)
for (const candidate of candidates) {
const detailsPage = findKeyInObject(
candidate,
"marketplace_product_details_page",
) as Record<string, unknown> | undefined;
const target = detailsPage?.target as Record<string, unknown> | undefined;
if (
target &&
typeof target.id === "string" &&
typeof target.marketplace_listing_title === "string"
) {
return target as unknown as FacebookMarketplaceItem;
}
}
if (htmlString.includes("XCometMarketplacePermalinkController")) {
return extractFacebookItemHtmlFallback(htmlString);
}
@@ -859,6 +804,25 @@ export function extractFacebookItemData(
return null;
}
function findKeyInObject(obj: unknown, targetKey: string): unknown {
if (obj == null) return undefined;
if (Array.isArray(obj)) {
for (const item of obj) {
const found = findKeyInObject(item, targetKey);
if (found !== undefined) return found;
}
return undefined;
}
if (typeof obj !== "object") return undefined;
const record = obj as Record<string, unknown>;
if (targetKey in record) return record[targetKey];
for (const [, value] of Object.entries(record)) {
const found = findKeyInObject(value, targetKey);
if (found !== undefined) return found;
}
return undefined;
}
/**
Parse Facebook marketplace search results into ListingDetails[]
*/
@@ -968,7 +932,7 @@ export function parseFacebookAds(
results.push(listingDetails);
} catch (error) {
console.warn("Failed to parse Facebook ad:", error);
logger.warn("Failed to parse Facebook ad:", error);
}
}
@@ -1069,7 +1033,7 @@ export function parseFacebookItem(
return listingDetails;
} catch (error) {
console.warn(`Failed to parse Facebook item ${item.id}:`, error);
logger.warn(`Failed to parse Facebook item ${item.id}:`, error);
return null;
}
}
@@ -1101,7 +1065,9 @@ export default async function fetchFacebookItems(
const finalizeResults = (
listings: FacebookListingDetails[],
): FacebookListingDetails[] | UnstableListingBuckets<FacebookListingDetails> => {
):
| FacebookListingDetails[]
| UnstableListingBuckets<FacebookListingDetails> => {
if (!unstableMode.hideUnstableResults) {
return listings.slice(0, MAX_ITEMS);
}
@@ -1113,16 +1079,18 @@ export default async function fetchFacebookItems(
};
};
const cookies = await ensureFacebookCookies();
const warmupCookies = await warmFacebookSession();
const warmupHeader = Object.entries(warmupCookies)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
// Format cookies for HTTP header
const domain = "www.facebook.com";
const cookiesHeader = formatCookiesForHeader(cookies, domain);
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
);
}
const userCookiesHeader = formatCookiesForHeader(userCookies, domain);
const cookiesHeader = [warmupHeader, userCookiesHeader]
.filter(Boolean)
.join("; ");
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
@@ -1132,53 +1100,76 @@ export default async function fetchFacebookItems(
// Facebook marketplace URL structure
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
console.log(`Fetching Facebook marketplace: ${searchUrl}`);
console.log(`Using ${cookies.length} cookies for authentication`);
logger.log(`Fetching Facebook marketplace: ${searchUrl}`);
if (userCookies.length > 0) {
logger.log(`Using ${userCookies.length} cookies for authentication`);
}
let searchHtml: string;
let searchResponseUrl = searchUrl;
try {
const response = await fetchHtml(searchUrl, DELAY_MS, {
maxRetries: 3,
includeResponseUrl: true,
headers: createFacebookHeaders(cookiesHeader),
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
logger.log(
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
cookies: cookiesHeader,
});
searchHtml = response.html;
searchResponseUrl = response.responseUrl;
} catch (err) {
if (err instanceof HttpError) {
console.warn(
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
logger.warn(
`\nFacebook marketplace access failed (${err.statusCode}): ${err.message}`,
);
if (err.status === 400 || err.status === 401 || err.status === 403) {
console.warn(
if (
err.statusCode === 400 ||
err.statusCode === 401 ||
err.statusCode === 403
) {
logger.warn(
"This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
);
}
return finalizeResults([]);
}
if (err instanceof RateLimitError) {
logger.warn(`\nFacebook marketplace access rate limited: ${err.message}`);
return finalizeResults([]);
}
throw err;
}
const classification = classifyFacebookResponse(searchHtml, searchResponseUrl);
const classification = classifyFacebookResponse(
searchHtml,
searchResponseUrl,
);
if (classification.authGated) {
console.warn("Facebook marketplace search redirected to login. Cookies may be expired.");
logger.warn(
"Facebook marketplace search redirected to login. Cookies may be expired.",
);
return finalizeResults([]);
}
if (classification.kind === "checkpoint") {
logger.warn(
"Facebook marketplace returned a checkpoint challenge. This may require manual verification.",
);
return finalizeResults([]);
}
if (classification.unavailable) {
console.warn("Facebook marketplace search returned an unavailable route.");
logger.warn("Facebook marketplace search returned an unavailable route.");
return finalizeResults([]);
}
if (classification.kind !== "search") {
console.warn(
logger.warn(
`Facebook marketplace search returned unexpected route kind: ${classification.kind}.`,
);
return finalizeResults([]);
@@ -1186,11 +1177,11 @@ export default async function fetchFacebookItems(
const ads = extractFacebookMarketplaceData(searchHtml);
if (!ads || ads.length === 0) {
console.warn("No ads parsed from Facebook marketplace page.");
logger.warn("No ads parsed from Facebook marketplace page.");
return finalizeResults([]);
}
console.log(`\nFound ${ads.length} raw ads. Processing...`);
logger.log(`\nFound ${ads.length} raw ads. Processing...`);
const isTTY = process.stdout?.isTTY ?? false;
const progressBar = isTTY
@@ -1204,13 +1195,14 @@ export default async function fetchFacebookItems(
// Filter to only priced items (already done in parseFacebookAds)
const pricedItems = items.filter(
(item) =>
typeof item.listingPrice?.cents === "number" && item.listingPrice.cents >= 0,
typeof item.listingPrice?.cents === "number" &&
item.listingPrice.cents >= 0,
);
progressBar?.update(totalProgress);
progressBar?.stop();
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
logger.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
return finalizeResults(pricedItems);
}
@@ -1220,80 +1212,101 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem(
itemId: string,
): Promise<FacebookListingDetails | null> {
const cookies = await ensureFacebookCookies();
// Format cookies for HTTP header
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
);
}
const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
const cookiesHeader = formatCookiesForHeader(userCookies, "www.facebook.com");
const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`;
console.log(`Fetching Facebook marketplace item: ${itemUrl}`);
logger.log(`Fetching Facebook marketplace item: ${itemUrl}`);
let itemHtml: string;
let itemResponseUrl = itemUrl;
try {
const response = await fetchHtml(itemUrl, 1000, {
includeResponseUrl: true,
headers: createFacebookHeaders(cookiesHeader),
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
logger.log(
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
cookies: cookiesHeader,
});
itemHtml = response.html;
itemResponseUrl = response.responseUrl;
} catch (err) {
if (err instanceof HttpError) {
console.warn(
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`,
logger.warn(
`\nFacebook marketplace item access failed (${err.statusCode}): ${err.message}`,
);
// Enhanced error handling based on status codes
switch (err.status) {
switch (err.statusCode) {
case 400:
case 401:
case 403:
console.warn(
logger.warn(
"Authentication error: Invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
);
break;
case 404:
console.warn(
logger.warn(
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.",
);
break;
case 429:
console.warn(
logger.warn(
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
);
break;
case 500:
case 502:
case 503:
console.warn(
logger.warn(
"Facebook server error: Marketplace may be temporarily unavailable.",
);
break;
default:
console.warn(`Unexpected error status: ${err.status}`);
logger.warn(`Unexpected error status: ${err.statusCode}`);
}
return null;
}
if (err instanceof RateLimitError) {
logger.warn(
`\nFacebook marketplace item rate limited for item ${itemId}: ${err.message}`,
);
logger.warn(
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
);
return null;
}
throw err;
}
const classification = classifyFacebookResponse(itemHtml, itemResponseUrl);
if (classification.kind === "checkpoint") {
logExtractionMetrics(false, itemId);
logger.warn(
`Checkpoint challenge detected for item ${itemId}. Facebook may be limiting access.`,
);
return null;
}
if (classification.authGated) {
logExtractionMetrics(false, itemId);
console.warn(`Authentication failed for item ${itemId}. Cookies may be expired.`);
logger.warn(
`Authentication failed for item ${itemId}. Cookies may be expired.`,
);
return null;
}
if (itemResponseUrl.includes("unavailable_product=1")) {
logExtractionMetrics(false, itemId);
logger.warn(
`Item ${itemId} appears to be sold or removed from marketplace.`,
);
return null;
}
@@ -1301,13 +1314,15 @@ export async function fetchFacebookItem(
if (classification.unavailable && !itemData) {
logExtractionMetrics(false, itemId);
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`);
logger.warn(
`Item ${itemId} appears to be sold or removed from marketplace.`,
);
return null;
}
if (classification.kind !== "item" && !itemData) {
logExtractionMetrics(false, itemId);
console.warn(
logger.warn(
`Item ${itemId} returned unexpected route kind: ${classification.kind}.`,
);
return null;
@@ -1317,36 +1332,38 @@ export async function fetchFacebookItem(
logExtractionMetrics(false, itemId);
if (itemHtml.includes("This item has been sold")) {
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`);
logger.warn(
`Item ${itemId} appears to be sold or removed from marketplace.`,
);
return null;
}
console.warn(
logger.warn(
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
);
console.warn(" - The listing was removed or sold");
console.warn(" - Authentication issues");
console.warn(" - Facebook changed their API structure");
console.warn(" - Network or parsing issues");
logger.warn(" - The listing was removed or sold");
logger.warn(" - Authentication issues");
logger.warn(" - Facebook changed their API structure");
logger.warn(" - Network or parsing issues");
return null;
}
logExtractionMetrics(true, itemId);
console.log(`Successfully extracted data for item ${itemId}`);
logger.log(`Successfully extracted data for item ${itemId}`);
const parsedItem = parseFacebookItem(itemData);
if (!parsedItem) {
console.warn(`Failed to parse item ${itemId}: Invalid data structure`);
logger.warn(`Failed to parse item ${itemId}: Invalid data structure`);
return null;
}
// Check for sold/removed status in the parsed data with proper precedence
if (itemData.is_sold) {
console.warn(`Item ${itemId} is marked as sold in the marketplace.`);
logger.warn(`Item ${itemId} is marked as sold in the marketplace.`);
// Still return the data but mark it as sold
parsedItem.listingStatus = "SOLD";
} else if (!itemData.is_live) {
console.warn(`Item ${itemId} is not live/active in the marketplace.`);
logger.warn(`Item ${itemId} is not live/active in the marketplace.`);
parsedItem.listingStatus = itemData.is_hidden
? "HIDDEN"
: itemData.is_pending

View File

@@ -6,12 +6,12 @@ import type {
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable";
import {
type CookieConfig,
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import {
fetchHtml,
@@ -22,13 +22,14 @@ import {
RateLimitError,
ValidationError,
} from "../utils/http";
import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable";
// Kijiji cookie configuration
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
name: "Kijiji",
domain: ".kijiji.ca",
envVar: "KIJIJI_COOKIE",
filePath: "./cookies/kijiji.json",
};
// ----------------------------- Types -----------------------------
@@ -46,6 +47,17 @@ interface ApolloSearchItem {
[k: string]: unknown;
}
type ListingAttribute = {
canonicalName?: string;
canonicalValues?: string[];
};
type ListingAttributes =
| ListingAttribute[]
| {
all?: ListingAttribute[];
};
interface ApolloListingRoot {
url?: string;
title?: string;
@@ -68,7 +80,7 @@ interface ApolloListingRoot {
adSource?: string;
flags?: { topAd?: boolean; priceDrop?: boolean };
posterInfo?: { posterId?: string; rating?: number };
attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>;
attributes?: ListingAttributes;
[k: string]: unknown;
}
@@ -203,11 +215,17 @@ const SORT_MAPPINGS: Record<string, string> = {
};
const LOCATION_SLUGS = Object.fromEntries(
Object.entries(LOCATION_MAPPINGS).map(([slug, id]) => [id, slug.replace(/\s+/g, "-")]),
Object.entries(LOCATION_MAPPINGS).map(([slug, id]) => [
id,
slug.replace(/\s+/g, "-"),
]),
) as Record<number, string>;
const CATEGORY_SLUGS = Object.fromEntries(
Object.entries(CATEGORY_MAPPINGS).map(([slug, id]) => [id, slug.replace(/\s+/g, "-")]),
Object.entries(CATEGORY_MAPPINGS).map(([slug, id]) => [
id,
slug.replace(/\s+/g, "-"),
]),
) as Record<number, string>;
// ----------------------------- Utilities -----------------------------
@@ -317,13 +335,22 @@ function findApolloListingKey(
predicate: (value: Record<string, unknown>) => boolean,
): string | undefined {
return Object.keys(apolloState).find((key) => {
if (!key.startsWith("Listing:")) return false;
if (!isListingRecordKey(key)) return false;
const value = apolloState[key];
return isRecord(value) && predicate(value);
});
}
function isListingRecordKey(key: string): boolean {
return key.startsWith("Listing:") || key.startsWith("StandardListing:");
}
function getListingAttributes(attributes: ListingAttributes | undefined) {
if (Array.isArray(attributes)) return attributes;
return attributes?.all ?? [];
}
/**
* Slugifies a string for Kijiji search URLs
*/
@@ -484,7 +511,7 @@ async function fetchSellerDetails(
};
} catch (err) {
// Silently fail for GraphQL errors - not critical for basic functionality
console.warn(
logger.warn(
`Failed to fetch seller details for ${posterId}:`,
err instanceof Error ? err.message : String(err),
);
@@ -526,7 +553,7 @@ export function parseSearch(
const results: SearchListing[] = [];
for (const [key, value] of Object.entries(apolloState)) {
if (!key.startsWith("Listing:")) continue;
if (!isListingRecordKey(key)) continue;
if (!isRecord(value)) continue;
const item = value as ApolloSearchItem;
@@ -542,78 +569,6 @@ export function parseSearch(
return results;
}
/**
Parse a listing page into a typed object (backward compatible).
*/
function _parseListing(
htmlString: HTMLString,
BASE_URL: string,
): KijijiListingDetails | null {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
const listingKey = findApolloListingKey(
apolloState,
(value) => typeof value.url === "string" && typeof value.title === "string",
);
if (!listingKey) return null;
const root = apolloState[listingKey];
if (!isRecord(root)) return null;
const {
url,
title,
description,
price,
type,
status,
activationDate,
endDate,
metrics,
location,
} = root as ApolloListingRoot;
const cents = price?.amount != null ? Number(price.amount) : undefined;
const amountFormatted =
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
const numberOfViews =
metrics?.views != null ? Number(metrics.views) : undefined;
const listingUrl =
typeof url === "string"
? url.startsWith("http")
? url
: `${BASE_URL}${url}`
: "";
if (!listingUrl || !title) return null;
return {
url: listingUrl,
title,
description,
listingPrice: amountFormatted
? {
amountFormatted,
cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency,
}
: undefined,
listingType: type,
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
};
}
/**
* Parse a listing page into a detailed object with all available fields
*/
@@ -683,11 +638,9 @@ export async function parseDetailedListing(
// Extract attributes as key-value pairs
const attributeMap: Record<string, string[]> = {};
if (Array.isArray(attributes)) {
for (const attr of attributes) {
if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) {
attributeMap[attr.canonicalName] = attr.canonicalValues;
}
for (const attr of getListingAttributes(attributes)) {
if (attr.canonicalName && Array.isArray(attr.canonicalValues)) {
attributeMap[attr.canonicalName] = attr.canonicalValues;
}
}
@@ -718,7 +671,7 @@ export async function parseDetailedListing(
};
} catch {
// Silently fail - GraphQL data is optional
console.warn(
logger.warn(
`Failed to fetch additional seller data for ${posterInfo.posterId}`,
);
}
@@ -816,7 +769,10 @@ export default async function fetchKijijiItems(
: undefined;
// Set defaults for configuration
const finalSearchOptions: Omit<Required<SearchOptions>, "priceMin" | "priceMax"> & {
const finalSearchOptions: Omit<
Required<SearchOptions>,
"priceMin" | "priceMax"
> & {
priceMin?: number;
priceMax?: number;
} = {
@@ -852,11 +808,11 @@ export default async function fetchKijijiItems(
BASE_URL,
);
console.log(`Fetching search page ${page}: ${searchUrl}`);
logger.log(`Fetching search page ${page}: ${searchUrl}`);
const searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
logger.log(
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
@@ -866,9 +822,17 @@ export default async function fetchKijijiItems(
const searchResults = parseSearch(searchHtml, BASE_URL);
if (searchResults.length === 0) {
console.log(
`No more results found on page ${page}. Stopping pagination.`,
);
if (page === 1) {
logger.log(
`No results found on page 1. The search URL was: ${searchUrl}\n` +
`Tip: Kijiji matches ALL words in the query against listing titles. ` +
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
);
} else {
logger.log(
`No more results found on page ${page}. Stopping pagination.`,
);
}
break;
}
@@ -881,7 +845,7 @@ export default async function fetchKijijiItems(
seenUrls.add(link);
}
console.log(
logger.log(
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
);
@@ -903,14 +867,14 @@ export default async function fetchKijijiItems(
const batchPromises = batch.map(async (link, batchIndex) => {
try {
if (batchIndex > 0) {
await new Promise((resolve) => setTimeout(resolve, DELAY_MS * batchIndex));
await delay(DELAY_MS * batchIndex);
}
const html = await fetchHtml(link, 0, {
// Staggered starts keep request pacing within REQUESTS_PER_SECOND.
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
logger.log(
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
@@ -925,11 +889,11 @@ export default async function fetchKijijiItems(
return parsed;
} catch (err) {
if (err instanceof HttpError) {
console.error(
logger.warn(
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
);
} else {
console.error(
logger.warn(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
);
}
@@ -938,7 +902,7 @@ export default async function fetchKijijiItems(
currentProgress++;
progressBar?.update(currentProgress);
if (!progressBar) {
console.log(`Progress: ${currentProgress}/${totalProgress}`);
logger.log(`Progress: ${currentProgress}/${totalProgress}`);
}
}
});
@@ -947,9 +911,8 @@ export default async function fetchKijijiItems(
results.push(...batchResults);
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
await new Promise((resolve) => setTimeout(resolve, DELAY_MS));
await delay(DELAY_MS);
}
}
allListings.push(
@@ -968,9 +931,7 @@ export default async function fetchKijijiItems(
matchesPriceFilters(listing, finalSearchOptions),
);
console.log(
`\nParsed ${filteredListings.length} detailed listings.`,
);
logger.log(`\nParsed ${filteredListings.length} detailed listings.`);
return finalizeResults(filteredListings);
}

View File

@@ -2,9 +2,12 @@
* Shared cookie handling utilities for marketplace scrapers
*/
import { logger } from "./logger";
export interface Cookie {
name: string;
value: string;
rawValue?: string;
domain: string;
path: string;
secure?: boolean;
@@ -41,9 +44,9 @@ export function parseCookieString(
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
.map((pair): Cookie | null => {
const [name, ...valueParts] = pair.split("=");
const trimmedName = name.trim();
const trimmedName = name?.trim();
const trimmedValue = valueParts.join("=").trim();
if (!trimmedName || !trimmedValue) {
@@ -53,6 +56,7 @@ export function parseCookieString(
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
rawValue: trimmedValue,
domain,
path: "/",
secure: true,
@@ -93,19 +97,30 @@ export function formatCookiesForHeader(
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.map((cookie) => `${cookie.name}=${cookie.rawValue ?? cookie.value}`)
.join("; ");
}
/**
* Load cookies from the configured environment variable
* Load cookies from the configured environment variable or explicit cookie string
*/
export async function ensureCookies(config: CookieConfig): Promise<Cookie[]> {
export async function ensureCookies(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
// Explicit cookie string takes priority
if (cookiesSource) {
const cookies = parseCookieString(cookiesSource, config.domain);
if (cookies.length > 0) {
return cookies;
}
}
const envValue = process.env[config.envVar];
const cookies = parseCookieString(envValue ?? "", config.domain);
if (cookies.length > 0) {
console.log(
logger.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
);
return cookies;

View File

@@ -4,5 +4,7 @@
* @returns A promise that resolves after the specified delay
*/
export function delay(ms: number): Promise<void> {
if (process.env.NODE_ENV === "test") return Promise.resolve();
return new Promise((resolve) => setTimeout(resolve, ms));
}

View File

@@ -0,0 +1,239 @@
import argon2 from "argon2-wasm-pro";
// ------------------ Types ------------------
interface ChallengeDetails {
p2: number;
p6: number;
p7: number;
p9: string;
p11: string;
p12: number;
p13: number;
p15: number;
}
interface ChallengeParams {
crefId: string;
cdetail: ChallengeDetails;
iid: string;
chlghost: string;
appName: string;
p: string;
destUrl: string;
}
interface ChallengeResult {
cookies: string;
}
// ------------------ Helpers ------------------
function memcmp(a: Uint8Array, b: number[], len: number): number {
for (let i = 0; i < len; i++) {
const va = a[i] ?? 0;
const vb = b[i] ?? 0;
if (va !== vb) return (va & 0xff) - (vb & 0xff);
}
return 0;
}
function intToBytes(val: number, arr: Uint8Array, offset: number) {
arr[offset] = val >>> 24;
arr[offset + 1] = val >>> 16;
arr[offset + 2] = val >>> 8;
arr[offset + 3] = val;
}
function string2Bin(str: string): number[] {
const result: number[] = [];
for (let i = 0; i < str.length; i++) {
result.push(str.charCodeAt(i));
}
return result;
}
function bufferToBase64(buf: Uint8Array): string {
return btoa(String.fromCharCode(...buf));
}
function parseCookiesFromSetCookie(cookies: string[]): Record<string, string> {
const result: Record<string, string> = {};
for (const header of cookies) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) {
result[match[1]] = match[2];
}
}
return result;
}
// ------------------ Default headers ------------------
const BROWSER_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
const _EBAY_HEADERS: Record<string, string> = {
"User-Agent": BROWSER_UA,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
};
// ------------------ Parser ------------------
export function parseChallengePage(html: string): ChallengeParams | null {
const getHidden = (id: string): string => {
const re = new RegExp(
`id=${id}\\s+value='([^']*)'` +
`|id=${id}\\s+value="([^"]*)"` +
`|id=${id}\\s+value=([^\\s>]+)`,
"i",
);
const m = html.match(re);
if (!m) return "";
return m[1] ?? m[2] ?? m[3] ?? "";
};
const crefId = getHidden("_crefId");
const cdetailRaw = getHidden("_cdetail");
const iid = getHidden("_iid");
const chlghost = getHidden("_chlghost");
const appName = getHidden("_appName");
const p = getHidden("_p");
const formActionMatch = html.match(
/<form\s+id=destForm\s+[^>]*action=([^\s>]+)/i,
);
const destUrl = formActionMatch?.[1]?.trim() ?? "";
if (!crefId || !cdetailRaw) return null;
let cdetail: ChallengeDetails;
try {
const parsed = JSON.parse(cdetailRaw);
const d = parsed.details;
cdetail = {
p2: Number(d.p2),
p6: Number(d.p6),
p7: Number(d.p7),
p9: d.p9,
p11: d.p11,
p12: Number(d.p12),
p13: Number(d.p13),
p15: Number(d.p15),
};
} catch {
return null;
}
return {
crefId,
cdetail,
iid,
chlghost: chlghost || "https://www.ebay.ca",
appName: appName || "orch",
p,
destUrl,
};
}
// ------------------ Solver ------------------
async function solveArgon2Challenge(
cdetail: ChallengeDetails,
): Promise<string[]> {
const targetBytes = string2Bin(atob(cdetail.p11));
const targetLen = targetBytes.length;
const nonceLen = cdetail.p6;
const answerCount = cdetail.p15;
const salt = new Uint8Array(
Uint8Array.from(atob(cdetail.p9), (c) => c.charCodeAt(0)),
);
const answers: string[] = [];
let nonce = new Uint8Array(nonceLen);
crypto.getRandomValues(nonce);
intToBytes(0, nonce, nonce.length - 4);
let counter = 0;
while (answers.length < answerCount) {
const result = await argon2.hash({
pass: nonce,
salt,
time: cdetail.p2,
mem: cdetail.p13,
hashLen: cdetail.p7,
parallelism: cdetail.p12,
type: 2,
});
const hashBytes = result.hash as Uint8Array;
if (memcmp(hashBytes, targetBytes, targetLen) <= 0) {
answers.push(bufferToBase64(nonce));
nonce = new Uint8Array(nonceLen);
crypto.getRandomValues(nonce);
intToBytes(0, nonce, nonce.length - 4);
counter = 0;
} else {
counter++;
intToBytes(counter, nonce, nonce.length - 4);
}
}
return answers;
}
// ------------------ Public API ------------------
export async function solveEbayChallenge(
html: string,
cookieHeader?: string,
): Promise<ChallengeResult | null> {
const params = parseChallengePage(html);
if (!params) return null;
const answers = await solveArgon2Challenge(params.cdetail);
const encodedAnswers = encodeURIComponent(answers.join(","));
const body = JSON.stringify({
iid: params.iid,
appName: params.appName,
referenceId: params.crefId,
pvt: Date.now().toString(),
crt: Date.now().toString(),
encodedAnswers,
p: params.p,
ru: params.destUrl,
});
const headers: Record<string, string> = {
"content-type": "application/json",
accept: "application/json, text/plain, */*",
"user-agent": BROWSER_UA,
};
if (cookieHeader) {
headers.cookie = cookieHeader;
}
const res = await fetch(`${params.chlghost}/splashui/challengesvc/answer`, {
method: "POST",
headers,
body,
});
if (!res.ok) return null;
// Collect cookies from answer response
const setCookies = res.headers.getSetCookie?.() ?? [];
const answerCookies = parseCookiesFromSetCookie(setCookies);
const cookieEntries = Object.entries(answerCookies);
if (cookieEntries.length === 0) return null;
const cookies = cookieEntries.map(([k, v]) => `${k}=${v}`).join("; ");
return { cookies };
}

View File

@@ -0,0 +1,128 @@
// Facebook Marketplace session & challenge utilities
// ------------------ Types ------------------
export type ChallengeType =
| "login_wall"
| "checkpoint"
| "bad_headers"
| "rate_limited"
| "none";
// ------------------ Constants ------------------
const FACEBOOK_BROWSER_HEADERS: Record<string, string> = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"sec-ch-ua":
'"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Linux"',
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
};
// ------------------ Cookie Management ------------------
function parseSetCookies(setCookieHeaders: string[]): Record<string, string> {
const cookies: Record<string, string> = {};
for (const header of setCookieHeaders) {
const parts = header.split(";");
const firstPart = parts[0]?.trim();
if (!firstPart) continue;
const eqIdx = firstPart.indexOf("=");
if (eqIdx === -1) continue;
const name = firstPart.slice(0, eqIdx).trim();
const value = firstPart.slice(eqIdx + 1).trim();
if (name && value) {
cookies[name] = value;
}
}
return cookies;
}
function cookiesToHeader(cookies: Record<string, string>): string {
return Object.entries(cookies)
.map(([name, value]) => `${name}=${value}`)
.join("; ");
}
// ------------------ Session Warmup ------------------
export async function warmFacebookSession(): Promise<Record<string, string>> {
try {
const res = await fetch("https://www.facebook.com/", {
method: "GET",
headers: FACEBOOK_BROWSER_HEADERS,
redirect: "manual",
signal: AbortSignal.timeout(10000),
});
const setCookies = res.headers.getSetCookie?.() ?? [];
return parseSetCookies(setCookies);
} catch {
return {};
}
}
// ------------------ Challenge Detection ------------------
export function detectFacebookChallenge(
status: number,
html: string,
responseUrl: string,
): ChallengeType {
if (status === 400) {
return "bad_headers";
}
if (status === 429) {
return "rate_limited";
}
if (responseUrl.includes("/login/")) {
return "login_wall";
}
if (html.includes("You must log in") || html.includes("log in to continue")) {
return "login_wall";
}
if (
responseUrl.includes("/checkpoint/") ||
(html.includes("checkpoint") && html.includes("challenge"))
) {
return "checkpoint";
}
return "none";
}
// ------------------ Header Construction ------------------
export function buildFacebookHeaders(
cookieJar: Record<string, string>,
extraHeaders?: Record<string, string>,
): Record<string, string> {
const headers: Record<string, string> = {
...FACEBOOK_BROWSER_HEADERS,
};
const cookieString = cookiesToHeader(cookieJar);
if (cookieString) {
headers.cookie = cookieString;
}
if (extraHeaders) {
Object.assign(headers, extraHeaders);
}
return headers;
}

View File

@@ -1,56 +1,56 @@
import type { HTMLString } from "../types/common";
import { delay } from "./delay";
/** Custom error class for HTTP-related failures */
export class HttpError extends Error {
override name = "HttpError";
constructor(
message: string,
public readonly statusCode: number,
public readonly url?: string,
) {
super(message);
this.name = "HttpError";
}
}
/** Error class for network failures (timeouts, connection issues) */
export class NetworkError extends Error {
override name = "NetworkError";
constructor(
message: string,
public readonly url: string,
public readonly cause?: Error,
public override readonly cause?: Error,
) {
super(message);
this.name = "NetworkError";
}
}
/** Error class for parsing failures */
export class ParseError extends Error {
override name = "ParseError";
constructor(
message: string,
public readonly data?: unknown,
) {
super(message);
this.name = "ParseError";
}
}
/** Error class for rate limiting */
export class RateLimitError extends Error {
override name = "RateLimitError";
constructor(
message: string,
public readonly url: string,
public readonly resetTime?: number,
) {
super(message);
this.name = "RateLimitError";
}
}
/** Error class for validation failures */
export class ValidationError extends Error {
constructor(message: string) {
super(message);
this.name = "ValidationError";
}
override name = "ValidationError";
}
/** Type guard to check if a value is a record (object) */
@@ -61,10 +61,57 @@ export function isRecord(value: unknown): value is Record<string, unknown> {
/**
* Calculate exponential backoff delay with jitter
*/
function calculateBackoffDelay(attempt: number, baseMs: number): number {
function calculateBackoffDelay(
attempt: number,
baseMs: number,
jitter: () => number = Math.random,
): number {
const exponentialDelay = baseMs * 2 ** attempt;
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
const jitterDelay = jitter() * 0.1 * exponentialDelay; // 10% jitter
return Math.min(exponentialDelay + jitterDelay, 30000); // Cap at 30 seconds
}
const MAX_RATE_LIMIT_WAIT_MS = 30_000;
const MAX_DELTA_RESET_SECONDS = 86_400;
function mergeHeaders(
defaultHeaders: Record<string, string>,
customHeaders?: Record<string, string>,
): Record<string, string> {
const merged: Record<string, string> = {};
for (const [key, value] of Object.entries(defaultHeaders)) {
merged[key.toLowerCase()] = value;
}
for (const [key, value] of Object.entries(customHeaders ?? {})) {
merged[key.toLowerCase()] = value;
}
return merged;
}
function calculateRateLimitWaitMs(
resetHeader: string | null,
fallbackWaitMs: number,
): number {
if (!resetHeader) return fallbackWaitMs;
const resetValue = Number(resetHeader);
if (!Number.isFinite(resetValue)) return fallbackWaitMs;
const waitMs =
resetValue <= MAX_DELTA_RESET_SECONDS
? resetValue * 1000
: resetValue * 1000 - Date.now();
return Math.min(Math.max(0, waitMs), MAX_RATE_LIMIT_WAIT_MS);
}
/** Result type when includeResponseUrl is true */
export interface FetchHtmlResult {
html: HTMLString;
responseUrl: string;
}
/** Options for fetchHtml */
@@ -74,6 +121,8 @@ export interface FetchHtmlOptions {
timeoutMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
headers?: Record<string, string>;
includeResponseUrl?: boolean;
jitter?: () => number;
}
/**
@@ -81,14 +130,24 @@ export interface FetchHtmlOptions {
* @param url - The URL to fetch
* @param delayMs - Delay in milliseconds between requests (rate limiting)
* @param opts - Optional fetch options
* @returns The HTML content as a string
* @returns The HTML content as a string, or an object with html and responseUrl
* @throws HttpError, NetworkError, or RateLimitError on failure
*/
export async function fetchHtml(
url: string,
delayMs: number,
opts: FetchHtmlOptions & { includeResponseUrl: true },
): Promise<FetchHtmlResult>;
export async function fetchHtml(
url: string,
delayMs: number,
opts?: FetchHtmlOptions,
): Promise<string> {
): Promise<HTMLString>;
export async function fetchHtml(
url: string,
delayMs: number,
opts?: FetchHtmlOptions,
): Promise<HTMLString | FetchHtmlResult> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 1000;
const timeoutMs = opts?.timeoutMs ?? 30000;
@@ -119,13 +178,17 @@ export async function fetchHtml(
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
const res = await fetch(url, {
method: "GET",
headers: { ...defaultHeaders, ...opts?.headers },
signal: controller.signal,
});
clearTimeout(timeoutId);
const res = await (async () => {
try {
return await fetch(url, {
method: "GET",
headers: mergeHeaders(defaultHeaders, opts?.headers),
signal: controller.signal,
});
} finally {
clearTimeout(timeoutId);
}
})();
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
@@ -137,12 +200,17 @@ export async function fetchHtml(
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: calculateBackoffDelay(attempt, retryBaseMs);
const waitMs = calculateRateLimitWaitMs(
rateLimitReset,
calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
);
if (attempt < maxRetries) {
await new Promise((resolve) => setTimeout(resolve, waitMs));
await delay(waitMs);
continue;
}
throw new RateLimitError(
@@ -154,8 +222,12 @@ export async function fetchHtml(
// Retry on server errors
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
await delay(
calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
);
continue;
}
@@ -170,8 +242,10 @@ export async function fetchHtml(
const html = await res.text();
// Respect per-request delay to maintain rate limiting
await new Promise((resolve) => setTimeout(resolve, delayMs));
return html;
await delay(delayMs);
return opts?.includeResponseUrl
? { html, responseUrl: res.url || url }
: html;
} catch (err) {
// Re-throw known errors
if (
@@ -184,8 +258,12 @@ export async function fetchHtml(
if (err instanceof Error && err.name === "AbortError") {
if (attempt < maxRetries) {
await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
await delay(
calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
);
continue;
}
@@ -194,8 +272,12 @@ export async function fetchHtml(
// Network or other errors
if (attempt < maxRetries) {
await new Promise((resolve) =>
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
await delay(
calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
);
continue;
}

View File

@@ -0,0 +1,10 @@
const isTest = () => process.env.NODE_ENV === "test";
export const logger = {
log: (...args: Parameters<typeof console.log>) => {
if (!isTest()) console.log(...args);
},
warn: (...args: Parameters<typeof console.warn>) => {
if (!isTest()) console.warn(...args);
},
};

View File

@@ -1,21 +1,29 @@
import type { ListingDetails, UnstableListingBuckets } from "../types/common";
import type { UnstableListingBuckets } from "../types/common";
interface HasListingPrice {
listingPrice?: { cents?: number } | null;
}
function getMedian(values: number[]): number {
const middleIndex = Math.floor(values.length / 2);
if (values.length % 2 === 0) {
return (values[middleIndex - 1] + values[middleIndex]) / 2;
const left = values[middleIndex - 1] ?? 0;
const right = values[middleIndex] ?? 0;
return (left + right) / 2;
}
return values[middleIndex];
return values[middleIndex] ?? 0;
}
export function classifyUnstableListings<T extends ListingDetails>(
export function classifyUnstableListings<T extends HasListingPrice>(
listings: T[],
): UnstableListingBuckets<T> {
const validPrices = listings
.map((listing) => listing.listingPrice.cents)
.filter((price) => Number.isFinite(price) && price > 0)
.map((listing) => listing.listingPrice?.cents)
.filter(
(price): price is number => Number.isFinite(price) && (price ?? 0) > 0,
)
.sort((left, right) => left - right);
if (validPrices.length < 2) {
@@ -32,9 +40,13 @@ export function classifyUnstableListings<T extends ListingDetails>(
};
for (const listing of listings) {
const price = listing.listingPrice.cents;
const price = listing.listingPrice?.cents;
if (Number.isFinite(price) && price > 0 && price < threshold) {
if (
Number.isFinite(price) &&
(price ?? 0) > 0 &&
(price ?? 0) < threshold
) {
buckets.unstableResults.push(listing);
continue;
}

View File

@@ -0,0 +1,24 @@
import { afterEach, describe, expect, mock, test } from "bun:test";
import { delay } from "../src/utils/delay";
describe("delay", () => {
const originalNodeEnv = process.env.NODE_ENV;
const originalSetTimeout = globalThis.setTimeout;
afterEach(() => {
process.env.NODE_ENV = originalNodeEnv;
globalThis.setTimeout = originalSetTimeout;
});
test("does not schedule throttle timers during tests", async () => {
process.env.NODE_ENV = "test";
const setTimeoutMock = mock(() => {
throw new Error("setTimeout should not be called during tests");
});
globalThis.setTimeout = setTimeoutMock as unknown as typeof setTimeout;
await delay(1000);
expect(setTimeoutMock).not.toHaveBeenCalled();
});
});

View File

@@ -29,12 +29,14 @@ const originalWarn = console.warn;
describe("eBay Scraper Cookie Handling", () => {
beforeEach(() => {
delete process.env.EBAY_COOKIE;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () => Promise.resolve("<html><body></body></html>"),
}),
) as typeof fetch;
) as unknown as typeof fetch;
});
afterEach(() => {
@@ -44,26 +46,32 @@ describe("eBay Scraper Cookie Handling", () => {
});
test("should ignore request cookie overrides and rely on EBAY_COOKIE", async () => {
const warnMock = mock(() => {});
console.warn = warnMock;
await fetchEbayItems("laptop", 1000);
expect(global.fetch).toHaveBeenCalledTimes(1);
// First call is homepage warm-up, second is search
expect(global.fetch).toHaveBeenCalledTimes(2);
const [, init] = (global.fetch as ReturnType<typeof mock>).mock.calls[0];
// The search request is the second call
const secondFetchCall = (global.fetch as unknown as ReturnType<typeof mock>)
.mock.calls[1];
if (!secondFetchCall) {
throw new Error("Expected search fetch to be called");
}
const [searchUrl, init] = secondFetchCall;
const headers = (init as RequestInit).headers as Record<string, string>;
expect(headers.Cookie).toBeUndefined();
expect(warnMock).toHaveBeenCalledWith(
"No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.",
expect(searchUrl).toBe(
"https://www.ebay.ca/sch/i.html?_nkw=laptop&_sacat=0&_from=R40&LH_BIN=1&LH_PrefLoc=1",
);
expect(headers.Cookie).toBeUndefined();
});
test("keeps relative item links on the ebay.ca host", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -75,7 +83,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
@@ -84,10 +92,26 @@ describe("eBay Scraper Cookie Handling", () => {
]);
});
test("returns empty results when eBay rate-limits the request", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 429,
headers: { get: () => "0" },
text: () => Promise.resolve(""),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([]);
});
test("deduplicates repeated item links from the same card", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -100,7 +124,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
@@ -114,6 +138,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -130,13 +155,15 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toHaveLength(1);
expect(results[0]).toEqual(
expect.objectContaining({ url: "https://www.ebay.ca/itm/123?_trkparms=foo" }),
expect.objectContaining({
url: "https://www.ebay.ca/itm/123?_trkparms=foo",
}),
);
});
@@ -144,6 +171,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -165,7 +193,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
@@ -186,6 +214,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -197,7 +226,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
@@ -208,10 +237,86 @@ describe("eBay Scraper Cookie Handling", () => {
]);
});
test("parses current eBay s-card markup with unquoted item links", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<div class="s-card s-card--horizontal">
<div class=su-card-container__header>
<a class=s-card__link href=https://ebay.com/itm/1234567890?itmmeta=abc>
<div role=heading aria-level=3 class=s-card__title>
<span class="su-styled-text primary default">Apple MacBook Air M1 2020 8GB 256GB</span>
</div>
</a>
</div>
<div class=su-card-container__attributes>
<span class="su-styled-text primary bold large-1 s-card__price">CA $599.00</span>
</div>
</div>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("macbook", 1000);
expect(results).toEqual([
expect.objectContaining({
title: "Apple MacBook Air M1 2020 8GB 256GB",
url: "https://ebay.com/itm/1234567890?itmmeta=abc",
listingPrice: expect.objectContaining({ cents: 59_900 }),
}),
]);
});
test("parses embedded eBay payload listings before HTML fallback", async () => {
const payload = encodeURIComponent(
JSON.stringify({
searchResults: [
{
title: "Apple MacBook Air M1 API Result",
itemWebUrl: "https://www.ebay.ca/itm/9876543210?hash=item987",
price: { value: "550.00", currency: "CAD" },
},
],
}),
);
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<script data-inlinepayload="${payload}"></script>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("macbook", 1000);
expect(results).toEqual([
expect.objectContaining({
title: "Apple MacBook Air M1 API Result",
url: "https://www.ebay.ca/itm/9876543210?hash=item987",
listingPrice: expect.objectContaining({
amountFormatted: "CAD 550.00",
cents: 55_000,
currency: "CAD",
}),
}),
]);
});
test("treats US dollar prices as USD", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -223,13 +328,16 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ currency: "USD", cents: 12345 }),
listingPrice: expect.objectContaining({
currency: "USD",
cents: 12345,
}),
}),
]);
});
@@ -238,6 +346,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -249,13 +358,16 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ currency: "USD", cents: 12345 }),
listingPrice: expect.objectContaining({
currency: "USD",
cents: 12345,
}),
}),
]);
});
@@ -264,6 +376,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -275,13 +388,16 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ currency: "GBP", cents: 12345 }),
listingPrice: expect.objectContaining({
currency: "GBP",
cents: 12345,
}),
}),
]);
});
@@ -290,6 +406,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -306,7 +423,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("bundle", 1000, {
keywords: ["bundle"],
@@ -314,10 +431,16 @@ describe("eBay Scraper Cookie Handling", () => {
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ currency: "EUR", cents: 12345 }),
listingPrice: expect.objectContaining({
currency: "EUR",
cents: 12345,
}),
}),
expect.objectContaining({
listingPrice: expect.objectContaining({ currency: "JPY", cents: 12300 }),
listingPrice: expect.objectContaining({
currency: "JPY",
cents: 12300,
}),
}),
]);
});
@@ -326,6 +449,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -340,7 +464,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
@@ -358,6 +482,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -372,7 +497,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
@@ -390,6 +515,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -404,7 +530,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
@@ -423,6 +549,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -434,7 +561,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("bike", 1000);
@@ -450,6 +577,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -461,7 +589,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("microphone", 1000, {
keywords: ["microphone"],
@@ -482,6 +610,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -493,7 +622,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000, {
minPrice: 0,
@@ -512,6 +641,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -533,7 +663,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems(
"laptop",
@@ -557,6 +687,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -578,7 +709,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000, { maxItems: 2 });
@@ -595,6 +726,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
@@ -616,7 +748,7 @@ describe("eBay Scraper Cookie Handling", () => {
</body></html>
`),
}),
) as typeof fetch;
) as unknown as typeof fetch;
const results = await fetchEbayItems(
"laptop",

View File

@@ -2,13 +2,13 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import cliProgress from "cli-progress";
import {
classifyFacebookResponse,
type FacebookListingDetails,
ensureFacebookCookies,
extractFacebookBootstrapCandidates,
extractFacebookItemData,
extractFacebookMarketplaceData,
default as fetchFacebookItems,
type FacebookListingDetails,
fetchFacebookItem,
default as fetchFacebookItems,
parseFacebookAds,
parseFacebookCookieString,
parseFacebookItem,
@@ -30,9 +30,13 @@ type IsExact<T, U> =
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
const getUnstableFacebookItems = async (): Promise<
UnstableListingBuckets<FacebookListingDetails>
> => fetchFacebookItems("chair", 1, "toronto", 25, { hideUnstableResults: true });
> =>
fetchFacebookItems("chair", 1, "toronto", 25, { hideUnstableResults: true });
type _FacebookDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
IsExact<
Awaited<ReturnType<typeof getDefaultFacebookItems>>,
FacebookListingDetails[]
>
>;
type _FacebookUnstableReturn = Assert<
IsExact<
@@ -48,7 +52,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
beforeEach(() => {
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
}) as unknown as typeof fetch;
});
afterEach(() => {
@@ -66,6 +70,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(result[0]).toEqual({
name: "c_user",
value: "123456789",
rawValue: "123456789",
domain: ".facebook.com",
path: "/",
secure: true,
@@ -76,6 +81,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(result[1]).toEqual({
name: "xs",
value: "abcdef123456",
rawValue: "abcdef123456",
domain: ".facebook.com",
path: "/",
secure: true,
@@ -89,8 +95,18 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
const result = parseFacebookCookieString(cookieString);
expect(result[0].value).toBe("123+456");
expect(result[1].value).toBe("abc=def");
expect(result[0]?.value).toBe("123+456");
expect(result[1]?.value).toBe("abc=def");
});
test("should preserve raw encoded values when formatting cookie headers", () => {
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
const result = formatCookiesForHeader(
parseFacebookCookieString(cookieString),
"www.facebook.com",
);
expect(result).toBe(cookieString);
});
test("should filter out malformed cookies", () => {
@@ -111,10 +127,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(2);
expect(result[0].name).toBe("c_user");
expect(result[0].value).toBe("123");
expect(result[1].name).toBe("xs");
expect(result[1].value).toBe("abc");
expect(result[0]?.name).toBe("c_user");
expect(result[0]?.value).toBe("123");
expect(result[1]?.name).toBe("xs");
expect(result[1]?.value).toBe("abc");
});
test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => {
@@ -173,10 +189,6 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
test("should handle authentication errors", async () => {
const originalWarn = console.warn;
const warnMock = mock(() => {});
console.warn = warnMock;
global.fetch = mock(() =>
Promise.resolve({
ok: false,
@@ -186,18 +198,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
try {
const result = await fetchFacebookItem("123");
expect(result).toBeNull();
expect(global.fetch).toHaveBeenCalledTimes(1);
expect(warnMock).toHaveBeenCalledWith(
"Authentication error: Invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
);
} finally {
console.warn = originalWarn;
}
const result = await fetchFacebookItem("123");
expect(result).toBeNull();
expect(global.fetch).toHaveBeenCalledTimes(1);
});
test("should handle item not found", async () => {
@@ -210,7 +215,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const result = await fetchFacebookItem("nonexistent");
expect(result).toBeNull();
@@ -270,7 +275,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
});
});
}) as unknown as typeof fetch;
const _result = await fetchFacebookItem("123");
expect(attempts).toBe(2);
@@ -293,7 +298,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
},
text: () => Promise.resolve("Rate limited"),
});
});
}) as unknown as typeof fetch;
const result = await fetchFacebookItem("429-loop");
@@ -342,7 +347,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const result = await fetchFacebookItem("456");
expect(result?.listingStatus).toBe("SOLD");
@@ -384,7 +389,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const result = await fetchFacebookItem("457");
@@ -431,7 +436,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const result = await fetchFacebookItem("458");
@@ -489,7 +494,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const result = await fetchFacebookItem("789");
expect(result).not.toBeNull();
@@ -508,7 +513,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const result = await fetchFacebookItem("error");
expect(result).toBeNull();
@@ -533,30 +538,32 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
test("returns an array by default", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Stable Chair Listing",
listing_price: {
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Stable Chair Listing",
listing_price: {
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
],
},
],
],
},
],
},
},
})}</script></body></html>`;
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
@@ -567,7 +574,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
@@ -576,30 +583,32 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
test("preserves free listings through the public fetch entrypoint", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "free-1",
marketplace_listing_title: "Free Chair",
listing_price: {
amount: "0.00",
formatted_amount: "FREE",
currency: "CAD",
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "free-1",
marketplace_listing_title: "Free Chair",
listing_price: {
amount: "0.00",
formatted_amount: "FREE",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
],
},
],
],
},
],
},
},
})}</script></body></html>`;
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
@@ -610,7 +619,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
@@ -626,30 +635,32 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
test("does not start a progress bar when stdout is not a TTY", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Chair Listing",
listing_price: {
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Chair Listing",
listing_price: {
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
],
},
],
],
},
],
},
},
})}</script></body></html>`;
)}</script></body></html>`;
process.stdout.isTTY = false;
const startSpy = mock(() => {});
@@ -672,7 +683,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
@@ -688,58 +699,60 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
test("returns results and unstableResults when unstable mode is enabled", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Stable Chair Listing",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Stable Chair Listing",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Another Stable Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Another Stable Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Suspiciously Cheap Chair",
listing_price: {
amount: "70.00",
formatted_amount: "CA$70",
currency: "CAD",
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Suspiciously Cheap Chair",
listing_price: {
amount: "70.00",
formatted_amount: "CA$70",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
],
},
],
],
},
],
},
},
})}</script></body></html>`;
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
@@ -750,7 +763,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 25, {
hideUnstableResults: true,
@@ -768,58 +781,61 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
test("unstable mode classifies before the final MAX_ITEMS limit", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Boundary Stable Chair",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Boundary Stable Chair",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Second Boundary Stable Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
{
node: {
listing: {
id: "2",
marketplace_listing_title:
"Second Boundary Stable Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Past Boundary Cheap Chair",
listing_price: {
amount: "70.00",
formatted_amount: "CA$70",
currency: "CAD",
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Past Boundary Cheap Chair",
listing_price: {
amount: "70.00",
formatted_amount: "CA$70",
currency: "CAD",
},
is_live: true,
},
is_live: true,
},
},
},
],
},
],
],
},
],
},
},
})}</script></body></html>`;
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
@@ -830,7 +846,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 2, {
hideUnstableResults: true,
@@ -869,7 +885,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
},
redacted_description: { text: "Solid wood chair" },
location_text: { text: "Toronto, ON" },
marketplace_listing_seller: { id: "seller-1", name: "Alex" },
marketplace_listing_seller: {
id: "seller-1",
name: "Alex",
},
condition: "USED",
is_live: true,
},
@@ -1114,7 +1133,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const result = extractFacebookMarketplaceData(html);
expect(result).not.toBeNull();
expect(result).toHaveLength(2);
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
expect(result?.[0]?.node.listing.marketplace_listing_title).toBe(
"Item 1",
);
});
@@ -1135,11 +1154,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const result = extractFacebookMarketplaceData(html);
expect(result).not.toBeNull();
expect(result).toHaveLength(1);
expect(result?.[0].node.listing.id).toBe("987654321");
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
expect(result?.[0]?.node.listing.id).toBe("987654321");
expect(result?.[0]?.node.listing.marketplace_listing_title).toBe(
"Vintage Bike",
);
expect(result?.[0].node.listing.listing_price).toEqual({
expect(result?.[0]?.node.listing.listing_price).toEqual({
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
@@ -1367,7 +1386,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const ads = extractFacebookMarketplaceData(html);
expect(ads).toHaveLength(1);
expect(ads?.[0].node.listing.marketplace_listing_title).toBe("Bike");
expect(ads?.[0]?.node.listing.marketplace_listing_title).toBe("Bike");
});
test("prefers the strongest marketplace edge set when multiple edges arrays exist", () => {
@@ -1425,7 +1444,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const ads = extractFacebookMarketplaceData(html);
expect(ads).toHaveLength(1);
expect(ads?.[0].node.listing.id).toBe("right-1");
expect(ads?.[0]?.node.listing.id).toBe("right-1");
});
test("rejects mixed edge arrays that contain non-listing entries", () => {
@@ -1650,11 +1669,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const results = parseFacebookAds(ads);
expect(results).toHaveLength(2);
expect(results[0].title).toBe("Ad 1");
expect(results[0].listingPrice?.cents).toBe(5000);
expect(results[0].address).toBe("Toronto");
expect(results[1].title).toBe("Ad 2");
expect(results[1].address).toBe("Ottawa");
expect(results[0]?.title).toBe("Ad 1");
expect(results[0]?.listingPrice?.cents).toBe(5000);
expect(results[0]?.address).toBe("Toronto");
expect(results[1]?.title).toBe("Ad 2");
expect(results[1]?.address).toBe("Ottawa");
});
test("should filter out ads without price", () => {
@@ -1686,14 +1705,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const results = parseFacebookAds(ads);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("With Price");
expect(results[0]?.title).toBe("With Price");
});
test("should handle malformed ads gracefully", () => {
const originalWarn = console.warn;
const warnMock = mock(() => {});
console.warn = warnMock;
const ads = [
{
node: {
@@ -1713,15 +1728,14 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
node: {
// Missing listing
},
} as { node: { listing?: unknown } },
} as unknown as { node: { listing?: unknown } },
];
const results = parseFacebookAds(ads);
const results = parseFacebookAds(
ads as unknown as Parameters<typeof parseFacebookAds>[0],
);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Valid Ad");
expect(warnMock).toHaveBeenCalledTimes(1);
console.warn = originalWarn;
expect(results[0]?.title).toBe("Valid Ad");
});
test("parses formatted fallback prices with multiple commas", () => {

View File

@@ -1,5 +1,7 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
import fetchFacebookItems, {
fetchFacebookItem,
} from "../src/scrapers/facebook";
// Mock fetch globally
const originalFetch = global.fetch;
@@ -13,7 +15,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
process.env.FACEBOOK_COOKIE = facebookCookie;
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
}) as unknown as typeof fetch;
});
afterEach(() => {
@@ -27,35 +29,37 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Main Search Function", () => {
test("should successfully fetch search results", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "iPhone 13",
listing_price: {
amount: "500.00",
formatted_amount: "CA$500",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "iPhone 13",
listing_price: {
amount: "500.00",
formatted_amount: "CA$500",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
},
},
is_live: true,
},
is_live: true,
},
},
},
],
},
],
],
},
],
},
},
})}</script></body></html>`;
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
@@ -65,11 +69,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("iPhone", 1, "toronto", 25);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("iPhone 13");
expect(results[0]?.title).toBe("iPhone 13");
});
test("should filter out items without price", async () => {
@@ -131,11 +135,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("With Price");
expect(results[0]?.title).toBe("With Price");
});
test("should respect MAX_ITEMS parameter", async () => {
@@ -186,7 +190,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 5);
expect(results).toHaveLength(5);
@@ -227,7 +231,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems(
"nonexistent query",
@@ -248,7 +252,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toEqual([]);
@@ -277,7 +281,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("lamp", 1, "toronto", 25);
expect(results).toEqual([]);
@@ -318,14 +322,16 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("lamp", 1, "toronto", 25);
expect(results).toEqual([]);
});
test("should handle network errors", async () => {
global.fetch = mock(() => Promise.reject(new Error("Network error")));
global.fetch = mock(() =>
Promise.reject(new Error("Network error")),
) as unknown as typeof fetch;
await expect(
fetchFacebookItems("test", 1, "toronto", 25),
@@ -396,7 +402,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
});
});
}) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(attempts).toBe(2);
@@ -469,13 +475,13 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("cars", 1, "toronto", 25);
expect(results).toHaveLength(2);
// Both should be classified as "item" type in search results (vehicle detection is for item details)
expect(results[0].title).toBe("2006 Honda Civic");
expect(results[1].title).toBe("iPhone 13");
expect(results[0]?.title).toBe("2006 Honda Civic");
expect(results[1]?.title).toBe("iPhone 13");
});
});
@@ -538,7 +544,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems(
"nintendo switch",
@@ -547,8 +553,8 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
25,
);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Nintendo Switch");
expect(results[0].categoryId).toBe("479353692612078");
expect(results[0]?.title).toBe("Nintendo Switch");
expect(results[0]?.categoryId).toBe("479353692612078");
});
test("should handle home goods/furniture listings", async () => {
@@ -609,12 +615,12 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("table", 1, "toronto", 25);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Dining Table");
expect(results[0].categoryId).toBe("1569171756675761");
expect(results[0]?.title).toBe("Dining Table");
expect(results[0]?.categoryId).toBe("1569171756675761");
});
});
@@ -631,7 +637,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toEqual([]);
@@ -647,7 +653,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toEqual([]);
@@ -663,7 +669,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toEqual([]);
@@ -704,7 +710,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null,
},
}),
);
) as unknown as typeof fetch;
const result = await fetchFacebookItem("123");
expect(result).toBeNull();

View File

@@ -0,0 +1,124 @@
import { afterEach, describe, expect, mock, test } from "bun:test";
import { fetchHtml } from "../src/utils/http";
describe("fetchHtml", () => {
const originalFetch = global.fetch;
const originalNodeEnv = process.env.NODE_ENV;
const originalSetTimeout = globalThis.setTimeout;
const originalClearTimeout = globalThis.clearTimeout;
afterEach(() => {
global.fetch = originalFetch;
process.env.NODE_ENV = originalNodeEnv;
globalThis.setTimeout = originalSetTimeout;
globalThis.clearTimeout = originalClearTimeout;
});
test("does not schedule throttle timers during tests", async () => {
process.env.NODE_ENV = "test";
const scheduledDelays: number[] = [];
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () => Promise.resolve("<html></html>"),
}),
) as unknown as typeof fetch;
globalThis.setTimeout = mock((handler: TimerHandler, timeout?: number) => {
scheduledDelays.push(Number(timeout));
if (timeout !== 30_000 && typeof handler === "function") {
handler();
}
return 0 as unknown as ReturnType<typeof setTimeout>;
}) as unknown as typeof setTimeout;
globalThis.clearTimeout = mock(() => {}) as unknown as typeof clearTimeout;
await fetchHtml("https://example.com", 1000, { timeoutMs: 30_000 });
expect(scheduledDelays).not.toContain(1000);
});
test("fetchHtml returns responseUrl when includeResponseUrl is true", async () => {
process.env.NODE_ENV = "test";
global.fetch = mock(() =>
Promise.resolve({
ok: true,
status: 200,
url: "https://example.test/final",
headers: { get: () => null },
text: () => Promise.resolve("<html></html>"),
}),
) as unknown as typeof fetch;
const result = await fetchHtml("https://example.test", 0, {
includeResponseUrl: true,
});
expect(result.html).toBe("<html></html>");
expect(result.responseUrl).toBe("https://example.test/final");
});
test("rate limit epoch reset uses bounded wait", async () => {
process.env.NODE_ENV = "production";
const scheduledDelays: number[] = [];
const farFutureEpochSeconds = Math.floor(Date.now() / 1000) + 315_360_000;
let calls = 0;
global.fetch = mock(() => {
calls += 1;
return Promise.resolve({
ok: calls > 1,
status: calls > 1 ? 200 : 429,
url: "https://example.test",
headers: {
get: (name: string) =>
name === "X-RateLimit-Reset" ? String(farFutureEpochSeconds) : null,
},
text: () => Promise.resolve("<html></html>"),
});
}) as unknown as typeof fetch;
globalThis.setTimeout = mock((handler: TimerHandler, timeout?: number) => {
scheduledDelays.push(Number(timeout));
if (timeout !== 1_234_567 && typeof handler === "function") {
handler();
}
return 0 as unknown as ReturnType<typeof setTimeout>;
}) as unknown as typeof setTimeout;
globalThis.clearTimeout = mock(() => {}) as unknown as typeof clearTimeout;
await fetchHtml("https://example.test", 0, {
maxRetries: 1,
timeoutMs: 1_234_567,
});
expect(scheduledDelays).toContain(30_000);
expect(scheduledDelays).not.toContain(farFutureEpochSeconds * 1000);
});
test("custom Accept header overrides default accept without duplicate casing", async () => {
process.env.NODE_ENV = "test";
const customAccept = "text/plain";
let requestHeaders: HeadersInit | undefined;
global.fetch = mock((_url: string | URL | Request, init?: RequestInit) => {
requestHeaders = init?.headers;
return Promise.resolve({
ok: true,
status: 200,
url: "https://example.test",
headers: { get: () => null },
text: () => Promise.resolve("<html></html>"),
});
}) as unknown as typeof fetch;
await fetchHtml("https://example.test", 0, {
headers: { Accept: customAccept },
});
expect(requestHeaders).toBeDefined();
expect((requestHeaders as Record<string, string>).accept).toBe(
customAccept,
);
expect((requestHeaders as Record<string, string>).Accept).toBeUndefined();
});
});

View File

@@ -1,12 +1,12 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
buildSearchUrl,
default as fetchKijijiItems,
type DetailedListing,
default as fetchKijijiItems,
NetworkError,
parseSearch,
parseDetailedListing,
ParseError,
parseDetailedListing,
parseSearch,
RateLimitError,
resolveCategoryId,
resolveLocationId,
@@ -49,7 +49,7 @@ const originalFetch = global.fetch;
beforeEach(() => {
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
}) as unknown as typeof fetch;
});
afterEach(() => {
@@ -282,7 +282,8 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-low/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Low Listing", 7000, "v-low/k0l0")),
text: () =>
Promise.resolve(listingHtml("Low Listing", 7000, "v-low/k0l0")),
headers: { get: () => null },
url,
});
@@ -291,7 +292,8 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-mid/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Mid Listing", 9000, "v-mid/k0l0")),
text: () =>
Promise.resolve(listingHtml("Mid Listing", 9000, "v-mid/k0l0")),
headers: { get: () => null },
url,
});
@@ -300,14 +302,15 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-high/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("High Listing", 12000, "v-high/k0l0")),
text: () =>
Promise.resolve(listingHtml("High Listing", 12000, "v-high/k0l0")),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
@@ -415,7 +418,7 @@ describe("fetchKijijiItems", () => {
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
@@ -512,7 +515,7 @@ describe("fetchKijijiItems", () => {
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
@@ -534,9 +537,18 @@ describe("fetchKijijiItems", () => {
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
"Listing:3": { url: "/v-unstable/k0l0", title: "Unstable Listing" },
"Listing:1": {
url: "/v-stable-one/k0l0",
title: "Stable Listing One",
},
"Listing:2": {
url: "/v-stable-two/k0l0",
title: "Stable Listing Two",
},
"Listing:3": {
url: "/v-unstable/k0l0",
title: "Unstable Listing",
},
},
},
},
@@ -582,7 +594,10 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-stable-one/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0")),
text: () =>
Promise.resolve(
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
),
headers: { get: () => null },
url,
});
@@ -591,7 +606,10 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-stable-two/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0")),
text: () =>
Promise.resolve(
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
),
headers: { get: () => null },
url,
});
@@ -600,14 +618,17 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-unstable/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")),
text: () =>
Promise.resolve(
listingHtml("Unstable Listing", 7000, "v-unstable/k0l0"),
),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
@@ -635,10 +656,22 @@ describe("fetchKijijiItems", () => {
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
"Listing:3": { url: "/v-out-of-range-high/k0l0", title: "Out Of Range High" },
"Listing:4": { url: "/v-out-of-range-low/k0l0", title: "Out Of Range Low" },
"Listing:1": {
url: "/v-stable-one/k0l0",
title: "Stable Listing One",
},
"Listing:2": {
url: "/v-stable-two/k0l0",
title: "Stable Listing Two",
},
"Listing:3": {
url: "/v-out-of-range-high/k0l0",
title: "Out Of Range High",
},
"Listing:4": {
url: "/v-out-of-range-low/k0l0",
title: "Out Of Range Low",
},
},
},
},
@@ -672,7 +705,11 @@ describe("fetchKijijiItems", () => {
global.fetch = mock((input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/k0c0l1700272") && url.includes("priceMin=80") && url.includes("priceMax=150")) {
if (
url.includes("/k0c0l1700272") &&
url.includes("priceMin=80") &&
url.includes("priceMax=150")
) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(searchHtml),
@@ -684,7 +721,10 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-stable-one/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0")),
text: () =>
Promise.resolve(
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
),
headers: { get: () => null },
url,
});
@@ -693,7 +733,10 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-stable-two/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0")),
text: () =>
Promise.resolve(
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
),
headers: { get: () => null },
url,
});
@@ -702,7 +745,14 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-out-of-range-high/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Out Of Range High", 20000, "v-out-of-range-high/k0l0")),
text: () =>
Promise.resolve(
listingHtml(
"Out Of Range High",
20000,
"v-out-of-range-high/k0l0",
),
),
headers: { get: () => null },
url,
});
@@ -711,14 +761,17 @@ describe("fetchKijijiItems", () => {
if (url.endsWith("/v-out-of-range-low/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Out Of Range Low", 7000, "v-out-of-range-low/k0l0")),
text: () =>
Promise.resolve(
listingHtml("Out Of Range Low", 7000, "v-out-of-range-low/k0l0"),
),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
@@ -819,7 +872,7 @@ describe("fetchKijijiItems", () => {
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
}) as unknown as typeof fetch;
await parseDetailedListing(html, "https://www.kijiji.ca", {
includeClientSideData: true,
@@ -928,7 +981,7 @@ describe("fetchKijijiItems", () => {
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",

View File

@@ -13,7 +13,7 @@ describe("HTML Parsing Integration", () => {
// Mock fetch for all tests
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
}) as unknown as typeof fetch;
});
afterEach(() => {
@@ -111,7 +111,7 @@ describe("HTML Parsing Integration", () => {
`;
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results[0].listingLink).toBe(
expect(results[0]?.listingLink).toBe(
"https://www.kijiji.ca/v-iphone/k0l0",
);
});
@@ -146,7 +146,49 @@ describe("HTML Parsing Integration", () => {
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results).toHaveLength(1);
expect(results[0].name).toBe("iPhone 13 Pro");
expect(results[0]?.name).toBe("iPhone 13 Pro");
});
test("should parse current StandardListing search records", () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
ROOT_QUERY: { test: "value" },
"StandardListing:123": {
__typename: "StandardListing",
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
title: "iPhone 13",
},
"StandardListing:456": {
__typename: "StandardListing",
url: "/v-cell-phone/city-of-toronto/iphone-14/456",
title: "iPhone 14",
},
},
},
},
})}
</script>
</html>
`;
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results).toEqual([
{
name: "iPhone 13",
listingLink:
"https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
},
{
name: "iPhone 14",
listingLink:
"https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-14/456",
},
]);
});
test("should return empty array for invalid HTML", () => {
@@ -303,6 +345,118 @@ describe("HTML Parsing Integration", () => {
expect(result).toBeNull();
});
test("should parse current StandardListing detail records", async () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"StandardListing:123": {
__typename: "StandardListing",
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
title: "iPhone 13",
description: "Lightly used iPhone 13",
price: {
__typename: "AmountPrice",
amount: 45000,
currency: "CAD",
type: "FIXED",
},
type: "OFFER",
status: "ACTIVE",
activationDate: "2026-04-20T10:00:00.000Z",
metrics: { views: "12" },
location: {
id: 1700273,
name: "City of Toronto",
address: "Toronto, ON",
coordinates: {
latitude: 43.6532,
longitude: -79.3832,
},
},
imageUrls: ["https://media.kijiji.ca/api/v1/image1.jpg"],
categoryId: 760,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: false,
},
posterInfo: {
posterId: "user123",
rating: 4.5,
},
attributes: {
__typename: "StandardListingAttributes",
all: [
{
__typename: "ListingAttributeV2",
canonicalName: "forsaleby",
canonicalValues: ["ownr"],
},
{
__typename: "ListingAttributeV2",
canonicalName: "phonebrand",
canonicalValues: ["apple"],
},
],
},
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toEqual({
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
title: "iPhone 13",
description: "Lightly used iPhone 13",
listingPrice: {
amountFormatted: "$450.00",
cents: 45000,
currency: "CAD",
},
listingType: "OFFER",
listingStatus: "ACTIVE",
creationDate: "2026-04-20T10:00:00.000Z",
endDate: undefined,
numberOfViews: 12,
address: "Toronto, ON",
images: ["https://media.kijiji.ca/api/v1/image1.jpg"],
categoryId: 760,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: false,
},
attributes: {
forsaleby: ["ownr"],
phonebrand: ["apple"],
},
location: {
id: 1700273,
name: "City of Toronto",
coordinates: {
latitude: 43.6532,
longitude: -79.3832,
},
},
sellerInfo: {
posterId: "user123",
rating: 4.5,
},
});
});
test("should handle missing optional fields", async () => {
const mockHtml = `
<html>

View File

@@ -0,0 +1,35 @@
import { describe, expect, test } from "bun:test";
import fetchEbayItems from "../../src/scrapers/ebay";
const LIVE_RESULT_LIMIT = 3;
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("eBay live parser", () => {
test(
"scrapes live search results into listing details",
async () => {
const results = await fetchEbayItems("iphone", 1, {
maxItems: LIVE_RESULT_LIMIT,
});
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith("https://");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -0,0 +1,44 @@
import { describe, expect, test } from "bun:test";
import fetchFacebookItems from "../../src/scrapers/facebook";
const LIVE_RESULT_LIMIT = 3;
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("Facebook live parser", () => {
test(
"scrapes live marketplace search results into listing details",
async () => {
if (!process.env.FACEBOOK_COOKIE?.trim()) {
throw new Error("FACEBOOK_COOKIE is required for Facebook live tests");
}
const results = await fetchFacebookItems(
"iphone",
1,
"toronto",
LIVE_RESULT_LIMIT,
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith(
"https://www.facebook.com/marketplace/item/",
);
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -0,0 +1,38 @@
import { describe, expect, test } from "bun:test";
import fetchKijijiItems from "../../src/scrapers/kijiji";
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("Kijiji live parser", () => {
test(
"scrapes live search results into detailed listings",
async () => {
const results = await fetchKijijiItems(
"iphone",
1,
"https://www.kijiji.ca",
{ maxPages: 1 },
{ includeImages: false, sellerDataDepth: "basic" },
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith("https://www.kijiji.ca/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -0,0 +1,29 @@
import { afterEach, describe, expect, mock, test } from "bun:test";
describe("logger", () => {
const originalNodeEnv = process.env.NODE_ENV;
const originalConsoleLog = console.log;
const originalConsoleWarn = console.warn;
afterEach(() => {
process.env.NODE_ENV = originalNodeEnv;
console.log = originalConsoleLog;
console.warn = originalConsoleWarn;
});
test("suppresses log and warn output during tests", async () => {
process.env.NODE_ENV = "test";
const logMock = mock(() => {});
const warnMock = mock(() => {});
console.log = logMock;
console.warn = warnMock;
const { logger } = await import("../src/utils/logger");
logger.log("hidden log");
logger.warn("hidden warn");
expect(logMock).not.toHaveBeenCalled();
expect(warnMock).not.toHaveBeenCalled();
});
});

View File

@@ -1,11 +1,6 @@
// Test setup for Bun test runner
// This file is loaded before any tests run due to bunfig.toml preload
// Mock fetch globally for tests
global.fetch =
global.fetch ||
(() => {
throw new Error("fetch is not available in test environment");
});
// Add any global test utilities here
global.fetch = Object.assign(
() => {
throw new Error("Tests must mock fetch explicitly");
},
{ preconnect: fetch.preconnect },
) as typeof fetch;

View File

@@ -31,8 +31,13 @@ describe("classifyUnstableListings", () => {
const buckets = classifyUnstableListings(listings);
expect(buckets.results.map((listing) => listing.id)).toEqual(["stable-1", "stable-2"]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual(["unstable"]);
expect(buckets.results.map((listing) => listing.id)).toEqual([
"stable-1",
"stable-2",
]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
"unstable",
]);
});
test("uses the midpoint median for even-sized priced inputs", () => {
@@ -45,8 +50,14 @@ describe("classifyUnstableListings", () => {
const buckets = classifyUnstableListings(listings);
expect(buckets.results.map((listing) => listing.id)).toEqual(["mid-low", "mid-high", "high"]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual(["low"]);
expect(buckets.results.map((listing) => listing.id)).toEqual([
"mid-low",
"mid-high",
"high",
]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
"low",
]);
});
test("keeps non-positive prices in results and excludes them from the median input", () => {
@@ -66,7 +77,9 @@ describe("classifyUnstableListings", () => {
"stable-1",
"stable-2",
]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual(["unstable"]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
"unstable",
]);
});
test("returns all listings in results when fewer than two valid prices are present", () => {
@@ -78,7 +91,11 @@ describe("classifyUnstableListings", () => {
const buckets = classifyUnstableListings(listings);
expect(buckets.results.map((listing) => listing.id)).toEqual(["zero", "negative", "only-valid"]);
expect(buckets.results.map((listing) => listing.id)).toEqual([
"zero",
"negative",
"only-valid",
]);
expect(buckets.unstableResults).toEqual([]);
});
});

View File

@@ -1,13 +1,9 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"lib": ["dom"],
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"paths": {
"@/*": ["./src/*"]
},
"strict": true,
"noEmit": true
}
}
},
"include": ["./src", "./test", "../../types/**/*.d.ts"]
}

View File

@@ -21,5 +21,6 @@
## Verify
- `bun test packages/mcp-server/test`
- `bun run --cwd packages/mcp-server build`
- `bun run ci`

View File

@@ -2,18 +2,22 @@
"name": "@marketplace-scrapers/mcp-server",
"version": "1.0.0",
"type": "module",
"module": "./src/index.ts",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"start": "bun ./src/index.ts",
"dev": "bun --watch ./src/index.ts",
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp"
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp",
"typecheck": "bun tsgo"
},
"dependencies": {
"@marketplace-scrapers/core": "workspace:*"
"@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:"
},
"devDependencies": {
"@types/bun": "latest"
"@types/bun": "catalog:"
},
"peerDependencies": {
"typescript": "^5"

View File

@@ -1,3 +1,4 @@
import { logger } from "./logger";
import { handleMcpRequest } from "./protocol/handler";
import { serverCard } from "./protocol/metadata";
@@ -33,4 +34,4 @@ const server = Bun.serve({
},
});
console.log(`MCP Server running on ${server.hostname}:${server.port}`);
logger.log(`MCP Server running on ${server.hostname}:${server.port}`);

View File

@@ -0,0 +1,10 @@
const isTest = () => process.env.NODE_ENV === "test";
export const logger = {
log: (...args: Parameters<typeof console.log>) => {
if (!isTest()) console.log(...args);
},
error: (...args: Parameters<typeof console.error>) => {
if (!isTest()) console.error(...args);
},
};

View File

@@ -1,7 +1,33 @@
import { logger } from "../logger";
import { tools } from "./tools";
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000;
async function callMarketplaceApi(
marketplace: string,
params: URLSearchParams,
): Promise<unknown> {
const url = `${API_BASE_URL}/${marketplace}?${params.toString()}`;
logger.log(`[MCP] Calling ${marketplace} API`);
const response = await Promise.race([
fetch(url),
new Promise<Response>((_, reject) =>
setTimeout(
() => reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
logger.error(
`[MCP] ${marketplace} API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
return response.json();
}
/**
* Handle MCP JSON-RPC 2.0 protocol requests
@@ -115,11 +141,10 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
params.append("priceMin", args.priceMin.toString());
if (args.priceMax)
params.append("priceMax", args.priceMax.toString());
if (args.cookies) params.append("cookies", args.cookies);
if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString());
console.log(
logger.log(
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
);
const response = await Promise.race([
@@ -135,13 +160,20 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
if (!response.ok) {
const errorText = await response.text();
console.error(
logger.error(
`[MCP] Kijiji API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
let errorMessage = `API returned ${response.status}: ${errorText}`;
try {
const errorJson = JSON.parse(errorText) as { message?: string };
if (errorJson.message) errorMessage = errorJson.message;
} catch {
// not JSON — use raw text
}
throw new Error(errorMessage);
}
result = await response.json();
console.log(
logger.log(
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
);
} else if (name === "search_facebook") {
@@ -160,31 +192,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString());
console.log(
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] Facebook API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
);
result = await callMarketplaceApi("facebook", params);
} else if (name === "search_ebay") {
const query = args.query;
if (!query) {
@@ -214,31 +222,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString());
console.log(
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] eBay API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
);
result = await callMarketplaceApi("ebay", params);
} else {
return Response.json({
jsonrpc: "2.0",

View File

@@ -11,7 +11,11 @@ export const tools = [
properties: {
query: {
type: "string",
description: "Search query for Kijiji listings",
description:
"Search query for Kijiji listings. " +
"Kijiji requires ALL words to appear in the listing title — keep queries short and use terms sellers actually write. " +
"Avoid marketing/brand phrases sellers don't use (e.g. use 'macbook air m1' not 'macbook air m1 apple silicon'). " +
"If the search returns no results, try a shorter or more common query.",
},
location: {
type: "string",
@@ -46,16 +50,11 @@ export const tools = [
},
priceMin: {
type: "number",
description: "Minimum price in cents",
description: "Minimum price in dollars",
},
priceMax: {
type: "number",
description: "Maximum price in cents",
},
cookies: {
type: "string",
description:
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
description: "Maximum price in dollars",
},
unstableFilter: {
type: "boolean",
@@ -108,11 +107,11 @@ export const tools = [
},
minPrice: {
type: "number",
description: "Minimum price filter",
description: "Minimum price in dollars",
},
maxPrice: {
type: "number",
description: "Maximum price filter",
description: "Maximum price in dollars",
},
strictMode: {
type: "boolean",

View File

@@ -8,25 +8,20 @@ describe("MCP protocol cookie inputs", () => {
beforeEach(() => {
global.fetch = mock(() =>
Promise.resolve(new Response(JSON.stringify([]), { status: 200 })),
) as typeof fetch;
) as unknown as typeof fetch;
});
afterEach(() => {
global.fetch = originalFetch;
});
test("search tools should not expose Facebook or eBay cookie inputs", () => {
const searchFacebookTool = tools.find(
(tool) => tool.name === "search_facebook",
);
const searchEbayTool = tools.find((tool) => tool.name === "search_ebay");
expect(searchFacebookTool?.inputSchema.properties).not.toHaveProperty(
"cookiesSource",
);
expect(searchEbayTool?.inputSchema.properties).not.toHaveProperty(
"cookies",
);
test("search tools should not expose cookie inputs", () => {
const toolNames = ["search_kijiji", "search_facebook", "search_ebay"];
for (const toolName of toolNames) {
const tool = tools.find((candidate) => candidate.name === toolName);
expect(tool?.inputSchema.properties).not.toHaveProperty("cookies");
expect(tool?.inputSchema.properties).not.toHaveProperty("cookiesSource");
}
});
test("search_facebook should not forward cookies query parameters", async () => {
@@ -48,18 +43,43 @@ describe("MCP protocol cookie inputs", () => {
}),
);
const calledUrl = (global.fetch as ReturnType<typeof mock>).mock
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("/facebook?q=laptop");
expect(String(calledUrl)).not.toContain("cookies=");
});
test("search_kijiji should not forward cookies query parameters", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_kijiji",
arguments: {
query: "laptop",
cookies: "s=1",
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("/kijiji?q=laptop");
expect(String(calledUrl)).not.toContain("cookies=");
});
});
describe("MCP protocol unstableFilter", () => {
beforeEach(() => {
global.fetch = mock(() =>
Promise.resolve(new Response(JSON.stringify([]), { status: 200 })),
) as typeof fetch;
) as unknown as typeof fetch;
});
afterEach(() => {
@@ -72,7 +92,10 @@ describe("MCP protocol unstableFilter", () => {
const tool = tools.find((t) => t.name === toolName);
expect(tool).toBeDefined();
expect(tool?.inputSchema.properties).toHaveProperty("unstableFilter");
const prop = tool?.inputSchema.properties.unstableFilter as any;
const prop = tool?.inputSchema.properties.unstableFilter as {
type: string;
description: string;
};
expect(prop.type).toBe("boolean");
expect(prop.description).toContain("optional");
expect(prop.description).toContain("20%");
@@ -100,11 +123,51 @@ describe("MCP protocol unstableFilter", () => {
}),
);
const calledUrl = (global.fetch as ReturnType<typeof mock>).mock
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("unstableFilter=true");
});
test("search_kijiji should document price filters as dollars", () => {
const tool = tools.find((candidate) => candidate.name === "search_kijiji");
const priceMin = tool?.inputSchema.properties.priceMin as {
description: string;
};
const priceMax = tool?.inputSchema.properties.priceMax as {
description: string;
};
expect(priceMin.description).toContain("dollars");
expect(priceMax.description).toContain("dollars");
});
test("handler should forward Kijiji dollar price filters to API", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_kijiji",
arguments: {
query: "macbook",
priceMin: 999.99,
priceMax: 1000,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("priceMin=999.99");
expect(String(calledUrl)).toContain("priceMax=1000");
});
test("handler should forward unstableFilter=true for search_facebook", async () => {
await handleMcpRequest(
new Request("http://localhost", {
@@ -124,11 +187,40 @@ describe("MCP protocol unstableFilter", () => {
}),
);
const calledUrl = (global.fetch as ReturnType<typeof mock>).mock
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("unstableFilter=true");
});
test("tools/call returns API JSON as text content", async () => {
global.fetch = mock(() =>
Promise.resolve(
new Response(JSON.stringify([{ title: "item" }]), { status: 200 }),
),
) as unknown as typeof fetch;
const response = await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_facebook",
arguments: { query: "laptop" },
},
}),
}),
);
const body = await response.json();
expect(body.result.content[0].type).toBe("text");
expect(JSON.parse(body.result.content[0].text)).toEqual([
{ title: "item" },
]);
});
test("handler should forward unstableFilter=true for search_ebay", async () => {
await handleMcpRequest(
new Request("http://localhost", {
@@ -148,8 +240,48 @@ describe("MCP protocol unstableFilter", () => {
}),
);
const calledUrl = (global.fetch as ReturnType<typeof mock>).mock
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("unstableFilter=true");
});
test("search_ebay should document price filters as dollars", () => {
const tool = tools.find((candidate) => candidate.name === "search_ebay");
const minPrice = tool?.inputSchema.properties.minPrice as {
description: string;
};
const maxPrice = tool?.inputSchema.properties.maxPrice as {
description: string;
};
expect(minPrice.description).toContain("dollars");
expect(maxPrice.description).toContain("dollars");
});
test("handler should forward eBay dollar price filters to API", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_ebay",
arguments: {
query: "macbook",
minPrice: 999.99,
maxPrice: 1000,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("minPrice=999.99");
expect(String(calledUrl)).toContain("maxPrice=1000");
});
});

View File

@@ -1,13 +1,9 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"lib": ["dom"],
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"paths": {
"@/*": ["./src/*"]
},
"strict": true,
"noEmit": true
}
}
},
"include": ["./src", "./test", "../../types/**/*.d.ts"]
}

25
tsconfig.json Normal file
View File

@@ -0,0 +1,25 @@
{
"$schema": "https://json.schemastore.org/tsconfig",
"extends": "@tsconfig/bun/tsconfig.json",
"compilerOptions": {
"lib": ["dom", "ESNext"],
"target": "ESNext",
"module": "preserve",
"moduleResolution": "bundler",
"strict": true,
"noEmit": true,
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitOverride": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false,
"types": ["@types/bun"]
}
}

14
turbo.json Normal file
View File

@@ -0,0 +1,14 @@
{
"$schema": "https://turbo.build/schema.json",
"tasks": {
"typecheck": {},
"build": {
"dependsOn": ["^build"],
"outputs": ["../../dist/**"]
},
"test": {
"dependsOn": ["^build"],
"outputs": []
}
}
}

25
types/argon2-wasm-pro/index.d.ts vendored Normal file
View File

@@ -0,0 +1,25 @@
declare module "argon2-wasm-pro" {
interface Argon2Options {
pass: string | Uint8Array;
salt: Uint8Array;
time: number;
mem: number;
hashLen: number;
parallelism: number;
type: number;
}
interface Argon2Result {
hash: Uint8Array;
hashHex: string;
encoded: string;
}
function hash(options: Argon2Options): Promise<Argon2Result>;
const argon2: {
hash: typeof hash;
};
export default argon2;
}