Compare commits

...

91 Commits

Author SHA1 Message Date
ec545723bb feat(facebook): add challenge detection and session warming utilities
facebook-challenge.ts: session warmup, header construction, and challenge type detection. Spec document for the anti-bot challenge solver design.
2026-05-02 19:03:00 -04:00
0a246a29bf feat(facebook): add session warming and challenge detection
Facebook Marketplace no longer requires authentication cookies.
Session warming sends proper browser headers. Checkpoint and
login-wall challenges are detected and handled gracefully.
Added marketplace_product_details_page.target extraction path
for current item page structure.
2026-05-02 18:58:53 -04:00
7ab33d0b02 chore: format markdown
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-05-01 11:42:54 -04:00
d2c3c07e7d docs: price filtering schema adjustments
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 23:18:49 -04:00
0470a7bec7 docs(mcp): clarify price filters are dollars 2026-04-30 23:17:59 -04:00
89ad1c521f fix(api): parse price filters as dollars 2026-04-30 23:17:56 -04:00
5c732287c5 test: guard live listing prices 2026-04-30 22:46:48 -04:00
20fb46190a test: add live parser script 2026-04-30 22:46:07 -04:00
e791fc5478 test(facebook): add live parser suite 2026-04-30 22:44:28 -04:00
c1fa5168dc test(kijiji): add live parser suite 2026-04-30 22:43:52 -04:00
ec2a26cedf test(ebay): add live parser suite 2026-04-30 22:42:32 -04:00
5d99e984e0 docs: plan live parser tests 2026-04-30 22:41:41 -04:00
b657ea594a chore: update agents docs
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:29:01 -04:00
5651a194e9 chore: use biome check instead of biome ci
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:28:02 -04:00
31cc0660bc refactor(ebay): reuse fetchHtml after challenge
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:26:24 -04:00
fc7200777e style: format expected json output in protocol test 2026-04-30 22:25:47 -04:00
f68a5a8d9b feat(linter): enforce correctness on unused imports
Configures the linter to treat unused imports as an error under the
`correctness` rule category. This tightens up code quality standards by
ensuring all imported bindings are utilized.
If the import is unused, there is a high chance refactoring missed this
flow. Review in-depth root causes.
2026-04-30 22:24:06 -04:00
a6b24b318e fix(types): expose argon2 declaration globally 2026-04-30 22:16:48 -04:00
0873df7e82 chore: merge code-smell-cleanup 2026-04-30 21:08:34 -04:00
24e0a8266e Revert "test: preload core fetch guard"
This reverts commit 28b3267b7d.
2026-04-30 20:58:06 -04:00
db173aef1b Revert "chore: add sentinel file for bun test root"
This reverts commit d1cd028f34.
2026-04-30 20:58:06 -04:00
d1cd028f34 chore: add sentinel file for bun test root 2026-04-30 20:56:14 -04:00
28b3267b7d test: preload core fetch guard 2026-04-30 20:53:31 -04:00
c0dda57f64 test: require explicit fetch mocks 2026-04-30 20:51:13 -04:00
31866de787 refactor: clean kijiji scraper internals 2026-04-30 20:48:15 -04:00
9c4c347933 feat: ebay splashui challenge solver
argon2id pow → /challengesvc/answer → chlgref cookie
warm homepage for akamai cookies, detect 307 redirect,
solve + retry transparently in fetchEbayItems flow
2026-04-30 20:44:37 -04:00
53eafe6d4c chore: agent-browser skills path env
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 20:44:05 -04:00
84f17fbdfd chore: ebay parser fix
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 16:56:55 -04:00
3a722a2d11 chore: agent-browser vars
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 16:56:44 -04:00
f95b974c7e fix: harden shared http helper 2026-04-29 21:09:10 -04:00
f5339cadf1 style: format shared http refactor 2026-04-29 21:05:36 -04:00
5d86a4e54d fix: preserve ebay rate-limit fallback 2026-04-29 14:52:08 -04:00
82e7abc057 fix: keep shared http refactor in scope 2026-04-29 14:48:47 -04:00
6e50ebf901 refactor: share scraper http fetching 2026-04-29 13:14:20 -04:00
5ecb645ee3 docs: smell cleanup plan
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-29 13:09:38 -04:00
82e12283de docs: surface Kijiji AND-matching behavior in tool, API, and MCP responses
Kijiji zero-result queries (e.g. 'macbook air m1 apple silicon') are
confusing because the failure mode is non-obvious. Surface the root
cause everywhere the caller can see it:
- MCP tool description warns about AND-matching and gives a concrete
  before/after example
- API 404 body includes the actionable hint via emptySearchResponse(hint)
- Core scraper logs the built URL and tip on page-1 zero results
- MCP handler unwraps the API message field so the hint reaches the LLM
2026-04-29 13:06:31 -04:00
22eb65d4a2 refactor: share mcp api calls 2026-04-29 05:37:24 -04:00
abdd39d65c fix: complete ebay integer validation test coverage 2026-04-29 00:56:37 -04:00
3e4e35c9ae fix: tighten route integer parsing and test coverage 2026-04-29 00:32:23 -04:00
3ea6ee3938 fix: strictly parse route integers 2026-04-29 00:12:26 -04:00
d178f9c9cb fix: remove cookie query forwarding 2026-04-28 23:52:45 -04:00
9cbba9ba13 chore: ignore local worktrees 2026-04-28 23:08:04 -04:00
b6aaec0b65 chore: update ruler docs
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 22:29:12 -04:00
11dce39428 fix(core): parse Kijiji StandardListing records 2026-04-28 21:57:10 -04:00
2a5701aeb9 test: quiet and speed up test runs 2026-04-28 21:45:06 -04:00
c6c44a0914 fix(api): preserve unstable buckets 2026-04-28 21:34:47 -04:00
3fe5fdb63f fix(core): handle partial listing data 2026-04-28 21:34:45 -04:00
7966073bf8 fix(core): prefer explicit cookie source 2026-04-28 21:34:40 -04:00
df2635d92f chore: prepend typecheck command before biome ci
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 20:11:43 -04:00
ddadc7d5ae chore: add bun types to global tsconfig
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 20:07:05 -04:00
d77a006ded chore: ignore .turbo cache dirs 2026-04-28 19:56:39 -04:00
56b2198df1 chore: fix turbo build outputs path to match actual dist location 2026-04-28 19:56:29 -04:00
63716272c5 chore: slim per-package tsconfigs to extend root 2026-04-28 19:55:59 -04:00
1d21c66945 chore: use exports field and catalog refs in all packages 2026-04-28 19:55:37 -04:00
f2f78225f3 chore: add workspace catalog and turbo to root package.json 2026-04-28 19:54:46 -04:00
43d15fce5f chore: add shared root tsconfig.json 2026-04-28 19:53:58 -04:00
fef2f1968a chore: add bunfig.toml and turbo.json 2026-04-28 19:53:47 -04:00
01081f6b2e docs: add opencode monorepo config adoption implementation plan 2026-04-28 19:52:28 -04:00
d10d5305a3 docs: add opencode monorepo config adoption design spec 2026-04-28 19:50:51 -04:00
bf393eacae chore: setup typecheck scripts for each package
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 19:37:18 -04:00
79bb249603 chore: replace any cast by asserting tool schema property types
Tightens the type assertion for the `unstableFilter` schema property in tests to ensure correct structural checking of its `type` and `description` fields.
2026-04-28 19:24:39 -04:00
957e0f137b chore: biome lint and formatting
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 19:21:16 -04:00
49e90d45f8 docs: expose unstable mode in mcp tools 2026-04-28 19:03:42 -04:00
b6456047a6 feat: add maxItems support to ebay scraper 2026-04-27 10:56:23 -04:00
02b3f805b2 fix: use explicit conditional calls and validate negative params 2026-04-27 10:46:06 -04:00
a1af5d2630 fix: align ebay route with spec and validate params 2026-04-27 09:56:39 -04:00
77b9fc9934 fix: validate route params and reduce duplication 2026-04-27 09:45:47 -04:00
a802035ca4 fix: correct empty-result and maxItems handling in routes 2026-04-27 09:34:08 -04:00
974190de6b fix: preserve maxItems limit in unstable mode 2026-04-27 08:57:48 -04:00
3c38232cd5 feat: expose unstable mode in api routes 2026-04-27 02:49:35 -04:00
224e83ac4c fix: correct ebay title filtering and type contracts 2026-04-27 02:04:48 -04:00
b73faa35da fix: respect scraper pacing details 2026-04-27 00:13:42 -04:00
0f77155c8d fix: align marketplace price filter parsing 2026-04-23 11:14:57 -04:00
10c2856bf6 fix: tighten item price and pacing behavior 2026-04-23 10:59:33 -04:00
9c8643086a fix: refine scraper output behavior 2026-04-23 10:43:38 -04:00
244a88e63c fix: harden scraper price parsing 2026-04-23 10:31:08 -04:00
807849e257 fix: expose ebay unstable mode typing 2026-04-23 05:47:50 -04:00
eb37e8814e fix: preserve free results and request pacing 2026-04-23 05:40:42 -04:00
13c0fec305 fix: tighten scraper type contracts 2026-04-23 05:28:46 -04:00
08d59ab497 fix: tighten ebay result parsing 2026-04-23 05:13:40 -04:00
0a0723a560 fix: respect filtered result sets in unstable mode 2026-04-23 05:03:26 -04:00
881c2ddf8c fix: finalize scraper unstable mode integration 2026-04-23 00:20:21 -04:00
55faee7dd5 fix: cover scraper pricing edge cases 2026-04-22 23:54:07 -04:00
b5e14e686a fix: tighten scraper edge case handling 2026-04-22 23:46:52 -04:00
6f9d4db419 fix: tighten scraper parsing behavior 2026-04-22 23:41:08 -04:00
08edfa8097 fix: align scraper unstable mode behavior 2026-04-22 23:36:00 -04:00
c7fc8352ac fix: preserve default scraper result contracts 2026-04-22 23:30:17 -04:00
1ee41fb346 feat: add unstable mode to scraper results 2026-04-22 23:23:31 -04:00
8141de5b4b feat: add shared unstable listing classifier 2026-04-22 17:56:26 -04:00
f8975fa91d docs: add unstable listing mode plan 2026-04-22 17:53:45 -04:00
cb5e1e62d2 docs: add unstable listing mode design 2026-04-22 17:51:07 -04:00
74 changed files with 10078 additions and 1177 deletions

4
.envrc
View File

@@ -1,4 +1,8 @@
export DIRENV_WARN_TIMEOUT=20s export DIRENV_WARN_TIMEOUT=20s
export AGENT_BROWSER_EXECUTABLE_PATH=/run/current-system/sw/bin/google-chrome-unstable
export AGENT_BROWSER_ENGINE=chrome
export AGENT_BROWSER_HEADED=0
export AGENT_BROWSER_SKILLS_DIR=.claude/skills
export OPENCODE_CONFIG_CONTENT="{\"plugin\":[\"superpowers@git+https://github.com/obra/superpowers.git\"]}" export OPENCODE_CONFIG_CONTENT="{\"plugin\":[\"superpowers@git+https://github.com/obra/superpowers.git\"]}"
eval "$(devenv direnvrc)" eval "$(devenv direnvrc)"

2
.gitignore vendored
View File

@@ -33,6 +33,8 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
.eslintcache .eslintcache
.cache .cache
*.tsbuildinfo *.tsbuildinfo
.turbo
.worktrees/
# IntelliJ based IDEs # IntelliJ based IDEs
.idea .idea

View File

@@ -1,52 +1,9 @@
## Bun Guidelines ## Bun Guide
**CRITICAL**: Do not assume you know full Bun APIs. For **ANY** Bun API you use, confirm them by using `bun-docs` MCP tools. - Package manager/runtime/test runner is Bun `1.3.13`.
- Use `bun install`, `bun run <script>`, `bun test`, and `bun build`; do not add npm/yarn/pnpm scripts.
Default to using Bun instead of Node.js. - Prefer Bun-native runtime APIs already used in repo: `Bun.serve`, built-in `fetch`, Web APIs, and `bun:test`.
- Keep servers framework-free. Do not introduce Express/Koa/Fastify for the adapters.
- Use `bun <file>` instead of `node <file>` or `ts-node <file>` - Bun auto-loads `.env`; do not add `dotenv`.
- Use `bun test` instead of `jest` or `vitest` - For tests, import from `bun:test` and restore mocked globals/env in `afterEach` or `finally`.
- Use `bun build <file.html|file.ts|file.css>` instead of `webpack` or `esbuild` - Root `bun test` is misleading because `bunfig.toml` sets a dummy root. Run package test paths explicitly.
- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install`
- Use `bun run <script>` instead of `npm run <script>` or `yarn run <script>` or `pnpm run <script>`
- Use `bunx <package> <command>` instead of `npx <package> <command>`
- Bun automatically loads .env, so don't use dotenv.
### APIs
- `Bun.serve()` supports WebSockets, HTTPS, and routes. Don't use `express`.
- `bun:sqlite` for SQLite. Don't use `better-sqlite3`.
- `Bun.redis` for Redis. Don't use `ioredis`.
- `Bun.sql` for Postgres. Don't use `pg` or `postgres.js`.
- `WebSocket` is built-in. Don't use `ws`.
- Prefer `Bun.file` over `node:fs`'s readFile/writeFile
- Bun.$`ls` instead of execa.
### Testing
#### Quick Start
- Run tests: `bun test`
- Write tests in `tests/` folder
#### Test Structure
- Use `describe` blocks to group related tests
- Use `test` for individual test cases
- Use `beforeEach`/`afterEach` for setup/teardown
#### Assertions
- Import: `import { test, expect, describe, beforeEach, afterEach, mock } from "bun:test";`
- Common: `expect(value).toBe(expected)`, `expect(fn).rejects.toThrow()`
- Async: `await expect(asyncFn()).resolves.toBe(expected)`
#### Mocking
- Mock functions: `mock(fn)`
- Mock globals: `global.fetch = mock(...)`
- Restore mocks in `afterEach` or `finally`
#### Best Practices
- Mock external APIs (fetch, file I/O)
- Test error cases and edge conditions
- Use descriptive test names
- Clean up resources in `afterEach`
For more information, read the Bun API docs in `node_modules/bun-types/docs/**.mdx`.

View File

@@ -2,37 +2,47 @@
## Repo Shape ## Repo Shape
- Bun workspace monorepo. - Bun workspace monorepo with packages under `packages/*`.
- `packages/core`: scraper logic, parsing, shared cookie/http/format helpers, and the only checked-in tests. - `packages/core`: scraper behavior, parsing, result types, cookie handling, HTTP helpers.
- `packages/api-server`: Bun HTTP adapter exposing `/api/*` routes. - `packages/api-server`: Bun HTTP adapter exposing `/api/*` routes over core.
- `packages/mcp-server`: MCP JSON-RPC adapter that proxies to the API server. - `packages/mcp-server`: MCP/JSON-RPC adapter that proxies to the API server.
- `dist/`: build output. Do not edit generated files here. - `cookies/`: local cookie docs/examples only. Treat real cookie files as secrets.
- `cookies/`: local cookie examples and docs. Never commit real session cookies. - `dist/`, `node_modules/`, `.turbo/`, `.direnv/`, `.devenv/`: generated/vendor/cache. Do not edit.
## Commands ## Commands
- Install: `bun install` - Install: `bun install`
- Lint/format check: `bun run ci` - Lint/format/typecheck: `bun run ci`
- Build everything: `bun run build` - Build all packages: `bun run build`
- Run tests: `bun test` - Build bundled runtime output: `bun run build:all`
- Run tests: `bun test packages/core/test packages/api-server/test packages/mcp-server/test`
- API dev server: `bun run --cwd packages/api-server dev` - API dev server: `bun run --cwd packages/api-server dev`
- MCP dev server: `bun run --cwd packages/mcp-server dev` - MCP dev server: `bun run --cwd packages/mcp-server dev`
## Repo Conventions ## Boundaries
- Keep marketplace scraping behavior in `packages/core`. `api-server` and `mcp-server` stay thin adapters. - Marketplace behavior belongs in `packages/core`, not adapter packages.
- Preserve cookie precedence everywhere: request parameter > environment variable > cookie file. - HTTP route code should parse request input, call core, and map status/errors.
- Shared public surface for scraper code is `packages/core/src/index.ts`. Update exports deliberately. - MCP code should define tools, validate JSON-RPC flow, and map tool args to API URLs.
- Tests should stay deterministic and offline. Mock `fetch`; do not hit live marketplace endpoints. - Keep API query params and MCP tool args in sync.
- Use Bun and Bun-native APIs in this repo. Do not introduce Node-specific tooling unless already required. - Shared public surface for scraper code is `packages/core/src/index.ts`; update exports deliberately.
- Biome and strict TypeScript are part of the contract. Fix code to satisfy them; do not relax config.
## Invariants
- Cookie precedence in core helpers: explicit/request cookie string before environment variable.
- Tests must be deterministic and offline. Mock `fetch`; do not hit live marketplace endpoints.
- Use Bun and Bun-native APIs. Do not add Node-specific tooling unless already required.
- Biome and strict TypeScript are contract. Fix code; do not relax config.
## Verification ## Verification
- Core changes: `bun test && bun run ci` - Core changes: `bun test packages/core/test && bun run ci`
- Cross-package contract changes: `bun test && bun run ci && bun run build` - Adapter-only changes: relevant package build plus `bun run ci`
- Adapter-only changes: run the relevant package build plus `bun run ci` - Cross-package contract changes: `bun test packages/core/test packages/api-server/test packages/mcp-server/test && bun run ci && bun run build`
## Gotchas ## Gotchas
- The root `build` script emits separate bundles to `dist/api` and `dist/mcp`, then `scripts/start.sh` launches both. - `bunfig.toml` points test root at `./do-not-run-tests-from-root`; pass package test paths explicitly.
- Root `build` cleans `dist`, then Turbo emits bundles for API and MCP.
- `scripts/start.sh` launches `dist/api/index.js` and `dist/mcp/index.js`.
- Package `tsconfig.json` files override root `include`; shared ambient declarations under root `types/` must be included from each package that typechecks cross-package source.

View File

@@ -1,44 +1,56 @@
# Facebook Marketplace API Reverse Engineering # Facebook Marketplace API Reverse Engineering
## Overview ## Overview
This document tracks findings from reverse-engineering Facebook Marketplace APIs for listing details.
This document tracks findings from reverse-engineering Facebook Marketplace APIs for
listing details.
## Current Implementation Status ## Current Implementation Status
- Search functionality: Implemented in `src/facebook.ts` - Search functionality: Implemented in `src/facebook.ts`
- Individual listing details: Not yet implemented - Individual listing details: Not yet implemented
## Findings ## Findings
### Step 1: Initial Setup ### Step 1: Initial Setup
- Using Chrome DevTools to inspect Facebook Marketplace - Using Chrome DevTools to inspect Facebook Marketplace
- Need to authenticate with Facebook account to access marketplace data - Need to authenticate with Facebook account to access marketplace data
- Cookies required for full access - Cookies required for full access
- Current status: Successfully logged in and accessed marketplace data - Current status: Successfully logged in and accessed marketplace data
### Step 2: Individual Listing Details Analysis - COMPLETED ### Step 2: Individual Listing Details Analysis - COMPLETED
- **Data Location**: Embedded in HTML script tags within `require` array structure - **Data Location**: Embedded in HTML script tags within `require` array structure
- **Path**: `require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target` - **Path**:
`require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Authentication**: Required for full data access - **Authentication**: Required for full data access
- **Current Status**: Successfully reverse-engineered the API structure and data extraction method - **Current Status**: Successfully reverse-engineered the API structure and data
extraction method
### API Endpoints Discovered ### API Endpoints Discovered
#### Search Endpoint #### Search Endpoint
- URL: `https://www.facebook.com/marketplace/{location}/search` - URL: `https://www.facebook.com/marketplace/{location}/search`
- Parameters: `query`, `sortBy`, `exact` - Parameters: `query`, `sortBy`, `exact`
- Data embedded in HTML script tags with `require` structure - Data embedded in HTML script tags with `require` structure
- Authentication: Required (cookies) - Authentication: Required (cookies)
#### Listing Details Endpoint #### Listing Details Endpoint
- **URL Structure**: `https://www.facebook.com/marketplace/item/{listing_id}/` - **URL Structure**: `https://www.facebook.com/marketplace/item/{listing_id}/`
- **Data Source**: Server-side rendered HTML with embedded JSON data in script tags - **Data Source**: Server-side rendered HTML with embedded JSON data in script tags
- **Data Structure**: Relay/GraphQL style data structure under `require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target` - **Data Structure**: Relay/GraphQL style data structure under
- **Extraction Method**: Parse JSON from script tags containing marketplace data, navigate to the target object `require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Extraction Method**: Parse JSON from script tags containing marketplace data,
navigate to the target object
- **Authentication**: Required (cookies) - **Authentication**: Required (cookies)
### Listing Data Structure Discovered (Current - 2026) ### Listing Data Structure Discovered (Current - 2026)
The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem` object with the following key properties: The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem`
object with the following key properties:
```typescript ```typescript
interface FacebookMarketplaceItem { interface FacebookMarketplaceItem {
@@ -151,6 +163,7 @@ interface FacebookMarketplaceItem {
``` ```
### Example Data Extracted (Current Structure) ### Example Data Extracted (Current Structure)
```json ```json
{ {
"__typename": "GroupCommerceProductItem", "__typename": "GroupCommerceProductItem",
@@ -228,36 +241,47 @@ interface FacebookMarketplaceItem {
## Data Extraction Method ## Data Extraction Method
### Current Method (2026) ### Current Method (2026)
Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML response. The extraction process:
1. **Find the Correct Script**: Look for script tags containing marketplace listing data by searching for key fields like `marketplace_listing_title`, `redacted_description`, and `formatted_price`. Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML
response. The extraction process:
1. **Find the Correct Script**: Look for script tags containing marketplace listing data
by searching for key fields like `marketplace_listing_title`, `redacted_description`,
and `formatted_price`.
2. **Parse JSON Structure**: The data is nested within a `require` array structure: 2. **Parse JSON Structure**: The data is nested within a `require` array structure:
``` ```
require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target
``` ```
3. **Navigate to Target Object**: The actual listing data is a `GroupCommerceProductItem` object containing comprehensive information about the listing, seller, and vehicle details. 3. **Navigate to Target Object**: The actual listing data is a
`GroupCommerceProductItem` object containing comprehensive information about the
listing, seller, and vehicle details.
4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust extraction should search for the target object recursively within the parsed JSON. 4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust
extraction should search for the target object recursively within the parsed JSON.
### Authentication Requirements ### Authentication Requirements
- Valid Facebook session cookies are required - Valid Facebook session cookies are required
- User must be logged in to Facebook - User must be logged in to Facebook
- Marketplace access may be location-restricted - Marketplace access may be location-restricted
## Tools Used ## Tools Used
- Chrome DevTools Protocol - Chrome DevTools Protocol
- Network monitoring - Network monitoring
- HTML/script parsing - HTML/script parsing
- JSON structure analysis - JSON structure analysis
## Implementation Status ## Implementation Status
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details - ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
- ✅ Identified current data structure and extraction method (2026) - ✅ Identified current data structure and extraction method (2026)
- ✅ Documented comprehensive GroupCommerceProductItem interface - ✅ Documented comprehensive GroupCommerceProductItem interface
- ✅ Implemented `extractFacebookItemData()` function with script parsing logic - ✅ Implemented `extractFacebookItemData()` function with script parsing logic
- ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to ListingDetails - ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to
ListingDetails
- ✅ Implemented `fetchFacebookItem()` function with authentication and error handling - ✅ Implemented `fetchFacebookItem()` function with authentication and error handling
- ✅ Updated TypeScript interfaces to match current API structure - ✅ Updated TypeScript interfaces to match current API structure
- ✅ Added robust extraction with fallback methods for changing API paths - ✅ Added robust extraction with fallback methods for changing API paths
@@ -266,12 +290,15 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
### Core Functions Implemented ### Core Functions Implemented
1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from HTML-embedded JSON in script tags 1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from
HTML-embedded JSON in script tags
- Searches for scripts containing marketplace listing data - Searches for scripts containing marketplace listing data
- Uses primary path: `require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target` - Uses primary path:
`require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
- Falls back to recursive search for GroupCommerceProductItem objects - Falls back to recursive search for GroupCommerceProductItem objects
2. **`parseFacebookItem(item)`**: Converts Facebook's GroupCommerceProductItem to unified ListingDetails format 2. **`parseFacebookItem(item)`**: Converts Facebooks GroupCommerceProductItem to
unified ListingDetails format
- Handles pricing (FREE listings, CAD currency) - Handles pricing (FREE listings, CAD currency)
- Extracts seller information, location, and status - Extracts seller information, location, and status
- Supports vehicle-specific metadata - Supports vehicle-specific metadata
@@ -284,25 +311,31 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
- Returns parsed ListingDetails or null on failure - Returns parsed ListingDetails or null on failure
### Authentication Requirements ### Authentication Requirements
- Facebook session cookies required in `./cookies/facebook.json` or provided as parameter
- Facebook session cookies required in `./cookies/facebook.json` or provided as
parameter
- Cookies must include valid authentication tokens for marketplace access - Cookies must include valid authentication tokens for marketplace access
- Handles cookie expiration and domain validation - Handles cookie expiration and domain validation
## Current Implementation Status - 2026 Verification ## Current Implementation Status - 2026 Verification
### Step 3: API Verification and Current Structure Analysis (January 2026) ### Step 3: API Verification and Current Structure Analysis (January 2026)
- **Verification Date**: January 22, 2026 - **Verification Date**: January 22, 2026
- **Status**: Successfully verified current Facebook Marketplace API structure - **Status**: Successfully verified current Facebook Marketplace API structure
- **Data Source**: Embedded JSON in HTML script tags (server-side rendered) - **Data Source**: Embedded JSON in HTML script tags (server-side rendered)
- **Extraction Path**: `require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target` - **Extraction Path**:
`require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
#### Verified Listing Structure (Real Example - 2006 Hyundai Tiburon) #### Verified Listing Structure (Real Example - 2006 Hyundai Tiburon)
- **Listing ID**: 1226468515995685 - **Listing ID**: 1226468515995685
- **Title**: "2006 Hyundai Tiburon" - **Title**: 2006 Hyundai Tiburon
- **Price**: CA$3,000 (formatted_price.text) - **Price**: CA$3,000 (formatted_price.text)
- **Raw Price Data**: {"amount_with_offset": "300000", "currency": "CAD", "amount": "3000.00"} - **Raw Price Data**: {"amount_with_offset": 300000, currency: CAD, amount”:
"3000.00"}
- **Location**: Hamilton, ON (with coordinates: 43.250427246094, -79.963989257812) - **Location**: Hamilton, ON (with coordinates: 43.250427246094, -79.963989257812)
- **Description**: "As is" (redacted_description.text) - **Description**: As is (redacted_description.text)
- **Vehicle Details**: - **Vehicle Details**:
- Make: Hyundai - Make: Hyundai
- Model: Tiburon - Model: Tiburon
@@ -323,41 +356,54 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
- **Messaging**: Enabled - **Messaging**: Enabled
#### Current API Characteristics #### Current API Characteristics
- **Authentication**: Still requires valid Facebook session cookies - **Authentication**: Still requires valid Facebook session cookies
- **Data Format**: Server-side rendered HTML with embedded GraphQL/Relay JSON - **Data Format**: Server-side rendered HTML with embedded GraphQL/Relay JSON
- **Structure Stability**: Primary extraction path remains functional - **Structure Stability**: Primary extraction path remains functional
- **Additional Features**: Includes marketplace ratings, seller verification badges, cross-posting info - **Additional Features**: Includes marketplace ratings, seller verification badges,
cross-posting info
### API Changes Observed Since 2024 Documentation ### API Changes Observed Since 2024 Documentation
- **Minimal Changes**: Core data structure largely unchanged - **Minimal Changes**: Core data structure largely unchanged
- **Enhanced Fields**: Added more detailed vehicle specifications and seller profile information - **Enhanced Fields**: Added more detailed vehicle specifications and seller profile
- **GraphQL Integration**: Deeper integration with Facebook's GraphQL infrastructure information
- **GraphQL Integration**: Deeper integration with Facebooks GraphQL infrastructure
- **Security Features**: Additional integrity checks and reporting mechanisms - **Security Features**: Additional integrity checks and reporting mechanisms
### Multi-Category Testing Results (January 2026) ### Multi-Category Testing Results (January 2026)
Successfully tested extraction across different listing categories: Successfully tested extraction across different listing categories:
#### 1. Vehicle Listings (Automotive) #### 1. Vehicle Listings (Automotive)
- **Example**: 2006 Hyundai Tiburon (ID: 1226468515995685) - **Example**: 2006 Hyundai Tiburon (ID: 1226468515995685)
- **Status**: ✅ Fully functional - **Status**: ✅ Fully functional
- **Data Extracted**: Complete vehicle specs, pricing, seller info, location coordinates - **Data Extracted**: Complete vehicle specs, pricing, seller info, location coordinates
- **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data, vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color, vehicle_fuel_type - **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data,
vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color,
vehicle_fuel_type
#### 2. Electronics Listings #### 2. Electronics Listings
- **Example**: Nintendo Switch (ID: 3903865769914262) - **Example**: Nintendo Switch (ID: 3903865769914262)
- **Status**: ✅ Fully functional - **Status**: ✅ Fully functional
- **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used - like new), seller (Yitao Hou) - **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used -
like new), seller (Yitao Hou)
- **Category**: Electronics (category_id: 479353692612078) - **Category**: Electronics (category_id: 479353692612078)
- **Notes**: Standard GroupCommerceProductItem structure applies - **Notes**: Standard GroupCommerceProductItem structure applies
#### 3. Home Goods/Furniture Listings #### 3. Home Goods/Furniture Listings
- **Example**: Tabletop Mirror (cat not included) (ID: 1082389057290709) - **Example**: Tabletop Mirror (cat not included) (ID: 1082389057290709)
- **Status**: ✅ Fully functional - **Status**: ✅ Fully functional
- **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used - like new), seller (Rohit Rehan) - **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used -
like new), seller (Rohit Rehan)
- **Category**: Home Goods (category_id: 1569171756675761) - **Category**: Home Goods (category_id: 1569171756675761)
- **Notes**: Includes detailed description and delivery options - **Notes**: Includes detailed description and delivery options
#### Testing Summary #### Testing Summary
- **Extraction Method**: Consistent across all categories - **Extraction Method**: Consistent across all categories
- **Data Structure**: GroupCommerceProductItem interface works for all listing types - **Data Structure**: GroupCommerceProductItem interface works for all listing types
- **Authentication**: Required for all categories - **Authentication**: Required for all categories
@@ -365,18 +411,22 @@ Successfully tested extraction across different listing categories:
- **Edge Cases**: All tested listings were active/in-person pickup - **Edge Cases**: All tested listings were active/in-person pickup
## Implementation Status - COMPLETED (January 2026) ## Implementation Status - COMPLETED (January 2026)
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details - ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
- ✅ Verified current API structure and extraction method (January 2026) - ✅ Verified current API structure and extraction method (January 2026)
- ✅ Tested extraction across multiple listing categories (vehicles, electronics, home goods) - ✅ Tested extraction across multiple listing categories (vehicles, electronics, home
- ✅ Implemented comprehensive error handling for sold/removed listings and authentication failures goods)
- ✅ Implemented comprehensive error handling for sold/removed listings and
authentication failures
- ✅ Enhanced rate limiting and retry logic (already robust) - ✅ Enhanced rate limiting and retry logic (already robust)
- ✅ Added monitoring and metrics for API stability detection - ✅ Added monitoring and metrics for API stability detection
- ✅ Updated all scraper functions to use verified extraction methods - ✅ Updated all scraper functions to use verified extraction methods
- ✅ Documented comprehensive GroupCommerceProductItem interface with real examples - ✅ Documented comprehensive GroupCommerceProductItem interface with real examples
## Next Steps (Future Maintenance) ## Next Steps (Future Maintenance)
1. Monitor extraction success rates for API change detection 1. Monitor extraction success rates for API change detection
2. Update extraction paths if Facebook changes their API structure 2. Update extraction paths if Facebook changes their API structure
3. Add support for additional marketplace features as they become available 3. Add support for additional marketplace features as they become available
4. Implement caching mechanisms for improved performance 4. Implement caching mechanisms for improved performance
5. Add support for marketplace messaging and negotiation features 5. Add support for marketplace messaging and negotiation features

145
KIJIJI.md
View File

@@ -1,9 +1,13 @@
# Kijiji API Findings # Kijiji API Findings
## Overview ## Overview
Kijiji is a Canadian classifieds marketplace that uses a modern web application built with Next.js and Apollo GraphQL. The search results are powered by a GraphQL API with client-side state management.
Kijiji is a Canadian classifieds marketplace that uses a modern web application built
with Next.js and Apollo GraphQL. The search results are powered by a GraphQL API with
client-side state management.
## Initial Page Load (Homepage) ## Initial Page Load (Homepage)
- **URL**: https://www.kijiji.ca/ - **URL**: https://www.kijiji.ca/
- **Architecture**: Server-side rendered React application with Next.js - **Architecture**: Server-side rendered React application with Next.js
- **Data Sources**: - **Data Sources**:
@@ -12,18 +16,27 @@ Kijiji is a Canadian classifieds marketplace that uses a modern web application
- No initial API calls for listings - data appears to be embedded in HTML - No initial API calls for listings - data appears to be embedded in HTML
## Search Results Page ## Search Results Page
- **URL Pattern**: `https://www.kijiji.ca/b-[location]/[keywords]/k0l0` - **URL Pattern**: `https://www.kijiji.ca/b-[location]/[keywords]/k0l0`
- **Example**: `https://www.kijiji.ca/b-canada/iphone/k0l0` - **Example**: `https://www.kijiji.ca/b-canada/iphone/k0l0`
- **Technology Stack**: Next.js with Apollo GraphQL client - **Technology Stack**: Next.js with Apollo GraphQL client
- **Data Structure**: Uses `__APOLLO_STATE__` global object containing normalized GraphQL cache - **Data Structure**: Uses `__APOLLO_STATE__` global object containing normalized
GraphQL cache
### GraphQL Data Structure ### GraphQL Data Structure
#### Data Location #### Data Location
Search results data is embedded in the Next.js page props under `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`. The data is pre-rendered on the server and sent to the client. Each page (including pagination) has its own pre-rendered data.
Search results data is embedded in the Next.js page props under
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`. The data is pre-rendered on the server
and sent to the client.
Each page (including pagination) has its own pre-rendered data.
#### Search Results Container #### Search Results Container
The search results are stored directly in the Apollo ROOT_QUERY with keys following the pattern `searchResultsPageByUrl:{url_path}` where `url_path` includes pagination parameters.
The search results are stored directly in the Apollo ROOT_QUERY with keys following the
pattern `searchResultsPageByUrl:{url_path}` where `url_path` includes pagination
parameters.
```json ```json
{ {
@@ -33,17 +46,20 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
``` ```
#### Pagination Handling #### Pagination Handling
- Each page is server-side rendered with its own embedded data - Each page is server-side rendered with its own embedded data
- No client-side GraphQL requests for pagination - No client-side GraphQL requests for pagination
- URL parameter `?page=N` controls which page data is embedded - URL parameter `?page=N` controls which page data is embedded
- Offset in searchString corresponds to `(page-1) * limit` - Offset in searchString corresponds to `(page-1) * limit`
#### Search Parameters in URL #### Search Parameters in URL
- `k0c{CATEGORY}l{LOCATION}` - Category and location IDs - `k0c{CATEGORY}l{LOCATION}` - Category and location IDs
- `?page=N` - Page number (1-based) - `?page=N` - Page number (1-based)
- Data contains `offset` and `limit` for API-style pagination - Data contains `offset` and `limit` for API-style pagination
#### Individual Listing Structure #### Individual Listing Structure
```json ```json
{ {
"id": "1732061412", "id": "1732061412",
@@ -90,6 +106,7 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
``` ```
### URL Parameters ### URL Parameters
- `sort=MATCH` - Sort by relevance - `sort=MATCH` - Sort by relevance
- `order=DESC` - Descending order - `order=DESC` - Descending order
- `type=OFFER` - Show offerings (not wanted ads) - `type=OFFER` - Show offerings (not wanted ads)
@@ -102,6 +119,7 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
- `eaTopAdPosition=1` - ? - `eaTopAdPosition=1` - ?
### Image API ### Image API
- **Endpoint**: `https://media.kijiji.ca/api/v1/` - **Endpoint**: `https://media.kijiji.ca/api/v1/`
- **Pattern**: `/ca-prod-fsbo-ads/images/{uuid}?rule=kijijica-{size}-jpg` - **Pattern**: `/ca-prod-fsbo-ads/images/{uuid}?rule=kijijica-{size}-jpg`
- **Sizes**: 200, 300, 400, 500 pixels - **Sizes**: 200, 300, 400, 500 pixels
@@ -109,10 +127,12 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
### Categories and Locations ### Categories and Locations
#### Category Structure #### Category Structure
Categories are hierarchical with parent-child relationships. The main categories under "Buy & Sell" include:
Categories are hierarchical with parent-child relationships.
The main categories under “Buy & Sell” include:
| ID | Name | Total Results (iPhone search) | | ID | Name | Total Results (iPhone search) |
|----|------|------------------------------| | --- | --- | --- |
| 10 | Buy & Sell | 19956 | | 10 | Buy & Sell | 19956 |
| 12 | Arts & Collectibles | 149 | | 12 | Arts & Collectibles | 149 |
| 767 | Audio | 481 | | 767 | Audio | 481 |
@@ -145,10 +165,11 @@ Categories are hierarchical with parent-child relationships. The main categories
| 26 | Other | 286 | | 26 | Other | 286 |
#### Location Structure #### Location Structure
Locations are also hierarchical, with provinces/states under the main "Canada" location:
Locations are also hierarchical, with provinces/states under the main “Canada” location:
| ID | Name | Total Results (iPhone search) | | ID | Name | Total Results (iPhone search) |
|----|------|------------------------------| | --- | --- | --- |
| 0 | Canada | - | | 0 | Canada | - |
| 9001 | Québec | 2516 | | 9001 | Québec | 2516 |
| 9002 | Nova Scotia | 875 | | 9002 | Nova Scotia | 875 |
@@ -163,16 +184,20 @@ Locations are also hierarchical, with provinces/states under the main "Canada" l
| 9011 | Prince Edward Island | 31 | | 9011 | Prince Edward Island | 31 |
#### URL Patterns #### URL Patterns
- Categories: `/b-{category-slug}/canada/{keywords}/k0c{CATEGORY_ID}l0` - Categories: `/b-{category-slug}/canada/{keywords}/k0c{CATEGORY_ID}l0`
- Locations: `/b-buy-sell/{location-slug}/iphone/k0c10l{LOCATION_ID}` - Locations: `/b-buy-sell/{location-slug}/iphone/k0c10l{LOCATION_ID}`
- Combined: `/b-{category-slug}/{location-slug}/{keywords}/k0c{CATEGORY_ID}l{LOCATION_ID}` - Combined:
`/b-{category-slug}/{location-slug}/{keywords}/k0c{CATEGORY_ID}l{LOCATION_ID}`
### Pagination ### Pagination
- Uses offset-based pagination - Uses offset-based pagination
- 40 results per page - 40 results per page
- Total count provided in pagination metadata - Total count provided in pagination metadata
## Authentication & User Management ## Authentication & User Management
- **Authentication System**: OAuth2-based using CIS (Customer Identity Service) - **Authentication System**: OAuth2-based using CIS (Customer Identity Service)
- **Identity Provider**: `id.kijiji.ca` - **Identity Provider**: `id.kijiji.ca`
- **OAuth2 Flow**: - **OAuth2 Flow**:
@@ -184,24 +209,30 @@ Locations are also hierarchical, with provinces/states under the main "Canada" l
- **User Features**: Saved searches, messaging, flagging require authentication - **User Features**: Saved searches, messaging, flagging require authentication
## Posting API ## Posting API
- **Posting Flow**: Requires authentication, redirects to login if not authenticated - **Posting Flow**: Requires authentication, redirects to login if not authenticated
- **Posting URL**: `https://www.kijiji.ca/p-post-ad.html` - **Posting URL**: `https://www.kijiji.ca/p-post-ad.html`
- **Authentication Required**: Yes, redirects to `/consumer/login` for unauthenticated users - **Authentication Required**: Yes, redirects to `/consumer/login` for unauthenticated
- **Post-Creation**: Likely uses authenticated GraphQL mutations (not observed in anonymous browsing) users
- **Post-Creation**: Likely uses authenticated GraphQL mutations (not observed in
anonymous browsing)
## GraphQL API Endpoint ## GraphQL API Endpoint
- **URL**: `https://www.kijiji.ca/anvil/api` - **URL**: `https://www.kijiji.ca/anvil/api`
- **Method**: POST - **Method**: POST
- **Content-Type**: application/json - **Content-Type**: application/json
- **Headers**: - **Headers**:
- `apollo-require-preflight: true` - `apollo-require-preflight: true`
- Standard CORS headers - Standard CORS headers
- **Authentication**: No authentication required for basic queries (uses cookies for session tracking) - **Authentication**: No authentication required for basic queries (uses cookies for
session tracking)
- **Technology**: Apollo GraphQL server - **Technology**: Apollo GraphQL server
### Sample GraphQL Queries Discovered ### Sample GraphQL Queries Discovered
#### Get Search Categories #### Get Search Categories
```graphql ```graphql
query getSearchCategories($locale: String!) { query getSearchCategories($locale: String!) {
searchCategories { searchCategories {
@@ -218,6 +249,7 @@ Variables: `{"locale": "en-CA"}`
Response includes hierarchical category structure with IDs and localized names. Response includes hierarchical category structure with IDs and localized names.
#### Get Geocode from IP (fails for current IP) #### Get Geocode from IP (fails for current IP)
```graphql ```graphql
query GetGeocodeReverseFromIp { query GetGeocodeReverseFromIp {
geocodeReverseFromIp { geocodeReverseFromIp {
@@ -229,9 +261,11 @@ query GetGeocodeReverseFromIp {
} }
``` ```
This query fails for the current IP address, suggesting geolocation-based features may not work or require different IP ranges. This query fails for the current IP address, suggesting geolocation-based features may
not work or require different IP ranges.
#### Get Category Path #### Get Category Path
```graphql ```graphql
query GetCategoryPath($categoryId: Int!, $locale: String, $locationId: Int) { query GetCategoryPath($categoryId: Int!, $locale: String, $locationId: Int) {
category(id: $categoryId) { category(id: $categoryId) {
@@ -256,25 +290,33 @@ Variables: `{"categoryId": 10, "locationId": 0, "locale": "en-CA"}`
## Latest Findings (2026-01-21) ## Latest Findings (2026-01-21)
### Client-Side GraphQL Queries Observed ### Client-Side GraphQL Queries Observed
- **getSearchCategories**: Retrieves category hierarchy for search filters - **getSearchCategories**: Retrieves category hierarchy for search filters
- **GetGeocodeReverseFromIp**: Attempts to geolocate user (fails for current IP) - **GetGeocodeReverseFromIp**: Attempts to geolocate user (fails for current IP)
### GraphQL Schema Insights ### GraphQL Schema Insights
Testing direct GraphQL queries revealed:
- Field "searchResults" does not exist on Query type
- Suggested alternatives: "searchResultsPage" or "searchUrl"
- This suggests the search functionality may use different GraphQL operations than direct queries
The embedded Apollo state approach appears to be the primary method for accessing search data, with GraphQL used for auxiliary operations like categories and geolocation. Testing direct GraphQL queries revealed:
- Field “searchResults” does not exist on Query type
- Suggested alternatives: “searchResultsPage” or “searchUrl”
- This suggests the search functionality may use different GraphQL operations than
direct queries
The embedded Apollo state approach appears to be the primary method for accessing search
data, with GraphQL used for auxiliary operations like categories and geolocation.
### Server-Side Rendering Architecture ### Server-Side Rendering Architecture
Search results are fully server-side rendered with data embedded in HTML. Each page (including pagination) contains its own pre-rendered data. No client-side GraphQL requests are made for:
Search results are fully server-side rendered with data embedded in HTML. Each page
(including pagination) contains its own pre-rendered data.
No client-side GraphQL requests are made for:
- Initial search results - Initial search results
- Pagination navigation - Pagination navigation
- Search result data - Search result data
### Network Analysis Findings ### Network Analysis Findings
- GraphQL endpoint: `https://www.kijiji.ca/anvil/api` - GraphQL endpoint: `https://www.kijiji.ca/anvil/api`
- Method: POST - Method: POST
- Content-Type: application/json - Content-Type: application/json
@@ -282,7 +324,10 @@ Search results are fully server-side rendered with data embedded in HTML. Each p
- Cookies required for session tracking - Cookies required for session tracking
### Embedded Data Structure ### Embedded Data Structure
Search results data is embedded in the HTML within Next.js `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` object. The data includes:
Search results data is embedded in the HTML within Next.js
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` object.
The data includes:
- Individual ad listings with complete metadata - Individual ad listings with complete metadata
- Pagination information - Pagination information
@@ -290,20 +335,24 @@ Search results data is embedded in the HTML within Next.js `__NEXT_DATA__.props.
- Category/location hierarchies - Category/location hierarchies
### Current Scraper Implementation ### Current Scraper Implementation
The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo state: The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo state:
- Uses `extractApolloState()` to parse `__NEXT_DATA__` from HTML - Uses `extractApolloState()` to parse `__NEXT_DATA__` from HTML
- Filters Apollo keys containing "Listing" to find ad data - Filters Apollo keys containing Listing to find ad data
- Extracts `url`, `title`, and other metadata from each listing - Extracts `url`, `title`, and other metadata from each listing
- Successfully scrapes listings without needing API authentication - Successfully scrapes listings without needing API authentication
### Authentication Status ### Authentication Status
- **Search functionality**: No authentication required - all search and listing data accessible anonymously
- **Search functionality**: No authentication required - all search and listing data
accessible anonymously
- **Posting functionality**: Requires authentication (redirects to login) - **Posting functionality**: Requires authentication (redirects to login)
- **User features**: Saved searches, messaging require authentication - **User features**: Saved searches, messaging require authentication
- **Rate limiting**: May apply but not observed in anonymous browsing - **Rate limiting**: May apply but not observed in anonymous browsing
### Pagination Implementation ### Pagination Implementation
- Each page is a separate server-rendered route - Each page is a separate server-rendered route
- URL pattern: `/b-{location}/{keywords}/page-{number}/k0{category}l{location_id}` - URL pattern: `/b-{location}/{keywords}/page-{number}/k0{category}l{location_id}`
- No client-side pagination API calls - No client-side pagination API calls
@@ -313,20 +362,24 @@ The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo
## URL Pattern Analysis ## URL Pattern Analysis
### Search URL Structure ### Search URL Structure
`https://www.kijiji.ca/b-{category_slug}/{location_slug}/{keywords}/k0c{category_id}l{location_id}` `https://www.kijiji.ca/b-{category_slug}/{location_slug}/{keywords}/k0c{category_id}l{location_id}`
#### Examples Observed: #### Examples Observed:
- All categories, Canada: `/b-canada/iphone/k0l0` (c0 = All Categories, l0 = Canada) - All categories, Canada: `/b-canada/iphone/k0l0` (c0 = All Categories, l0 = Canada)
- Cell phones category: `/b-cell-phones/canada/iphone/k0c132l0` (c132 = Cell Phones) - Cell phones category: `/b-cell-phones/canada/iphone/k0c132l0` (c132 = Cell Phones)
- With pagination: `/b-canada/iphone/page-2/k0l0` - With pagination: `/b-canada/iphone/page-2/k0l0`
#### URL Components: #### URL Components:
- `c{CATEGORY_ID}`: Category ID (0 = All Categories, 132 = Cell Phones, etc.) - `c{CATEGORY_ID}`: Category ID (0 = All Categories, 132 = Cell Phones, etc.)
- `l{LOCATION_ID}`: Location ID (0 = Canada, 1700272 = GTA, etc.) - `l{LOCATION_ID}`: Location ID (0 = Canada, 1700272 = GTA, etc.)
- `page-{N}`: Pagination (1-based, optional) - `page-{N}`: Pagination (1-based, optional)
- Keywords are slugified in URL path - Keywords are slugified in URL path
### Current Implementation Status ### Current Implementation Status
The existing scraper in `src/kijiji.ts` successfully implements the approach: The existing scraper in `src/kijiji.ts` successfully implements the approach:
- Parses embedded Apollo state from HTML responses - Parses embedded Apollo state from HTML responses
- Handles rate limiting and retries - Handles rate limiting and retries
@@ -336,14 +389,22 @@ The existing scraper in `src/kijiji.ts` successfully implements the approach:
## Listing Details Page ## Listing Details Page
### Overview ### Overview
Similar to search results, listing details pages use server-side rendering with embedded Apollo GraphQL state in the HTML. No dedicated API endpoint serves individual listing data - all information is pre-rendered on the server.
Similar to search results, listing details pages use server-side rendering with embedded
Apollo GraphQL state in the HTML. No dedicated API endpoint serves individual listing
data - all information is pre-rendered on the server.
### Data Architecture ### Data Architecture
- **Server-Side Rendering**: Each listing page is fully server-rendered with data embedded in HTML
- **Embedded Apollo State**: Listing data is stored in `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` - **Server-Side Rendering**: Each listing page is fully server-rendered with data
- **Client-Side GraphQL**: Additional data (categories, campaigns, similar listings, user profiles) fetched via GraphQL API embedded in HTML
- **Embedded Apollo State**: Listing data is stored in
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`
- **Client-Side GraphQL**: Additional data (categories, campaigns, similar listings,
user profiles) fetched via GraphQL API
### Listing Data Structure ### Listing Data Structure
The main listing data follows the same pattern as search results: The main listing data follows the same pattern as search results:
```json ```json
@@ -385,40 +446,50 @@ The main listing data follows the same pattern as search results:
``` ```
### Client-Side GraphQL Queries ### Client-Side GraphQL Queries
When loading a listing details page, the following GraphQL queries are executed: When loading a listing details page, the following GraphQL queries are executed:
#### 1. getSearchCategories #### 1. getSearchCategories
- **Purpose**: Category hierarchy for navigation - **Purpose**: Category hierarchy for navigation
- **Variables**: `{"locale": "en-CA"}` - **Variables**: `{"locale": "en-CA"}`
- **Response**: Hierarchical category structure - **Response**: Hierarchical category structure
#### 2. getCampaignsForVip #### 2. getCampaignsForVip
- **Purpose**: Advertisement targeting data - **Purpose**: Advertisement targeting data
- **Variables**: `{"placement": "vip", "locationId": 1700275, "categoryId": 760, "platform": "desktop"}` - **Variables**:
`{"placement": "vip", "locationId": 1700275, "categoryId": 760, "platform": "desktop"}`
- **Response**: Campaign/ads data (usually null) - **Response**: Campaign/ads data (usually null)
#### 3. GetReviewSummary #### 3. GetReviewSummary
- **Purpose**: Seller review statistics - **Purpose**: Seller review statistics
- **Variables**: `{"userId": "1044934581"}` - **Variables**: `{"userId": "1044934581"}`
- **Response**: Review count and score (usually 0 for new sellers) - **Response**: Review count and score (usually 0 for new sellers)
#### 4. GetProfileMetrics #### 4. GetProfileMetrics
- **Purpose**: Seller profile information - **Purpose**: Seller profile information
- **Variables**: `{"profileId": "1044934581"}` - **Variables**: `{"profileId": "1044934581"}`
- **Response**: Member since date, account type - **Response**: Member since date, account type
#### 5. GetListingsSimilar #### 5. GetListingsSimilar
- **Purpose**: Similar listings for cross-selling - **Purpose**: Similar listings for cross-selling
- **Variables**: `{"listingId": "1705585530", "limit": 10, "isExternalId": false}` - **Variables**: `{"listingId": "1705585530", "limit": 10, "isExternalId": false}`
- **Response**: Array of similar listings with basic metadata - **Response**: Array of similar listings with basic metadata
#### 6. GetGeocodeReverseFromIp #### 6. GetGeocodeReverseFromIp
- **Purpose**: Geolocation-based features - **Purpose**: Geolocation-based features
- **Variables**: `{}` - **Variables**: `{}`
- **Response**: Fails with 404 for most IPs - **Response**: Fails with 404 for most IPs
### Implementation Status ### Implementation Status
The existing `parseListing()` function in `src/kijiji.ts` successfully extracts listing details from embedded Apollo state:
The existing `parseListing()` function in `src/kijiji.ts` successfully extracts listing
details from embedded Apollo state:
- ✅ Extracts title, description, price, location - ✅ Extracts title, description, price, location
- ✅ Handles contact-based pricing ("Please Contact") - ✅ Handles contact-based pricing ("Please Contact")
@@ -427,22 +498,30 @@ The existing `parseListing()` function in `src/kijiji.ts` successfully extracts
- ✅ Works without authentication or API keys - ✅ Works without authentication or API keys
### Key Findings ### Key Findings
1. **No Dedicated Listing API**: Unlike search results, there's no separate GraphQL query for individual listing data
2. **Complete Data Available**: All listing information is embedded in the initial HTML response 1. **No Dedicated Listing API**: Unlike search results, theres no separate GraphQL
3. **Additional Context Fetched**: Secondary GraphQL queries provide complementary data (reviews, similar listings) query for individual listing data
2. **Complete Data Available**: All listing information is embedded in the initial HTML
response
3. **Additional Context Fetched**: Secondary GraphQL queries provide complementary data
(reviews, similar listings)
4. **Consistent Architecture**: Same Apollo state embedding pattern as search pages 4. **Consistent Architecture**: Same Apollo state embedding pattern as search pages
### Current Scraper Implementation ### Current Scraper Implementation
The scraper successfully extracts listing details by: The scraper successfully extracts listing details by:
1. Fetching the listing URL HTML 1. Fetching the listing URL HTML
2. Parsing embedded `__NEXT_DATA__` Apollo state 2. Parsing embedded `__NEXT_DATA__` Apollo state
3. Extracting the `Listing:{id}` object from Apollo cache 3. Extracting the `Listing:{id}` object from Apollo cache
4. Mapping fields to typed `ListingDetails` interface 4. Mapping fields to typed `ListingDetails` interface
This approach works reliably without requiring authentication or dealing with rate limiting on individual listing fetches. This approach works reliably without requiring authentication or dealing with rate
limiting on individual listing fetches.
## Next Steps ## Next Steps
- Explore posting/authentication APIs (requires user login) - Explore posting/authentication APIs (requires user login)
- Investigate if GraphQL API can be used for programmatic access with proper authentication - Investigate if GraphQL API can be used for programmatic access with proper
authentication
- Test rate limiting patterns and optimal scraping strategies - Test rate limiting patterns and optimal scraping strategies
- Document additional category and location ID mappings - Document additional category and location ID mappings

View File

@@ -1 +1,2 @@
# ca-marketplace-scraper # ca-marketplace-scraper

View File

@@ -15,7 +15,10 @@
"linter": { "linter": {
"enabled": true, "enabled": true,
"rules": { "rules": {
"recommended": true "recommended": true,
"correctness": {
"noUnusedImports": "error"
}
} }
}, },
"javascript": { "javascript": {

View File

@@ -4,8 +4,13 @@
"workspaces": { "workspaces": {
"": { "": {
"name": "marketplace-scrapers-monorepo", "name": "marketplace-scrapers-monorepo",
"dependencies": {
"@types/bun": "1.3.13",
},
"devDependencies": { "devDependencies": {
"@biomejs/biome": "2.3.11", "@biomejs/biome": "2.3.11",
"@tsconfig/bun": "catalog:",
"turbo": "2.5.4",
}, },
}, },
"packages/api-server": { "packages/api-server": {
@@ -13,9 +18,10 @@
"version": "1.0.0", "version": "1.0.0",
"dependencies": { "dependencies": {
"@marketplace-scrapers/core": "workspace:*", "@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:",
}, },
"devDependencies": { "devDependencies": {
"@types/bun": "latest", "@types/bun": "catalog:",
}, },
"peerDependencies": { "peerDependencies": {
"typescript": "^5", "typescript": "^5",
@@ -25,14 +31,16 @@
"name": "@marketplace-scrapers/core", "name": "@marketplace-scrapers/core",
"version": "1.0.0", "version": "1.0.0",
"dependencies": { "dependencies": {
"@typescript/native-preview": "catalog:",
"argon2-wasm-pro": "1.1.0",
"cli-progress": "^3.12.0", "cli-progress": "^3.12.0",
"linkedom": "^0.18.12", "linkedom": "^0.18.12",
"unidecode": "^1.1.0", "unidecode": "^1.1.0",
}, },
"devDependencies": { "devDependencies": {
"@types/bun": "latest", "@types/bun": "catalog:",
"@types/cli-progress": "^3.11.6", "@types/cli-progress": "catalog:",
"@types/unidecode": "^1.1.0", "@types/unidecode": "catalog:",
}, },
"peerDependencies": { "peerDependencies": {
"typescript": "^5", "typescript": "^5",
@@ -43,15 +51,23 @@
"version": "1.0.0", "version": "1.0.0",
"dependencies": { "dependencies": {
"@marketplace-scrapers/core": "workspace:*", "@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:",
}, },
"devDependencies": { "devDependencies": {
"@types/bun": "latest", "@types/bun": "catalog:",
}, },
"peerDependencies": { "peerDependencies": {
"typescript": "^5", "typescript": "^5",
}, },
}, },
}, },
"catalog": {
"@tsconfig/bun": "1.0.9",
"@types/bun": "1.3.13",
"@types/cli-progress": "3.11.6",
"@types/unidecode": "1.1.0",
"@typescript/native-preview": "7.0.0-dev.20260428.1",
},
"packages": { "packages": {
"@biomejs/biome": ["@biomejs/biome@2.3.11", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "2.3.11", "@biomejs/cli-darwin-x64": "2.3.11", "@biomejs/cli-linux-arm64": "2.3.11", "@biomejs/cli-linux-arm64-musl": "2.3.11", "@biomejs/cli-linux-x64": "2.3.11", "@biomejs/cli-linux-x64-musl": "2.3.11", "@biomejs/cli-win32-arm64": "2.3.11", "@biomejs/cli-win32-x64": "2.3.11" }, "bin": { "biome": "bin/biome" } }, "sha512-/zt+6qazBWguPG6+eWmiELqO+9jRsMZ/DBU3lfuU2ngtIQYzymocHhKiZRyrbra4aCOoyTg/BmY+6WH5mv9xmQ=="], "@biomejs/biome": ["@biomejs/biome@2.3.11", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "2.3.11", "@biomejs/cli-darwin-x64": "2.3.11", "@biomejs/cli-linux-arm64": "2.3.11", "@biomejs/cli-linux-arm64-musl": "2.3.11", "@biomejs/cli-linux-x64": "2.3.11", "@biomejs/cli-linux-x64-musl": "2.3.11", "@biomejs/cli-win32-arm64": "2.3.11", "@biomejs/cli-win32-x64": "2.3.11" }, "bin": { "biome": "bin/biome" } }, "sha512-/zt+6qazBWguPG6+eWmiELqO+9jRsMZ/DBU3lfuU2ngtIQYzymocHhKiZRyrbra4aCOoyTg/BmY+6WH5mv9xmQ=="],
@@ -77,7 +93,9 @@
"@marketplace-scrapers/mcp-server": ["@marketplace-scrapers/mcp-server@workspace:packages/mcp-server"], "@marketplace-scrapers/mcp-server": ["@marketplace-scrapers/mcp-server@workspace:packages/mcp-server"],
"@types/bun": ["@types/bun@1.3.4", "", { "dependencies": { "bun-types": "1.3.4" } }, "sha512-EEPTKXHP+zKGPkhRLv+HI0UEX8/o+65hqARxLy8Ov5rIxMBPNTjeZww00CIihrIQGEQBYg+0roO5qOnS/7boGA=="], "@tsconfig/bun": ["@tsconfig/bun@1.0.9", "", {}, "sha512-4M0/Ivfwcpz325z6CwSifOBZYji3DFOEpY6zEUt0+Xi2qRhzwvmqQN9XAHJh3OVvRJuAqVTLU2abdCplvp6mwQ=="],
"@types/bun": ["@types/bun@1.3.13", "", { "dependencies": { "bun-types": "1.3.13" } }, "sha512-9fqXWk5YIHGGnUau9TEi+qdlTYDAnOj+xLCmSTwXfAIqXr2x4tytJb43E9uCvt09zJURKXwAtkoH4nLQfzeTXw=="],
"@types/cli-progress": ["@types/cli-progress@3.11.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-cE3+jb9WRlu+uOSAugewNpITJDt1VF8dHOopPO4IABFc3SXYL5WE/+PTz/FCdZRRfIujiWW3n3aMbv1eIGVRWA=="], "@types/cli-progress": ["@types/cli-progress@3.11.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-cE3+jb9WRlu+uOSAugewNpITJDt1VF8dHOopPO4IABFc3SXYL5WE/+PTz/FCdZRRfIujiWW3n3aMbv1eIGVRWA=="],
@@ -85,11 +103,29 @@
"@types/unidecode": ["@types/unidecode@1.1.0", "", {}, "sha512-NTIsFsTe9WRek39/8DDj7KiQ0nU33DHMrKwNHcD1rKlUvn4N0Rc4Di8q/Xavs8bsDZmBa4MMtQA8+HNgwfxC/A=="], "@types/unidecode": ["@types/unidecode@1.1.0", "", {}, "sha512-NTIsFsTe9WRek39/8DDj7KiQ0nU33DHMrKwNHcD1rKlUvn4N0Rc4Di8q/Xavs8bsDZmBa4MMtQA8+HNgwfxC/A=="],
"@typescript/native-preview": ["@typescript/native-preview@7.0.0-dev.20260428.1", "", { "optionalDependencies": { "@typescript/native-preview-darwin-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-darwin-x64": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-arm": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-x64": "7.0.0-dev.20260428.1", "@typescript/native-preview-win32-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-win32-x64": "7.0.0-dev.20260428.1" }, "bin": { "tsgo": "bin/tsgo.js" } }, "sha512-JiM4PYWDGs57TT0mV2KArmaW7BnTkk3XRid79NdG17tfvDbRyg4hBCpKI7vARiQPtxjKrHlxyzxOGDpv5W5T7Q=="],
"@typescript/native-preview-darwin-arm64": ["@typescript/native-preview-darwin-arm64@7.0.0-dev.20260428.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Lll6WmXfgTEj1G3QBIoHlabQwUtJiyhlRgSLksa06QFL5BoA7V+Lu1waa9PtPNZbGsXLDMHodtk/bRQABKuPiw=="],
"@typescript/native-preview-darwin-x64": ["@typescript/native-preview-darwin-x64@7.0.0-dev.20260428.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-WbsBNSHlo+4sGrTxDWdmI7r8x48tCtSCuKdmK62FvVOq58UWAs6sL13Z4Rev4ohLcGHdXC5E/8AIdpLPqDYQpw=="],
"@typescript/native-preview-linux-arm": ["@typescript/native-preview-linux-arm@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "arm" }, "sha512-/d/NnZFvEJU67L5mHh+cO3gsfwNCvJ9HGtxGq1KGz1VwTabOIcwLdpTpfsAR39WXzzfh9GJHL28n6GSGZInPow=="],
"@typescript/native-preview-linux-arm64": ["@typescript/native-preview-linux-arm64@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-cgcBX/ZBMdepkamLT8g8jQdHe7DZS/s6zTZRof6mvcrnJHlMeUnKoC9UO8/c22IrUMV3n0XPh7R8FYjUP0ll+Q=="],
"@typescript/native-preview-linux-x64": ["@typescript/native-preview-linux-x64@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "x64" }, "sha512-4gJCE7wzenx1BH2Vtx2uKWUo8rFxnhGkxNEH1zxbYy/6ASwo+PnOPYmKHAzNE1C3yB5lzw71/vR5p5zyO57Y4A=="],
"@typescript/native-preview-win32-arm64": ["@typescript/native-preview-win32-arm64@7.0.0-dev.20260428.1", "", { "os": "win32", "cpu": "arm64" }, "sha512-yn6Rzbn62L4QTWrp0QgG8al6l/VG7PCPRdbE0vuGDSlKhInlC+Flo4QSc1qA8KHTbpHgl+nEsq9DymiitI4G4g=="],
"@typescript/native-preview-win32-x64": ["@typescript/native-preview-win32-x64@7.0.0-dev.20260428.1", "", { "os": "win32", "cpu": "x64" }, "sha512-T9z13mcMowXmwGjprA2FIR2EEdYZxgqH8+qk7dFZVBlo5vfk41AN/qJfAdN7IsAhEb640MJ8cMN/aiczweZKmA=="],
"ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="], "ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="],
"argon2-wasm-pro": ["argon2-wasm-pro@1.1.0", "", {}, "sha512-ApZAKEgbWQILckY+IdjrETB0oTC8L9YHT3JVQhdun77tilExkXNyM/T/qbkvX+Uv68+IQmVwewQwg6yJnSwVxQ=="],
"boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="], "boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="],
"bun-types": ["bun-types@1.3.4", "", { "dependencies": { "@types/node": "*" } }, "sha512-5ua817+BZPZOlNaRgGBpZJOSAQ9RQ17pkwPD0yR7CfJg+r8DgIILByFifDTa+IPDDxzf5VNhtNlcKqFzDgJvlQ=="], "bun-types": ["bun-types@1.3.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-QXKeHLlOLqQX9LgYaHJfzdBaV21T63HhFJnvuRCcjZiaUDpbs5ED1MgxbMra71CsryN/1dAoXuJJJwIv/2drVA=="],
"cli-progress": ["cli-progress@3.12.0", "", { "dependencies": { "string-width": "^4.2.3" } }, "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A=="], "cli-progress": ["cli-progress@3.12.0", "", { "dependencies": { "string-width": "^4.2.3" } }, "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A=="],
@@ -125,6 +161,20 @@
"strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="], "strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
"turbo": ["turbo@2.5.4", "", { "optionalDependencies": { "turbo-darwin-64": "2.5.4", "turbo-darwin-arm64": "2.5.4", "turbo-linux-64": "2.5.4", "turbo-linux-arm64": "2.5.4", "turbo-windows-64": "2.5.4", "turbo-windows-arm64": "2.5.4" }, "bin": { "turbo": "bin/turbo" } }, "sha512-kc8ZibdRcuWUG1pbYSBFWqmIjynlD8Lp7IB6U3vIzvOv9VG+6Sp8bzyeBWE3Oi8XV5KsQrznyRTBPvrf99E4mA=="],
"turbo-darwin-64": ["turbo-darwin-64@2.5.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-ah6YnH2dErojhFooxEzmvsoZQTMImaruZhFPfMKPBq8sb+hALRdvBNLqfc8NWlZq576FkfRZ/MSi4SHvVFT9PQ=="],
"turbo-darwin-arm64": ["turbo-darwin-arm64@2.5.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-2+Nx6LAyuXw2MdXb7pxqle3MYignLvS7OwtsP9SgtSBaMlnNlxl9BovzqdYAgkUW3AsYiQMJ/wBRb7d+xemM5A=="],
"turbo-linux-64": ["turbo-linux-64@2.5.4", "", { "os": "linux", "cpu": "x64" }, "sha512-5May2kjWbc8w4XxswGAl74GZ5eM4Gr6IiroqdLhXeXyfvWEdm2mFYCSWOzz0/z5cAgqyGidF1jt1qzUR8hTmOA=="],
"turbo-linux-arm64": ["turbo-linux-arm64@2.5.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-/2yqFaS3TbfxV3P5yG2JUI79P7OUQKOUvAnx4MV9Bdz6jqHsHwc9WZPpO4QseQm+NvmgY6ICORnoVPODxGUiJg=="],
"turbo-windows-64": ["turbo-windows-64@2.5.4", "", { "os": "win32", "cpu": "x64" }, "sha512-EQUO4SmaCDhO6zYohxIjJpOKRN3wlfU7jMAj3CgcyTPvQR/UFLEKAYHqJOnJtymbQmiiM/ihX6c6W6Uq0yC7mA=="],
"turbo-windows-arm64": ["turbo-windows-arm64@2.5.4", "", { "os": "win32", "cpu": "arm64" }, "sha512-oQ8RrK1VS8lrxkLriotFq+PiF7iiGgkZtfLKF4DDKsmdbPo0O9R2mQxm7jHLuXraRCuIQDWMIw6dpcr7Iykf4A=="],
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
"uhyphen": ["uhyphen@0.2.0", "", {}, "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA=="], "uhyphen": ["uhyphen@0.2.0", "", {}, "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA=="],

5
bunfig.toml Normal file
View File

@@ -0,0 +1,5 @@
[install]
exact = true
[test]
root = "./do-not-run-tests-from-root"

View File

@@ -1,55 +1,18 @@
# Marketplace Cookies Setup # cookies
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings. ## Scope
## Cookie Configuration - This directory is for cookie setup docs and local examples only.
- Treat any real browser cookie export as a secret, even if already present locally.
Authenticated scrapers now read cookies only from environment variables: ## Runtime Sources
1. `FACEBOOK_COOKIE`
2. `EBAY_COOKIE`
--- - Authenticated scrapers read raw `Cookie` header strings from environment variables such as `FACEBOOK_COOKIE` and `EBAY_COOKIE`.
- Some core entrypoints also accept explicit cookie strings from request/options; explicit input takes precedence over environment values.
## Facebook Marketplace ## Safety Rules
### Required Cookies - Never commit real cookie values, browser exports, or session files.
- `c_user`: Your Facebook user ID - Use placeholder values in docs: `c_user=123; xs=token; fr=request`.
- `xs`: Facebook session token - Do not paste cookie values into logs, tests, fixtures, or generated agent docs.
- `fr`: Facebook request token - If editing this directory, verify diffs do not contain real `c_user`, `xs`, `fr`, `datr`, `sb`, `s`, `ds2`, or `ebay` values.
- `datr`: Data attribution token
- `sb`: Session browser token
### Setup
```bash
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
```
Use the raw `Cookie` header string copied from an authenticated browser session.
---
## eBay
eBay has aggressive bot detection that blocks requests without valid session cookies.
### Setup
```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
```
Use the raw `Cookie` header string copied from an authenticated browser session.
---
## Important Notes
- Cookies must be from active browser sessions
- Cookies expire and need periodic refresh
- **NEVER** commit real cookies to version control
- Platforms may still block automated scraping despite valid cookies
## Security
Do not commit real cookie values or store them in tracked files.

View File

@@ -0,0 +1,511 @@
# opencode Monorepo Config Adoption Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Adopt opencode-style monorepo config: Turbo task orchestration, workspace dep
catalog, shared root tsconfig, bunfig.toml, and `exports` field in all packages.
**Architecture:** Pure config changes across 10 files — no source code touched.
Root config files are added/updated first, then per-package files updated to reference
them. Changes are independent within each task and safe to commit atomically.
**Tech Stack:** Bun workspaces, Turbo 2.x, @tsconfig/bun, TypeScript (tsgo /
@typescript/native-preview)
* * *
## File Map
| File | Action | Responsible for |
| --- | --- | --- |
| `package.json` | Modify | Workspace catalog, turbo devDep, @tsconfig/bun devDep, updated scripts |
| `turbo.json` | Create | Task graph: typecheck, build, test |
| `tsconfig.json` | Create | Shared TS compiler options for all packages |
| `bunfig.toml` | Create | Exact installs, root test guard |
| `packages/core/package.json` | Modify | exports field, catalog refs, script rename |
| `packages/api-server/package.json` | Modify | exports field, catalog refs, script rename |
| `packages/mcp-server/package.json` | Modify | exports field, catalog refs, script rename |
| `packages/core/tsconfig.json` | Modify | Slim — extends root, paths only |
| `packages/api-server/tsconfig.json` | Modify | Slim — extends root, paths only |
| `packages/mcp-server/tsconfig.json` | Modify | Slim — extends root, paths only |
* * *
### Task 1: Add `bunfig.toml` and `turbo.json`
Two new root config files with no dependencies on other tasks.
**Files:**
- Create: `bunfig.toml`
- Create: `turbo.json`
- [ ] **Step 1: Create `bunfig.toml`**
Write this file at repo root (`/path/to/ca-marketplace-scraper/bunfig.toml`):
```toml
[install]
exact = true
[test]
root = "./do-not-run-tests-from-root"
```
- [ ] **Step 2: Create `turbo.json`**
Write this file at repo root:
```json
{
"$schema": "https://turbo.build/schema.json",
"tasks": {
"typecheck": {},
"build": {
"dependsOn": ["^build"],
"outputs": ["dist/**"]
},
"test": {
"dependsOn": ["^build"],
"outputs": []
}
}
}
```
- [ ] **Step 3: Verify files exist**
Run:
```bash
ls bunfig.toml turbo.json
```
Expected: both files listed, no errors.
- [ ] **Step 4: Commit**
```bash
git add bunfig.toml turbo.json
git commit -m "chore: add bunfig.toml and turbo.json"
```
* * *
### Task 2: Create root `tsconfig.json`
Shared base tsconfig all packages will extend.
Extracts the common options currently duplicated in all 3 per-package tsconfigs.
**Files:**
- Create: `tsconfig.json`
- [ ] **Step 1: Create root `tsconfig.json`**
Write this file at repo root:
```json
{
"$schema": "https://json.schemastore.org/tsconfig",
"extends": "@tsconfig/bun/tsconfig.json",
"compilerOptions": {
"lib": ["dom", "ESNext"],
"target": "ESNext",
"module": "preserve",
"moduleResolution": "bundler",
"strict": true,
"noEmit": true,
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitOverride": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false
}
}
```
- [ ] **Step 2: Commit**
```bash
git add tsconfig.json
git commit -m "chore: add shared root tsconfig.json"
```
* * *
### Task 3: Update root `package.json`
Add workspace catalog, `turbo` + `@tsconfig/bun` devDependencies, and update scripts to
use `turbo run`.
**Files:**
- Modify: `package.json`
- [ ] **Step 1: Replace root `package.json`**
Write this complete file:
```json
{
"name": "marketplace-scrapers-monorepo",
"version": "1.0.0",
"private": true,
"type": "module",
"packageManager": "bun@1.3.13",
"scripts": {
"typecheck": "turbo run typecheck",
"build": "bun run clean && turbo run build",
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
"build:all": "bun run build:api && bun run build:mcp",
"ci": "biome ci",
"clean": "rm -rf dist",
"start": "./scripts/start.sh"
},
"workspaces": {
"packages": [
"packages/*"
],
"catalog": {
"@tsconfig/bun": "1.0.9",
"@typescript/native-preview": "7.0.0-dev.20260428.1",
"@types/bun": "1.2.18",
"@types/cli-progress": "3.11.6",
"@types/unidecode": "1.1.0"
}
},
"devDependencies": {
"@biomejs/biome": "2.3.11",
"@tsconfig/bun": "catalog:",
"turbo": "2.5.4"
}
}
```
> **Note on catalog versions:** The catalog pins exact versions.
> The values above are taken from the current package installs.
> If `@types/bun` was `latest`, check `node_modules/@types/bun/package.json` for the
> actual installed version and use that.
> Same for `@typescript/native-preview`.
- [ ] **Step 2: Check actual installed versions**
Run:
```bash
cat node_modules/@types/bun/package.json | grep '"version"'
cat node_modules/@typescript/native-preview/package.json | grep '"version"'
cat node_modules/@types/cli-progress/package.json | grep '"version"'
cat node_modules/@types/unidecode/package.json | grep '"version"'
```
Update the catalog values in `package.json` to match the exact installed versions.
- [ ] **Step 3: Install turbo and @tsconfig/bun**
```bash
bun install
```
Expected: lock file updated, `turbo` and `@tsconfig/bun` appear in `node_modules`.
- [ ] **Step 4: Verify turbo works**
```bash
bunx turbo run typecheck --dry
```
Expected: output lists the `typecheck` task for each package (even if no `typecheck`
script exists yet — turbo will note them as skipped/missing).
- [ ] **Step 5: Commit**
```bash
git add package.json bun.lock
git commit -m "chore: add workspace catalog and turbo to root package.json"
```
* * *
### Task 4: Update per-package `package.json` files
Rename `type:check``typecheck`, replace `main`/`module` with `exports`, swap pinned
dep versions for `catalog:` references.
**Files:**
- Modify: `packages/core/package.json`
- Modify: `packages/api-server/package.json`
- Modify: `packages/mcp-server/package.json`
- [ ] **Step 1: Replace `packages/core/package.json`**
```json
{
"name": "@marketplace-scrapers/core",
"version": "1.0.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"typecheck": "bun tsgo"
},
"dependencies": {
"@typescript/native-preview": "catalog:",
"cli-progress": "^3.12.0",
"linkedom": "^0.18.12",
"unidecode": "^1.1.0"
},
"devDependencies": {
"@types/bun": "catalog:",
"@types/cli-progress": "catalog:",
"@types/unidecode": "catalog:"
},
"peerDependencies": {
"typescript": "^5"
}
}
```
- [ ] **Step 2: Replace `packages/api-server/package.json`**
```json
{
"name": "@marketplace-scrapers/api-server",
"version": "1.0.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"start": "bun ./src/index.ts",
"dev": "bun --watch ./src/index.ts",
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api",
"typecheck": "bun tsgo"
},
"dependencies": {
"@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:"
},
"devDependencies": {
"@types/bun": "catalog:"
},
"peerDependencies": {
"typescript": "^5"
}
}
```
- [ ] **Step 3: Replace `packages/mcp-server/package.json`**
```json
{
"name": "@marketplace-scrapers/mcp-server",
"version": "1.0.0",
"type": "module",
"exports": {
".": "./src/index.ts"
},
"private": true,
"scripts": {
"start": "bun ./src/index.ts",
"dev": "bun --watch ./src/index.ts",
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp",
"typecheck": "bun tsgo"
},
"dependencies": {
"@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:"
},
"devDependencies": {
"@types/bun": "catalog:"
},
"peerDependencies": {
"typescript": "^5"
}
}
```
- [ ] **Step 4: Run `bun install` to sync lockfile**
```bash
bun install
```
Expected: no errors.
Catalog refs resolved.
`bun.lock` updated.
- [ ] **Step 5: Verify typecheck still works per-package**
```bash
cd packages/core && bun run typecheck
cd ../api-server && bun run typecheck
cd ../mcp-server && bun run typecheck
cd ../..
```
Expected: each exits 0 (or same errors as before — no new errors introduced).
- [ ] **Step 6: Commit**
```bash
git add packages/core/package.json packages/api-server/package.json packages/mcp-server/package.json bun.lock
git commit -m "chore: use exports field and catalog refs in all packages"
```
* * *
### Task 5: Slim per-package `tsconfig.json` files
Replace the duplicated full tsconfig in each package with a slim `extends`-based one
pointing to root.
**Files:**
- Modify: `packages/core/tsconfig.json`
- Modify: `packages/api-server/tsconfig.json`
- Modify: `packages/mcp-server/tsconfig.json`
- [ ] **Step 1: Replace `packages/core/tsconfig.json`**
```json
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["./src", "./test"]
}
```
- [ ] **Step 2: Replace `packages/api-server/tsconfig.json`**
```json
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["./src", "./test"]
}
```
- [ ] **Step 3: Replace `packages/mcp-server/tsconfig.json`**
```json
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["./src", "./test"]
}
```
- [ ] **Step 4: Verify `@tsconfig/bun` is resolvable**
The root tsconfig extends `@tsconfig/bun/tsconfig.json`. Confirm the package is
installed:
```bash
ls node_modules/@tsconfig/bun/tsconfig.json
```
Expected: file exists.
- [ ] **Step 5: Run typecheck via Turbo**
```bash
bun run typecheck
```
Expected: Turbo runs `typecheck` for all 3 packages in parallel, all pass (or same
pre-existing errors — no new ones).
- [ ] **Step 6: Commit**
```bash
git add packages/core/tsconfig.json packages/api-server/tsconfig.json packages/mcp-server/tsconfig.json
git commit -m "chore: slim per-package tsconfigs to extend root"
```
* * *
### Task 6: Smoke test full build pipeline
Verify everything works end-to-end.
**Files:** none (verification only)
- [ ] **Step 1: Run turbo typecheck**
```bash
bun run typecheck
```
Expected: Turbo runs `typecheck` across all packages.
Exit 0.
- [ ] **Step 2: Run full build**
```bash
bun run build
```
Expected: `dist/` cleaned, Turbo runs `build` (core first, then api-server and
mcp-server in parallel), build artifacts appear in `dist/api/` and `dist/mcp/`.
- [ ] **Step 3: Verify dist artifacts**
```bash
ls dist/api/ dist/mcp/
```
Expected: compiled output files in both directories.
- [ ] **Step 4: Verify `bun install` is exact**
```bash
grep -c '\^' bun.lock | head -5
```
With `exact = true` in bunfig.toml, new installs wont add `^` ranges.
Existing `^` ranges in `bun.lock` from before are fine — theyll be resolved to exact on
next fresh install.
- [ ] **Step 5: Final commit if any loose files**
```bash
git status
```
If clean: done. If any files modified by `bun install` (e.g. `bun.lock`):
```bash
git add bun.lock
git commit -m "chore: sync lockfile after monorepo config adoption"
```

View File

@@ -1,53 +1,64 @@
# Cookie Env-Only Implementation Plan # Cookie Env-Only Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. > **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Remove cookie files and request-provided cookie overrides so all authenticated marketplace scraping reads raw `Cookie` header strings only from environment variables. **Goal:** Remove cookie files and request-provided cookie overrides so all authenticated
marketplace scraping reads raw `Cookie` header strings only from environment variables.
**Architecture:** Collapse shared cookie loading to a single env-var reader in `packages/core/src/utils/cookies.ts`, then tighten Facebook and eBay core signatures to stop accepting request/file cookie inputs. Update the API and MCP adapters so they no longer advertise or forward cookie parameters, and rewrite docs/tests to match the env-only contract. **Architecture:** Collapse shared cookie loading to a single env-var reader in
`packages/core/src/utils/cookies.ts`, then tighten Facebook and eBay core signatures to
stop accepting request/file cookie inputs.
Update the API and MCP adapters so they no longer advertise or forward cookie
parameters, and rewrite docs/tests to match the env-only contract.
**Tech Stack:** Bun, TypeScript, Bun test, Biome, workspace package exports **Tech Stack:** Bun, TypeScript, Bun test, Biome, workspace package exports
--- * * *
## File Map ## File Map
- Modify: `packages/core/src/utils/cookies.ts` - Modify: `packages/core/src/utils/cookies.ts` Purpose: remove JSON/file/request-source
Purpose: remove JSON/file/request-source loading and keep env-only cookie parsing/formatting. loading and keep env-only cookie parsing/formatting.
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts` Purpose: drop `cookiesSource` /
Purpose: drop `cookiesSource` / `cookiePath` arguments and env-only error text. `cookiePath` arguments and env-only error text.
- Modify: `packages/core/src/scrapers/ebay.ts` - Modify: `packages/core/src/scrapers/ebay.ts` Purpose: remove `opts.cookies` request
Purpose: remove `opts.cookies` request override and use env-only cookie loading. override and use env-only cookie loading.
- Modify: `packages/core/src/index.ts` - Modify: `packages/core/src/index.ts` Purpose: keep exports aligned with tightened core
Purpose: keep exports aligned with tightened core signatures. signatures.
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts` Purpose: replace missing-file
Purpose: replace missing-file coverage with env-only auth tests. coverage with env-only auth tests.
- Create: `packages/core/test/ebay-core.test.ts` - Create: `packages/core/test/ebay-core.test.ts` Purpose: add dedicated eBay auth
Purpose: add dedicated eBay auth regression coverage instead of mixing it into Facebook tests. regression coverage instead of mixing it into Facebook tests.
- Modify: `packages/api-server/src/routes/facebook.ts` - Modify: `packages/api-server/src/routes/facebook.ts` Purpose: stop parsing/forwarding
Purpose: stop parsing/forwarding `cookies` query params. `cookies` query params.
- Modify: `packages/api-server/src/routes/ebay.ts` - Modify: `packages/api-server/src/routes/ebay.ts` Purpose: stop parsing/forwarding
Purpose: stop parsing/forwarding `cookies` query params. `cookies` query params.
- Create: `packages/api-server/test/routes.test.ts` - Create: `packages/api-server/test/routes.test.ts` Purpose: verify Facebook/eBay routes
Purpose: verify Facebook/eBay routes ignore cookie query params and still call core correctly. ignore cookie query params and still call core correctly.
- Modify: `packages/mcp-server/src/protocol/tools.ts` - Modify: `packages/mcp-server/src/protocol/tools.ts` Purpose: remove Facebook/eBay
Purpose: remove Facebook/eBay cookie tool inputs and descriptions. cookie tool inputs and descriptions.
- Modify: `packages/mcp-server/src/protocol/handler.ts` - Modify: `packages/mcp-server/src/protocol/handler.ts` Purpose: stop mapping removed
Purpose: stop mapping removed cookie tool inputs into API URLs. cookie tool inputs into API URLs.
- Create: `packages/mcp-server/test/protocol.test.ts` - Create: `packages/mcp-server/test/protocol.test.ts` Purpose: verify tool schemas and
Purpose: verify tool schemas and handler URL building no longer include Facebook/eBay cookie fields. handler URL building no longer include Facebook/eBay cookie fields.
- Modify: `cookies/AGENTS.md` - Modify: `cookies/AGENTS.md` Purpose: document env vars as the only supported cookie
Purpose: document env vars as the only supported cookie input. input.
### Task 1: Lock core cookie utilities to env-only loading ### Task 1: Lock core cookie utilities to env-only loading
**Files:** **Files:**
- Modify: `packages/core/src/utils/cookies.ts:19-227` - Modify: `packages/core/src/utils/cookies.ts:19-227`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Add or replace the auth-source test block in `packages/core/test/facebook-core.test.ts` with env-only expectations: Add or replace the auth-source test block in `packages/core/test/facebook-core.test.ts`
with env-only expectations:
```ts ```ts
test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => { test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => {
@@ -85,12 +96,14 @@ test("should reject missing Facebook auth env var", async () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: FAIL because the
Expected: FAIL because the current implementation still allows missing env values to fall through to file/request-based behavior and does not emit the new env-only error. current implementation still allows missing env values to fall through to
file/request-based behavior and does not emit the new env-only error.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
Replace the multi-source loader in `packages/core/src/utils/cookies.ts` with an env-only loader. The target shape is: Replace the multi-source loader in `packages/core/src/utils/cookies.ts` with an env-only
loader. The target shape is:
```ts ```ts
export interface CookieConfig { export interface CookieConfig {
@@ -129,8 +142,8 @@ Delete the now-dead helpers and types that exist only for JSON/file/request load
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS for the new
Expected: PASS for the new env-only tests. env-only tests.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -142,10 +155,15 @@ git commit -m "refactor: make cookie loading env-only"
### Task 2: Tighten Facebook core APIs to the new contract ### Task 2: Tighten Facebook core APIs to the new contract
**Files:** **Files:**
- Modify: `packages/core/src/scrapers/facebook.ts:23-29` - Modify: `packages/core/src/scrapers/facebook.ts:23-29`
- Modify: `packages/core/src/scrapers/facebook.ts:214-228` - Modify: `packages/core/src/scrapers/facebook.ts:214-228`
- Modify: `packages/core/src/scrapers/facebook.ts:823-929` - Modify: `packages/core/src/scrapers/facebook.ts:823-929`
- Modify: `packages/core/src/index.ts:5-15` - Modify: `packages/core/src/index.ts:5-15`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
@@ -171,8 +189,9 @@ test("should fail Facebook item fetch when FACEBOOK_COOKIE is unset", async () =
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: FAIL because the
Expected: FAIL because the current function signatures and error text still mention parameter/file-based auth paths. current function signatures and error text still mention parameter/file-based auth
paths.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -206,12 +225,14 @@ console.warn(
); );
``` ```
Remove the extra cookie arguments from `fetchFacebookItem(...)` and keep `packages/core/src/index.ts` exporting the tightened functions without the old parameter contract. Remove the extra cookie arguments from `fetchFacebookItem(...)` and keep
`packages/core/src/index.ts` exporting the tightened functions without the old parameter
contract.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS with the new
Expected: PASS with the new env-only Facebook API surface. env-only Facebook API surface.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -223,8 +244,11 @@ git commit -m "refactor: remove facebook cookie overrides"
### Task 3: Tighten eBay core APIs to env-only auth ### Task 3: Tighten eBay core APIs to env-only auth
**Files:** **Files:**
- Modify: `packages/core/src/scrapers/ebay.ts:9-15` - Modify: `packages/core/src/scrapers/ebay.ts:9-15`
- Modify: `packages/core/src/scrapers/ebay.ts:337-389` - Modify: `packages/core/src/scrapers/ebay.ts:337-389`
- Create: `packages/core/test/ebay-core.test.ts` - Create: `packages/core/test/ebay-core.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
@@ -249,8 +273,8 @@ test("should warn and continue without eBay cookies when EBAY_COOKIE is unset",
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/ebay-core.test.ts` Run: `bun test packages/core/test/ebay-core.test.ts` Expected: FAIL because
Expected: FAIL because `loadEbayCookies` still accepts request overrides and mentions file/json sources. `loadEbayCookies` still accepts request overrides and mentions file/json sources.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -276,12 +300,13 @@ async function loadEbayCookies(): Promise<string | undefined> {
} }
``` ```
Then remove `cookies` from `fetchEbayItems(..., opts)` and the destructuring that feeds it into `loadEbayCookies()`. Then remove `cookies` from `fetchEbayItems(..., opts)` and the destructuring that feeds
it into `loadEbayCookies()`.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/ebay-core.test.ts` Run: `bun test packages/core/test/ebay-core.test.ts` Expected: PASS for the eBay
Expected: PASS for the eBay env-only regression coverage. env-only regression coverage.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -293,13 +318,17 @@ git commit -m "refactor: make ebay auth env-only"
### Task 4: Remove cookie query parameters from the API adapter ### Task 4: Remove cookie query parameters from the API adapter
**Files:** **Files:**
- Modify: `packages/api-server/src/routes/facebook.ts:3-33` - Modify: `packages/api-server/src/routes/facebook.ts:3-33`
- Modify: `packages/api-server/src/routes/ebay.ts:3-52` - Modify: `packages/api-server/src/routes/ebay.ts:3-52`
- Create: `packages/api-server/test/routes.test.ts` - Create: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Create `packages/api-server/test/routes.test.ts` and mock `@marketplace-scrapers/core` so the route contract is explicit: Create `packages/api-server/test/routes.test.ts` and mock `@marketplace-scrapers/core`
so the route contract is explicit:
```ts ```ts
import { afterEach, describe, expect, mock, test } from "bun:test"; import { afterEach, describe, expect, mock, test } from "bun:test";
@@ -347,8 +376,9 @@ test("ebayRoute ignores cookies query parameter", async () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/api-server/test/routes.test.ts` Run: `bun test packages/api-server/test/routes.test.ts` Expected: FAIL because the
Expected: FAIL because the current routes still parse `reqUrl.searchParams.get("cookies")` and forward it downstream. current routes still parse `reqUrl.searchParams.get("cookies")` and forward it
downstream.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -383,8 +413,8 @@ const items = await fetchEbayItems(SEARCH_QUERY, 1, {
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/api-server/test/routes.test.ts` Run: `bun test packages/api-server/test/routes.test.ts` Expected: PASS for route
Expected: PASS for route coverage and no remaining adapter references to `cookies` for Facebook/eBay. coverage and no remaining adapter references to `cookies` for Facebook/eBay.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -396,13 +426,17 @@ git commit -m "refactor: remove api cookie query overrides"
### Task 5: Remove cookie inputs from MCP tool schemas and request mapping ### Task 5: Remove cookie inputs from MCP tool schemas and request mapping
**Files:** **Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts:65-148` - Modify: `packages/mcp-server/src/protocol/tools.ts:65-148`
- Modify: `packages/mcp-server/src/protocol/handler.ts:154-211` - Modify: `packages/mcp-server/src/protocol/handler.ts:154-211`
- Create: `packages/mcp-server/test/protocol.test.ts` - Create: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Create `packages/mcp-server/test/protocol.test.ts` with schema and URL-building assertions: Create `packages/mcp-server/test/protocol.test.ts` with schema and URL-building
assertions:
```ts ```ts
import { expect, mock, test } from "bun:test"; import { expect, mock, test } from "bun:test";
@@ -445,8 +479,8 @@ expect(calledUrl).not.toContain("cookies=");
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/mcp-server/test/protocol.test.ts` Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: FAIL because the
Expected: FAIL because the current MCP schema and handler still expose and forward those inputs. current MCP schema and handler still expose and forward those inputs.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -465,12 +499,13 @@ Delete the Facebook/eBay cookie tool properties and handler mapping:
// if (args.cookies) params.append("cookies", args.cookies); // if (args.cookies) params.append("cookies", args.cookies);
``` ```
Leave Kijiji alone; this plan only changes Facebook/eBay env-only auth paths defined by the approved spec. Leave Kijiji alone; this plan only changes Facebook/eBay env-only auth paths defined by
the approved spec.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/mcp-server/test/protocol.test.ts` Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: PASS with MCP
Expected: PASS with MCP definitions and handler mapping in sync. definitions and handler mapping in sync.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -482,12 +517,16 @@ git commit -m "refactor: remove mcp cookie parameters"
### Task 6: Rewrite cookie documentation and run full verification ### Task 6: Rewrite cookie documentation and run full verification
**Files:** **Files:**
- Modify: `cookies/AGENTS.md:9-85` - Modify: `cookies/AGENTS.md:9-85`
- Modify: `docs/superpowers/specs/2026-04-21-cookie-env-only-design.md` only if implementation reveals a spec mismatch
- Modify: `docs/superpowers/specs/2026-04-21-cookie-env-only-design.md` only if
implementation reveals a spec mismatch
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Treat docs drift as a contract failure. Capture the required state before editing: Treat docs drift as a contract failure.
Capture the required state before editing:
```md ```md
- Cookie setup docs mention env vars only for Facebook and eBay - Cookie setup docs mention env vars only for Facebook and eBay
@@ -497,14 +536,14 @@ Treat docs drift as a contract failure. Capture the required state before editin
- [ ] **Step 2: Run verification to prove current docs are stale** - [ ] **Step 2: Run verification to prove current docs are stale**
Run: `rg -n "facebook\.json|ebay\.json|cookies=" cookies/AGENTS.md` Run: `rg -n "facebook\.json|ebay\.json|cookies=" cookies/AGENTS.md` Expected: matches
Expected: matches found found
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
Rewrite the cookie setup doc so Facebook and eBay each show only env-var setup: Rewrite the cookie setup doc so Facebook and eBay each show only env-var setup:
```md ````md
## Cookie Configuration ## Cookie Configuration
All supported authenticated scrapers read cookies only from environment variables. All supported authenticated scrapers read cookies only from environment variables.
@@ -513,14 +552,14 @@ All supported authenticated scrapers read cookies only from environment variable
```bash ```bash
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request' export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
``` ````
### eBay ### eBay
```bash ```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE' export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
``` ```
``` ````
Remove the file-based and request-parameter sections entirely. Remove the file-based and request-parameter sections entirely.
@@ -534,10 +573,14 @@ Expected: all commands pass
```bash ```bash
git add cookies/AGENTS.md docs/superpowers/specs/2026-04-21-cookie-env-only-design.md git add cookies/AGENTS.md docs/superpowers/specs/2026-04-21-cookie-env-only-design.md
git commit -m "docs: align cookie setup with env-only auth" git commit -m "docs: align cookie setup with env-only auth"
``` ````
## Self-Review ## Self-Review
- Spec coverage check: shared cookie utils, Facebook, eBay, API adapter, MCP adapter, tests, and docs each have explicit tasks. - Spec coverage check: shared cookie utils, Facebook, eBay, API adapter, MCP adapter,
- Placeholder scan: concrete test files are now named for eBay core, API routes, and MCP protocol coverage. tests, and docs each have explicit tasks.
- Type consistency check: `ensureCookies(config)` is the single shared loader name used across Tasks 1-3, and Facebook/eBay route signatures stay aligned with the core changes. - Placeholder scan: concrete test files are now named for eBay core, API routes, and MCP
protocol coverage.
- Type consistency check: `ensureCookies(config)` is the single shared loader name used
across Tasks 1-3, and Facebook/eBay route signatures stay aligned with the core
changes.

View File

@@ -1,34 +1,49 @@
# Facebook Comet Rewrite Implementation Plan # Facebook Comet Rewrite Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. > **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid Comet-bootstrap parser for both search and item routes. **Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid
Comet-bootstrap parser for both search and item routes.
**Architecture:** Keep authenticated direct HTTP fetches as the transport. Classify each Facebook response first, then parse route-specific Comet bootstrap/state candidates, and fall back to rendered-HTML extraction only when bootstrap decoding cannot produce the expected search or item shape. **Architecture:** Keep authenticated direct HTTP fetches as the transport.
Classify each Facebook response first, then parse route-specific Comet bootstrap/state
candidates, and fall back to rendered-HTML extraction only when bootstrap decoding
cannot produce the expected search or item shape.
**Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http helpers **Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http
helpers
--- * * *
## File Structure ## File Structure
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Owns Facebook fetch flow, response classification, bootstrap candidate extraction, search parsing, item parsing, and HTML fallbacks. - Owns Facebook fetch flow, response classification, bootstrap candidate extraction,
search parsing, item parsing, and HTML fallbacks.
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Owns unit coverage for response classification, bootstrap parsing, fallback parsing, and route-aware item/search extraction behavior. - Owns unit coverage for response classification, bootstrap parsing, fallback parsing,
and route-aware item/search extraction behavior.
- Modify: `packages/core/test/facebook-integration.test.ts` - Modify: `packages/core/test/facebook-integration.test.ts`
- Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping for search/item entrypoints. - Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping
for search/item entrypoints.
### Task 1: Add Route Classification Coverage ### Task 1: Add Route Classification Coverage
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
Add these tests near the Facebook parser tests in `packages/core/test/facebook-core.test.ts`: Add these tests near the Facebook parser tests in
`packages/core/test/facebook-core.test.ts`:
```ts ```ts
test("classifies Comet search responses", () => { test("classifies Comet search responses", () => {
@@ -89,12 +104,14 @@ test("classifies unavailable item responses", () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
Expected: FAIL because `classifyFacebookResponse` does not exist yet. Expected: FAIL because `classifyFacebookResponse` does not exist yet.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
Add this type and function near the parsing section in `packages/core/src/scrapers/facebook.ts`: Add this type and function near the parsing section in
`packages/core/src/scrapers/facebook.ts`:
```ts ```ts
type FacebookResponseKind = "search" | "item" | "auth_gated" | "unavailable" | "unknown"; type FacebookResponseKind = "search" | "item" | "auth_gated" | "unavailable" | "unknown";
@@ -128,7 +145,8 @@ export function classifyFacebookResponse(htmlString: HTMLString, responseUrl: st
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
Expected: PASS Expected: PASS
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -141,8 +159,11 @@ git commit -m "refactor: add facebook response classification"
### Task 2: Add Bootstrap Candidate Extraction ### Task 2: Add Bootstrap Candidate Extraction
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -185,7 +206,8 @@ test("keeps candidate order stable for later scoring", () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
Expected: FAIL because `extractFacebookBootstrapCandidates` does not exist. Expected: FAIL because `extractFacebookBootstrapCandidates` does not exist.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -218,7 +240,8 @@ export function extractFacebookBootstrapCandidates(htmlString: HTMLString): Reco
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
Expected: PASS Expected: PASS
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -231,10 +254,15 @@ git commit -m "refactor: add facebook bootstrap candidate extraction"
### Task 3: Replace Search Parsing With Candidate Scoring ### Task 3: Replace Search Parsing With Candidate Scoring
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/test/facebook-integration.test.ts` - Modify: `packages/core/test/facebook-integration.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- Test: `packages/core/test/facebook-integration.test.ts` - Test: `packages/core/test/facebook-integration.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -323,12 +351,15 @@ const mockSearchHtml = `
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"` Run:
Expected: FAIL because the current search extractor only understands legacy `marketplace_search` shapes. `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"`
Expected: FAIL because the current search extractor only understands legacy
`marketplace_search` shapes.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
Replace the search extraction internals in `extractFacebookMarketplaceData()` with candidate scoring like this: Replace the search extraction internals in `extractFacebookMarketplaceData()` with
candidate scoring like this:
```ts ```ts
function findSearchEdges(candidate: unknown): FacebookEdge[] | null { function findSearchEdges(candidate: unknown): FacebookEdge[] | null {
@@ -383,7 +414,8 @@ export function extractFacebookMarketplaceData(htmlString: HTMLString): Facebook
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts` Run:
`bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
Expected: PASS for the rewritten search fixtures and existing unaffected tests. Expected: PASS for the rewritten search fixtures and existing unaffected tests.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -396,8 +428,11 @@ git commit -m "refactor: rewrite facebook search parser for comet bootstrap"
### Task 4: Replace Item Parsing With Candidate Scoring ### Task 4: Replace Item Parsing With Candidate Scoring
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -438,7 +473,8 @@ test("extracts item details from Comet permalink bootstrap candidates", () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"`
Expected: FAIL because the current item extractor depends on legacy permalink markers. Expected: FAIL because the current item extractor depends on legacy permalink markers.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -491,8 +527,8 @@ export function extractFacebookItemData(htmlString: HTMLString): FacebookMarketp
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS for
Expected: PASS for current-shape item tests and remaining parser tests. current-shape item tests and remaining parser tests.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -504,8 +540,11 @@ git commit -m "refactor: rewrite facebook item parser for comet bootstrap"
### Task 5: Add HTML Fallback Extraction ### Task 5: Add HTML Fallback Extraction
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -549,8 +588,10 @@ test("falls back to rendered item HTML when bootstrap payloads are undecodable",
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"` Run:
Expected: FAIL because the extractor currently returns `null` without a structured candidate. `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
Expected: FAIL because the extractor currently returns `null` without a structured
candidate.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -607,11 +648,13 @@ function extractItemFallback(htmlString: HTMLString): FacebookMarketplaceItem |
} }
``` ```
Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()` and `extractFacebookItemData()`. Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()`
and `extractFacebookItemData()`.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
Expected: PASS Expected: PASS
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -624,8 +667,11 @@ git commit -m "refactor: add facebook html fallbacks"
### Task 6: Wire Route-Aware Failures Into Entry Points ### Task 6: Wire Route-Aware Failures Into Entry Points
**Files:** **Files:**
- Modify: `packages/core/test/facebook-integration.test.ts` - Modify: `packages/core/test/facebook-integration.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-integration.test.ts` - Test: `packages/core/test/facebook-integration.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -664,8 +710,10 @@ test("returns null for unavailable item responses", async () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"` Run:
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by route/auth state. `bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"`
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by
route/auth state.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -690,12 +738,13 @@ if (itemResponseClass.kind === "unavailable") {
} }
``` ```
Use the actual response URL from `fetchHtml` plumbing if that helper is extended to return both HTML and final URL; otherwise start by threading final URL support through the fetch helper in the same task. Use the actual response URL from `fetchHtml` plumbing if that helper is extended to
return both HTML and final URL; otherwise start by threading final URL support through
the fetch helper in the same task.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-integration.test.ts` Run: `bun test packages/core/test/facebook-integration.test.ts` Expected: PASS
Expected: PASS
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -707,19 +756,22 @@ git commit -m "refactor: handle facebook route-aware failure states"
### Task 7: Run Full Verification And Live Probe ### Task 7: Run Full Verification And Live Probe
**Files:** **Files:**
- Modify: `packages/core/src/scrapers/facebook.ts` if small cleanup is required - Modify: `packages/core/src/scrapers/facebook.ts` if small cleanup is required
- Modify: `packages/core/test/facebook-core.test.ts` if small cleanup is required - Modify: `packages/core/test/facebook-core.test.ts` if small cleanup is required
- Modify: `packages/core/test/facebook-integration.test.ts` if small cleanup is required - Modify: `packages/core/test/facebook-integration.test.ts` if small cleanup is required
- [ ] **Step 1: Run focused Facebook tests** - [ ] **Step 1: Run focused Facebook tests**
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts` Run:
`bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
Expected: PASS Expected: PASS
- [ ] **Step 2: Run broader core tests** - [ ] **Step 2: Run broader core tests**
Run: `bun test packages/core/test` Run: `bun test packages/core/test` Expected: PASS
Expected: PASS
- [ ] **Step 3: Run live authenticated Facebook probe** - [ ] **Step 3: Run live authenticated Facebook probe**
@@ -742,11 +794,14 @@ if (results[0]?.url) {
Expected: Expected:
- search returns at least one result - search returns at least one result
- item fetch returns non-null for the first live result when the route is not stale/unavailable
- item fetch returns non-null for the first live result when the route is not
stale/unavailable
- [ ] **Step 4: Make any minimal cleanup needed to keep tests and live probe green** - [ ] **Step 4: Make any minimal cleanup needed to keep tests and live probe green**
If cleanup is needed, keep it limited to naming, dead-code removal caused by the rewrite, or small parser corrections directly exposed by the verification commands. If cleanup is needed, keep it limited to naming, dead-code removal caused by the
rewrite, or small parser corrections directly exposed by the verification commands.
- [ ] **Step 5: Re-run verification** - [ ] **Step 5: Re-run verification**
@@ -767,6 +822,11 @@ git commit -m "refactor: complete facebook comet scraper rewrite"
## Self-Review ## Self-Review
- Spec coverage: the plan covers classification, route-aware search parsing, route-aware item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and live verification. - Spec coverage: the plan covers classification, route-aware search parsing, route-aware
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps remain. item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and
- Type consistency: all planned functions and types use the same names across tasks: `classifyFacebookResponse`, `extractFacebookBootstrapCandidates`, `extractFacebookMarketplaceData`, and `extractFacebookItemData`. live verification.
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps
remain.
- Type consistency: all planned functions and types use the same names across tasks:
`classifyFacebookResponse`, `extractFacebookBootstrapCandidates`,
`extractFacebookMarketplaceData`, and `extractFacebookItemData`.

View File

@@ -0,0 +1,718 @@
# Unstable Listing Mode Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add an optional shared mode across Facebook, eBay, and Kijiji that moves
listings priced below 80% of the median into `unstableResults`, while preserving current
default response shapes.
**Architecture:** Introduce a shared generic classifier in `packages/core` that splits
any listing array into `results` and `unstableResults` using the same median-based rule.
Then thread one opt-in flag through the scraper entrypoints, API routes, and MCP tool
definitions so all surfaces expose the same behavior without changing existing defaults.
**Tech Stack:** Bun, TypeScript, Bun test, workspace packages, JSON-RPC MCP server
* * *
## File Map
- Create: `packages/core/src/utils/unstable.ts` Purpose: shared generic median/cutoff
classifier for listing arrays.
- Modify: `packages/core/src/types/common.ts` Purpose: add shared mode types used by
scrapers and adapters.
- Modify: `packages/core/src/index.ts` Purpose: export the new shared classifier/types.
- Modify: `packages/core/src/scrapers/facebook.ts` Purpose: add the optional mode flag
and return bucketed results when enabled.
- Modify: `packages/core/src/scrapers/ebay.ts` Purpose: add the optional mode flag and
return bucketed results when enabled.
- Modify: `packages/core/src/scrapers/kijiji.ts` Purpose: add the optional mode flag and
return bucketed results when enabled.
- Create: `packages/core/test/unstable-listing-mode.test.ts` Purpose: lock the shared
classifier behavior with direct unit tests.
- Modify: `packages/core/test/facebook-core.test.ts` Purpose: prove Facebook preserves
default arrays and returns buckets when enabled.
- Modify: `packages/core/test/ebay-core.test.ts` Purpose: prove eBay preserves default
arrays and returns buckets when enabled.
- Modify: `packages/core/test/kijiji-core.test.ts` Purpose: prove Kijiji preserves
default arrays and returns buckets when enabled.
- Modify: `packages/api-server/src/routes/facebook.ts` Purpose: expose a shared opt-in
query parameter and preserve default response shape.
- Modify: `packages/api-server/src/routes/ebay.ts` Purpose: expose the same query
parameter and preserve default response shape.
- Modify: `packages/api-server/src/routes/kijiji.ts` Purpose: expose the same query
parameter and preserve default response shape.
- Modify: `packages/api-server/test/routes.test.ts` Purpose: verify route forwarding and
route response-shape switching.
- Modify: `packages/mcp-server/src/protocol/tools.ts` Purpose: document the optional
unstable mode in all search tools.
- Modify: `packages/mcp-server/src/protocol/handler.ts` Purpose: forward the optional
mode to API routes for all search tools.
- Modify: `packages/mcp-server/test/protocol.test.ts` Purpose: verify MCP tool metadata
and forwarded URLs include the new option.
### Task 1: Add the shared unstable-listing classifier
**Files:**
- Create: `packages/core/src/utils/unstable.ts`
- Modify: `packages/core/src/types/common.ts`
- Modify: `packages/core/src/index.ts`
- Test: `packages/core/test/unstable-listing-mode.test.ts`
- [ ] **Step 1: Write the failing test**
Create `packages/core/test/unstable-listing-mode.test.ts` with focused shared-behavior
coverage:
```ts
import { describe, expect, test } from "bun:test";
import {
classifyUnstableListings,
type ListingDetails,
} from "../src/index";
function makeListing(title: string, cents?: number): ListingDetails {
return {
url: `https://example.com/${title}`,
title,
listingPrice: {
amountFormatted: cents ? `$${(cents / 100).toFixed(2)}` : "$0.00",
cents: cents ?? 0,
currency: "CAD",
},
listingType: "item",
listingStatus: "ACTIVE",
};
}
describe("classifyUnstableListings", () => {
test("moves listings below 80% of the median into unstableResults", () => {
const output = classifyUnstableListings([
makeListing("cheap", 1000),
makeListing("mid", 2000),
makeListing("high", 3000),
]);
expect(output.results.map((item) => item.title)).toEqual(["mid", "high"]);
expect(output.unstableResults.map((item) => item.title)).toEqual(["cheap"]);
});
test("uses the midpoint median for even-sized priced inputs", () => {
const output = classifyUnstableListings([
makeListing("a", 1000),
makeListing("b", 2000),
makeListing("c", 3000),
makeListing("d", 4000),
]);
expect(output.results.map((item) => item.title)).toEqual(["b", "c", "d"]);
expect(output.unstableResults.map((item) => item.title)).toEqual(["a"]);
});
test("keeps non-positive prices in results while excluding them from median input", () => {
const output = classifyUnstableListings([
makeListing("free", 0),
makeListing("cheap", 1000),
makeListing("mid", 2000),
makeListing("high", 3000),
]);
expect(output.results.map((item) => item.title)).toEqual(["free", "mid", "high"]);
expect(output.unstableResults.map((item) => item.title)).toEqual(["cheap"]);
});
test("returns all listings as results when fewer than two valid prices exist", () => {
const output = classifyUnstableListings([makeListing("only", 2500)]);
expect(output.results.map((item) => item.title)).toEqual(["only"]);
expect(output.unstableResults).toEqual([]);
});
});
```
- [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Expected: FAIL because
`classifyUnstableListings` and the shared mode types do not exist yet.
- [ ] **Step 3: Write minimal implementation**
Add shared types in `packages/core/src/types/common.ts`:
```ts
export interface UnstableListingBuckets<T> {
results: T[];
unstableResults: T[];
}
export interface UnstableListingModeOptions {
hideUnstableResults?: boolean;
}
```
Create `packages/core/src/utils/unstable.ts` with the shared classifier:
```ts
import type { ListingDetails, UnstableListingBuckets } from "../types/common";
function getMedian(values: number[]): number | null {
if (values.length < 2) return null;
const sorted = [...values].sort((a, b) => a - b);
const middle = Math.floor(sorted.length / 2);
if (sorted.length % 2 === 0) {
return (sorted[middle - 1] + sorted[middle]) / 2;
}
return sorted[middle];
}
export function classifyUnstableListings<T extends ListingDetails>(
listings: T[],
): UnstableListingBuckets<T> {
const pricedValues = listings
.map((listing) => listing.listingPrice?.cents)
.filter((cents): cents is number => Number.isFinite(cents) && cents > 0);
const median = getMedian(pricedValues);
if (median == null) {
return { results: listings, unstableResults: [] };
}
const threshold = median * 0.8;
const results: T[] = [];
const unstableResults: T[] = [];
for (const listing of listings) {
const cents = listing.listingPrice?.cents;
if (Number.isFinite(cents) && cents > 0 && cents < threshold) {
unstableResults.push(listing);
continue;
}
results.push(listing);
}
return { results, unstableResults };
}
```
Export the new symbols from `packages/core/src/index.ts`:
```ts
export * from "./types/common";
export { classifyUnstableListings } from "./utils/unstable";
```
- [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Expected: PASS with 4
passing tests.
- [ ] **Step 5: Commit**
```bash
git add packages/core/src/utils/unstable.ts packages/core/src/types/common.ts packages/core/src/index.ts packages/core/test/unstable-listing-mode.test.ts
git commit -m "feat: add shared unstable listing classifier"
```
### Task 2: Thread the optional mode through all core scrapers
**Files:**
- Modify: `packages/core/src/scrapers/facebook.ts`
- Modify: `packages/core/src/scrapers/ebay.ts`
- Modify: `packages/core/src/scrapers/kijiji.ts`
- Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/test/ebay-core.test.ts`
- Modify: `packages/core/test/kijiji-core.test.ts`
- [ ] **Step 1: Write the failing tests**
Add one focused opt-in test per scraper.
Use the new shared classifier through the public scraper entrypoints instead of testing
internal helpers.
In `packages/core/test/facebook-core.test.ts`, add:
```ts
test("fetchFacebookItems returns stable and unstable buckets when unstable mode is enabled", async () => {
process.env.FACEBOOK_COOKIE = "c_user=123; xs=abc";
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(facebookSearchHtmlFixture),
headers: { get: () => null },
}),
);
const result = await fetchFacebookItems("bike", 1, "toronto", 25, {
hideUnstableResults: true,
});
expect(result).toHaveProperty("results");
expect(result).toHaveProperty("unstableResults");
});
```
In `packages/core/test/ebay-core.test.ts`, add:
```ts
test("fetchEbayItems returns stable and unstable buckets when unstable mode is enabled", async () => {
const result = await fetchEbayItems("bike", 1, {
keywords: ["bike"],
exclusions: [],
strictMode: false,
buyItNowOnly: true,
canadaOnly: true,
}, {
hideUnstableResults: true,
});
expect(result).toHaveProperty("results");
expect(result).toHaveProperty("unstableResults");
});
```
In `packages/core/test/kijiji-core.test.ts`, add:
```ts
test("fetchKijijiItems returns stable and unstable buckets when unstable mode is enabled", async () => {
const result = await fetchKijijiItems(
"bike",
1,
"https://www.kijiji.ca",
{ maxPages: 1 },
{},
{ hideUnstableResults: true },
);
expect(result).toHaveProperty("results");
expect(result).toHaveProperty("unstableResults");
});
```
Also add one default-mode assertion in one existing scraper test file, for example in
`packages/core/test/facebook-core.test.ts`:
```ts
test("fetchFacebookItems keeps returning an array by default", async () => {
process.env.FACEBOOK_COOKIE = "c_user=123; xs=abc";
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(facebookSearchHtmlFixture),
headers: { get: () => null },
}),
);
const result = await fetchFacebookItems("bike");
expect(Array.isArray(result)).toBe(true);
});
```
- [ ] **Step 2: Run tests to verify they fail**
Run:
`bun test packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
Expected: FAIL because the scraper signatures do not yet accept the new option and still
always return arrays.
- [ ] **Step 3: Write minimal implementation**
Add a small shared helper type import to each scraper:
```ts
import {
classifyUnstableListings,
type UnstableListingBuckets,
type UnstableListingModeOptions,
} from "../index";
```
In `packages/core/src/scrapers/facebook.ts`, extend the default export signature and
branch at the end:
```ts
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
LOCATION = "toronto",
MAX_ITEMS = 25,
unstableOptions: UnstableListingModeOptions = {},
): Promise<FacebookListingDetails[] | UnstableListingBuckets<FacebookListingDetails>> {
// existing fetch/parsing logic
const limitedItems = pricedItems.slice(0, MAX_ITEMS);
if (!unstableOptions.hideUnstableResults) {
return limitedItems;
}
const classified = classifyUnstableListings(pricedItems);
return {
results: classified.results.slice(0, MAX_ITEMS),
unstableResults: classified.unstableResults,
};
}
```
In `packages/core/src/scrapers/ebay.ts`, extend the entrypoint the same way:
```ts
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
options: EbaySearchOptions = {},
unstableOptions: UnstableListingModeOptions = {},
): Promise<EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails>> {
// existing fetch/parsing logic
const limitedResults = maxItems ? listings.slice(0, maxItems) : listings;
if (!unstableOptions.hideUnstableResults) {
return limitedResults;
}
const classified = classifyUnstableListings(listings);
return {
results: maxItems ? classified.results.slice(0, maxItems) : classified.results,
unstableResults: classified.unstableResults,
};
}
```
In `packages/core/src/scrapers/kijiji.ts`, add the same final argument after
`listingOptions`:
```ts
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
BASE_URL = "https://www.kijiji.ca",
searchOptions: SearchOptions = {},
listingOptions: ListingFetchOptions = {},
unstableOptions: UnstableListingModeOptions = {},
): Promise<DetailedListing[] | UnstableListingBuckets<DetailedListing>> {
// existing fetch/parsing logic
if (!unstableOptions.hideUnstableResults) {
return allListings;
}
return classifyUnstableListings(allListings);
}
```
Keep the default branch untouched in all three files so existing callers still receive
arrays.
- [ ] **Step 4: Run tests to verify they pass**
Run:
`bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
Expected: PASS, including the new opt-in bucket assertions and the default-array
regression assertion.
- [ ] **Step 5: Commit**
```bash
git add packages/core/src/scrapers/facebook.ts packages/core/src/scrapers/ebay.ts packages/core/src/scrapers/kijiji.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts
git commit -m "feat: add unstable mode to scraper results"
```
### Task 3: Expose unstable mode in API routes
**Files:**
- Modify: `packages/api-server/src/routes/facebook.ts`
- Modify: `packages/api-server/src/routes/ebay.ts`
- Modify: `packages/api-server/src/routes/kijiji.ts`
- Modify: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Write the failing tests**
Extend `packages/api-server/test/routes.test.ts` with route-forwarding coverage for the
new query parameter:
```ts
test("facebookRoute forwards unstableFilter=true to core", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
await facebookRoute(
new Request(
"http://localhost/api/facebook?q=laptop&location=toronto&maxItems=3&unstableFilter=true",
),
);
expect(fetchFacebookItems).toHaveBeenCalledWith(
"laptop",
1,
"toronto",
3,
{ hideUnstableResults: true },
);
});
test("ebayRoute forwards unstableFilter=true to core", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&unstableFilter=true"),
);
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
{
minPrice: undefined,
maxPrice: undefined,
strictMode: false,
exclusions: [],
keywords: ["laptop"],
buyItNowOnly: true,
canadaOnly: true,
},
{ hideUnstableResults: true },
);
});
test("kijijiRoute forwards unstableFilter=true to core", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&unstableFilter=true"),
);
expect(fetchKijijiItems).toHaveBeenCalledWith(
"laptop",
4,
"https://www.kijiji.ca",
expect.any(Object),
{},
{ hideUnstableResults: true },
);
});
```
- [ ] **Step 2: Run tests to verify they fail**
Run: `bun test packages/api-server/test/routes.test.ts` Expected: FAIL because the
routes do not yet parse or forward `unstableFilter`.
- [ ] **Step 3: Write minimal implementation**
In each route, parse the shared boolean once:
```ts
const hideUnstableResults = reqUrl.searchParams.get("unstableFilter") === "true";
```
Update the core calls to forward the shared option.
In `packages/api-server/src/routes/facebook.ts`:
```ts
const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems, {
hideUnstableResults,
});
```
In `packages/api-server/src/routes/ebay.ts`:
```ts
const items = await fetchEbayItems(
SEARCH_QUERY,
1,
{
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
buyItNowOnly,
canadaOnly,
},
{ hideUnstableResults },
);
```
In `packages/api-server/src/routes/kijiji.ts`:
```ts
const items = await fetchKijijiItems(
SEARCH_QUERY,
4,
"https://www.kijiji.ca",
searchOptions,
{},
{ hideUnstableResults },
);
```
Do not add any response wrapper logic in the routes; simply return whatever the core
scraper returns so the default array path remains unchanged.
- [ ] **Step 4: Run tests to verify they pass**
Run: `bun test packages/api-server/test/routes.test.ts` Expected: PASS, including
existing cookie-parameter regression tests and the new unstable-mode forwarding
assertions.
- [ ] **Step 5: Commit**
```bash
git add packages/api-server/src/routes/facebook.ts packages/api-server/src/routes/ebay.ts packages/api-server/src/routes/kijiji.ts packages/api-server/test/routes.test.ts
git commit -m "feat: expose unstable mode in api routes"
```
### Task 4: Document and forward unstable mode in MCP tools
**Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts`
- Modify: `packages/mcp-server/src/protocol/handler.ts`
- Modify: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Write the failing tests**
Extend `packages/mcp-server/test/protocol.test.ts` with metadata and forwarding
coverage:
```ts
test("search tools document unstable listing mode", () => {
for (const toolName of ["search_kijiji", "search_facebook", "search_ebay"]) {
const tool = tools.find((entry) => entry.name === toolName);
expect(tool?.inputSchema.properties).toHaveProperty("unstableFilter");
expect(tool?.inputSchema.properties.unstableFilter.description).toContain(
"20% below the median",
);
expect(tool?.inputSchema.properties.unstableFilter.description).toContain(
"unstableResults",
);
}
});
test("search_facebook forwards unstableFilter to the API", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_facebook",
arguments: {
query: "laptop",
unstableFilter: true,
},
},
}),
}),
);
const calledUrl = (global.fetch as ReturnType<typeof mock>).mock.calls[0]?.[0];
expect(String(calledUrl)).toContain("unstableFilter=true");
});
```
Mirror the forwarding assertion for `search_kijiji` and `search_ebay` in the same file.
- [ ] **Step 2: Run tests to verify they fail**
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: FAIL because the
tools do not yet describe `unstableFilter` and the handler does not append it to API
URLs.
- [ ] **Step 3: Write minimal implementation**
In `packages/mcp-server/src/protocol/tools.ts`, add the same optional property to all
three tools:
```ts
unstableFilter: {
type: "boolean",
description:
"Optional: move listings priced more than 20% below the median into unstableResults instead of the main results. When enabled, the response shape changes from a plain list to an object with results and unstableResults.",
default: false,
},
```
In `packages/mcp-server/src/protocol/handler.ts`, append the shared flag in each search
branch:
```ts
if (args.unstableFilter !== undefined) {
params.append("unstableFilter", args.unstableFilter.toString());
}
```
Add that snippet to the `search_kijiji`, `search_facebook`, and `search_ebay` branches.
- [ ] **Step 4: Run tests to verify they pass**
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: PASS, including the
new tool-schema assertions and URL-forwarding assertions.
- [ ] **Step 5: Commit**
```bash
git add packages/mcp-server/src/protocol/tools.ts packages/mcp-server/src/protocol/handler.ts packages/mcp-server/test/protocol.test.ts
git commit -m "docs: expose unstable mode in mcp tools"
```
### Task 5: Verify the full cross-package feature end to end
**Files:**
- No code changes expected.
- [ ] **Step 1: Run the focused package tests**
Run:
`bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts packages/api-server/test/routes.test.ts packages/mcp-server/test/protocol.test.ts`
Expected: PASS with zero failing tests.
- [ ] **Step 2: Run the broader workspace verification**
Run: `bun run ci` Expected: PASS with clean workspace validation.
- [ ] **Step 3: Commit verification-only follow-ups if needed**
If verification forced any tiny fixes, commit them immediately after the fix with a
focused message, for example:
```bash
git add <exact files changed>
git commit -m "fix: align unstable mode verification"
```
If no files changed during verification, skip this commit step.
## Self-Review
- Spec coverage: shared classifier, all three scrapers, API exposure, MCP documentation,
and tests are each mapped to a task.
- Placeholder scan: no `TODO`, `TBD`, or “write tests later” placeholders remain.
- Type consistency: the plan uses one shared flag name, `unstableFilter`, and one shared
core option, `hideUnstableResults`, across all tasks.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,110 @@
# Marketplace Dollar Price Inputs Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to
> implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make public marketplace price inputs use dollars while preserving core scraper
cent-based filtering.
**Architecture:** API server owns HTTP query parsing and converts dollar amounts to
cents before calling core.
MCP server keeps forwarding numeric dollar values as query params.
Core scraper internals remain unchanged because parsed listing prices already use cents.
This applies to eBay `minPrice`/`maxPrice` and Kijiji `priceMin`/`priceMax`; Facebook
exposes no price filter inputs.
**Tech Stack:** Bun, TypeScript, `bun:test`, MCP JSON-RPC adapter, framework-free Bun
HTTP routes.
* * *
### Task 1: API Dollar Parsing
**Files:**
- Modify: `packages/api-server/src/routes/helpers.ts`
- Modify: `packages/api-server/src/routes/ebay.ts`
- Modify: `packages/api-server/src/routes/kijiji.ts`
- Test: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Add failing API route tests**
Add tests proving eBay `minPrice=999.99` / `maxPrice=1000` and Kijiji `priceMin=999.99`
/ `priceMax=1000` are forwarded to core as `99999` and `100000` cents.
Add validation tests for empty, whitespace, negative, hex, mixed text, and malformed
decimal price values.
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: new forwarding tests fail because route currently rejects decimals and
forwards integer dollars unchanged.
- [ ] **Step 2: Implement dollar parser helper**
Add `parseDollarPriceParam(searchParams, name)` in
`packages/api-server/src/routes/helpers.ts`. Accept `0`, `1000`, `999.99`, and `0.99`.
Reject values that do not match `^\d+(?:\.\d{1,2})?$`. Convert to cents with
`Math.round(Number(rawValue) * 100)`.
- [ ] **Step 3: Use dollar parser in eBay route**
Replace `parseNonNegativeIntegerParam` calls for eBay `minPrice`/`maxPrice` and Kijiji
`priceMin`/`priceMax` with `parseDollarPriceParam`. Keep pagination/count params on
integer parsing.
- [ ] **Step 4: Verify API tests**
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: all API route tests pass.
### Task 2: MCP Schema Contract
**Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts`
- Test: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Add MCP schema/forwarding tests**
Add tests that `search_ebay` describes `minPrice` and `maxPrice` as dollar filters and
forwards numeric dollar values unchanged in API query params.
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: description test fails until schema text changes; forwarding behavior should
already pass or reveal mapping gaps.
- [ ] **Step 2: Update tool descriptions**
Change eBay `minPrice` and Kijiji `priceMin` descriptions to `Minimum price in dollars`.
Change eBay `maxPrice` and Kijiji `priceMax` descriptions to `Maximum price in dollars`.
- [ ] **Step 3: Verify MCP tests**
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: all MCP protocol tests pass.
### Task 3: Cross-Package Verification
**Files:**
- No additional edits expected.
- [ ] **Step 1: Run relevant package tests**
Run: `bun test packages/api-server/test packages/mcp-server/test`
Expected: all tests pass.
- [ ] **Step 2: Run CI**
Run: `bun run ci`
Expected: typecheck and Biome pass without changing lint config.

View File

@@ -0,0 +1,187 @@
# Live Parser Tests Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add explicit live endpoint test suites for each core marketplace scraper,
excluded from default tests and runnable through one script.
**Architecture:** Live tests live under `packages/core/test/live/` and import public
scraper entry points directly.
Normal package tests remain offline because the new files are outside current explicit
test commands and run only through `bun run test:live`.
**Tech Stack:** Bun `1.3.13`, `bun:test`, TypeScript, existing core scraper APIs.
* * *
## File Structure
- Create `packages/core/test/live/ebay.live.test.ts`: live eBay search smoke test
against `fetchEbayItems`.
- Create `packages/core/test/live/kijiji.live.test.ts`: live Kijiji search smoke test
against `fetchKijijiItems`.
- Create `packages/core/test/live/facebook.live.test.ts`: strict live Facebook search
smoke test against `fetchFacebookItems` and `FACEBOOK_COOKIE`.
- Modify `package.json`: add root script `test:live` running all files under
`packages/core/test/live`.
### Task 1: Add eBay Live Suite
**Files:**
- Create: `packages/core/test/live/ebay.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchEbayItems from "../../src/scrapers/ebay";
describe("eBay live parser", () => {
test("scrapes live search results into listing details", async () => {
const results = await fetchEbayItems("iphone", 1, { maxItems: 3 });
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run eBay live test**
Run: `bun test packages/core/test/live/ebay.live.test.ts` Expected: PASS when eBay
returns parseable search results; FAIL on endpoint/rate-limit/parser breakage.
### Task 2: Add Kijiji Live Suite
**Files:**
- Create: `packages/core/test/live/kijiji.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchKijijiItems from "../../src/scrapers/kijiji";
describe("Kijiji live parser", () => {
test("scrapes live search results into detailed listings", async () => {
const results = await fetchKijijiItems(
"iphone",
1,
"https://www.kijiji.ca",
{ maxPages: 1 },
{ includeImages: false, sellerDataDepth: "basic" },
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://www.kijiji.ca/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run Kijiji live test**
Run: `bun test packages/core/test/live/kijiji.live.test.ts` Expected: PASS when Kijiji
returns parseable search and detail pages; FAIL on endpoint/parser breakage.
### Task 3: Add Facebook Live Suite
**Files:**
- Create: `packages/core/test/live/facebook.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchFacebookItems from "../../src/scrapers/facebook";
describe("Facebook live parser", () => {
test("requires FACEBOOK_COOKIE for strict live testing", () => {
expect(process.env.FACEBOOK_COOKIE?.trim().length ?? 0).toBeGreaterThan(0);
});
test("scrapes live marketplace search results into listing details", async () => {
const results = await fetchFacebookItems("iphone", 1, "toronto", 3);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://www.facebook.com/marketplace/item/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run Facebook live test**
Run: `bun test packages/core/test/live/facebook.live.test.ts` Expected: PASS with valid
`FACEBOOK_COOKIE`; FAIL when `FACEBOOK_COOKIE` is missing, expired, or parser output is
empty.
### Task 4: Add Root Live Test Script
**Files:**
- Modify: `package.json`
- [ ] **Step 1: Add script**
Change root `scripts` to include:
```json
{
"test:live": "bun test packages/core/test/live"
}
```
- [ ] **Step 2: Run all live tests through script**
Run: `bun run test:live` Expected: runs eBay, Kijiji, and Facebook live suites.
Facebook fails if `FACEBOOK_COOKIE` is unset.
### Task 5: Verify Default Suite Exclusion
**Files:**
- No code files modified.
- [ ] **Step 1: Run existing core tests**
Run: `bun test packages/core/test` Expected: existing mocked tests run.
If Bun discovers `packages/core/test/live`, change normal verification command to
explicit glob `bun test packages/core/test/*.test.ts` and document that in final notes.
- [ ] **Step 2: Run static checks**
Run: `bun run ci` Expected: typecheck and Biome pass.
Fix code issues without changing lint or TypeScript rules.
## Commit Note
Do not commit during execution unless user explicitly requests a commit.
This repo session policy overrides generic plan commit steps.
## Self-Review
- Spec coverage: eBay, Kijiji, Facebook live suites; explicit script; strict Facebook
auth; excluded from default flow.
- Placeholder scan: no `TBD`, `TODO`, or underspecified implementation steps.
- Type consistency: tests use current exported scraper signatures and shared listing
fields from `ListingDetails`.

View File

@@ -0,0 +1,140 @@
# Design: Adopt opencode Monorepo Config
**Date:** 2025-07-14\
**Status:** Approved\
**Approach:** Full adoption (A)
## Context
Current repo (`marketplace-scrapers-monorepo`) has basic bun workspaces with 3 packages
(`core`, `api-server`, `mcp-server`). Reference: `anomalyco/opencode` monorepo patterns.
**Gaps vs opencode:**
- No Turbo (task orchestration, caching, dep graph)
- No workspace catalog (shared dep versions duplicated across packages)
- No root tsconfig (identical tsconfigs duplicated in all 3 packages)
- No `bunfig.toml` (no exact installs, no root test guard)
- `main`/`module` fields instead of `exports` field
## Changes
### 1. Root `package.json`
- Add `workspaces.catalog` block with shared deps:
- `@typescript/native-preview`, `@types/bun`, `@types/unidecode`,
`@types/cli-progress`
- Add `turbo` to `devDependencies`
- Add `@tsconfig/bun` to `devDependencies` + catalog
- Update root scripts: `typecheck` and `build` delegate to `turbo run`
- Keep `build:api`, `build:mcp`, `build:all`, `start` as-is (deployment-specific)
- Rename `type:check``typecheck` in all packages (Turbo convention)
### 2. `turbo.json` (new file)
Tasks:
```json
{
"tasks": {
"typecheck": {},
"build": { "dependsOn": ["^build"], "outputs": ["dist/**"] },
"test": { "dependsOn": ["^build"], "outputs": [] }
}
}
```
`core` builds before `api-server`/`mcp-server` due to `^build` dep.
### 3. Root `tsconfig.json` (new file)
```json
{
"extends": "@tsconfig/bun/tsconfig.json",
"compilerOptions": {
"lib": ["dom", "ESNext"],
"target": "ESNext",
"module": "preserve",
"moduleResolution": "bundler",
"strict": true,
"noEmit": true,
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitOverride": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false
}
}
```
### 4. Per-package `tsconfig.json` (slim)
All 3 packages slim to:
```json
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"paths": { "@/*": ["./src/*"] }
},
"include": ["./src", "./test"]
}
```
### 5. `bunfig.toml` (new file)
```toml
[install]
exact = true
[test]
root = "./do-not-run-tests-from-root"
```
Exact installs = reproducible.
Root test guard prevents accidental root-level test runs.
### 6. Package `exports` field
Replace `main`/`module` with `exports` in all 3 packages:
```json
"exports": { ".": "./src/index.ts" }
```
Remove `main` and `module` fields.
Bun resolves `.ts` directly.
### 7. Catalog references in per-package `package.json`
Replace pinned versions with `"catalog:"` for shared deps:
- `@typescript/native-preview: "catalog:"`
- `@types/bun: "catalog:"`
- `@types/unidecode: "catalog:"` (core only)
- `@types/cli-progress: "catalog:"` (core only)
## Files Changed
| File | Action |
| --- | --- |
| `package.json` | Update (catalog, turbo dep, scripts) |
| `turbo.json` | Create |
| `tsconfig.json` | Create |
| `bunfig.toml` | Create |
| `packages/core/package.json` | Update (exports, catalog refs, script rename) |
| `packages/api-server/package.json` | Update (exports, catalog refs, script rename) |
| `packages/mcp-server/package.json` | Update (exports, catalog refs, script rename) |
| `packages/core/tsconfig.json` | Update (slim, extends root) |
| `packages/api-server/tsconfig.json` | Update (slim, extends root) |
| `packages/mcp-server/tsconfig.json` | Update (slim, extends root) |
## Non-Goals
- No Husky/git hooks (not needed yet)
- No SST/cloud infra (not applicable)
- No prettier (keep biome as formatter)
- No patches mechanism
- No `postinstall` scripts

View File

@@ -3,7 +3,9 @@
## Summary ## Summary
Remove all file-based and request-provided cookie inputs across the repo. Remove all file-based and request-provided cookie inputs across the repo.
The only supported authentication input becomes a raw `Cookie` header string supplied through scraper-specific environment variables such as `FACEBOOK_COOKIE` and `EBAY_COOKIE`. The only supported authentication input becomes a raw `Cookie` header string supplied
through scraper-specific environment variables such as `FACEBOOK_COOKIE` and
`EBAY_COOKIE`.
## Goals ## Goals
@@ -17,7 +19,8 @@ The only supported authentication input becomes a raw `Cookie` header string sup
- Changing scraper behavior unrelated to authentication input. - Changing scraper behavior unrelated to authentication input.
- Adding new cookie formats or migration helpers. - Adding new cookie formats or migration helpers.
- Preserving backward compatibility for cookie files, JSON cookie arrays, or request overrides. - Preserving backward compatibility for cookie files, JSON cookie arrays, or request
overrides.
## Current State ## Current State
@@ -27,27 +30,33 @@ The current shared cookie utilities support three sources in priority order:
2. Environment variable 2. Environment variable
3. Cookie file 3. Cookie file
`packages/core/src/utils/cookies.ts` includes file loading, JSON array parsing, and auto-detection between JSON and header-string formats. `packages/core/src/utils/cookies.ts` includes file loading, JSON array parsing, and
Facebook also exposes deprecated `cookiePath` arguments that still reach shared loading logic. auto-detection between JSON and header-string formats.
Docs in `cookies/AGENTS.md` still describe file-based setup and request-level overrides. Facebook also exposes deprecated `cookiePath` arguments that still reach shared loading
logic. Docs in `cookies/AGENTS.md` still describe file-based setup and request-level
overrides.
## Chosen Approach ## Chosen Approach
Use the hard-reset approach. Use the hard-reset approach.
Delete the shared multi-source cookie-loading model and reduce the cookie surface to env-header parsing only. Delete the shared multi-source cookie-loading model and reduce the cookie surface to
This is a larger diff than a surgical removal, but it avoids leaving behind abstractions that imply unsupported inputs still exist. env-header parsing only.
This is a larger diff than a surgical removal, but it avoids leaving behind abstractions
that imply unsupported inputs still exist.
## Design ## Design
### Shared Cookie Utilities ### Shared Cookie Utilities
`packages/core/src/utils/cookies.ts` will keep only the pieces needed for env-header-based auth: `packages/core/src/utils/cookies.ts` will keep only the pieces needed for
env-header-based auth:
- `Cookie` type - `Cookie` type
- A reduced cookie config shape containing only `name`, `domain`, and `envVar` - A reduced cookie config shape containing only `name`, `domain`, and `envVar`
- `parseCookieString()` for raw `Cookie` header strings - `parseCookieString()` for raw `Cookie` header strings
- `formatCookiesForHeader()` for domain filtering and request formatting - `formatCookiesForHeader()` for domain filtering and request formatting
- An env-only loader that reads `process.env[config.envVar]`, parses it, and throws a targeted error when missing or invalid - An env-only loader that reads `process.env[config.envVar]`, parses it, and throws a
targeted error when missing or invalid
The following shared utilities will be removed: The following shared utilities will be removed:
@@ -68,15 +77,18 @@ For Facebook this means:
For eBay this means: For eBay this means:
- Remove any remaining fallback/file-oriented behavior from shared calls and error strings - Remove any remaining fallback/file-oriented behavior from shared calls and error
strings
- Keep the existing env-var auth path, but make it the only path - Keep the existing env-var auth path, but make it the only path
### Public API Surface ### Public API Surface
Exports from `packages/core/src/index.ts` should reflect the new contract. Exports from `packages/core/src/index.ts` should reflect the new contract.
If exported functions currently advertise cookie-source or cookie-path arguments, their signatures will be tightened so callers cannot pass unsupported inputs. If exported functions currently advertise cookie-source or cookie-path arguments, their
signatures will be tightened so callers cannot pass unsupported inputs.
Downstream adapter packages should continue calling core through the simplified signatures without adding their own cookie-loading behavior. Downstream adapter packages should continue calling core through the simplified
signatures without adding their own cookie-loading behavior.
### Error Handling ### Error Handling
@@ -93,8 +105,8 @@ Errors should be blunt and specific:
### Testing Strategy ### Testing Strategy
Follow TDD. Follow TDD. Start by changing or adding core tests so the old file/request behavior is
Start by changing or adding core tests so the old file/request behavior is no longer accepted. no longer accepted.
Coverage targets: Coverage targets:
@@ -102,7 +114,8 @@ Coverage targets:
2. Missing env vars fail with the new env-only error. 2. Missing env vars fail with the new env-only error.
3. Invalid env strings fail without falling back to files or request data. 3. Invalid env strings fail without falling back to files or request data.
4. Facebook APIs no longer expose or honor cookie-path/request-cookie behavior. 4. Facebook APIs no longer expose or honor cookie-path/request-cookie behavior.
5. Existing tests that depended on missing files or JSON cookie arrays are rewritten to the env-only contract. 5. Existing tests that depended on missing files or JSON cookie arrays are rewritten to
the env-only contract.
Verification target after implementation: Verification target after implementation:
@@ -121,11 +134,15 @@ Update cookie-related docs to match the new contract:
## Risks ## Risks
- External callers using request cookie overrides will break at compile time or runtime, depending on how they consume the package. - External callers using request cookie overrides will break at compile time or runtime,
- Recent work added support for custom Facebook cookie paths, so removing that path intentionally reverses a newly introduced behavior. depending on how they consume the package.
- Tests that currently model missing-file behavior must be rewritten rather than preserved. - Recent work added support for custom Facebook cookie paths, so removing that path
intentionally reverses a newly introduced behavior.
- Tests that currently model missing-file behavior must be rewritten rather than
preserved.
## Rollout Notes ## Rollout Notes
This is an intentional contract break. This is an intentional contract break.
The code, tests, and docs should all land together so there is no mixed messaging about supported cookie sources. The code, tests, and docs should all land together so there is no mixed messaging about
supported cookie sources.

View File

@@ -2,35 +2,46 @@
## Summary ## Summary
Replace the legacy Facebook Marketplace scraper with a route-aware implementation built around current Comet bootstrap markers and route-specific extraction. Replace the legacy Facebook Marketplace scraper with a route-aware implementation built
The new scraper will keep authenticated direct HTTP fetches as the primary transport, but it will stop treating legacy `require`, `__bbox`, and `marketplace_product_details_page` structures as the main parsing contract. around current Comet bootstrap markers and route-specific extraction.
The new scraper will keep authenticated direct HTTP fetches as the primary transport,
but it will stop treating legacy `require`, `__bbox`, and
`marketplace_product_details_page` structures as the main parsing contract.
## Goals ## Goals
- Replace both Facebook search and item-detail extraction with a current-shape parser. - Replace both Facebook search and item-detail extraction with a current-shape parser.
- Keep authenticated direct HTTP requests as the primary fetch strategy. - Keep authenticated direct HTTP requests as the primary fetch strategy.
- Parse route-specific Comet bootstrap/state payloads before falling back to rendered-HTML extraction. - Parse route-specific Comet bootstrap/state payloads before falling back to
rendered-HTML extraction.
- Detect auth-gated, unavailable, and unknown responses explicitly. - Detect auth-gated, unavailable, and unknown responses explicitly.
- Update tests so they model current route markers and failure modes instead of legacy page objects. - Update tests so they model current route markers and failure modes instead of legacy
page objects.
## Non-Goals ## Non-Goals
- Reworking non-Facebook scrapers. - Reworking non-Facebook scrapers.
- Converting the scraper to browser-only automation. - Converting the scraper to browser-only automation.
- Preserving old parser behavior for `marketplace_product_details_page` or `__bbox`-driven item extraction. - Preserving old parser behavior for `marketplace_product_details_page` or
- Reverse-engineering every internal Facebook bootstrap payload shape exhaustively before implementation. `__bbox`-driven item extraction.
- Reverse-engineering every internal Facebook bootstrap payload shape exhaustively
before implementation.
## Current State ## Current State
The current implementation in `packages/core/src/scrapers/facebook.ts` still uses authenticated HTTP requests, which remains correct. The current implementation in `packages/core/src/scrapers/facebook.ts` still uses
The search path parses embedded script JSON and looks for `marketplace_search.feed_units.edges`. authenticated HTTP requests, which remains correct.
The item-detail path is centered on legacy extraction paths such as: The search path parses embedded script JSON and looks for
`marketplace_search.feed_units.edges`. The item-detail path is centered on legacy
extraction paths such as:
- `parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target` - `parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
- nested `__bbox.require[...]` variations - nested `__bbox.require[...]` variations
- recursive search through `parsed.require` - recursive search through `parsed.require`
Live evidence gathered earlier in this session and by the isolated research subagent shows that current Facebook Marketplace pages are Comet route-driven and expose markers such as: Live evidence gathered earlier in this session and by the isolated research subagent
shows that current Facebook Marketplace pages are Comet route-driven and expose markers
such as:
- `XCometMarketplaceSearchController` - `XCometMarketplaceSearchController`
- `XCometMarketplacePermalinkController` - `XCometMarketplacePermalinkController`
@@ -41,7 +52,9 @@ Live evidence gathered earlier in this session and by the isolated research suba
- `data-sjs` - `data-sjs`
- `data-btmanifest` - `data-btmanifest`
The same live investigation also showed that authenticated item pages no longer expose the old `marketplace_product_details_page` marker reliably, while live search still returns usable results. The same live investigation also showed that authenticated item pages no longer expose
the old `marketplace_product_details_page` marker reliably, while live search still
returns usable results.
## Chosen Approach ## Chosen Approach
@@ -52,9 +65,11 @@ The scraper will:
1. Fetch authenticated HTML directly. 1. Fetch authenticated HTML directly.
2. Classify the response using current route and auth markers. 2. Classify the response using current route and auth markers.
3. Parse inline bootstrap/state payloads using route-specific probes. 3. Parse inline bootstrap/state payloads using route-specific probes.
4. Fall back to rendered-HTML extraction only when bootstrap markers are present but the payload cannot be decoded into the expected search or item shape. 4. Fall back to rendered-HTML extraction only when bootstrap markers are present but the
payload cannot be decoded into the expected search or item shape.
This keeps the cheaper direct-HTTP transport while shifting the parser contract from legacy page-object names to current Comet route structure. This keeps the cheaper direct-HTTP transport while shifting the parser contract from
legacy page-object names to current Comet route structure.
## Design ## Design
@@ -88,7 +103,8 @@ Primary behavior:
- fetch the Marketplace search HTML with auth cookies - fetch the Marketplace search HTML with auth cookies
- confirm the response class is `search` - confirm the response class is `search`
- extract inline bootstrap/state blobs from script tags and page attributes - extract inline bootstrap/state blobs from script tags and page attributes
- probe for route-specific search payloads associated with `XCometMarketplaceSearchController` - probe for route-specific search payloads associated with
`XCometMarketplaceSearchController`
- map decoded search results into summary listing records - map decoded search results into summary listing records
Search summary fields should remain aligned with the current public output shape: Search summary fields should remain aligned with the current public output shape:
@@ -102,7 +118,8 @@ Search summary fields should remain aligned with the current public output shape
Fallback behavior: Fallback behavior:
- if search route markers are present but structured payload decoding fails, extract listing summaries from rendered HTML anchors and text patterns - if search route markers are present but structured payload decoding fails, extract
listing summaries from rendered HTML anchors and text patterns
- use item links matching `/marketplace/item/<id>` as the anchor for fallback extraction - use item links matching `/marketplace/item/<id>` as the anchor for fallback extraction
- treat fallback results as summary-only data, not rich detail data - treat fallback results as summary-only data, not rich detail data
@@ -132,9 +149,12 @@ Priority item fields:
Fallback behavior: Fallback behavior:
- if permalink route markers are present but no stable payload object is decodable, extract data from rendered HTML text structure - if permalink route markers are present but no stable payload object is decodable,
- prioritize title, price, condition, description, location text, and seller module content extract data from rendered HTML text structure
- return partial item data when core user-facing fields are present rather than failing solely because deeper commerce metadata is missing - prioritize title, price, condition, description, location text, and seller module
content
- return partial item data when core user-facing fields are present rather than failing
solely because deeper commerce metadata is missing
### Bootstrap Parsing Strategy ### Bootstrap Parsing Strategy
@@ -151,11 +171,14 @@ Candidate discovery inputs:
- `ServerJS` / `Bootloader` inline blobs - `ServerJS` / `Bootloader` inline blobs
- route controller names - route controller names
Candidate scoring for search should favor objects that contain repeated result-card semantics, item IDs, listing links, titles, prices, or location summaries. Candidate scoring for search should favor objects that contain repeated result-card
Candidate scoring for item pages should favor objects that contain singular listing semantics, title, price, condition, description, location, seller, or permalink context. semantics, item IDs, listing links, titles, prices, or location summaries.
Candidate scoring for item pages should favor objects that contain singular listing
semantics, title, price, condition, description, location, seller, or permalink context.
The parser should not depend on one hard-coded object name surviving forever. The parser should not depend on one hard-coded object name surviving forever.
Instead, it should look for route-specific semantic clusters and choose the strongest candidate. Instead, it should look for route-specific semantic clusters and choose the strongest
candidate.
### Legacy Removal ### Legacy Removal
@@ -166,7 +189,9 @@ Specifically:
- delete legacy-first `require` / `__bbox` navigation tables - delete legacy-first `require` / `__bbox` navigation tables
- delete tests whose only purpose is to preserve those legacy paths - delete tests whose only purpose is to preserve those legacy paths
If a minimal legacy compatibility branch remains, it must be a last-resort fallback behind the new route-aware parser and should not shape test fixtures or design decisions. If a minimal legacy compatibility branch remains, it must be a last-resort fallback
behind the new route-aware parser and should not shape test fixtures or design
decisions.
### Error Handling ### Error Handling
@@ -178,7 +203,8 @@ Facebook responses should now fail with explicit route-aware outcomes:
4. Search or item route detected, but no decodable data found. 4. Search or item route detected, but no decodable data found.
5. Unknown response shape. 5. Unknown response shape.
Error messages should name the actual class of failure instead of implying that every parse miss is caused by expired cookies. Error messages should name the actual class of failure instead of implying that every
parse miss is caused by expired cookies.
### Testing Strategy ### Testing Strategy
@@ -190,11 +216,15 @@ Coverage targets:
1. Search responses classify correctly from current Comet controller markers. 1. Search responses classify correctly from current Comet controller markers.
2. Item responses classify correctly from current Comet controller markers. 2. Item responses classify correctly from current Comet controller markers.
3. Login-gated and unavailable responses are detected before parsing. 3. Login-gated and unavailable responses are detected before parsing.
4. Search bootstrap parsing produces summary listing results from current-shape fixtures. 4. Search bootstrap parsing produces summary listing results from current-shape
fixtures.
5. Item bootstrap parsing produces rich listing details from current-shape fixtures. 5. Item bootstrap parsing produces rich listing details from current-shape fixtures.
6. Search fallback extraction works when route markers exist but structured payload decoding fails. 6. Search fallback extraction works when route markers exist but structured payload
7. Item fallback extraction works when route markers exist but structured payload decoding fails. decoding fails.
8. Old legacy-only item fixtures are removed or rewritten so they no longer define the contract. 7. Item fallback extraction works when route markers exist but structured payload
decoding fails.
8. Old legacy-only item fixtures are removed or rewritten so they no longer define the
contract.
Verification target after implementation: Verification target after implementation:
@@ -204,23 +234,30 @@ Verification target after implementation:
## Public API Surface ## Public API Surface
Keep the current public function names unless the rewrite proves that a signature change is required: Keep the current public function names unless the rewrite proves that a signature change
is required:
- `fetchFacebookItems(...)` - `fetchFacebookItems(...)`
- `fetchFacebookItem(...)` - `fetchFacebookItem(...)`
- `extractFacebookMarketplaceData(...)` - `extractFacebookMarketplaceData(...)`
- `extractFacebookItemData(...)` - `extractFacebookItemData(...)`
The internals should change substantially, but callers should not need a new integration surface for this rewrite. The internals should change substantially, but callers should not need a new integration
surface for this rewrite.
## Risks ## Risks
- Facebook may change bootstrap payload naming again, so route/controller markers are more stable than exact nested object paths but still not guaranteed. - Facebook may change bootstrap payload naming again, so route/controller markers are
- Search and item pages may each contain multiple partial payloads, making candidate ranking important. more stable than exact nested object paths but still not guaranteed.
- Fallback rendered-HTML extraction may be noisier than bootstrap decoding and needs clear precedence rules. - Search and item pages may each contain multiple partial payloads, making candidate
- Live fixtures can drift from production quickly, so tests must model route semantics rather than exact one-off payloads where possible. ranking important.
- Fallback rendered-HTML extraction may be noisier than bootstrap decoding and needs
clear precedence rules.
- Live fixtures can drift from production quickly, so tests must model route semantics
rather than exact one-off payloads where possible.
## Rollout Notes ## Rollout Notes
The code, fixtures, and tests should change together. The code, fixtures, and tests should change together.
There should be no mixed state where the implementation is Comet-aware but the tests still encode `marketplace_product_details_page` as the primary contract. There should be no mixed state where the implementation is Comet-aware but the tests
still encode `marketplace_product_details_page` as the primary contract.

View File

@@ -0,0 +1,173 @@
# Unstable Listing Mode Design
## Summary
Add an optional shared result mode across Facebook, eBay, and Kijiji that moves
suspiciously cheap listings out of the main results into a separate `unstableResults`
bucket. Listings are considered unstable when their price is more than 20% below the
median price of the scrapers priced search results.
## Goals
- Support the same optional unstable-listing mode across all scrapers.
- Keep current default scraper and route behavior unchanged unless the mode is enabled.
- Hide unstable listings from the main results while still returning them separately.
- Implement the rule once in shared core code instead of duplicating
marketplace-specific logic.
- Document the option in MCP tool descriptions so callers can discover it.
## Non-Goals
- Adding marketplace-specific thresholds or heuristics.
- Re-ranking results beyond splitting stable and unstable buckets.
- Classifying free, missing-price, or invalid-price listings as unstable.
- Changing unrelated scraper parsing behavior.
## Current State
`packages/core` currently returns plain arrays from scraper search functions.
`packages/api-server` forwards those scraper results directly from marketplace routes.
`packages/mcp-server` documents search tools per marketplace, but does not expose or
describe any result-stability mode.
There is no shared result-classification utility today.
Price filtering exists in some scrapers, but not a cross-marketplace median-based split.
## Chosen Approach
Use a shared core utility plus per-route and per-tool opt-in.
The shared utility will accept parsed listings, compute the median from valid positive
prices, and split the data into `results` and `unstableResults`. Each scraper will opt
into that utility when the caller enables unstable-listing mode.
API routes and MCP tools will expose the same optional mode so the feature is
consistently available everywhere scraper search is surfaced.
This keeps the heuristic centralized, minimizes duplicated logic, and preserves existing
consumers by leaving the default path unchanged.
## Design
### Shared Core Classification
Add a shared utility in `packages/core` for listing stability classification.
Responsibilities:
- accept parsed listing arrays with `listingPrice.cents`
- ignore listings whose price is missing, non-numeric, or non-positive when computing
the median
- compute the median price from valid priced listings
- classify listings as unstable when `listingPrice.cents < median * 0.8`
- return an object with:
- `results`: listings that remain in the main bucket
- `unstableResults`: listings moved out of the main bucket
Listings excluded from median computation because their price is missing or non-positive
remain in `results` unchanged.
### Scraper Integration
Facebook, eBay, and Kijiji search entrypoints will gain the same optional mode flag.
Default behavior:
- return the current plain array result shape
Opt-in behavior:
- run the shared classification utility after parsing search results
- classify before final result limiting so unstable items do not consume main-result
slots
- return an object shaped like:
```ts
{
results: ListingDetails[];
unstableResults: ListingDetails[];
}
```
Each scraper will use its existing concrete listing subtype for these arrays.
### API Surface
Marketplace API routes will expose an optional query parameter for unstable-listing
mode.
Requirements:
- keep existing route responses unchanged when the parameter is absent or false
- when enabled, return the object payload with `results` and `unstableResults`
- use the same semantics across Facebook, eBay, and Kijiji routes
The exact parameter name should be consistent across routes and intentionally describe
the behavior, for example `unstableFilter=true`.
### MCP Surface
Marketplace MCP tools will expose the same optional mode as an input field.
Tool descriptions should explicitly document:
- that the option is optional
- that it moves listings priced more than 20% below the median into `unstableResults`
- that enabling it changes the response shape from a plain list to an object with
`results` and `unstableResults`
- that the behavior is available for Facebook, eBay, and Kijiji search tools
The wording should be aligned across all three tools so the feature reads as one shared
capability.
### Error Handling
The unstable-listing mode should be best-effort and non-failing.
- If there are no valid positive prices, return all listings in `results` and an empty
`unstableResults` array.
- If there is only one valid priced listing, do not classify it as unstable.
- Parsing failures remain governed by existing scraper behavior; the classification
layer should not introduce new scraper-specific errors.
### Testing Strategy
Follow TDD. Start with shared utility tests, then wire the option through scraper and
route tests.
Coverage targets:
1. Median calculation for odd-sized valid price sets.
2. Median calculation for even-sized valid price sets.
3. Strict cutoff behavior where only listings with `price < median * 0.8` move to
`unstableResults`.
4. Missing, invalid, zero, or negative prices are excluded from median computation and
remain in `results`.
5. Default scraper behavior still returns plain arrays when the option is disabled.
6. Enabled scraper behavior returns `{ results, unstableResults }` for Facebook, eBay,
and Kijiji.
7. API routes preserve existing response shapes by default and switch to the object
payload only when enabled.
8. MCP tool metadata documents the new optional mode for all three marketplace search
tools.
Verification target after implementation:
- `bun test packages/core/test`
- `bun test packages/api-server/test`
- `bun test packages/mcp-server/test` if MCP metadata tests exist or are added
- `bun run ci`
## Risks
- The optional mode introduces a union return shape for scraper callers, which can
ripple into downstream TypeScript signatures.
- Applying classification before final limiting changes which items appear in the main
bucket compared with a naive post-limit split.
- Kijiji and eBay may have different mixes of priced and unpriced results, so excluding
non-positive prices from the median must remain explicit and tested.
## Rollout Notes
Land the shared classifier, scraper wiring, route wiring, tests, and MCP description
updates together. That avoids a partial rollout where the feature exists in one surface
but is undocumented or inconsistent elsewhere.

View File

@@ -0,0 +1,44 @@
# Live Parser Tests Design
## Summary
Add explicit live endpoint tests for each core scraper parser path.
These tests are excluded from normal deterministic test commands and run only through a
dedicated package script.
## Scope
- Add one live suite per parser: eBay, Kijiji, Facebook.
- Place suites under `packages/core/test/live/` so normal
`bun test packages/core/test/*.test.ts` patterns do not include them accidentally.
- Add a root `test:live` script that runs all live suites together.
- Keep existing mocked tests unchanged.
## Behavior
- Each suite calls the public scraper entry point for that marketplace with a narrow
query and low max item count.
- Assertions verify scrape output shape and parser viability, not exact listing
identity.
- eBay and Kijiji require live network access and fail on endpoint/parser breakage.
- Facebook is strict: missing or expired `FACEBOOK_COOKIE` fails the live suite instead
of skipping.
## Test Data
- Use stable broad Canadian queries such as `iphone` or `laptop` to reduce empty-result
risk.
- Use low limits to avoid unnecessary load and rate-limit pressure.
- Avoid exact prices, titles, listing IDs, or ordering assumptions.
## Failure Meaning
- Empty result arrays fail because live parser logic did not produce usable listings.
- Missing required fields fail because adapter contracts depend on those fields.
- Authentication failures fail for Facebook because selected scope is strict.
## Verification
- Normal suite remains offline: `bun test packages/core/test`.
- Live suite runs by explicit script: `bun run test:live`.
- Full static checks remain via `bun run ci`.

View File

@@ -0,0 +1,173 @@
# Facebook Marketplace Anti-Bot Challenge Solver Design
## Summary
Add a challenge-detection and challenge-solving layer to the Facebook Marketplace
scraper so it can handle anti-bot gates (checkpoint pages, token rotation, cookie
requirements) programmatically.
Build the solver in pure Bun — no browser automation in production.
Use `agent-browser` only for one-time debug reconnaissance.
## Goals
- Identify which anti-bot challenge(s) Facebook Marketplace triggers against
programmatic HTTP requests.
- Implement detection + solving for each discovered challenge type.
- Wire the solver into `fetchFacebookItems` and `fetchFacebookItem` so challenges are
handled transparently.
- Follow the same pattern as the existing `ebay-challenge.ts` (detect → solve → retry
with clearance).
- Zero browser automation at runtime.
Pure `fetch` + `Bun` APIs + npm packages only.
## Non-Goals
- Solving login/auth-wall challenges (those require fresh cookies — not solvable
programmatically).
- Full account login automation (cookies must be provided by the user).
- Browser-based scraping or Puppeteer/Playwright integration.
- Solving challenges for non-Marketplace Facebook endpoints.
## Current State
The Facebook scraper (`packages/core/src/scrapers/facebook.ts`) fetches Marketplace
search and item pages via authenticated `fetch` with cookies from `FACEBOOK_COOKIE` env
var. It:
- Sends a browser-like header set (`sec-ch-ua`, `user-agent`, etc.)
- Parses SSR HTML for embedded JSON in script tags
- Has no challenge detection — if Facebook returns a challenge page, the scraper
silently fails (no listings parsed, classifies as “unknown”)
- Depends entirely on cookie freshness
The eBay scraper already follows the challenge-solver pattern in this codebase:
`ebay.ts` uses `warmEbaySession()`, `isChallengeRedirect()`, `isChallengeHtml()`, and
`solveEbayChallenge()` from `ebay-challenge.ts`.
## Chosen Approach
**Reconnaissance-first development:**
1. Use `agent-browser` (debug only) to capture a real Facebook Marketplace browsing
session via HAR.
2. Probe programmatic `fetch` to see what Facebook returns without a browser.
3. Diff the two to identify the gap (missing headers?
missing cookies? missing JS execution?).
4. Build a modular solver in `packages/core/src/utils/facebook-challenge.ts` that
detects each challenge type and applies the appropriate fix.
5. Wire it into `facebook.ts` following the eBay pattern.
## Design
### File Plan
| File | Purpose |
| --- | --- |
| `packages/core/src/utils/facebook-challenge.ts` | Challenge detection, solving, and cookie/session utilities |
| `packages/core/src/scrapers/facebook.ts` | Modified: warmup, challenge detection before parsing, retry loop |
| `packages/core/test/facebook-challenge.test.ts` | Unit tests with mock challenge HTML fixtures |
### Flow
```
fetchFacebookItems(searchUrl)
├── warmFacebookSession() → GET facebook.com/ (collect datr + Akamai cookies)
├── fetchHtml(searchUrl) → receives response
├── detectFacebookChallenge(response)
│ ├── checkpoint/challenge HTML → solveCheckpointChallenge()
│ ├── redirect to /login → fail (cookies expired)
│ ├── missing required cookies → regenerate session
│ ├── 429 rate limit → backoff + retry (existing http.ts handles this)
│ └── no challenge → proceed to parsing
├── if solveCheckpointChallenge succeeds → retry fetchHtml with clearance cookie
└── parse results
```
### Challenge Types (to be confirmed by reconnaissance)
| Type | Expected Signal | Solving Strategy |
| --- | --- | --- |
| Login wall | Redirect to `/login` or HTML `"You must log in"` | Fail — user must provide fresh cookies |
| Checkpoint page | HTML contains `checkpoint` or `challenge` path | Parse hidden form fields, compute proof-of-work if present, submit answer endpoint |
| `datr` cookie missing | No `datr` in cookie jar → request fails | Fetch homepage first to obtain `datr` (session warmup) |
| DTSG token needed | Form submissions fail with CSRF error | Extract `fb_dtsg` from page HTML, include in request body |
| GraphQL header check | Request blocked without internal headers | Extract `x-fb-friendly-name` from browser HAR, replicate |
| Akamai/bot-manager | Redirect loops or blank pages without Akamai cookies | Homepage warmup to collect `bm_sv`, `bm_mi`, etc. |
### Key Modules
**`facebook-challenge.ts`:**
```
// Session warmup — fetch homepage to prime cookies
warmFacebookSession(): Promise<Record<string, string>>
// Challenge detection
detectFacebookChallenge(html, status, url, headers): ChallengeType | null
// Checkpoint solver
solveCheckpointChallenge(html, cookies): Promise<ChallengeResult>
// DTSG token extraction
extractDtsg(html): string | null
// Cookie jar management (shared with ebay.ts pattern)
mergeCookies(...): Record<string, string>
```
**`ChallengeResult` type:**
```ts
interface ChallengeResult {
solved: boolean;
cookies?: Record<string, string>; // clearance cookies to replay
token?: string; // challenge response token
error?: string; // why it failed
}
```
### Error Handling
- Solver failure → return `ChallengeResult { solved: false, error: "..." }`, scraper
logs warning and returns empty results (never throws).
- Unrecognized challenge → log the response URL and HTML snippet for future analysis.
- Rate limits → handled by existing `http.ts` exponential backoff (no change needed).
- Solver timeout → 30s cap on any challenge computation, fall back to `solved: false`.
### Testing
| Test | What It Verifies |
| --- | --- |
| `detectFacebookChallenge` with sample checkpoint HTML | Correctly identifies checkpoint challenge |
| `detectFacebookChallenge` with normal search HTML | Returns null (no false positives) |
| `detectFacebookChallenge` with login redirect | Identifies auth-gated |
| `solveCheckpointChallenge` with known PoW params | Produces correct answer |
| `warmFacebookSession` with mocked fetch | Collects expected cookies |
| `extractDtsg` with sample page HTML | Extracts the DTSG token |
| Integration: fetch → challenge → solve → retry → results | End-to-end mock flow |
| Solver throws → scraper returns empty, no crash | Graceful fallback |
| Solver unknown challenge → logs warning, returns empty | No unhandled challenge crashes |
Test data will use anonymized HTML fixtures (no real user data).
## Reconnaissance Steps (debug-only, one-time)
1. **Probe programmatically:** `fetch` Marketplace search with/without cookies, record
status code and HTML.
2. **Browser session:** `agent-browser` → log into Facebook → navigate Marketplace →
record HAR.
3. **Diff analysis:** Compare browser request headers vs.
our programmatic headers.
4. **Cookie inventory:** List all cookies from browser session, identify which are
essential.
5. **Challenge trigger:** Identify what change in request signature triggers a
challenge.
6. **Replay test:** Replay browsers exact request via `fetch` to confirm
headers/cookies are the differentiator.
All reconnaissance artifacts saved under `docs/facebook-challenge/`.
## Decisions Deferred to Post-Reconnaissance
- Exact challenge types and solving strategies (depends on what Facebook actually uses).
- Whether a PoW solver, CAPTCHA solver, or token-extraction approach is needed.
- npm package dependencies (only add what the reconnaissance proves necessary).

View File

@@ -1,21 +1,39 @@
{ {
"$schema": "https://json.schemastore.org/package.json",
"name": "marketplace-scrapers-monorepo", "name": "marketplace-scrapers-monorepo",
"version": "1.0.0", "version": "1.0.0",
"private": true,
"type": "module",
"packageManager": "bun@1.3.13",
"scripts": { "scripts": {
"ci": "biome ci", "typecheck": "turbo run typecheck",
"clean": "rm -rf dist", "build": "bun run clean && turbo run build",
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify", "build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify", "build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
"build:all": "bun run build:api && bun run build:mcp", "build:all": "bun run build:api && bun run build:mcp",
"build": "bun run clean && bun run build:all", "ci": "bun run typecheck && biome check --write",
"test:live": "bun test --cwd packages/core test/live",
"clean": "rm -rf dist",
"start": "./scripts/start.sh" "start": "./scripts/start.sh"
}, },
"private": true, "workspaces": {
"type": "module", "packages": [
"workspaces": [ "packages/*"
"packages/*" ],
], "catalog": {
"@tsconfig/bun": "1.0.9",
"@typescript/native-preview": "7.0.0-dev.20260428.1",
"@types/bun": "1.3.13",
"@types/cli-progress": "3.11.6",
"@types/unidecode": "1.1.0"
}
},
"devDependencies": { "devDependencies": {
"@biomejs/biome": "2.3.11" "@biomejs/biome": "2.3.11",
"@tsconfig/bun": "catalog:",
"turbo": "2.5.4"
},
"dependencies": {
"@types/bun": "1.3.13"
} }
} }

View File

@@ -19,5 +19,6 @@
## Verify ## Verify
- `bun test packages/api-server/test`
- `bun run --cwd packages/api-server build` - `bun run --cwd packages/api-server build`
- `bun run ci` - `bun run ci`

View File

@@ -2,18 +2,22 @@
"name": "@marketplace-scrapers/api-server", "name": "@marketplace-scrapers/api-server",
"version": "1.0.0", "version": "1.0.0",
"type": "module", "type": "module",
"module": "./src/index.ts", "exports": {
".": "./src/index.ts"
},
"private": true, "private": true,
"scripts": { "scripts": {
"start": "bun ./src/index.ts", "start": "bun ./src/index.ts",
"dev": "bun --watch ./src/index.ts", "dev": "bun --watch ./src/index.ts",
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api" "build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api",
"typecheck": "bun tsgo"
}, },
"dependencies": { "dependencies": {
"@marketplace-scrapers/core": "workspace:*" "@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:"
}, },
"devDependencies": { "devDependencies": {
"@types/bun": "latest" "@types/bun": "catalog:"
}, },
"peerDependencies": { "peerDependencies": {
"typescript": "^5" "typescript": "^5"

View File

@@ -1,3 +1,4 @@
import { logger } from "./logger";
import { ebayRoute } from "./routes/ebay"; import { ebayRoute } from "./routes/ebay";
import { facebookRoute } from "./routes/facebook"; import { facebookRoute } from "./routes/facebook";
import { kijijiRoute } from "./routes/kijiji"; import { kijijiRoute } from "./routes/kijiji";
@@ -27,4 +28,4 @@ const server = Bun.serve({
}, },
}); });
console.log(`API Server running on ${server.hostname}:${server.port}`); logger.log(`API Server running on ${server.hostname}:${server.port}`);

View File

@@ -0,0 +1,10 @@
const isTest = () => process.env.NODE_ENV === "test";
export const logger = {
log: (...args: Parameters<typeof console.log>) => {
if (!isTest()) console.log(...args);
},
error: (...args: Parameters<typeof console.error>) => {
if (!isTest()) console.error(...args);
},
};

View File

@@ -1,62 +1,84 @@
import { fetchEbayItems } from "@marketplace-scrapers/core"; import { fetchEbayItems } from "@marketplace-scrapers/core";
import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseDollarPriceParam,
parseNonNegativeIntegerParam,
} from "./helpers";
/** /**
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly} * GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
* Search eBay for listings (default: Buy It Now only, Canada only) * Search eBay for listings (default: Buy It Now only, Canada only)
*/ */
export async function ebayRoute(req: Request): Promise<Response> { export async function ebayRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url);
const SEARCH_QUERY = getRequiredSearchQuery(req);
if (SEARCH_QUERY instanceof Response) {
return SEARCH_QUERY;
}
const minPrice = parseDollarPriceParam(reqUrl.searchParams, "minPrice");
if (minPrice instanceof Response) {
return minPrice;
}
const maxPrice = parseDollarPriceParam(reqUrl.searchParams, "maxPrice");
if (maxPrice instanceof Response) {
return maxPrice;
}
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
const maxItems = parseNonNegativeIntegerParam(
reqUrl.searchParams,
"maxItems",
);
if (maxItems instanceof Response) {
return maxItems;
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
const opts = {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
buyItNowOnly,
canadaOnly,
maxItems,
};
try { try {
const reqUrl = new URL(req.url); if (hideUnstableResults) {
const items = await fetchEbayItems(SEARCH_QUERY, 1, opts, {
hideUnstableResults: true,
});
if (items.results.length === 0 && items.unstableResults.length === 0) {
return emptySearchResponse();
}
return Response.json(items, { status: 200 });
}
const SEARCH_QUERY = const items = await fetchEbayItems(SEARCH_QUERY, 1, opts);
req.headers.get("query") || reqUrl.searchParams.get("q") || null; const isEmpty = !items || items.length === 0;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const minPriceParam = reqUrl.searchParams.get("minPrice"); if (isEmpty) {
const minPrice = minPriceParam ? parseInt(minPriceParam, 10) : undefined; return emptySearchResponse();
const maxPriceParam = reqUrl.searchParams.get("maxPrice"); }
const maxPrice = maxPriceParam ? parseInt(maxPriceParam, 10) : undefined; return Response.json(items, { status: 200 });
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
const maxItemsParam = reqUrl.searchParams.get("maxItems");
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
buyItNowOnly,
canadaOnly,
});
const results = maxItems ? items.slice(0, maxItems) : items;
if (!results || results.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(results, { status: 200 });
} catch (error) { } catch (error) {
console.error("eBay scraping error:", error); logger.error("eBay scraping error:", error);
const errorMessage = const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred"; error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 }); return Response.json({ message: errorMessage }, { status: 400 });

View File

@@ -1,4 +1,10 @@
import { fetchFacebookItems } from "@marketplace-scrapers/core"; import { fetchFacebookItems } from "@marketplace-scrapers/core";
import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseNonNegativeIntegerParam,
} from "./helpers";
/** /**
* GET /api/facebook?q={query}&location={location} * GET /api/facebook?q={query}&location={location}
@@ -7,30 +13,47 @@ import { fetchFacebookItems } from "@marketplace-scrapers/core";
export async function facebookRoute(req: Request): Promise<Response> { export async function facebookRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url); const reqUrl = new URL(req.url);
const SEARCH_QUERY = const SEARCH_QUERY = getRequiredSearchQuery(req);
req.headers.get("query") || reqUrl.searchParams.get("q") || null; if (SEARCH_QUERY instanceof Response) {
if (!SEARCH_QUERY) return SEARCH_QUERY;
return Response.json( }
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const LOCATION = reqUrl.searchParams.get("location") || "toronto"; const LOCATION = reqUrl.searchParams.get("location") || "toronto";
const maxItemsParam = reqUrl.searchParams.get("maxItems"); const maxItems = parseNonNegativeIntegerParam(
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : 25; reqUrl.searchParams,
"maxItems",
25,
);
if (maxItems instanceof Response) {
return maxItems;
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
try { try {
const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems); if (hideUnstableResults) {
if (!items || items.length === 0) const items = await fetchFacebookItems(
return Response.json( SEARCH_QUERY,
{ message: "Search didn't return any results!" }, 1,
{ status: 404 }, LOCATION,
maxItems,
{
hideUnstableResults: true,
},
); );
if (items.results.length === 0 && items.unstableResults.length === 0) {
return emptySearchResponse();
}
return Response.json(items, { status: 200 });
}
const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems);
if (!items || items.length === 0) {
return emptySearchResponse();
}
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} catch (error) { } catch (error) {
console.error("Facebook scraping error:", error); logger.error("Facebook scraping error:", error);
const errorMessage = const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred"; error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 }); return Response.json({ message: errorMessage }, { status: 400 });

View File

@@ -0,0 +1,64 @@
export function getRequiredSearchQuery(req: Request): string | Response {
const reqUrl = new URL(req.url);
const query = req.headers.get("query") || reqUrl.searchParams.get("q");
if (!query) {
return Response.json(
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
}
return query;
}
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
defaultValue: number,
): number | Response;
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
): number | undefined | Response;
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
defaultValue?: number,
): number | undefined | Response {
const rawValue = searchParams.get(name);
if (rawValue === null) {
return defaultValue;
}
if (!/^\d+$/.test(rawValue)) {
return Response.json(
{ message: `Invalid ${name} parameter` },
{ status: 400 },
);
}
return Number(rawValue);
}
export function parseDollarPriceParam(
searchParams: URLSearchParams,
name: string,
): number | undefined | Response {
const rawValue = searchParams.get(name);
if (rawValue === null) {
return undefined;
}
if (!/^\d+(?:\.\d{1,2})?$/.test(rawValue)) {
return Response.json(
{ message: `Invalid ${name} parameter` },
{ status: 400 },
);
}
return Math.round(Number(rawValue) * 100);
}
export function emptySearchResponse(hint?: string): Response {
const message = hint
? `Search didn't return any results! ${hint}`
: "Search didn't return any results!";
return Response.json({ message }, { status: 404 });
}

View File

@@ -1,4 +1,11 @@
import { fetchKijijiItems } from "@marketplace-scrapers/core"; import { fetchKijijiItems } from "@marketplace-scrapers/core";
import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseDollarPriceParam,
parseNonNegativeIntegerParam,
} from "./helpers";
/** /**
* GET /api/kijiji?q={query} * GET /api/kijiji?q={query}
@@ -7,44 +14,68 @@ import { fetchKijijiItems } from "@marketplace-scrapers/core";
export async function kijijiRoute(req: Request): Promise<Response> { export async function kijijiRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url); const reqUrl = new URL(req.url);
const SEARCH_QUERY = const SEARCH_QUERY = getRequiredSearchQuery(req);
req.headers.get("query") || reqUrl.searchParams.get("q") || null; if (SEARCH_QUERY instanceof Response) {
if (!SEARCH_QUERY) return SEARCH_QUERY;
return Response.json( }
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const maxPagesParam = reqUrl.searchParams.get("maxPages"); const maxPages = parseNonNegativeIntegerParam(
const maxPages = maxPagesParam ? parseInt(maxPagesParam, 10) : 5; reqUrl.searchParams,
const priceMinParam = reqUrl.searchParams.get("priceMin"); "maxPages",
const priceMin = priceMinParam ? parseInt(priceMinParam, 10) : undefined; 5,
const priceMaxParam = reqUrl.searchParams.get("priceMax"); );
const priceMax = priceMaxParam ? parseInt(priceMaxParam, 10) : undefined; if (maxPages instanceof Response) {
return maxPages;
}
const priceMin = parseDollarPriceParam(reqUrl.searchParams, "priceMin");
if (priceMin instanceof Response) {
return priceMin;
}
const priceMax = parseDollarPriceParam(reqUrl.searchParams, "priceMax");
if (priceMax instanceof Response) {
return priceMax;
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
const searchOptions = { const searchOptions = {
location: reqUrl.searchParams.get("location") || undefined, location: reqUrl.searchParams.get("location") || undefined,
category: reqUrl.searchParams.get("category") || undefined, category: reqUrl.searchParams.get("category") || undefined,
keywords: reqUrl.searchParams.get("keywords") || undefined, keywords: reqUrl.searchParams.get("keywords") || undefined,
sortBy: reqUrl.searchParams.get("sortBy") as sortBy:
| "relevancy" (reqUrl.searchParams.get("sortBy") as
| "date" | "relevancy"
| "price" | "date"
| "distance" | "price"
| undefined, | "distance"
sortOrder: reqUrl.searchParams.get("sortOrder") as | undefined) || undefined,
| "desc" sortOrder:
| "asc" (reqUrl.searchParams.get("sortOrder") as "desc" | "asc" | undefined) ||
| undefined, undefined,
maxPages, maxPages,
priceMin, priceMin,
priceMax, priceMax,
cookies: reqUrl.searchParams.get("cookies") || undefined,
}; };
try { try {
if (hideUnstableResults) {
const items = await fetchKijijiItems(
SEARCH_QUERY,
4, // 4 requests per second for faster scraping
"https://www.kijiji.ca",
searchOptions,
{},
{ hideUnstableResults: true },
);
if (items.results.length === 0 && items.unstableResults.length === 0) {
return emptySearchResponse(
`Kijiji matches ALL words in the query against listing titles. ` +
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
);
}
return Response.json(items, { status: 200 });
}
const items = await fetchKijijiItems( const items = await fetchKijijiItems(
SEARCH_QUERY, SEARCH_QUERY,
4, // 4 requests per second for faster scraping 4, // 4 requests per second for faster scraping
@@ -52,14 +83,15 @@ export async function kijijiRoute(req: Request): Promise<Response> {
searchOptions, searchOptions,
{}, {},
); );
if (!items) if (!items || items.length === 0) {
return Response.json( return emptySearchResponse(
{ message: "Search didn't return any results!" }, `Kijiji matches ALL words in the query against listing titles. ` +
{ status: 404 }, `Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
); );
}
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} catch (error) { } catch (error) {
console.error("Kijiji scraping error:", error); logger.error("Kijiji scraping error:", error);
const errorMessage = const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred"; error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 }); return Response.json({ message: errorMessage }, { status: 400 });

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,9 @@
{ {
"extends": "../../tsconfig.json",
"compilerOptions": { "compilerOptions": {
"lib": ["dom"],
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"paths": { "paths": {
"@/*": ["./src/*"] "@/*": ["./src/*"]
}, }
"strict": true, },
"noEmit": true "include": ["./src", "./test", "../../types/**/*.d.ts"]
}
} }

View File

@@ -18,6 +18,7 @@
- Isolate marketplace-specific hacks/selectors inside the owning scraper file unless they are genuinely shared. - Isolate marketplace-specific hacks/selectors inside the owning scraper file unless they are genuinely shared.
- If a new helper is scraper-local, keep it local. Do not promote it into `utils` early. - If a new helper is scraper-local, keep it local. Do not promote it into `utils` early.
- If you change shared types or exports, check downstream imports in both adapter packages. - If you change shared types or exports, check downstream imports in both adapter packages.
- eBay SplashUI challenge handling needs raw `fetch` for manual redirects and `getSetCookie()`; use `fetchHtml` only once the flow only needs final HTML.
## Tests ## Tests

View File

@@ -2,18 +2,24 @@
"name": "@marketplace-scrapers/core", "name": "@marketplace-scrapers/core",
"version": "1.0.0", "version": "1.0.0",
"type": "module", "type": "module",
"main": "./src/index.ts", "exports": {
"module": "./src/index.ts", ".": "./src/index.ts"
},
"private": true, "private": true,
"scripts": {
"typecheck": "bun tsgo"
},
"dependencies": { "dependencies": {
"@typescript/native-preview": "catalog:",
"argon2-wasm-pro": "1.1.0",
"cli-progress": "^3.12.0", "cli-progress": "^3.12.0",
"linkedom": "^0.18.12", "linkedom": "^0.18.12",
"unidecode": "^1.1.0" "unidecode": "^1.1.0"
}, },
"devDependencies": { "devDependencies": {
"@types/bun": "latest", "@types/bun": "catalog:",
"@types/unidecode": "^1.1.0", "@types/cli-progress": "catalog:",
"@types/cli-progress": "^3.11.6" "@types/unidecode": "catalog:"
}, },
"peerDependencies": { "peerDependencies": {
"typescript": "^5" "typescript": "^5"

View File

@@ -39,5 +39,7 @@ export * from "./types/common";
// Export shared utilities // Export shared utilities
export * from "./utils/cookies"; export * from "./utils/cookies";
export * from "./utils/delay"; export * from "./utils/delay";
export * from "./utils/ebay-challenge";
export * from "./utils/format"; export * from "./utils/format";
export * from "./utils/http"; export * from "./utils/http";
export * from "./utils/unstable";

View File

@@ -1,10 +1,19 @@
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import type {
HTMLString,
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { import {
type CookieConfig, type CookieConfig,
ensureCookies, ensureCookies,
formatCookiesForHeader, formatCookiesForHeader,
} from "../utils/cookies"; } from "../utils/cookies";
import { delay } from "../utils/delay"; import { delay } from "../utils/delay";
import { solveEbayChallenge } from "../utils/ebay-challenge";
import { fetchHtml, HttpError, RateLimitError } from "../utils/http";
import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable";
// eBay cookie configuration // eBay cookie configuration
const EBAY_COOKIE_CONFIG: CookieConfig = { const EBAY_COOKIE_CONFIG: CookieConfig = {
@@ -32,6 +41,243 @@ export interface EbayListingDetails {
address?: string | null; address?: string | null;
} }
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C|US)\s*\$|\s*[$£¥])/u;
const EBAY_ITEM_URL_RE = /^https?:\/\/(?:www\.)?ebay\.(?:ca|com)\/itm\//u;
function decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, "&")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.trim();
}
function stripHtml(value: string): string {
return decodeHtmlEntities(
value.replace(/<[^>]*>/g, " ").replace(/\s+/g, " "),
);
}
function getHtmlAttr(tag: string, attrName: string): string | null {
const attrMatch = tag.match(
new RegExp(`\\s${attrName}=(?:"([^"]*)"|'([^']*)'|([^\\s>]+))`, "iu"),
);
return attrMatch?.[1] ?? attrMatch?.[2] ?? attrMatch?.[3] ?? null;
}
function normalizeEbayUrl(url: string): string | null {
const decodedUrl = decodeHtmlEntities(url);
try {
const parsed = new URL(decodedUrl, "https://www.ebay.ca");
return EBAY_ITEM_URL_RE.test(parsed.href) ? parsed.href : null;
} catch {
return null;
}
}
function toEbayListing(
url: string,
title: string,
priceText: string,
): EbayListingDetails | null {
const normalizedUrl = normalizeEbayUrl(url);
const cleanedTitle = stripHtml(title);
const cleanedPrice = stripHtml(priceText);
const priceInfo = parseEbayPrice(cleanedPrice);
if (!normalizedUrl || !cleanedTitle || cleanedTitle === "Shop on eBay") {
return null;
}
if (!priceInfo) return null;
return {
url: normalizedUrl,
title: cleanedTitle,
listingPrice: {
amountFormatted: cleanedPrice,
cents: priceInfo.cents,
currency: priceInfo.currency,
},
listingType: "OFFER",
listingStatus: "ACTIVE",
address: null,
};
}
function readObjectString(
value: Record<string, unknown>,
keys: string[],
): string | null {
for (const key of keys) {
const candidate = value[key];
if (typeof candidate === "string" && candidate.trim()) {
return candidate.trim();
}
}
return null;
}
function readPayloadPrice(value: Record<string, unknown>): string | null {
const directPrice = readObjectString(value, [
"price",
"currentPrice",
"displayPrice",
]);
if (directPrice) return directPrice;
for (const key of ["price", "currentPrice", "displayPrice", "priceInfo"]) {
const candidate = value[key];
if (
!candidate ||
typeof candidate !== "object" ||
Array.isArray(candidate)
) {
continue;
}
const priceObject = candidate as Record<string, unknown>;
const formatted = readObjectString(priceObject, [
"amount",
"formatted",
"text",
]);
if (formatted) return formatted;
const numericValue = priceObject.value;
const currency = readObjectString(priceObject, [
"currency",
"currencyCode",
]);
if (typeof numericValue === "string" && numericValue.trim()) {
return currency ? `${currency} ${numericValue}` : numericValue;
}
if (typeof numericValue === "number") {
return currency ? `${currency} ${numericValue}` : String(numericValue);
}
}
return null;
}
function collectPayloadListings(
value: unknown,
results: EbayListingDetails[],
): void {
if (!value || typeof value !== "object") return;
if (Array.isArray(value)) {
for (const item of value) {
collectPayloadListings(item, results);
}
return;
}
const objectValue = value as Record<string, unknown>;
const url = readObjectString(objectValue, [
"itemWebUrl",
"itemUrl",
"url",
"webUrl",
]);
const title = readObjectString(objectValue, ["title", "itemTitle", "name"]);
const priceText = readPayloadPrice(objectValue);
if (url && title && priceText) {
const listing = toEbayListing(url, title, priceText);
if (listing) {
results.push(listing);
return;
}
}
for (const child of Object.values(objectValue)) {
collectPayloadListings(child, results);
}
}
function parseEmbeddedEbayListings(
htmlString: HTMLString,
): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const payloadMatches = htmlString.matchAll(
/data-inlinepayload=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/giu,
);
for (const match of payloadMatches) {
const rawPayload = match[1] ?? match[2] ?? match[3];
if (!rawPayload) continue;
try {
const decodedPayload = decodeURIComponent(decodeHtmlEntities(rawPayload));
collectPayloadListings(JSON.parse(decodedPayload), results);
} catch {
// eBay inline payloads vary by module; non-JSON payloads are ignored.
}
}
return results;
}
function parseSCardHtmlListings(htmlString: HTMLString): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const cardMatches = htmlString.matchAll(
/<div\b[^>]*class=(?:"[^"]*\bs-card\b[^"]*"|'[^']*\bs-card\b[^']*'|[^\s>]*\bs-card\b[^\s>]*)[\s\S]*?(?=<div\b[^>]*class=(?:"[^"]*\bs-card\b[^"]*"|'[^']*\bs-card\b[^']*'|[^\s>]*\bs-card\b[^\s>]*)|<\/body>|<\/html>)/giu,
);
for (const cardMatch of cardMatches) {
const cardHtml = cardMatch[0];
const linkTag = cardHtml.match(
/<a\b[^>]*\bhref=(?:"[^"]*\/itm\/[^"]*"|'[^']*\/itm\/[^']*'|[^\s>]*\/itm\/[^\s>]*)[^>]*>/iu,
)?.[0];
const titleMatch = cardHtml.match(
/<[^>]*\bclass=(?:"[^"]*\bs-card__title\b[^"]*"|'[^']*\bs-card__title\b[^']*'|[^\s>]*\bs-card__title\b[^\s>]*)[^>]*>([\s\S]*?)<\/[^>]+>/iu,
);
const priceMatch = cardHtml.match(
/<[^>]*\bclass=(?:"[^"]*\bs-card__price\b[^"]*"|'[^']*\bs-card__price\b[^']*'|[^\s>]*\bs-card__price\b[^\s>]*)[^>]*>([\s\S]*?)<\/[^>]+>/iu,
);
if (!linkTag || !titleMatch?.[1] || !priceMatch?.[1]) continue;
const href = getHtmlAttr(linkTag, "href");
if (!href) continue;
const listing = toEbayListing(href, titleMatch[1], priceMatch[1]);
if (listing) results.push(listing);
}
return results;
}
function dedupeEbayListings(
listings: EbayListingDetails[],
): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const seenUrls = new Set<string>();
for (const listing of listings) {
const canonicalUrl = canonicalizeEbayItemUrl(listing.url);
if (seenUrls.has(canonicalUrl)) continue;
seenUrls.add(canonicalUrl);
results.push(listing);
}
return results;
}
function canonicalizeEbayItemUrl(url: string): string {
try {
const parsed = new URL(url, "https://www.ebay.ca");
const match = parsed.pathname.match(/\/itm\/(?:[^/?#]+\/)?\d+/);
return match
? `${parsed.origin}${match[0]}`
: `${parsed.origin}${parsed.pathname}`;
} catch {
return url;
}
}
// ----------------------------- Utilities ----------------------------- // ----------------------------- Utilities -----------------------------
/** /**
@@ -56,7 +302,7 @@ function parseEbayPrice(
const cents = Math.round(dollars * 100); const cents = Math.round(dollars * 100);
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc. // Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
let currency = "USD"; // Default let currency = "CAD"; // Default for ebay.ca
if ( if (
cleaned.toUpperCase().includes("CAD") || cleaned.toUpperCase().includes("CAD") ||
@@ -64,24 +310,23 @@ function parseEbayPrice(
cleaned.includes("C $") cleaned.includes("C $")
) { ) {
currency = "CAD"; currency = "CAD";
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) { } else if (
cleaned.toUpperCase().includes("USD") ||
cleaned.toUpperCase().includes("US $") ||
cleaned.toUpperCase().includes("US$")
) {
currency = "USD"; currency = "USD";
} else if (cleaned.includes("£")) {
currency = "GBP";
} else if (cleaned.includes("€")) {
currency = "EUR";
} else if (cleaned.includes("¥")) {
currency = "JPY";
} }
return { cents, currency }; return { cents, currency };
} }
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- Parsing ----------------------------- // ----------------------------- Parsing -----------------------------
/** /**
@@ -93,8 +338,14 @@ function parseEbayListings(
exclusions: string[], exclusions: string[],
strictMode: boolean, strictMode: boolean,
): EbayListingDetails[] { ): EbayListingDetails[] {
const embeddedListings = parseEmbeddedEbayListings(htmlString);
if (embeddedListings.length > 0) {
return dedupeEbayListings(embeddedListings);
}
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const results: EbayListingDetails[] = []; const results: EbayListingDetails[] = [];
const seenUrls = new Set<string>();
// Find all listing links by looking for eBay item URLs (/itm/) // Find all listing links by looking for eBay item URLs (/itm/)
const linkElements = document.querySelectorAll('a[href*="itm/"]'); const linkElements = document.querySelectorAll('a[href*="itm/"]');
@@ -109,9 +360,12 @@ function parseEbayListings(
if (!href.startsWith("http")) { if (!href.startsWith("http")) {
href = href.startsWith("//") href = href.startsWith("//")
? `https:${href}` ? `https:${href}`
: `https://www.ebay.com${href}`; : `https://www.ebay.ca${href}`;
} }
const canonicalUrl = canonicalizeEbayItemUrl(href);
if (seenUrls.has(canonicalUrl)) continue;
// Find the container - go up several levels to find the item container // Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures (often 5-10 levels deep) // Modern eBay uses complex nested structures (often 5-10 levels deep)
let container: Element | null = linkElement; let container: Element | null = linkElement;
@@ -173,16 +427,18 @@ function parseEbayListings(
"opens in a new window or tab", "opens in a new window or tab",
]; ];
let shortened = false;
for (const uiString of uiStrings) { for (const uiString of uiStrings) {
const uiIndex = title.indexOf(uiString); const uiIndex = title.indexOf(uiString);
if (uiIndex !== -1) { if (uiIndex !== -1) {
title = title.substring(0, uiIndex).trim(); title = title.substring(0, uiIndex).trim();
shortened = true;
break; // Only remove one UI string per title break; // Only remove one UI string per title
} }
} }
// If the title became empty or too short after cleaning, skip this item // If the title was shortened by UI cleaning and became too short, skip this item
if (title.length < 10) { if (shortened && title.length < 10) {
continue; continue;
} }
} }
@@ -215,7 +471,6 @@ function parseEbayListings(
!text.includes("core") && !text.includes("core") &&
!text.includes("ram") && !text.includes("ram") &&
!text.includes("ssd") && !text.includes("ssd") &&
!/\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements !text.includes('"') // Avoid measurements
) { ) {
priceElement = el; priceElement = el;
@@ -239,15 +494,10 @@ function parseEbayListings(
); );
// Filter to only elements that actually contain prices (not labels) // Filter to only elements that actually contain prices (not labels)
const actualPrices: HTMLElement[] = []; const actualPrices: Element[] = [];
for (const el of allPriceElements) { for (const el of allPriceElements) {
const text = el.textContent?.trim(); const text = el.textContent?.trim();
if ( if (text && EBAY_PRICE_TEXT_RE.test(text) && text.length < 50) {
text &&
/^\s*[$£¥]/u.test(text) &&
text.length < 50 &&
!/\d{4}/.test(text)
) {
actualPrices.push(el); actualPrices.push(el);
} }
} }
@@ -271,11 +521,10 @@ function parseEbayListings(
if (nonStrikethroughPrices.length > 0) { if (nonStrikethroughPrices.length > 0) {
// Use the first non-strikethrough price (sale price) // Use the first non-strikethrough price (sale price)
priceElement = nonStrikethroughPrices[0]; priceElement = nonStrikethroughPrices[0] ?? null;
} else { } else {
// Fallback: use the last price (likely the most current) // Fallback: use the last price (likely the most current)
const lastPrice = actualPrices[actualPrices.length - 1]; priceElement = actualPrices[actualPrices.length - 1] ?? null;
priceElement = lastPrice;
} }
} }
} }
@@ -323,33 +572,173 @@ function parseEbayListings(
}; };
results.push(listing); results.push(listing);
seenUrls.add(canonicalUrl);
} catch (err) { } catch (err) {
console.warn(`Error parsing eBay listing: ${err}`); logger.warn(`Error parsing eBay listing: ${err}`);
} }
} }
return results; if (results.length > 0) {
return results;
}
return dedupeEbayListings(
parseSCardHtmlListings(htmlString).filter((listing) => {
if (
exclusions.some((exclusion) =>
listing.title.toLowerCase().includes(exclusion.toLowerCase()),
)
) {
return false;
}
return (
!strictMode ||
keywords.some((keyword) =>
listing.title.toLowerCase().includes(keyword.toLowerCase()),
)
);
}),
);
} }
// ----------------------------- Cookie Loading ----------------------------- // ----------------------------- Session & Challenge -----------------------------
/** /**
* Load eBay cookies from EBAY_COOKIE * Load eBay cookies from EBAY_COOKIE env var
*/ */
async function loadEbayCookies(): Promise<string | undefined> { async function loadEbayCookies(): Promise<string | undefined> {
try { try {
const cookies = await ensureCookies(EBAY_COOKIE_CONFIG); const cookies = await ensureCookies(EBAY_COOKIE_CONFIG);
return formatCookiesForHeader(cookies, "www.ebay.ca"); return formatCookiesForHeader(cookies, "www.ebay.ca");
} catch { } catch {
console.warn( logger.warn(
"No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.", "No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.",
); );
return undefined; return undefined;
} }
} }
const EBAY_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
/**
* Visit eBay homepage to collect Akamai fingerprinting cookies.
* These are required to pass the edge layer before any search request.
*/
async function warmEbaySession(): Promise<string | undefined> {
try {
const res = await fetch("https://www.ebay.ca", {
headers: {
"User-Agent": EBAY_UA,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
},
redirect: "manual",
});
if (!res.ok) return undefined;
const setCookies = res.headers.getSetCookie?.() ?? [];
const jar: Record<string, string> = {};
for (const header of setCookies) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) jar[match[1]] = match[2];
}
const cookieKeys = Object.keys(jar);
if (cookieKeys.length === 0) return undefined;
return cookieKeys.map((k) => `${k}=${jar[k] ?? ""}`).join("; ");
} catch {
return undefined;
}
}
function mergeCookies(
base: string,
...additions: (string | undefined)[]
): string {
const jar: Record<string, string> = {};
const all = [base, ...additions.filter(Boolean)] as string[];
for (const str of all) {
for (const pair of str.split(";")) {
const eq = pair.indexOf("=");
if (eq > 0) {
jar[pair.substring(0, eq).trim()] = pair.substring(eq + 1).trim();
}
}
}
return Object.entries(jar)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
}
function collectResponseCookies(res: Response, jar: Record<string, string>) {
for (const header of res.headers.getSetCookie?.() ?? []) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) jar[match[1]] = match[2];
}
}
function cookiesToString(jar: Record<string, string>): string {
return Object.entries(jar)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
}
const CHALLENGE_REDIRECT = 307;
const CHALLENGE_MARKER = "splashui/challenge";
function isChallengeRedirect(res: Response): boolean {
return (
res.status === CHALLENGE_REDIRECT &&
(res.headers.get("location") ?? "").includes(CHALLENGE_MARKER)
);
}
function isChallengeHtml(html: string): boolean {
return (
html.length < 50000 &&
(html.includes("_crefId") || html.includes("_cdetail"))
);
}
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
opts:
| {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
maxItems?: number;
}
| undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<EbayListingDetails>>;
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
opts?: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
maxItems?: number;
},
unstableMode?: UnstableListingModeOptions,
): Promise<EbayListingDetails[]>;
export default async function fetchEbayItems( export default async function fetchEbayItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
@@ -361,8 +750,12 @@ export default async function fetchEbayItems(
keywords?: string[]; keywords?: string[];
buyItNowOnly?: boolean; buyItNowOnly?: boolean;
canadaOnly?: boolean; canadaOnly?: boolean;
maxItems?: number;
} = {}, } = {},
unstableMode: UnstableListingModeOptions = {},
) { ) {
const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
const { const {
minPrice = 0, minPrice = 0,
maxPrice = Number.MAX_SAFE_INTEGER, maxPrice = Number.MAX_SAFE_INTEGER,
@@ -371,9 +764,26 @@ export default async function fetchEbayItems(
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
buyItNowOnly = true, buyItNowOnly = true,
canadaOnly = true, canadaOnly = true,
maxItems,
} = opts; } = opts;
const cookies = await loadEbayCookies(); const finalizeResults = (
listings: EbayListingDetails[],
): EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails> => {
const limitedListings =
maxItems !== undefined ? listings.slice(0, maxItems) : listings;
if (!unstableMode.hideUnstableResults) {
return limitedListings;
}
return classifyUnstableListings(limitedListings);
};
// Collect cookies from env var + warm-up session
const envCookies = await loadEbayCookies();
const warmCookies = await warmEbaySession();
const baseCookies = mergeCookies(envCookies ?? "", warmCookies);
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference // Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
const urlParams = new URLSearchParams({ const urlParams = new URLSearchParams({
@@ -392,38 +802,109 @@ export default async function fetchEbayItems(
const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`; const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`;
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
console.log(`Fetching eBay search: ${searchUrl}`); logger.log(`Fetching eBay search: ${searchUrl}`);
try { try {
// Use custom headers modeled after real browser requests to bypass bot detection const searchHeaders: Record<string, string> = {
const headers: Record<string, string> = { "User-Agent": EBAY_UA,
"User-Agent": Accept:
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br, zstd",
Referer: "https://www.ebay.ca/", Referer: "https://www.ebay.ca/",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
Priority: "u=0, i",
}; };
// Add cookies if available (helps bypass bot detection) if (baseCookies) {
if (cookies) { searchHeaders.Cookie = baseCookies;
headers.Cookie = cookies;
} }
const res = await fetch(searchUrl, { // Step 1: Make search request (follow redirects for challenge flow)
let res = await fetch(searchUrl, {
method: "GET", method: "GET",
headers, headers: searchHeaders,
redirect: "manual",
}); });
const cookieJar: Record<string, string> = {};
// Collect cookies from homepage warm-up
if (baseCookies) {
for (const pair of baseCookies.split(";")) {
const eq = pair.indexOf("=");
if (eq > 0) {
cookieJar[pair.substring(0, eq).trim()] = pair
.substring(eq + 1)
.trim();
}
}
}
// Step 2: Follow challenge redirect if present
if (isChallengeRedirect(res)) {
const chalUrl = res.headers.get("location") ?? "";
collectResponseCookies(res, cookieJar);
logger.log("Challenge detected, fetching challenge page...");
res = await fetch(chalUrl, {
headers: { ...searchHeaders, Cookie: cookiesToString(cookieJar) },
redirect: "manual",
});
collectResponseCookies(res, cookieJar);
}
// Step 3: If response is challenge HTML, solve and submit
const responseHtml = await res.text();
if (isChallengeHtml(responseHtml)) {
logger.log("Solving challenge...");
const result = await solveEbayChallenge(
responseHtml,
cookiesToString(cookieJar),
);
if (result) {
// Merge answer cookies into jar
if (baseCookies) {
searchHeaders.Cookie = mergeCookies(baseCookies, result.cookies);
} else {
searchHeaders.Cookie = result.cookies;
}
logger.log("Challenge solved, retrying search...");
// Delay briefly before retry
await delay(DELAY_MS);
const retryHtml = await fetchHtml(searchUrl, DELAY_MS, {
headers: searchHeaders,
});
const listings = parseEbayListings(
retryHtml,
keywords,
exclusions,
strictMode,
);
const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents;
return (
typeof cents === "number" && cents >= minPrice && cents <= maxPrice
);
});
logger.log(
`Parsed ${filteredListings.length} eBay listings (after challenge).`,
);
return finalizeResults(filteredListings);
}
logger.warn("Challenge solve failed, returning empty results.");
return finalizeResults([]);
}
// Step 4: Normal flow — no challenge
if (!res.ok) { if (!res.ok) {
throw new HttpError( throw new HttpError(
`Request failed with status ${res.status}`, `Request failed with status ${res.status}`,
@@ -432,33 +913,32 @@ export default async function fetchEbayItems(
); );
} }
const searchHtml = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS); await delay(DELAY_MS);
console.log(`\nParsing eBay listings...`); logger.log(`\nParsing eBay listings...`);
const listings = parseEbayListings( const listings = parseEbayListings(
searchHtml, responseHtml,
keywords, keywords,
exclusions, exclusions,
strictMode, strictMode,
); );
// Filter by price range (additional safety check)
const filteredListings = listings.filter((listing) => { const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents; const cents = listing.listingPrice?.cents;
return cents && cents >= minPrice && cents <= maxPrice; return (
typeof cents === "number" && cents >= minPrice && cents <= maxPrice
);
}); });
console.log(`Parsed ${filteredListings.length} eBay listings.`); logger.log(`Parsed ${filteredListings.length} eBay listings.`);
return filteredListings; return finalizeResults(filteredListings);
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError || err instanceof RateLimitError) {
console.error( logger.warn(
`Failed to fetch eBay search (${err.status}): ${err.message}`, `Failed to fetch eBay search (${err instanceof HttpError ? err.statusCode : 429}): ${err.message}`,
); );
return []; return finalizeResults([]);
} }
throw err; throw err;
} }

View File

@@ -1,23 +1,35 @@
import cliProgress from "cli-progress"; import cliProgress from "cli-progress";
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common"; import type {
HTMLString,
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { import {
type Cookie, type Cookie,
type CookieConfig, type CookieConfig,
ensureCookies, ensureCookies,
formatCookiesForHeader, formatCookiesForHeader,
loadCookiesOptional,
parseCookieString, parseCookieString,
} from "../utils/cookies"; } from "../utils/cookies";
import { delay } from "../utils/delay"; import {
buildFacebookHeaders,
detectFacebookChallenge,
warmFacebookSession,
} from "../utils/facebook-challenge";
import { formatCentsToCurrency } from "../utils/format"; import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http"; import { fetchHtml, HttpError, isRecord, RateLimitError } from "../utils/http";
import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable";
/** /**
* Facebook Marketplace Scraper * Facebook Marketplace Scraper
* *
* Note: Facebook Marketplace requires authentication cookies for full access. * Facebook Marketplace returns search results without authentication when
* This implementation will return limited or no results without proper authentication. * proper browser headers are sent. Prices and seller details are hidden on
* This is by design to respect Facebook's authentication requirements. * search results but are available on individual item pages even without
* auth cookies. For full-price search results, provide FACEBOOK_COOKIE.
*/ */
// Facebook cookie configuration // Facebook cookie configuration
@@ -81,7 +93,7 @@ interface FacebookMarketplaceItem {
__typename: "GroupCommerceProductItem"; __typename: "GroupCommerceProductItem";
// Listing content // Listing content
marketplace_listing_title: string; marketplace_listing_title?: string;
redacted_description?: { redacted_description?: {
text: string; text: string;
}; };
@@ -94,7 +106,7 @@ interface FacebookMarketplaceItem {
listing_price?: { listing_price?: {
amount: string; amount: string;
currency: string; currency: string;
amount_with_offset: string; amount_with_offset?: string;
}; };
// Location // Location
@@ -122,9 +134,9 @@ interface FacebookMarketplaceItem {
// Seller information // Seller information
marketplace_listing_seller?: { marketplace_listing_seller?: {
__typename: "User"; __typename?: "User";
id: string; id?: string;
name: string; name?: string;
profile_picture?: { profile_picture?: {
uri: string; uri: string;
}; };
@@ -213,17 +225,6 @@ export async function ensureFacebookCookies(): Promise<Cookie[]> {
return ensureCookies(FACEBOOK_COOKIE_CONFIG); return ensureCookies(FACEBOOK_COOKIE_CONFIG);
} }
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- Extraction Metrics ----------------------------- // ----------------------------- Extraction Metrics -----------------------------
/** /**
@@ -255,113 +256,28 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
successRate < 0.8 && successRate < 0.8 &&
!extractionStats.lastApiChangeDetected !extractionStats.lastApiChangeDetected
) { ) {
console.warn( logger.warn(
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.", "Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
); );
extractionStats.lastApiChangeDetected = new Date(); extractionStats.lastApiChangeDetected = new Date();
} }
if (!success && itemId) { if (!success && itemId) {
console.warn(`Facebook API extraction failed for item ${itemId}`); logger.warn(`Facebook API extraction failed for item ${itemId}`);
} }
} }
// ----------------------------- HTTP Client ----------------------------- // ----------------------------- HTTP Client -----------------------------
/** function createFacebookHeaders(cookies: string): Record<string, string> {
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls. const jar: Record<string, string> = {};
- Retries on 429 and 5xx if (cookies) {
- Respects X-RateLimit-Reset when present (seconds) for (const pair of cookies.split(";")) {
- Supports custom cookies for Facebook authentication const [name, ...rest] = pair.trim().split("=");
*/ if (name && rest.length > 0) jar[name.trim()] = rest.join("=").trim();
async function fetchHtml(
url: string,
DELAY_MS: number,
opts?: {
maxRetries?: number;
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
cookies?: string;
},
): Promise<{ html: HTMLString; responseUrl: string }> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const headers: Record<string, string> = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"accept-encoding": "gzip, deflate, br",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
};
// Add cookies if provided
if (opts?.cookies) {
headers.cookie = opts.cookies;
}
const res = await fetch(url, {
method: "GET",
headers,
});
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
await delay(waitMs);
continue;
}
// For Facebook, 400 often means authentication required
// Don't retry 4xx client errors except 429
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
throw new HttpError(
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
res.status,
url,
);
}
// Retry on 5xx
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await delay((attempt + 1) * retryBaseMs);
continue;
}
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
}
const html = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
return { html, responseUrl: res.url || url };
} catch (err) {
if (attempt >= maxRetries) throw err;
await delay((attempt + 1) * retryBaseMs);
} }
} }
return buildFacebookHeaders(jar);
throw new Error("Exhausted retries without response");
} }
// ----------------------------- Parsing ----------------------------- // ----------------------------- Parsing -----------------------------
@@ -371,13 +287,29 @@ export type FacebookResponseKind =
| "item" | "item"
| "auth_gated" | "auth_gated"
| "unavailable" | "unavailable"
| "checkpoint"
| "unknown"; | "unknown";
export function classifyFacebookResponse( export function classifyFacebookResponse(
htmlString: HTMLString, htmlString: HTMLString,
responseUrl: string, responseUrl: string,
status = 200,
) { ) {
const challengeType = detectFacebookChallenge(
status,
htmlString,
responseUrl,
);
if (challengeType === "checkpoint") {
return {
kind: "checkpoint" as const,
authGated: false,
unavailable: false,
};
}
const authGated = const authGated =
challengeType === "login_wall" ||
responseUrl.includes("/login/") || responseUrl.includes("/login/") ||
htmlString.includes("You must log in") || htmlString.includes("You must log in") ||
htmlString.includes("log in to continue"); htmlString.includes("log in to continue");
@@ -391,7 +323,11 @@ export function classifyFacebookResponse(
htmlString.includes("This listing is no longer available") || htmlString.includes("This listing is no longer available") ||
htmlString.includes("listing has been removed"); htmlString.includes("listing has been removed");
if (unavailable) { if (unavailable) {
return { kind: "unavailable" as const, authGated: false, unavailable: true }; return {
kind: "unavailable" as const,
authGated: false,
unavailable: true,
};
} }
if (responseUrl.includes("/marketplace/item/")) { if (responseUrl.includes("/marketplace/item/")) {
@@ -438,7 +374,8 @@ function isFacebookSearchEdgeArray(value: unknown): value is FacebookEdge[] {
Array.isArray(value) && Array.isArray(value) &&
value.length > 0 && value.length > 0 &&
value.every( value.every(
(edge) => isRecord(edge) && isRecord(edge.node) && isRecord(edge.node.listing), (edge) =>
isRecord(edge) && isRecord(edge.node) && isRecord(edge.node.listing),
) )
); );
} }
@@ -535,8 +472,7 @@ function scoreMarketplaceItemPath(path: string[]): number {
if ( if (
path.some( path.some(
(segment) => (segment) => segment.includes("recommend") || segment.includes("related"),
segment.includes("recommend") || segment.includes("related"),
) )
) { ) {
score -= 10; score -= 10;
@@ -550,7 +486,9 @@ function collectMarketplaceItemCandidates(
path: string[] = [], path: string[] = [],
): FacebookMarketplaceItemMatch[] { ): FacebookMarketplaceItemMatch[] {
if (Array.isArray(candidate)) { if (Array.isArray(candidate)) {
return candidate.flatMap((item) => collectMarketplaceItemCandidates(item, path)); return candidate.flatMap((item) =>
collectMarketplaceItemCandidates(item, path),
);
} }
if (!isRecord(candidate)) { if (!isRecord(candidate)) {
@@ -611,7 +549,9 @@ function extractRenderedText(node: ParentNode, selector: string): string[] {
.filter((text): text is string => Boolean(text)); .filter((text): text is string => Boolean(text));
} }
function extractMarketplaceItemIdFromElement(element: Element | null): string | null { function extractMarketplaceItemIdFromElement(
element: Element | null,
): string | null {
const href = element?.getAttribute("href") || ""; const href = element?.getAttribute("href") || "";
return href.match(FACEBOOK_ITEM_HREF_RE)?.[1] ?? null; return href.match(FACEBOOK_ITEM_HREF_RE)?.[1] ?? null;
} }
@@ -649,7 +589,9 @@ function extractFacebookPermalinkItemId(document: Document): string | null {
return extractMarketplaceItemIdFromElement(itemLinks.at(-1) ?? null); return extractMarketplaceItemIdFromElement(itemLinks.at(-1) ?? null);
} }
function extractFacebookDescriptionText(document: Document): string | undefined { function extractFacebookDescriptionText(
document: Document,
): string | undefined {
const labels = Array.from(document.querySelectorAll("div, span, h2, h3, p")); const labels = Array.from(document.querySelectorAll("div, span, h2, h3, p"));
for (const label of labels) { for (const label of labels) {
@@ -742,7 +684,10 @@ function extractFacebookItemHtmlFallback(
const priceText = texts.find((text) => FACEBOOK_PRICE_TEXT_RE.test(text)); const priceText = texts.find((text) => FACEBOOK_PRICE_TEXT_RE.test(text));
const parsedPrice = priceText ? parseFacebookRenderedPrice(priceText) : null; const parsedPrice = priceText ? parseFacebookRenderedPrice(priceText) : null;
const location = texts.find( const location = texts.find(
(text) => text !== title && text !== priceText && FACEBOOK_LOCATION_TEXT_RE.test(text), (text) =>
text !== title &&
text !== priceText &&
FACEBOOK_LOCATION_TEXT_RE.test(text),
); );
const description = extractFacebookDescriptionText(document); const description = extractFacebookDescriptionText(document);
@@ -790,18 +735,18 @@ export function extractFacebookMarketplaceData(
if (htmlString.includes("XCometMarketplaceSearchController")) { if (htmlString.includes("XCometMarketplaceSearchController")) {
const htmlFallback = extractFacebookMarketplaceHtmlFallback(htmlString); const htmlFallback = extractFacebookMarketplaceHtmlFallback(htmlString);
if (htmlFallback?.length) { if (htmlFallback?.length) {
console.log( logger.log(
`Successfully parsed ${htmlFallback.length} Facebook marketplace listings from rendered HTML fallback`, `Successfully parsed ${htmlFallback.length} Facebook marketplace listings from rendered HTML fallback`,
); );
return htmlFallback; return htmlFallback;
} }
} }
console.warn("No marketplace data found in HTML response"); logger.warn("No marketplace data found in HTML response");
return null; return null;
} }
console.log( logger.log(
`Successfully parsed ${bestEdges.length} Facebook marketplace listings`, `Successfully parsed ${bestEdges.length} Facebook marketplace listings`,
); );
return bestEdges.map((edge) => ({ node: edge.node })); return bestEdges.map((edge) => ({ node: edge.node }));
@@ -824,7 +769,8 @@ export function extractFacebookItemData(
if ( if (
!bestMatch || !bestMatch ||
match.score > bestMatch.score || match.score > bestMatch.score ||
(match.score === bestMatch.score && match.path.length < bestMatch.path.length) (match.score === bestMatch.score &&
match.path.length < bestMatch.path.length)
) { ) {
bestMatch = match; bestMatch = match;
} }
@@ -835,6 +781,22 @@ export function extractFacebookItemData(
return bestMatch.item; return bestMatch.item;
} }
// Try marketplace_product_details_page.target path (current item page structure)
for (const candidate of candidates) {
const detailsPage = findKeyInObject(
candidate,
"marketplace_product_details_page",
) as Record<string, unknown> | undefined;
const target = detailsPage?.target as Record<string, unknown> | undefined;
if (
target &&
typeof target.id === "string" &&
typeof target.marketplace_listing_title === "string"
) {
return target as unknown as FacebookMarketplaceItem;
}
}
if (htmlString.includes("XCometMarketplacePermalinkController")) { if (htmlString.includes("XCometMarketplacePermalinkController")) {
return extractFacebookItemHtmlFallback(htmlString); return extractFacebookItemHtmlFallback(htmlString);
} }
@@ -842,6 +804,25 @@ export function extractFacebookItemData(
return null; return null;
} }
function findKeyInObject(obj: unknown, targetKey: string): unknown {
if (obj == null) return undefined;
if (Array.isArray(obj)) {
for (const item of obj) {
const found = findKeyInObject(item, targetKey);
if (found !== undefined) return found;
}
return undefined;
}
if (typeof obj !== "object") return undefined;
const record = obj as Record<string, unknown>;
if (targetKey in record) return record[targetKey];
for (const [, value] of Object.entries(record)) {
const found = findKeyInObject(value, targetKey);
if (found !== undefined) return found;
}
return undefined;
}
/** /**
Parse Facebook marketplace search results into ListingDetails[] Parse Facebook marketplace search results into ListingDetails[]
*/ */
@@ -873,35 +854,25 @@ export function parseFacebookAds(
: priceObj.amount; : priceObj.amount;
cents = Math.round(dollars * 100); cents = Math.round(dollars * 100);
} else if (priceObj.amount_with_offset_in_currency != null) { } else if (priceObj.amount_with_offset_in_currency != null) {
// Fallback: try to extract cents from amount_with_offset_in_currency if (!priceObj.formatted_amount) continue;
// This appears to use some exchange rate/multiplier format
const encodedAmount = Number(priceObj.amount_with_offset_in_currency); const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) { if (!match) continue;
// Estimate roughly - this field doesn't contain real cents
// Use formatted_amount to get the actual dollar amount const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
if (priceObj.formatted_amount) { if (Number.isNaN(dollars)) continue;
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (match) { cents = Math.round(dollars * 100);
const dollars = Number.parseFloat(match[0].replace(",", "")); } else if (
if (!Number.isNaN(dollars)) { typeof priceObj.formatted_amount === "string" &&
cents = Math.round(dollars * 100); priceObj.formatted_amount.toUpperCase() === "FREE"
} else { ) {
cents = encodedAmount; // fallback cents = 0;
}
} else {
cents = encodedAmount; // fallback
}
} else {
cents = encodedAmount; // fallback
}
} else {
continue; // Invalid price
}
} else { } else {
continue; // No price available continue; // No price available
} }
if (!Number.isFinite(cents) || cents <= 0) continue; if (!Number.isFinite(cents) || cents < 0) continue;
// Extract address from location data if available // Extract address from location data if available
const cityName = const cityName =
@@ -960,7 +931,9 @@ export function parseFacebookAds(
}; };
results.push(listingDetails); results.push(listingDetails);
} catch {} } catch (error) {
logger.warn("Failed to parse Facebook ad:", error);
}
} }
return results; return results;
@@ -980,13 +953,13 @@ export function parseFacebookItem(
const url = `https://www.facebook.com/marketplace/item/${item.id}`; const url = `https://www.facebook.com/marketplace/item/${item.id}`;
// Extract price information // Extract price information
let cents = 0; let cents: number | undefined;
let currency = "CAD"; // Default let currency = "CAD"; // Default
let amountFormatted = item.formatted_price?.text || "FREE"; let amountFormatted = item.formatted_price?.text;
if (item.listing_price) { if (item.listing_price) {
currency = item.listing_price.currency || "CAD"; currency = item.listing_price.currency || "CAD";
if (item.listing_price.amount && item.listing_price.amount !== "0.00") { if (item.listing_price.amount != null) {
const amount = Number.parseFloat(item.listing_price.amount); const amount = Number.parseFloat(item.listing_price.amount);
if (!Number.isNaN(amount)) { if (!Number.isNaN(amount)) {
cents = Math.round(amount * 100); cents = Math.round(amount * 100);
@@ -1033,6 +1006,13 @@ export function parseFacebookItem(
listingType = "vehicle"; listingType = "vehicle";
} }
if (cents == null || !amountFormatted) {
if (!listingStatus || listingStatus === "ACTIVE") return null;
cents = 0;
amountFormatted = item.formatted_price?.text || "PRICE_UNAVAILABLE";
}
const listingDetails: FacebookListingDetails = { const listingDetails: FacebookListingDetails = {
url, url,
title, title,
@@ -1053,31 +1033,66 @@ export function parseFacebookItem(
return listingDetails; return listingDetails;
} catch (error) { } catch (error) {
console.warn(`Failed to parse Facebook item ${item.id}:`, error); logger.warn(`Failed to parse Facebook item ${item.id}:`, error);
return null; return null;
} }
} }
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
LOCATION: string | undefined,
MAX_ITEMS: number | undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
LOCATION?: string,
MAX_ITEMS?: number,
unstableMode?: UnstableListingModeOptions,
): Promise<FacebookListingDetails[]>;
export default async function fetchFacebookItems( export default async function fetchFacebookItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
LOCATION = "toronto", LOCATION = "toronto",
MAX_ITEMS = 25, MAX_ITEMS = 25,
unstableMode: UnstableListingModeOptions = {},
) { ) {
const cookies = await ensureFacebookCookies(); const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
const finalizeResults = (
listings: FacebookListingDetails[],
):
| FacebookListingDetails[]
| UnstableListingBuckets<FacebookListingDetails> => {
if (!unstableMode.hideUnstableResults) {
return listings.slice(0, MAX_ITEMS);
}
const classified = classifyUnstableListings(listings);
return {
results: classified.results.slice(0, MAX_ITEMS),
unstableResults: classified.unstableResults,
};
};
const warmupCookies = await warmFacebookSession();
const warmupHeader = Object.entries(warmupCookies)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
// Format cookies for HTTP header
const domain = "www.facebook.com"; const domain = "www.facebook.com";
const cookiesHeader = formatCookiesForHeader(cookies, domain); const userCookiesHeader = formatCookiesForHeader(userCookies, domain);
if (!cookiesHeader) { const cookiesHeader = [warmupHeader, userCookiesHeader]
throw new Error( .filter(Boolean)
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", .join("; ");
);
}
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
// Encode search query for URL // Encode search query for URL
const encodedQuery = encodeURIComponent(SEARCH_QUERY); const encodedQuery = encodeURIComponent(SEARCH_QUERY);
@@ -1085,86 +1100,110 @@ export default async function fetchFacebookItems(
// Facebook marketplace URL structure // Facebook marketplace URL structure
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`; const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
console.log(`Fetching Facebook marketplace: ${searchUrl}`); logger.log(`Fetching Facebook marketplace: ${searchUrl}`);
console.log(`Using ${cookies.length} cookies for authentication`); if (userCookies.length > 0) {
logger.log(`Using ${userCookies.length} cookies for authentication`);
}
let searchHtml: string; let searchHtml: string;
let searchResponseUrl = searchUrl; let searchResponseUrl = searchUrl;
try { try {
const response = await fetchHtml(searchUrl, DELAY_MS, { const response = await fetchHtml(searchUrl, DELAY_MS, {
maxRetries: 3, maxRetries: 3,
includeResponseUrl: true,
headers: createFacebookHeaders(cookiesHeader),
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( logger.log(
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
}, },
cookies: cookiesHeader,
}); });
searchHtml = response.html; searchHtml = response.html;
searchResponseUrl = response.responseUrl; searchResponseUrl = response.responseUrl;
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.warn( logger.warn(
`\nFacebook marketplace access failed (${err.status}): ${err.message}`, `\nFacebook marketplace access failed (${err.statusCode}): ${err.message}`,
); );
if (err.status === 400 || err.status === 401 || err.status === 403) { if (
console.warn( err.statusCode === 400 ||
err.statusCode === 401 ||
err.statusCode === 403
) {
logger.warn(
"This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.", "This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
); );
} }
return []; return finalizeResults([]);
}
if (err instanceof RateLimitError) {
logger.warn(`\nFacebook marketplace access rate limited: ${err.message}`);
return finalizeResults([]);
} }
throw err; throw err;
} }
const classification = classifyFacebookResponse(searchHtml, searchResponseUrl); const classification = classifyFacebookResponse(
searchHtml,
searchResponseUrl,
);
if (classification.authGated) { if (classification.authGated) {
console.warn("Facebook marketplace search redirected to login. Cookies may be expired."); logger.warn(
return []; "Facebook marketplace search redirected to login. Cookies may be expired.",
);
return finalizeResults([]);
}
if (classification.kind === "checkpoint") {
logger.warn(
"Facebook marketplace returned a checkpoint challenge. This may require manual verification.",
);
return finalizeResults([]);
} }
if (classification.unavailable) { if (classification.unavailable) {
console.warn("Facebook marketplace search returned an unavailable route."); logger.warn("Facebook marketplace search returned an unavailable route.");
return []; return finalizeResults([]);
} }
if (classification.kind !== "search") { if (classification.kind !== "search") {
console.warn( logger.warn(
`Facebook marketplace search returned unexpected route kind: ${classification.kind}.`, `Facebook marketplace search returned unexpected route kind: ${classification.kind}.`,
); );
return []; return finalizeResults([]);
} }
const ads = extractFacebookMarketplaceData(searchHtml); const ads = extractFacebookMarketplaceData(searchHtml);
if (!ads || ads.length === 0) { if (!ads || ads.length === 0) {
console.warn("No ads parsed from Facebook marketplace page."); logger.warn("No ads parsed from Facebook marketplace page.");
return []; return finalizeResults([]);
} }
console.log(`\nFound ${ads.length} raw ads. Processing...`); logger.log(`\nFound ${ads.length} raw ads. Processing...`);
const progressBar = new cliProgress.SingleBar( const isTTY = process.stdout?.isTTY ?? false;
{}, const progressBar = isTTY
cliProgress.Presets.shades_classic, ? new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
); : null;
const totalProgress = ads.length; const totalProgress = ads.length;
const currentProgress = 0; progressBar?.start(totalProgress, 0);
progressBar.start(totalProgress, currentProgress);
const items = parseFacebookAds(ads); const items = parseFacebookAds(ads);
// Filter to only priced items (already done in parseFacebookAds) // Filter to only priced items (already done in parseFacebookAds)
const pricedItems = items.filter( const pricedItems = items.filter(
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0, (item) =>
typeof item.listingPrice?.cents === "number" &&
item.listingPrice.cents >= 0,
); );
progressBar.update(totalProgress); progressBar?.update(totalProgress);
progressBar.stop(); progressBar?.stop();
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`); logger.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
return pricedItems.slice(0, MAX_ITEMS); // Limit results return finalizeResults(pricedItems);
} }
/** /**
@@ -1173,127 +1212,158 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem( export async function fetchFacebookItem(
itemId: string, itemId: string,
): Promise<FacebookListingDetails | null> { ): Promise<FacebookListingDetails | null> {
const cookies = await ensureFacebookCookies(); const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
const cookiesHeader = formatCookiesForHeader(userCookies, "www.facebook.com");
// Format cookies for HTTP header
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
);
}
const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`; const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`;
console.log(`Fetching Facebook marketplace item: ${itemUrl}`); logger.log(`Fetching Facebook marketplace item: ${itemUrl}`);
let itemHtml: string; let itemHtml: string;
let itemResponseUrl = itemUrl; let itemResponseUrl = itemUrl;
try { try {
const response = await fetchHtml(itemUrl, 1000, { const response = await fetchHtml(itemUrl, 1000, {
includeResponseUrl: true,
headers: createFacebookHeaders(cookiesHeader),
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( logger.log(
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
}, },
cookies: cookiesHeader,
}); });
itemHtml = response.html; itemHtml = response.html;
itemResponseUrl = response.responseUrl; itemResponseUrl = response.responseUrl;
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.warn( logger.warn(
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`, `\nFacebook marketplace item access failed (${err.statusCode}): ${err.message}`,
); );
// Enhanced error handling based on status codes // Enhanced error handling based on status codes
switch (err.status) { switch (err.statusCode) {
case 400: case 400:
case 401: case 401:
case 403: case 403:
console.warn( logger.warn(
"Authentication error: Invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.", "Authentication error: Invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
); );
break; break;
case 404: case 404:
console.warn( logger.warn(
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.", "Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.",
); );
break; break;
case 429: case 429:
console.warn( logger.warn(
"Rate limited: Too many requests. Facebook is blocking access temporarily.", "Rate limited: Too many requests. Facebook is blocking access temporarily.",
); );
break; break;
case 500: case 500:
case 502: case 502:
case 503: case 503:
console.warn( logger.warn(
"Facebook server error: Marketplace may be temporarily unavailable.", "Facebook server error: Marketplace may be temporarily unavailable.",
); );
break; break;
default: default:
console.warn(`Unexpected error status: ${err.status}`); logger.warn(`Unexpected error status: ${err.statusCode}`);
} }
return null; return null;
} }
if (err instanceof RateLimitError) {
logger.warn(
`\nFacebook marketplace item rate limited for item ${itemId}: ${err.message}`,
);
logger.warn(
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
);
return null;
}
throw err; throw err;
} }
const classification = classifyFacebookResponse(itemHtml, itemResponseUrl); const classification = classifyFacebookResponse(itemHtml, itemResponseUrl);
if (classification.kind === "checkpoint") {
logExtractionMetrics(false, itemId);
logger.warn(
`Checkpoint challenge detected for item ${itemId}. Facebook may be limiting access.`,
);
return null;
}
if (classification.authGated) { if (classification.authGated) {
logExtractionMetrics(false, itemId); logExtractionMetrics(false, itemId);
console.warn(`Authentication failed for item ${itemId}. Cookies may be expired.`); logger.warn(
`Authentication failed for item ${itemId}. Cookies may be expired.`,
);
return null; return null;
} }
if (classification.unavailable || itemHtml.includes("This item has been sold")) { if (itemResponseUrl.includes("unavailable_product=1")) {
logExtractionMetrics(false, itemId); logExtractionMetrics(false, itemId);
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`); logger.warn(
return null; `Item ${itemId} appears to be sold or removed from marketplace.`,
}
if (classification.kind !== "item") {
logExtractionMetrics(false, itemId);
console.warn(
`Item ${itemId} returned unexpected route kind: ${classification.kind}.`,
); );
return null; return null;
} }
const itemData = extractFacebookItemData(itemHtml); const itemData = extractFacebookItemData(itemHtml);
if (classification.unavailable && !itemData) {
logExtractionMetrics(false, itemId);
logger.warn(
`Item ${itemId} appears to be sold or removed from marketplace.`,
);
return null;
}
if (classification.kind !== "item" && !itemData) {
logExtractionMetrics(false, itemId);
logger.warn(
`Item ${itemId} returned unexpected route kind: ${classification.kind}.`,
);
return null;
}
if (!itemData) { if (!itemData) {
logExtractionMetrics(false, itemId); logExtractionMetrics(false, itemId);
console.warn( if (itemHtml.includes("This item has been sold")) {
logger.warn(
`Item ${itemId} appears to be sold or removed from marketplace.`,
);
return null;
}
logger.warn(
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`, `No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
); );
console.warn(" - The listing was removed or sold"); logger.warn(" - The listing was removed or sold");
console.warn(" - Authentication issues"); logger.warn(" - Authentication issues");
console.warn(" - Facebook changed their API structure"); logger.warn(" - Facebook changed their API structure");
console.warn(" - Network or parsing issues"); logger.warn(" - Network or parsing issues");
return null; return null;
} }
logExtractionMetrics(true, itemId); logExtractionMetrics(true, itemId);
console.log(`Successfully extracted data for item ${itemId}`); logger.log(`Successfully extracted data for item ${itemId}`);
const parsedItem = parseFacebookItem(itemData); const parsedItem = parseFacebookItem(itemData);
if (!parsedItem) { if (!parsedItem) {
console.warn(`Failed to parse item ${itemId}: Invalid data structure`); logger.warn(`Failed to parse item ${itemId}: Invalid data structure`);
return null; return null;
} }
// Check for sold/removed status in the parsed data with proper precedence // Check for sold/removed status in the parsed data with proper precedence
if (itemData.is_sold) { if (itemData.is_sold) {
console.warn(`Item ${itemId} is marked as sold in the marketplace.`); logger.warn(`Item ${itemId} is marked as sold in the marketplace.`);
// Still return the data but mark it as sold // Still return the data but mark it as sold
parsedItem.listingStatus = "SOLD"; parsedItem.listingStatus = "SOLD";
} else if (!itemData.is_live) { } else if (!itemData.is_live) {
console.warn(`Item ${itemId} is not live/active in the marketplace.`); logger.warn(`Item ${itemId} is not live/active in the marketplace.`);
parsedItem.listingStatus = itemData.is_hidden parsedItem.listingStatus = itemData.is_hidden
? "HIDDEN" ? "HIDDEN"
: itemData.is_pending : itemData.is_pending

View File

@@ -1,12 +1,17 @@
import cliProgress from "cli-progress"; import cliProgress from "cli-progress";
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import unidecode from "unidecode"; import unidecode from "unidecode";
import type { HTMLString } from "../types/common"; import type {
HTMLString,
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { import {
type CookieConfig, type CookieConfig,
formatCookiesForHeader, formatCookiesForHeader,
loadCookiesOptional, loadCookiesOptional,
} from "../utils/cookies"; } from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format"; import { formatCentsToCurrency } from "../utils/format";
import { import {
fetchHtml, fetchHtml,
@@ -17,13 +22,14 @@ import {
RateLimitError, RateLimitError,
ValidationError, ValidationError,
} from "../utils/http"; } from "../utils/http";
import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable";
// Kijiji cookie configuration // Kijiji cookie configuration
const KIJIJI_COOKIE_CONFIG: CookieConfig = { const KIJIJI_COOKIE_CONFIG: CookieConfig = {
name: "Kijiji", name: "Kijiji",
domain: ".kijiji.ca", domain: ".kijiji.ca",
envVar: "KIJIJI_COOKIE", envVar: "KIJIJI_COOKIE",
filePath: "./cookies/kijiji.json",
}; };
// ----------------------------- Types ----------------------------- // ----------------------------- Types -----------------------------
@@ -41,6 +47,17 @@ interface ApolloSearchItem {
[k: string]: unknown; [k: string]: unknown;
} }
type ListingAttribute = {
canonicalName?: string;
canonicalValues?: string[];
};
type ListingAttributes =
| ListingAttribute[]
| {
all?: ListingAttribute[];
};
interface ApolloListingRoot { interface ApolloListingRoot {
url?: string; url?: string;
title?: string; title?: string;
@@ -63,7 +80,7 @@ interface ApolloListingRoot {
adSource?: string; adSource?: string;
flags?: { topAd?: boolean; priceDrop?: boolean }; flags?: { topAd?: boolean; priceDrop?: boolean };
posterInfo?: { posterId?: string; rating?: number }; posterInfo?: { posterId?: string; rating?: number };
attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>; attributes?: ListingAttributes;
[k: string]: unknown; [k: string]: unknown;
} }
@@ -197,18 +214,43 @@ const SORT_MAPPINGS: Record<string, string> = {
distance: "DISTANCE", distance: "DISTANCE",
}; };
const LOCATION_SLUGS = Object.fromEntries(
Object.entries(LOCATION_MAPPINGS).map(([slug, id]) => [
id,
slug.replace(/\s+/g, "-"),
]),
) as Record<number, string>;
const CATEGORY_SLUGS = Object.fromEntries(
Object.entries(CATEGORY_MAPPINGS).map(([slug, id]) => [
id,
slug.replace(/\s+/g, "-"),
]),
) as Record<number, string>;
// ----------------------------- Utilities ----------------------------- // ----------------------------- Utilities -----------------------------
const SEPS = new Set([" ", "", "—", "/", ":", ";", ",", ".", "-"]); const SEPS = new Set([" ", "", "—", "/", ":", ";", ",", ".", "-"]);
function normalizeLookupKey(value: string): string {
return value.toLowerCase().replace(/[\s-]+/g, "-");
}
function centsToKijijiPriceParam(cents: number): number {
return Math.floor(cents / 100);
}
/** /**
* Resolve location ID from name or return numeric ID * Resolve location ID from name or return numeric ID
*/ */
export function resolveLocationId(location?: number | string): number { export function resolveLocationId(location?: number | string): number {
if (typeof location === "number") return location; if (typeof location === "number") return location;
if (typeof location === "string") { if (typeof location === "string") {
const normalized = location.toLowerCase().replace(/\s+/g, "-"); const normalized = normalizeLookupKey(location);
return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0) const mapping = Object.entries(LOCATION_MAPPINGS).find(
([key]) => normalizeLookupKey(key) === normalized,
);
return mapping?.[1] ?? 0; // Default to Canada (0)
} }
return 0; // Default to Canada return 0; // Default to Canada
} }
@@ -219,12 +261,38 @@ export function resolveLocationId(location?: number | string): number {
export function resolveCategoryId(category?: number | string): number { export function resolveCategoryId(category?: number | string): number {
if (typeof category === "number") return category; if (typeof category === "number") return category;
if (typeof category === "string") { if (typeof category === "string") {
const normalized = category.toLowerCase().replace(/\s+/g, "-"); const normalized = normalizeLookupKey(category);
return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories const mapping = Object.entries(CATEGORY_MAPPINGS).find(
([key]) => normalizeLookupKey(key) === normalized,
);
return mapping?.[1] ?? 0; // Default to all categories
} }
return 0; // Default to all categories return 0; // Default to all categories
} }
function matchesPriceFilters(
listing: DetailedListing,
searchOptions: SearchOptions,
): boolean {
const cents = listing.listingPrice?.cents;
if (typeof cents !== "number") return false;
if (
typeof searchOptions.priceMin === "number" &&
cents < searchOptions.priceMin
) {
return false;
}
if (
typeof searchOptions.priceMax === "number" &&
cents > searchOptions.priceMax
) {
return false;
}
return true;
}
/** /**
* Build search URL with enhanced parameters * Build search URL with enhanced parameters
*/ */
@@ -236,23 +304,53 @@ export function buildSearchUrl(
const locationId = resolveLocationId(options.location); const locationId = resolveLocationId(options.location);
const categoryId = resolveCategoryId(options.category); const categoryId = resolveCategoryId(options.category);
const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell"; const categorySlug = CATEGORY_SLUGS[categoryId] ?? "buy-sell";
const locationSlug = locationId === 0 ? "canada" : "canada"; const locationSlug = LOCATION_SLUGS[locationId] ?? "canada";
let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`; let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`;
const sortParam = options.sortBy const sortValue =
? `&sort=${SORT_MAPPINGS[options.sortBy]}` options.sortBy && options.sortBy !== "relevancy"
: ""; ? SORT_MAPPINGS[options.sortBy]
: "relevancyDesc";
const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC"; const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC";
const priceMinParam =
typeof options.priceMin === "number"
? `&priceMin=${centsToKijijiPriceParam(options.priceMin)}`
: "";
const priceMaxParam =
typeof options.priceMax === "number"
? `&priceMax=${centsToKijijiPriceParam(options.priceMax)}`
: "";
const pageParam = const pageParam =
options.page && options.page > 1 ? `&page=${options.page}` : ""; options.page && options.page > 1 ? `&page=${options.page}` : "";
url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`; url += `?sort=${sortValue}&view=list&order=${sortOrder}${priceMinParam}${priceMaxParam}${pageParam}`;
return url; return url;
} }
function findApolloListingKey(
apolloState: ApolloRecord,
predicate: (value: Record<string, unknown>) => boolean,
): string | undefined {
return Object.keys(apolloState).find((key) => {
if (!isListingRecordKey(key)) return false;
const value = apolloState[key];
return isRecord(value) && predicate(value);
});
}
function isListingRecordKey(key: string): boolean {
return key.startsWith("Listing:") || key.startsWith("StandardListing:");
}
function getListingAttributes(attributes: ListingAttributes | undefined) {
if (Array.isArray(attributes)) return attributes;
return attributes?.all ?? [];
}
/** /**
* Slugifies a string for Kijiji search URLs * Slugifies a string for Kijiji search URLs
*/ */
@@ -391,18 +489,16 @@ async function fetchSellerDetails(
accountType?: string; accountType?: string;
}> { }> {
try { try {
const [reviewData, profileData] = await Promise.all([ const reviewData = await fetchGraphQLData(
fetchGraphQLData( GRAPHQL_QUERIES.getReviewSummary,
GRAPHQL_QUERIES.getReviewSummary, { userId: posterId },
{ userId: posterId }, BASE_URL,
BASE_URL, );
), const profileData = await fetchGraphQLData(
fetchGraphQLData( GRAPHQL_QUERIES.getProfileMetrics,
GRAPHQL_QUERIES.getProfileMetrics, { profileId: posterId },
{ profileId: posterId }, BASE_URL,
BASE_URL, );
),
]);
const reviewResponse = reviewData as GraphQLReviewResponse; const reviewResponse = reviewData as GraphQLReviewResponse;
const profileResponse = profileData as GraphQLProfileResponse; const profileResponse = profileData as GraphQLProfileResponse;
@@ -415,7 +511,7 @@ async function fetchSellerDetails(
}; };
} catch (err) { } catch (err) {
// Silently fail for GraphQL errors - not critical for basic functionality // Silently fail for GraphQL errors - not critical for basic functionality
console.warn( logger.warn(
`Failed to fetch seller details for ${posterId}:`, `Failed to fetch seller details for ${posterId}:`,
err instanceof Error ? err.message : String(err), err instanceof Error ? err.message : String(err),
); );
@@ -457,8 +553,7 @@ export function parseSearch(
const results: SearchListing[] = []; const results: SearchListing[] = [];
for (const [key, value] of Object.entries(apolloState)) { for (const [key, value] of Object.entries(apolloState)) {
// Heuristic: Kijiji listing keys usually contain "Listing" if (!isListingRecordKey(key)) continue;
if (!key.includes("Listing")) continue;
if (!isRecord(value)) continue; if (!isRecord(value)) continue;
const item = value as ApolloSearchItem; const item = value as ApolloSearchItem;
@@ -474,78 +569,6 @@ export function parseSearch(
return results; return results;
} }
/**
Parse a listing page into a typed object (backward compatible).
*/
function _parseListing(
htmlString: HTMLString,
BASE_URL: string,
): KijijiListingDetails | null {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
// Find the listing root key
const listingKey = Object.keys(apolloState).find((k) =>
k.includes("Listing"),
);
if (!listingKey) return null;
const root = apolloState[listingKey];
if (!isRecord(root)) return null;
const {
url,
title,
description,
price,
type,
status,
activationDate,
endDate,
metrics,
location,
} = root as ApolloListingRoot;
const cents = price?.amount != null ? Number(price.amount) : undefined;
const amountFormatted =
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
const numberOfViews =
metrics?.views != null ? Number(metrics.views) : undefined;
const listingUrl =
typeof url === "string"
? url.startsWith("http")
? url
: `${BASE_URL}${url}`
: "";
if (!listingUrl || !title) return null;
return {
url: listingUrl,
title,
description,
listingPrice: amountFormatted
? {
amountFormatted,
cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency,
}
: undefined,
listingType: type,
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
};
}
/** /**
* Parse a listing page into a detailed object with all available fields * Parse a listing page into a detailed object with all available fields
*/ */
@@ -557,9 +580,12 @@ export async function parseDetailedListing(
const apolloState = extractApolloState(htmlString); const apolloState = extractApolloState(htmlString);
if (!apolloState) return null; if (!apolloState) return null;
// Find the listing root key const listingKey = findApolloListingKey(
const listingKey = Object.keys(apolloState).find((k) => apolloState,
k.includes("Listing"), (value) =>
typeof value.url === "string" &&
typeof value.title === "string" &&
isRecord(value.price),
); );
if (!listingKey) return null; if (!listingKey) return null;
@@ -612,11 +638,9 @@ export async function parseDetailedListing(
// Extract attributes as key-value pairs // Extract attributes as key-value pairs
const attributeMap: Record<string, string[]> = {}; const attributeMap: Record<string, string[]> = {};
if (Array.isArray(attributes)) { for (const attr of getListingAttributes(attributes)) {
for (const attr of attributes) { if (attr.canonicalName && Array.isArray(attr.canonicalValues)) {
if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) { attributeMap[attr.canonicalName] = attr.canonicalValues;
attributeMap[attr.canonicalName] = attr.canonicalValues;
}
} }
} }
@@ -647,7 +671,7 @@ export async function parseDetailedListing(
}; };
} catch { } catch {
// Silently fail - GraphQL data is optional // Silently fail - GraphQL data is optional
console.warn( logger.warn(
`Failed to fetch additional seller data for ${posterInfo.posterId}`, `Failed to fetch additional seller data for ${posterInfo.posterId}`,
); );
} }
@@ -696,14 +720,43 @@ export async function parseDetailedListing(
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
BASE_URL: string | undefined,
searchOptions: SearchOptions | undefined,
listingOptions: ListingFetchOptions | undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<DetailedListing>>;
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
BASE_URL?: string,
searchOptions?: SearchOptions,
listingOptions?: ListingFetchOptions,
unstableMode?: UnstableListingModeOptions,
): Promise<DetailedListing[]>;
export default async function fetchKijijiItems( export default async function fetchKijijiItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
BASE_URL = "https://www.kijiji.ca", BASE_URL = "https://www.kijiji.ca",
searchOptions: SearchOptions = {}, searchOptions: SearchOptions = {},
listingOptions: ListingFetchOptions = {}, listingOptions: ListingFetchOptions = {},
unstableMode: UnstableListingModeOptions = {},
) { ) {
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
const finalizeResults = (
listings: DetailedListing[],
): DetailedListing[] | UnstableListingBuckets<DetailedListing> => {
if (!unstableMode.hideUnstableResults) {
return listings;
}
return classifyUnstableListings(listings);
};
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
// Load Kijiji cookies (optional - helps bypass bot detection) // Load Kijiji cookies (optional - helps bypass bot detection)
const cookies = await loadCookiesOptional( const cookies = await loadCookiesOptional(
@@ -716,15 +769,21 @@ export default async function fetchKijijiItems(
: undefined; : undefined;
// Set defaults for configuration // Set defaults for configuration
const finalSearchOptions: Required<SearchOptions> = { const finalSearchOptions: Omit<
Required<SearchOptions>,
"priceMin" | "priceMax"
> & {
priceMin?: number;
priceMax?: number;
} = {
location: searchOptions.location ?? 1700272, // Default to GTA location: searchOptions.location ?? 1700272, // Default to GTA
category: searchOptions.category ?? 0, // Default to all categories category: searchOptions.category ?? 0, // Default to all categories
keywords: searchOptions.keywords ?? SEARCH_QUERY, keywords: searchOptions.keywords ?? SEARCH_QUERY,
sortBy: searchOptions.sortBy ?? "relevancy", sortBy: searchOptions.sortBy ?? "relevancy",
sortOrder: searchOptions.sortOrder ?? "desc", sortOrder: searchOptions.sortOrder ?? "desc",
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
priceMin: searchOptions.priceMin as number, priceMin: searchOptions.priceMin,
priceMax: searchOptions.priceMax as number, priceMax: searchOptions.priceMax,
cookies: searchOptions.cookies ?? "", cookies: searchOptions.cookies ?? "",
}; };
@@ -749,11 +808,11 @@ export default async function fetchKijijiItems(
BASE_URL, BASE_URL,
); );
console.log(`Fetching search page ${page}: ${searchUrl}`); logger.log(`Fetching search page ${page}: ${searchUrl}`);
const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { const searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( logger.log(
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`, `\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
@@ -763,9 +822,17 @@ export default async function fetchKijijiItems(
const searchResults = parseSearch(searchHtml, BASE_URL); const searchResults = parseSearch(searchHtml, BASE_URL);
if (searchResults.length === 0) { if (searchResults.length === 0) {
console.log( if (page === 1) {
`No more results found on page ${page}. Stopping pagination.`, logger.log(
); `No results found on page 1. The search URL was: ${searchUrl}\n` +
`Tip: Kijiji matches ALL words in the query against listing titles. ` +
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
);
} else {
logger.log(
`No more results found on page ${page}. Stopping pagination.`,
);
}
break; break;
} }
@@ -778,7 +845,7 @@ export default async function fetchKijijiItems(
seenUrls.add(link); seenUrls.add(link);
} }
console.log( logger.log(
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`, `\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
); );
@@ -792,18 +859,22 @@ export default async function fetchKijijiItems(
progressBar?.start(totalProgress, currentProgress); progressBar?.start(totalProgress, currentProgress);
// Process in batches for controlled concurrency // Process in batches for controlled concurrency
const CONCURRENT_REQUESTS = REQUESTS_PER_SECOND * 2; // 2x rate for faster processing const CONCURRENT_REQUESTS = Math.max(1, Math.floor(requestsPerSecond));
const results: (DetailedListing | null)[] = []; const results: (DetailedListing | null)[] = [];
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) { for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS); const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
const batchPromises = batch.map(async (link) => { const batchPromises = batch.map(async (link, batchIndex) => {
try { try {
if (batchIndex > 0) {
await delay(DELAY_MS * batchIndex);
}
const html = await fetchHtml(link, 0, { const html = await fetchHtml(link, 0, {
// No per-request delay, batch handles rate limit // Staggered starts keep request pacing within REQUESTS_PER_SECOND.
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
console.log( logger.log(
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`, `\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
); );
} }
@@ -818,11 +889,11 @@ export default async function fetchKijijiItems(
return parsed; return parsed;
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.error( logger.warn(
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`, `\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
); );
} else { } else {
console.error( logger.warn(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`, `\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
); );
} }
@@ -831,7 +902,7 @@ export default async function fetchKijijiItems(
currentProgress++; currentProgress++;
progressBar?.update(currentProgress); progressBar?.update(currentProgress);
if (!progressBar) { if (!progressBar) {
console.log(`Progress: ${currentProgress}/${totalProgress}`); logger.log(`Progress: ${currentProgress}/${totalProgress}`);
} }
} }
}); });
@@ -839,11 +910,8 @@ export default async function fetchKijijiItems(
const batchResults = await Promise.all(batchPromises); const batchResults = await Promise.all(batchPromises);
results.push(...batchResults); results.push(...batchResults);
// Wait between batches to respect rate limit
if (i + CONCURRENT_REQUESTS < newListingLinks.length) { if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
await new Promise((resolve) => await delay(DELAY_MS);
setTimeout(resolve, DELAY_MS * batch.length),
);
} }
} }
@@ -859,8 +927,12 @@ export default async function fetchKijijiItems(
} }
} }
console.log(`\nParsed ${allListings.length} detailed listings.`); const filteredListings = allListings.filter((listing) =>
return allListings; matchesPriceFilters(listing, finalSearchOptions),
);
logger.log(`\nParsed ${filteredListings.length} detailed listings.`);
return finalizeResults(filteredListings);
} }
// Re-export error classes for convenience // Re-export error classes for convenience

View File

@@ -18,3 +18,12 @@ export interface ListingDetails {
address?: string | null; address?: string | null;
creationDate?: string; creationDate?: string;
} }
export interface UnstableListingBuckets<T> {
results: T[];
unstableResults: T[];
}
export interface UnstableListingModeOptions {
hideUnstableResults?: boolean;
}

View File

@@ -2,9 +2,12 @@
* Shared cookie handling utilities for marketplace scrapers * Shared cookie handling utilities for marketplace scrapers
*/ */
import { logger } from "./logger";
export interface Cookie { export interface Cookie {
name: string; name: string;
value: string; value: string;
rawValue?: string;
domain: string; domain: string;
path: string; path: string;
secure?: boolean; secure?: boolean;
@@ -41,9 +44,9 @@ export function parseCookieString(
.split(";") .split(";")
.map((pair) => pair.trim()) .map((pair) => pair.trim())
.filter((pair) => pair.includes("=")) .filter((pair) => pair.includes("="))
.map((pair) => { .map((pair): Cookie | null => {
const [name, ...valueParts] = pair.split("="); const [name, ...valueParts] = pair.split("=");
const trimmedName = name.trim(); const trimmedName = name?.trim();
const trimmedValue = valueParts.join("=").trim(); const trimmedValue = valueParts.join("=").trim();
if (!trimmedName || !trimmedValue) { if (!trimmedName || !trimmedValue) {
@@ -53,6 +56,7 @@ export function parseCookieString(
return { return {
name: trimmedName, name: trimmedName,
value: decodeURIComponent(trimmedValue), value: decodeURIComponent(trimmedValue),
rawValue: trimmedValue,
domain, domain,
path: "/", path: "/",
secure: true, secure: true,
@@ -93,19 +97,30 @@ export function formatCookiesForHeader(
}); });
return validCookies return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`) .map((cookie) => `${cookie.name}=${cookie.rawValue ?? cookie.value}`)
.join("; "); .join("; ");
} }
/** /**
* Load cookies from the configured environment variable * Load cookies from the configured environment variable or explicit cookie string
*/ */
export async function ensureCookies(config: CookieConfig): Promise<Cookie[]> { export async function ensureCookies(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
// Explicit cookie string takes priority
if (cookiesSource) {
const cookies = parseCookieString(cookiesSource, config.domain);
if (cookies.length > 0) {
return cookies;
}
}
const envValue = process.env[config.envVar]; const envValue = process.env[config.envVar];
const cookies = parseCookieString(envValue ?? "", config.domain); const cookies = parseCookieString(envValue ?? "", config.domain);
if (cookies.length > 0) { if (cookies.length > 0) {
console.log( logger.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`, `Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
); );
return cookies; return cookies;

View File

@@ -4,5 +4,7 @@
* @returns A promise that resolves after the specified delay * @returns A promise that resolves after the specified delay
*/ */
export function delay(ms: number): Promise<void> { export function delay(ms: number): Promise<void> {
if (process.env.NODE_ENV === "test") return Promise.resolve();
return new Promise((resolve) => setTimeout(resolve, ms)); return new Promise((resolve) => setTimeout(resolve, ms));
} }

View File

@@ -0,0 +1,239 @@
import argon2 from "argon2-wasm-pro";
// ------------------ Types ------------------
interface ChallengeDetails {
p2: number;
p6: number;
p7: number;
p9: string;
p11: string;
p12: number;
p13: number;
p15: number;
}
interface ChallengeParams {
crefId: string;
cdetail: ChallengeDetails;
iid: string;
chlghost: string;
appName: string;
p: string;
destUrl: string;
}
interface ChallengeResult {
cookies: string;
}
// ------------------ Helpers ------------------
function memcmp(a: Uint8Array, b: number[], len: number): number {
for (let i = 0; i < len; i++) {
const va = a[i] ?? 0;
const vb = b[i] ?? 0;
if (va !== vb) return (va & 0xff) - (vb & 0xff);
}
return 0;
}
function intToBytes(val: number, arr: Uint8Array, offset: number) {
arr[offset] = val >>> 24;
arr[offset + 1] = val >>> 16;
arr[offset + 2] = val >>> 8;
arr[offset + 3] = val;
}
function string2Bin(str: string): number[] {
const result: number[] = [];
for (let i = 0; i < str.length; i++) {
result.push(str.charCodeAt(i));
}
return result;
}
function bufferToBase64(buf: Uint8Array): string {
return btoa(String.fromCharCode(...buf));
}
function parseCookiesFromSetCookie(cookies: string[]): Record<string, string> {
const result: Record<string, string> = {};
for (const header of cookies) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) {
result[match[1]] = match[2];
}
}
return result;
}
// ------------------ Default headers ------------------
const BROWSER_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
const _EBAY_HEADERS: Record<string, string> = {
"User-Agent": BROWSER_UA,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
};
// ------------------ Parser ------------------
export function parseChallengePage(html: string): ChallengeParams | null {
const getHidden = (id: string): string => {
const re = new RegExp(
`id=${id}\\s+value='([^']*)'` +
`|id=${id}\\s+value="([^"]*)"` +
`|id=${id}\\s+value=([^\\s>]+)`,
"i",
);
const m = html.match(re);
if (!m) return "";
return m[1] ?? m[2] ?? m[3] ?? "";
};
const crefId = getHidden("_crefId");
const cdetailRaw = getHidden("_cdetail");
const iid = getHidden("_iid");
const chlghost = getHidden("_chlghost");
const appName = getHidden("_appName");
const p = getHidden("_p");
const formActionMatch = html.match(
/<form\s+id=destForm\s+[^>]*action=([^\s>]+)/i,
);
const destUrl = formActionMatch?.[1]?.trim() ?? "";
if (!crefId || !cdetailRaw) return null;
let cdetail: ChallengeDetails;
try {
const parsed = JSON.parse(cdetailRaw);
const d = parsed.details;
cdetail = {
p2: Number(d.p2),
p6: Number(d.p6),
p7: Number(d.p7),
p9: d.p9,
p11: d.p11,
p12: Number(d.p12),
p13: Number(d.p13),
p15: Number(d.p15),
};
} catch {
return null;
}
return {
crefId,
cdetail,
iid,
chlghost: chlghost || "https://www.ebay.ca",
appName: appName || "orch",
p,
destUrl,
};
}
// ------------------ Solver ------------------
async function solveArgon2Challenge(
cdetail: ChallengeDetails,
): Promise<string[]> {
const targetBytes = string2Bin(atob(cdetail.p11));
const targetLen = targetBytes.length;
const nonceLen = cdetail.p6;
const answerCount = cdetail.p15;
const salt = new Uint8Array(
Uint8Array.from(atob(cdetail.p9), (c) => c.charCodeAt(0)),
);
const answers: string[] = [];
let nonce = new Uint8Array(nonceLen);
crypto.getRandomValues(nonce);
intToBytes(0, nonce, nonce.length - 4);
let counter = 0;
while (answers.length < answerCount) {
const result = await argon2.hash({
pass: nonce,
salt,
time: cdetail.p2,
mem: cdetail.p13,
hashLen: cdetail.p7,
parallelism: cdetail.p12,
type: 2,
});
const hashBytes = result.hash as Uint8Array;
if (memcmp(hashBytes, targetBytes, targetLen) <= 0) {
answers.push(bufferToBase64(nonce));
nonce = new Uint8Array(nonceLen);
crypto.getRandomValues(nonce);
intToBytes(0, nonce, nonce.length - 4);
counter = 0;
} else {
counter++;
intToBytes(counter, nonce, nonce.length - 4);
}
}
return answers;
}
// ------------------ Public API ------------------
export async function solveEbayChallenge(
html: string,
cookieHeader?: string,
): Promise<ChallengeResult | null> {
const params = parseChallengePage(html);
if (!params) return null;
const answers = await solveArgon2Challenge(params.cdetail);
const encodedAnswers = encodeURIComponent(answers.join(","));
const body = JSON.stringify({
iid: params.iid,
appName: params.appName,
referenceId: params.crefId,
pvt: Date.now().toString(),
crt: Date.now().toString(),
encodedAnswers,
p: params.p,
ru: params.destUrl,
});
const headers: Record<string, string> = {
"content-type": "application/json",
accept: "application/json, text/plain, */*",
"user-agent": BROWSER_UA,
};
if (cookieHeader) {
headers.cookie = cookieHeader;
}
const res = await fetch(`${params.chlghost}/splashui/challengesvc/answer`, {
method: "POST",
headers,
body,
});
if (!res.ok) return null;
// Collect cookies from answer response
const setCookies = res.headers.getSetCookie?.() ?? [];
const answerCookies = parseCookiesFromSetCookie(setCookies);
const cookieEntries = Object.entries(answerCookies);
if (cookieEntries.length === 0) return null;
const cookies = cookieEntries.map(([k, v]) => `${k}=${v}`).join("; ");
return { cookies };
}

View File

@@ -0,0 +1,128 @@
// Facebook Marketplace session & challenge utilities
// ------------------ Types ------------------
export type ChallengeType =
| "login_wall"
| "checkpoint"
| "bad_headers"
| "rate_limited"
| "none";
// ------------------ Constants ------------------
const FACEBOOK_BROWSER_HEADERS: Record<string, string> = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"sec-ch-ua":
'"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Linux"',
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
};
// ------------------ Cookie Management ------------------
function parseSetCookies(setCookieHeaders: string[]): Record<string, string> {
const cookies: Record<string, string> = {};
for (const header of setCookieHeaders) {
const parts = header.split(";");
const firstPart = parts[0]?.trim();
if (!firstPart) continue;
const eqIdx = firstPart.indexOf("=");
if (eqIdx === -1) continue;
const name = firstPart.slice(0, eqIdx).trim();
const value = firstPart.slice(eqIdx + 1).trim();
if (name && value) {
cookies[name] = value;
}
}
return cookies;
}
function cookiesToHeader(cookies: Record<string, string>): string {
return Object.entries(cookies)
.map(([name, value]) => `${name}=${value}`)
.join("; ");
}
// ------------------ Session Warmup ------------------
export async function warmFacebookSession(): Promise<Record<string, string>> {
try {
const res = await fetch("https://www.facebook.com/", {
method: "GET",
headers: FACEBOOK_BROWSER_HEADERS,
redirect: "manual",
signal: AbortSignal.timeout(10000),
});
const setCookies = res.headers.getSetCookie?.() ?? [];
return parseSetCookies(setCookies);
} catch {
return {};
}
}
// ------------------ Challenge Detection ------------------
export function detectFacebookChallenge(
status: number,
html: string,
responseUrl: string,
): ChallengeType {
if (status === 400) {
return "bad_headers";
}
if (status === 429) {
return "rate_limited";
}
if (responseUrl.includes("/login/")) {
return "login_wall";
}
if (html.includes("You must log in") || html.includes("log in to continue")) {
return "login_wall";
}
if (
responseUrl.includes("/checkpoint/") ||
(html.includes("checkpoint") && html.includes("challenge"))
) {
return "checkpoint";
}
return "none";
}
// ------------------ Header Construction ------------------
export function buildFacebookHeaders(
cookieJar: Record<string, string>,
extraHeaders?: Record<string, string>,
): Record<string, string> {
const headers: Record<string, string> = {
...FACEBOOK_BROWSER_HEADERS,
};
const cookieString = cookiesToHeader(cookieJar);
if (cookieString) {
headers.cookie = cookieString;
}
if (extraHeaders) {
Object.assign(headers, extraHeaders);
}
return headers;
}

View File

@@ -1,56 +1,56 @@
import type { HTMLString } from "../types/common";
import { delay } from "./delay";
/** Custom error class for HTTP-related failures */ /** Custom error class for HTTP-related failures */
export class HttpError extends Error { export class HttpError extends Error {
override name = "HttpError";
constructor( constructor(
message: string, message: string,
public readonly statusCode: number, public readonly statusCode: number,
public readonly url?: string, public readonly url?: string,
) { ) {
super(message); super(message);
this.name = "HttpError";
} }
} }
/** Error class for network failures (timeouts, connection issues) */ /** Error class for network failures (timeouts, connection issues) */
export class NetworkError extends Error { export class NetworkError extends Error {
override name = "NetworkError";
constructor( constructor(
message: string, message: string,
public readonly url: string, public readonly url: string,
public readonly cause?: Error, public override readonly cause?: Error,
) { ) {
super(message); super(message);
this.name = "NetworkError";
} }
} }
/** Error class for parsing failures */ /** Error class for parsing failures */
export class ParseError extends Error { export class ParseError extends Error {
override name = "ParseError";
constructor( constructor(
message: string, message: string,
public readonly data?: unknown, public readonly data?: unknown,
) { ) {
super(message); super(message);
this.name = "ParseError";
} }
} }
/** Error class for rate limiting */ /** Error class for rate limiting */
export class RateLimitError extends Error { export class RateLimitError extends Error {
override name = "RateLimitError";
constructor( constructor(
message: string, message: string,
public readonly url: string, public readonly url: string,
public readonly resetTime?: number, public readonly resetTime?: number,
) { ) {
super(message); super(message);
this.name = "RateLimitError";
} }
} }
/** Error class for validation failures */ /** Error class for validation failures */
export class ValidationError extends Error { export class ValidationError extends Error {
constructor(message: string) { override name = "ValidationError";
super(message);
this.name = "ValidationError";
}
} }
/** Type guard to check if a value is a record (object) */ /** Type guard to check if a value is a record (object) */
@@ -61,10 +61,57 @@ export function isRecord(value: unknown): value is Record<string, unknown> {
/** /**
* Calculate exponential backoff delay with jitter * Calculate exponential backoff delay with jitter
*/ */
function calculateBackoffDelay(attempt: number, baseMs: number): number { function calculateBackoffDelay(
attempt: number,
baseMs: number,
jitter: () => number = Math.random,
): number {
const exponentialDelay = baseMs * 2 ** attempt; const exponentialDelay = baseMs * 2 ** attempt;
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter const jitterDelay = jitter() * 0.1 * exponentialDelay; // 10% jitter
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds return Math.min(exponentialDelay + jitterDelay, 30000); // Cap at 30 seconds
}
const MAX_RATE_LIMIT_WAIT_MS = 30_000;
const MAX_DELTA_RESET_SECONDS = 86_400;
function mergeHeaders(
defaultHeaders: Record<string, string>,
customHeaders?: Record<string, string>,
): Record<string, string> {
const merged: Record<string, string> = {};
for (const [key, value] of Object.entries(defaultHeaders)) {
merged[key.toLowerCase()] = value;
}
for (const [key, value] of Object.entries(customHeaders ?? {})) {
merged[key.toLowerCase()] = value;
}
return merged;
}
function calculateRateLimitWaitMs(
resetHeader: string | null,
fallbackWaitMs: number,
): number {
if (!resetHeader) return fallbackWaitMs;
const resetValue = Number(resetHeader);
if (!Number.isFinite(resetValue)) return fallbackWaitMs;
const waitMs =
resetValue <= MAX_DELTA_RESET_SECONDS
? resetValue * 1000
: resetValue * 1000 - Date.now();
return Math.min(Math.max(0, waitMs), MAX_RATE_LIMIT_WAIT_MS);
}
/** Result type when includeResponseUrl is true */
export interface FetchHtmlResult {
html: HTMLString;
responseUrl: string;
} }
/** Options for fetchHtml */ /** Options for fetchHtml */
@@ -74,6 +121,8 @@ export interface FetchHtmlOptions {
timeoutMs?: number; timeoutMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void; onRateInfo?: (remaining: string | null, reset: string | null) => void;
headers?: Record<string, string>; headers?: Record<string, string>;
includeResponseUrl?: boolean;
jitter?: () => number;
} }
/** /**
@@ -81,14 +130,24 @@ export interface FetchHtmlOptions {
* @param url - The URL to fetch * @param url - The URL to fetch
* @param delayMs - Delay in milliseconds between requests (rate limiting) * @param delayMs - Delay in milliseconds between requests (rate limiting)
* @param opts - Optional fetch options * @param opts - Optional fetch options
* @returns The HTML content as a string * @returns The HTML content as a string, or an object with html and responseUrl
* @throws HttpError, NetworkError, or RateLimitError on failure * @throws HttpError, NetworkError, or RateLimitError on failure
*/ */
export async function fetchHtml(
url: string,
delayMs: number,
opts: FetchHtmlOptions & { includeResponseUrl: true },
): Promise<FetchHtmlResult>;
export async function fetchHtml( export async function fetchHtml(
url: string, url: string,
delayMs: number, delayMs: number,
opts?: FetchHtmlOptions, opts?: FetchHtmlOptions,
): Promise<string> { ): Promise<HTMLString>;
export async function fetchHtml(
url: string,
delayMs: number,
opts?: FetchHtmlOptions,
): Promise<HTMLString | FetchHtmlResult> {
const maxRetries = opts?.maxRetries ?? 3; const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 1000; const retryBaseMs = opts?.retryBaseMs ?? 1000;
const timeoutMs = opts?.timeoutMs ?? 30000; const timeoutMs = opts?.timeoutMs ?? 30000;
@@ -119,13 +178,17 @@ export async function fetchHtml(
const controller = new AbortController(); const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs); const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
const res = await fetch(url, { const res = await (async () => {
method: "GET", try {
headers: { ...defaultHeaders, ...opts?.headers }, return await fetch(url, {
signal: controller.signal, method: "GET",
}); headers: mergeHeaders(defaultHeaders, opts?.headers),
signal: controller.signal,
clearTimeout(timeoutId); });
} finally {
clearTimeout(timeoutId);
}
})();
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset"); const rateLimitReset = res.headers.get("X-RateLimit-Reset");
@@ -137,12 +200,17 @@ export async function fetchHtml(
const resetSeconds = rateLimitReset const resetSeconds = rateLimitReset
? Number(rateLimitReset) ? Number(rateLimitReset)
: Number.NaN; : Number.NaN;
const waitMs = Number.isFinite(resetSeconds) const waitMs = calculateRateLimitWaitMs(
? Math.max(0, resetSeconds * 1000) rateLimitReset,
: calculateBackoffDelay(attempt, retryBaseMs); calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
);
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => setTimeout(resolve, waitMs)); await delay(waitMs);
continue; continue;
} }
throw new RateLimitError( throw new RateLimitError(
@@ -154,8 +222,12 @@ export async function fetchHtml(
// Retry on server errors // Retry on server errors
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await new Promise((resolve) => await delay(
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
); );
continue; continue;
} }
@@ -170,8 +242,10 @@ export async function fetchHtml(
const html = await res.text(); const html = await res.text();
// Respect per-request delay to maintain rate limiting // Respect per-request delay to maintain rate limiting
await new Promise((resolve) => setTimeout(resolve, delayMs)); await delay(delayMs);
return html; return opts?.includeResponseUrl
? { html, responseUrl: res.url || url }
: html;
} catch (err) { } catch (err) {
// Re-throw known errors // Re-throw known errors
if ( if (
@@ -184,8 +258,12 @@ export async function fetchHtml(
if (err instanceof Error && err.name === "AbortError") { if (err instanceof Error && err.name === "AbortError") {
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => await delay(
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
); );
continue; continue;
} }
@@ -194,8 +272,12 @@ export async function fetchHtml(
// Network or other errors // Network or other errors
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => await delay(
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
); );
continue; continue;
} }

View File

@@ -0,0 +1,10 @@
const isTest = () => process.env.NODE_ENV === "test";
export const logger = {
log: (...args: Parameters<typeof console.log>) => {
if (!isTest()) console.log(...args);
},
warn: (...args: Parameters<typeof console.warn>) => {
if (!isTest()) console.warn(...args);
},
};

View File

@@ -0,0 +1,58 @@
import type { UnstableListingBuckets } from "../types/common";
interface HasListingPrice {
listingPrice?: { cents?: number } | null;
}
function getMedian(values: number[]): number {
const middleIndex = Math.floor(values.length / 2);
if (values.length % 2 === 0) {
const left = values[middleIndex - 1] ?? 0;
const right = values[middleIndex] ?? 0;
return (left + right) / 2;
}
return values[middleIndex] ?? 0;
}
export function classifyUnstableListings<T extends HasListingPrice>(
listings: T[],
): UnstableListingBuckets<T> {
const validPrices = listings
.map((listing) => listing.listingPrice?.cents)
.filter(
(price): price is number => Number.isFinite(price) && (price ?? 0) > 0,
)
.sort((left, right) => left - right);
if (validPrices.length < 2) {
return {
results: [...listings],
unstableResults: [],
};
}
const threshold = getMedian(validPrices) * 0.8;
const buckets: UnstableListingBuckets<T> = {
results: [],
unstableResults: [],
};
for (const listing of listings) {
const price = listing.listingPrice?.cents;
if (
Number.isFinite(price) &&
(price ?? 0) > 0 &&
(price ?? 0) < threshold
) {
buckets.unstableResults.push(listing);
continue;
}
buckets.results.push(listing);
}
return buckets;
}

View File

@@ -0,0 +1,24 @@
import { afterEach, describe, expect, mock, test } from "bun:test";
import { delay } from "../src/utils/delay";
describe("delay", () => {
const originalNodeEnv = process.env.NODE_ENV;
const originalSetTimeout = globalThis.setTimeout;
afterEach(() => {
process.env.NODE_ENV = originalNodeEnv;
globalThis.setTimeout = originalSetTimeout;
});
test("does not schedule throttle timers during tests", async () => {
process.env.NODE_ENV = "test";
const setTimeoutMock = mock(() => {
throw new Error("setTimeout should not be called during tests");
});
globalThis.setTimeout = setTimeoutMock as unknown as typeof setTimeout;
await delay(1000);
expect(setTimeoutMock).not.toHaveBeenCalled();
});
});

View File

@@ -1,17 +1,42 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import type { EbayListingDetails } from "../src/scrapers/ebay";
import fetchEbayItems from "../src/scrapers/ebay"; import fetchEbayItems from "../src/scrapers/ebay";
import type { UnstableListingBuckets } from "../src/types/common";
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
const getUnstableEbayItems = async () =>
fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
type _EbayDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
>;
type _EbayUnstableReturn = Assert<
IsExact<
Awaited<ReturnType<typeof getUnstableEbayItems>>,
UnstableListingBuckets<EbayListingDetails>
>
>;
const originalFetch = global.fetch; const originalFetch = global.fetch;
const originalWarn = console.warn; const originalWarn = console.warn;
describe("eBay Scraper Cookie Handling", () => { describe("eBay Scraper Cookie Handling", () => {
beforeEach(() => { beforeEach(() => {
delete process.env.EBAY_COOKIE;
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => Promise.resolve("<html><body></body></html>"), text: () => Promise.resolve("<html><body></body></html>"),
}), }),
) as typeof fetch; ) as unknown as typeof fetch;
}); });
afterEach(() => { afterEach(() => {
@@ -21,21 +46,724 @@ describe("eBay Scraper Cookie Handling", () => {
}); });
test("should ignore request cookie overrides and rely on EBAY_COOKIE", async () => { test("should ignore request cookie overrides and rely on EBAY_COOKIE", async () => {
const warnMock = mock(() => {}); await fetchEbayItems("laptop", 1000);
console.warn = warnMock;
await fetchEbayItems("laptop", 1000, { // First call is homepage warm-up, second is search
cookies: "s=from-request", expect(global.fetch).toHaveBeenCalledTimes(2);
});
expect(global.fetch).toHaveBeenCalledTimes(1); // The search request is the second call
const secondFetchCall = (global.fetch as unknown as ReturnType<typeof mock>)
.mock.calls[1];
if (!secondFetchCall) {
throw new Error("Expected search fetch to be called");
}
const [, init] = (global.fetch as ReturnType<typeof mock>).mock.calls[0]; const [searchUrl, init] = secondFetchCall;
const headers = (init as RequestInit).headers as Record<string, string>; const headers = (init as RequestInit).headers as Record<string, string>;
expect(searchUrl).toBe(
"https://www.ebay.ca/sch/i.html?_nkw=laptop&_sacat=0&_from=R40&LH_BIN=1&LH_PrefLoc=1",
);
expect(headers.Cookie).toBeUndefined(); expect(headers.Cookie).toBeUndefined();
expect(warnMock).toHaveBeenCalledWith( });
"No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.",
test("keeps relative item links on the ebay.ca host", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({ url: "https://www.ebay.ca/itm/123" }),
]);
});
test("returns empty results when eBay rate-limits the request", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 429,
headers: { get: () => "0" },
text: () => Promise.resolve(""),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([]);
});
test("deduplicates repeated item links from the same card", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"><span>Open</span></a>
<a href="/itm/123"><span>Image</span></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toHaveLength(1);
expect(results[0]).toEqual(
expect.objectContaining({ url: "https://www.ebay.ca/itm/123" }),
);
});
test("deduplicates tracking variants of the same item URL", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123?_trkparms=foo"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/123?hash=item123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toHaveLength(1);
expect(results[0]).toEqual(
expect.objectContaining({
url: "https://www.ebay.ca/itm/123?_trkparms=foo",
}),
);
});
test("deduplicates tracking variants of SEO-style item URLs", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/title-slug/1234567890?_trkparms=foo"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/title-slug/1234567890?hash=item123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/title-slug/9999999999?hash=item999"></a>
<h3>Another Laptop Bundle</h3>
<span class="s-item__price">CA $110.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toHaveLength(2);
expect(results[0]).toEqual(
expect.objectContaining({
url: "https://www.ebay.ca/itm/title-slug/1234567890?_trkparms=foo",
}),
);
expect(results[1]).toEqual(
expect.objectContaining({
url: "https://www.ebay.ca/itm/title-slug/9999999999?hash=item999",
}),
);
});
test("treats bare dollar prices as CAD on ebay.ca", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">$100.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ currency: "CAD" }),
}),
]);
});
test("parses current eBay s-card markup with unquoted item links", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<div class="s-card s-card--horizontal">
<div class=su-card-container__header>
<a class=s-card__link href=https://ebay.com/itm/1234567890?itmmeta=abc>
<div role=heading aria-level=3 class=s-card__title>
<span class="su-styled-text primary default">Apple MacBook Air M1 2020 8GB 256GB</span>
</div>
</a>
</div>
<div class=su-card-container__attributes>
<span class="su-styled-text primary bold large-1 s-card__price">CA $599.00</span>
</div>
</div>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("macbook", 1000);
expect(results).toEqual([
expect.objectContaining({
title: "Apple MacBook Air M1 2020 8GB 256GB",
url: "https://ebay.com/itm/1234567890?itmmeta=abc",
listingPrice: expect.objectContaining({ cents: 59_900 }),
}),
]);
});
test("parses embedded eBay payload listings before HTML fallback", async () => {
const payload = encodeURIComponent(
JSON.stringify({
searchResults: [
{
title: "Apple MacBook Air M1 API Result",
itemWebUrl: "https://www.ebay.ca/itm/9876543210?hash=item987",
price: { value: "550.00", currency: "CAD" },
},
],
}),
);
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<script data-inlinepayload="${payload}"></script>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("macbook", 1000);
expect(results).toEqual([
expect.objectContaining({
title: "Apple MacBook Air M1 API Result",
url: "https://www.ebay.ca/itm/9876543210?hash=item987",
listingPrice: expect.objectContaining({
amountFormatted: "CAD 550.00",
cents: 55_000,
currency: "CAD",
}),
}),
]);
});
test("treats US dollar prices as USD", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">US $123.45</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
currency: "USD",
cents: 12345,
}),
}),
]);
});
test("treats US dollar prices without space as USD", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">US$123.45</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
currency: "USD",
cents: 12345,
}),
}),
]);
});
test("maps pound prices to GBP", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">£123.45</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
currency: "GBP",
cents: 12345,
}),
}),
]);
});
test("maps euro and yen prices to the matching currency labels", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Euro Bundle</h3>
<span class="s-item__price">€123.45</span>
</li>
<li class="s-item">
<a href="/itm/456"></a>
<h3>Yen Bundle</h3>
<span class="s-item__price">¥123</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("bundle", 1000, {
keywords: ["bundle"],
});
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
currency: "EUR",
cents: 12345,
}),
}),
expect.objectContaining({
listingPrice: expect.objectContaining({
currency: "JPY",
cents: 12300,
}),
}),
]);
});
test("prefers the discounted Canadian-formatted price", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">
<s>CA $150.00</s>
<span>CA $100.00</span>
</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
amountFormatted: "CA $100.00",
cents: 10000,
}),
}),
]);
});
test("prefers discounted Canadian prices that contain four consecutive digits", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">
<s>CA $1500.00</s>
<span>CA $1000.00</span>
</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
amountFormatted: "CA $1000.00",
cents: 100000,
}),
}),
]);
});
test("prefers discounted US dollar prices over original prices", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">
<s>US $150.00</s>
<span>US $100.00</span>
</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
amountFormatted: "US $100.00",
cents: 10000,
currency: "USD",
}),
}),
]);
});
test("keeps short titles that were not shortened by UI cleaning", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Free Bike</h3>
<span class="s-item__price">CA $0.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("bike", 1000);
expect(results).toEqual([
expect.objectContaining({
title: "Free Bike",
listingPrice: expect.objectContaining({ cents: 0, currency: "CAD" }),
}),
]);
});
test("accepts higher fallback prices without price classes", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Studio Microphone Bundle</h3>
<div>CA $2500.00</div>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("microphone", 1000, {
keywords: ["microphone"],
});
expect(results).toEqual([
expect.objectContaining({
title: "Studio Microphone Bundle",
listingPrice: expect.objectContaining({
amountFormatted: "CA $2500.00",
cents: 250000,
}),
}),
]);
});
test("retains free items when the requested price range includes zero", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Free Laptop Bundle</h3>
<span class="s-item__price">$0.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000, {
minPrice: 0,
maxPrice: 0,
});
expect(results).toEqual([
expect.objectContaining({
title: "Free Laptop Bundle",
listingPrice: expect.objectContaining({ cents: 0 }),
}),
]);
});
test("returns results and unstableResults when unstable mode is enabled", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="https://www.ebay.ca/itm/1"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/2"></a>
<h3>Another Laptop Bundle</h3>
<span class="s-item__price">CA $110.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/3"></a>
<h3>Cheap Laptop Bundle</h3>
<span class="s-item__price">CA $70.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems(
"laptop",
1000,
{},
{ hideUnstableResults: true },
);
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Laptop Bundle" }),
expect.objectContaining({ title: "Another Laptop Bundle" }),
],
unstableResults: [
expect.objectContaining({ title: "Cheap Laptop Bundle" }),
],
});
});
test("respects maxItems in default mode", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="https://www.ebay.ca/itm/1"></a>
<h3>First Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/2"></a>
<h3>Second Bundle</h3>
<span class="s-item__price">CA $110.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/3"></a>
<h3>Third Bundle</h3>
<span class="s-item__price">CA $70.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000, { maxItems: 2 });
expect(results).toHaveLength(2);
expect(results[0]).toEqual(
expect.objectContaining({ title: "First Bundle" }),
);
expect(results[1]).toEqual(
expect.objectContaining({ title: "Second Bundle" }),
);
});
test("respects maxItems in unstable mode", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="https://www.ebay.ca/itm/1"></a>
<h3>First Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/2"></a>
<h3>Second Bundle</h3>
<span class="s-item__price">CA $110.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/3"></a>
<h3>Third Bundle</h3>
<span class="s-item__price">CA $70.00</span>
</li>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems(
"laptop",
1000,
{ maxItems: 2 },
{ hideUnstableResults: true },
);
expect(results.results).toHaveLength(2);
expect(results.unstableResults).toHaveLength(0);
expect(results.results[0]).toEqual(
expect.objectContaining({ title: "First Bundle" }),
);
expect(results.results[1]).toEqual(
expect.objectContaining({ title: "Second Bundle" }),
); );
}); });
}); });

View File

@@ -1,18 +1,50 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import cliProgress from "cli-progress";
import { import {
classifyFacebookResponse, classifyFacebookResponse,
ensureFacebookCookies, ensureFacebookCookies,
extractFacebookBootstrapCandidates, extractFacebookBootstrapCandidates,
extractFacebookItemData, extractFacebookItemData,
extractFacebookMarketplaceData, extractFacebookMarketplaceData,
type FacebookListingDetails,
fetchFacebookItem, fetchFacebookItem,
default as fetchFacebookItems,
parseFacebookAds, parseFacebookAds,
parseFacebookCookieString, parseFacebookCookieString,
parseFacebookItem, parseFacebookItem,
} from "../src/scrapers/facebook"; } from "../src/scrapers/facebook";
import type { UnstableListingBuckets } from "../src/types/common";
import { formatCookiesForHeader } from "../src/utils/cookies"; import { formatCookiesForHeader } from "../src/utils/cookies";
import { formatCentsToCurrency } from "../src/utils/format"; import { formatCentsToCurrency } from "../src/utils/format";
const originalStdoutIsTTY = process.stdout.isTTY;
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
const getUnstableFacebookItems = async (): Promise<
UnstableListingBuckets<FacebookListingDetails>
> =>
fetchFacebookItems("chair", 1, "toronto", 25, { hideUnstableResults: true });
type _FacebookDefaultReturn = Assert<
IsExact<
Awaited<ReturnType<typeof getDefaultFacebookItems>>,
FacebookListingDetails[]
>
>;
type _FacebookUnstableReturn = Assert<
IsExact<
Awaited<ReturnType<typeof getUnstableFacebookItems>>,
UnstableListingBuckets<FacebookListingDetails>
>
>;
// Mock fetch globally // Mock fetch globally
const originalFetch = global.fetch; const originalFetch = global.fetch;
@@ -20,11 +52,12 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
beforeEach(() => { beforeEach(() => {
global.fetch = mock(() => { global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests"); throw new Error("fetch should be mocked in individual tests");
}); }) as unknown as typeof fetch;
}); });
afterEach(() => { afterEach(() => {
global.fetch = originalFetch; global.fetch = originalFetch;
process.stdout.isTTY = originalStdoutIsTTY;
}); });
describe("Cookie Parsing", () => { describe("Cookie Parsing", () => {
@@ -37,6 +70,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(result[0]).toEqual({ expect(result[0]).toEqual({
name: "c_user", name: "c_user",
value: "123456789", value: "123456789",
rawValue: "123456789",
domain: ".facebook.com", domain: ".facebook.com",
path: "/", path: "/",
secure: true, secure: true,
@@ -47,6 +81,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(result[1]).toEqual({ expect(result[1]).toEqual({
name: "xs", name: "xs",
value: "abcdef123456", value: "abcdef123456",
rawValue: "abcdef123456",
domain: ".facebook.com", domain: ".facebook.com",
path: "/", path: "/",
secure: true, secure: true,
@@ -60,8 +95,18 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const cookieString = "c_user=123%2B456; xs=abc%3Ddef"; const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
const result = parseFacebookCookieString(cookieString); const result = parseFacebookCookieString(cookieString);
expect(result[0].value).toBe("123+456"); expect(result[0]?.value).toBe("123+456");
expect(result[1].value).toBe("abc=def"); expect(result[1]?.value).toBe("abc=def");
});
test("should preserve raw encoded values when formatting cookie headers", () => {
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
const result = formatCookiesForHeader(
parseFacebookCookieString(cookieString),
"www.facebook.com",
);
expect(result).toBe(cookieString);
}); });
test("should filter out malformed cookies", () => { test("should filter out malformed cookies", () => {
@@ -82,10 +127,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const result = parseFacebookCookieString(cookieString); const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(2); expect(result).toHaveLength(2);
expect(result[0].name).toBe("c_user"); expect(result[0]?.name).toBe("c_user");
expect(result[0].value).toBe("123"); expect(result[0]?.value).toBe("123");
expect(result[1].name).toBe("xs"); expect(result[1]?.name).toBe("xs");
expect(result[1].value).toBe("abc"); expect(result[1]?.value).toBe("abc");
}); });
test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => { test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => {
@@ -144,10 +189,6 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}); });
test("should handle authentication errors", async () => { test("should handle authentication errors", async () => {
const originalWarn = console.warn;
const warnMock = mock(() => {});
console.warn = warnMock;
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: false, ok: false,
@@ -157,17 +198,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
try { const result = await fetchFacebookItem("123");
const result = await fetchFacebookItem("123"); expect(result).toBeNull();
expect(result).toBeNull(); expect(global.fetch).toHaveBeenCalledTimes(1);
expect(warnMock).toHaveBeenCalledWith(
"Authentication error: Invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
);
} finally {
console.warn = originalWarn;
}
}); });
test("should handle item not found", async () => { test("should handle item not found", async () => {
@@ -180,7 +215,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const result = await fetchFacebookItem("nonexistent"); const result = await fetchFacebookItem("nonexistent");
expect(result).toBeNull(); expect(result).toBeNull();
@@ -240,13 +275,37 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null, get: () => null,
}, },
}); });
}); }) as unknown as typeof fetch;
const _result = await fetchFacebookItem("123"); const _result = await fetchFacebookItem("123");
expect(attempts).toBe(2); expect(attempts).toBe(2);
// Should eventually succeed after retry // Should eventually succeed after retry
}); });
test("should handle exhausted rate limiting retries as a 429", async () => {
let attempts = 0;
global.fetch = mock(() => {
attempts++;
return Promise.resolve({
ok: false,
status: 429,
headers: {
get: (header: string) => {
if (header === "X-RateLimit-Reset") return "0";
return null;
},
},
text: () => Promise.resolve("Rate limited"),
});
}) as unknown as typeof fetch;
const result = await fetchFacebookItem("429-loop");
expect(result).toBeNull();
expect(attempts).toBe(4);
});
test("should handle sold items", async () => { test("should handle sold items", async () => {
const mockData = { const mockData = {
require: [ require: [
@@ -288,12 +347,107 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const result = await fetchFacebookItem("456"); const result = await fetchFacebookItem("456");
expect(result?.listingStatus).toBe("SOLD"); expect(result?.listingStatus).toBe("SOLD");
}); });
test("should still parse sold items when structured data exists", async () => {
const soldStructuredHtml = `
<html><body>
<div>This item has been sold</div>
<script>"XCometMarketplacePermalinkController"</script>
<script>
${JSON.stringify({
payload: {
listing: {
id: "457",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Structured Sold Item",
formatted_price: { text: "CA$90" },
listing_price: {
amount: "90.00",
currency: "CAD",
amount_with_offset: "90.00",
},
is_sold: true,
is_live: false,
},
},
})}
</script>
</body></html>
`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(soldStructuredHtml),
url: "https://www.facebook.com/marketplace/item/457/",
headers: {
get: () => null,
},
}),
) as unknown as typeof fetch;
const result = await fetchFacebookItem("457");
expect(result).toEqual(
expect.objectContaining({
title: "Structured Sold Item",
listingStatus: "SOLD",
}),
);
});
test("should parse structured data even when an unavailable banner is present", async () => {
const unavailableStructuredHtml = `
<html><body>
<div>This listing is no longer available.</div>
<script>"XCometMarketplacePermalinkController"</script>
<script>
${JSON.stringify({
payload: {
listing: {
id: "458",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Recovered Item",
formatted_price: { text: "CA$120" },
listing_price: {
amount: "120.00",
currency: "CAD",
amount_with_offset: "120.00",
},
is_live: true,
},
},
})}
</script>
</body></html>
`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(unavailableStructuredHtml),
url: "https://www.facebook.com/marketplace/item/458/",
headers: {
get: () => null,
},
}),
) as unknown as typeof fetch;
const result = await fetchFacebookItem("458");
expect(result).toEqual(
expect.objectContaining({
title: "Recovered Item",
listingStatus: "ACTIVE",
}),
);
});
test("should handle successful item extraction", async () => { test("should handle successful item extraction", async () => {
const mockData = { const mockData = {
require: [ require: [
@@ -340,7 +494,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const result = await fetchFacebookItem("789"); const result = await fetchFacebookItem("789");
expect(result).not.toBeNull(); expect(result).not.toBeNull();
@@ -359,7 +513,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const result = await fetchFacebookItem("error"); const result = await fetchFacebookItem("error");
expect(result).toBeNull(); expect(result).toBeNull();
@@ -367,6 +521,349 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}); });
}); });
describe("fetchFacebookItems", () => {
let previousCookie: string | undefined;
beforeEach(() => {
previousCookie = process.env.FACEBOOK_COOKIE;
process.env.FACEBOOK_COOKIE = "c_user=12345; xs=abc123";
});
afterEach(() => {
if (previousCookie === undefined) {
delete process.env.FACEBOOK_COOKIE;
} else {
process.env.FACEBOOK_COOKIE = previousCookie;
}
});
test("returns an array by default", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Stable Chair Listing",
listing_price: {
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
},
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
expect(Array.isArray(results)).toBe(true);
expect(results).toHaveLength(1);
});
test("preserves free listings through the public fetch entrypoint", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "free-1",
marketplace_listing_title: "Free Chair",
listing_price: {
amount: "0.00",
formatted_amount: "FREE",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
},
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
expect(results).toEqual([
expect.objectContaining({
title: "Free Chair",
listingPrice: expect.objectContaining({
cents: 0,
amountFormatted: "FREE",
}),
}),
]);
});
test("does not start a progress bar when stdout is not a TTY", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Chair Listing",
listing_price: {
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
},
)}</script></body></html>`;
process.stdout.isTTY = false;
const startSpy = mock(() => {});
const updateSpy = mock(() => {});
const stopSpy = mock(() => {});
const originalStart = cliProgress.SingleBar.prototype.start;
const originalUpdate = cliProgress.SingleBar.prototype.update;
const originalStop = cliProgress.SingleBar.prototype.stop;
try {
cliProgress.SingleBar.prototype.start = startSpy;
cliProgress.SingleBar.prototype.update = updateSpy;
cliProgress.SingleBar.prototype.stop = stopSpy;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
expect(results).toHaveLength(1);
expect(startSpy).not.toHaveBeenCalled();
expect(updateSpy).not.toHaveBeenCalled();
expect(stopSpy).not.toHaveBeenCalled();
} finally {
cliProgress.SingleBar.prototype.start = originalStart;
cliProgress.SingleBar.prototype.update = originalUpdate;
cliProgress.SingleBar.prototype.stop = originalStop;
}
});
test("returns results and unstableResults when unstable mode is enabled", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Stable Chair Listing",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Another Stable Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Suspiciously Cheap Chair",
listing_price: {
amount: "70.00",
formatted_amount: "CA$70",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
},
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 25, {
hideUnstableResults: true,
});
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Chair Listing" }),
expect.objectContaining({ title: "Another Stable Chair" }),
],
unstableResults: [
expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
],
});
});
test("unstable mode classifies before the final MAX_ITEMS limit", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
{
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Boundary Stable Chair",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title:
"Second Boundary Stable Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Past Boundary Cheap Chair",
listing_price: {
amount: "70.00",
formatted_amount: "CA$70",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
},
)}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
) as unknown as typeof fetch;
const results = await fetchFacebookItems("chair", 1, "toronto", 2, {
hideUnstableResults: true,
});
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Boundary Stable Chair" }),
expect.objectContaining({ title: "Second Boundary Stable Chair" }),
],
unstableResults: [
expect.objectContaining({ title: "Past Boundary Cheap Chair" }),
],
});
});
});
describe("Data Extraction", () => { describe("Data Extraction", () => {
describe("extractFacebookItemData", () => { describe("extractFacebookItemData", () => {
test("extracts item details from Comet permalink bootstrap candidates", () => { test("extracts item details from Comet permalink bootstrap candidates", () => {
@@ -388,7 +885,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}, },
redacted_description: { text: "Solid wood chair" }, redacted_description: { text: "Solid wood chair" },
location_text: { text: "Toronto, ON" }, location_text: { text: "Toronto, ON" },
marketplace_listing_seller: { id: "seller-1", name: "Alex" }, marketplace_listing_seller: {
id: "seller-1",
name: "Alex",
},
condition: "USED", condition: "USED",
is_live: true, is_live: true,
}, },
@@ -633,7 +1133,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const result = extractFacebookMarketplaceData(html); const result = extractFacebookMarketplaceData(html);
expect(result).not.toBeNull(); expect(result).not.toBeNull();
expect(result).toHaveLength(2); expect(result).toHaveLength(2);
expect(result?.[0].node.listing.marketplace_listing_title).toBe( expect(result?.[0]?.node.listing.marketplace_listing_title).toBe(
"Item 1", "Item 1",
); );
}); });
@@ -654,11 +1154,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const result = extractFacebookMarketplaceData(html); const result = extractFacebookMarketplaceData(html);
expect(result).not.toBeNull(); expect(result).not.toBeNull();
expect(result).toHaveLength(1); expect(result).toHaveLength(1);
expect(result?.[0].node.listing.id).toBe("987654321"); expect(result?.[0]?.node.listing.id).toBe("987654321");
expect(result?.[0].node.listing.marketplace_listing_title).toBe( expect(result?.[0]?.node.listing.marketplace_listing_title).toBe(
"Vintage Bike", "Vintage Bike",
); );
expect(result?.[0].node.listing.listing_price).toEqual({ expect(result?.[0]?.node.listing.listing_price).toEqual({
amount: "120.00", amount: "120.00",
formatted_amount: "CA$120", formatted_amount: "CA$120",
currency: "CAD", currency: "CAD",
@@ -886,7 +1386,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const ads = extractFacebookMarketplaceData(html); const ads = extractFacebookMarketplaceData(html);
expect(ads).toHaveLength(1); expect(ads).toHaveLength(1);
expect(ads?.[0].node.listing.marketplace_listing_title).toBe("Bike"); expect(ads?.[0]?.node.listing.marketplace_listing_title).toBe("Bike");
}); });
test("prefers the strongest marketplace edge set when multiple edges arrays exist", () => { test("prefers the strongest marketplace edge set when multiple edges arrays exist", () => {
@@ -944,7 +1444,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const ads = extractFacebookMarketplaceData(html); const ads = extractFacebookMarketplaceData(html);
expect(ads).toHaveLength(1); expect(ads).toHaveLength(1);
expect(ads?.[0].node.listing.id).toBe("right-1"); expect(ads?.[0]?.node.listing.id).toBe("right-1");
}); });
test("rejects mixed edge arrays that contain non-listing entries", () => { test("rejects mixed edge arrays that contain non-listing entries", () => {
@@ -1051,10 +1551,21 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}; };
const result = parseFacebookItem(item); const result = parseFacebookItem(item);
expect(result).not.toBeNull(); expect(result).toBeNull();
expect(result?.title).toBe("Minimal Item"); });
expect(result?.description).toBeUndefined();
expect(result?.seller).toBeUndefined(); test("returns null when item price data is present but unparseable", () => {
const item = {
id: "456b",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Broken Price Item",
formatted_price: { text: "price unavailable" },
listing_price: { amount: "not-a-number", currency: "CAD" },
};
const result = parseFacebookItem(item);
expect(result).toBeNull();
}); });
test("should identify vehicle listings", () => { test("should identify vehicle listings", () => {
@@ -1158,11 +1669,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const results = parseFacebookAds(ads); const results = parseFacebookAds(ads);
expect(results).toHaveLength(2); expect(results).toHaveLength(2);
expect(results[0].title).toBe("Ad 1"); expect(results[0]?.title).toBe("Ad 1");
expect(results[0].listingPrice?.cents).toBe(5000); expect(results[0]?.listingPrice?.cents).toBe(5000);
expect(results[0].address).toBe("Toronto"); expect(results[0]?.address).toBe("Toronto");
expect(results[1].title).toBe("Ad 2"); expect(results[1]?.title).toBe("Ad 2");
expect(results[1].address).toBe("Ottawa"); expect(results[1]?.address).toBe("Ottawa");
}); });
test("should filter out ads without price", () => { test("should filter out ads without price", () => {
@@ -1194,7 +1705,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const results = parseFacebookAds(ads); const results = parseFacebookAds(ads);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("With Price"); expect(results[0]?.title).toBe("With Price");
}); });
test("should handle malformed ads gracefully", () => { test("should handle malformed ads gracefully", () => {
@@ -1217,12 +1728,125 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
node: { node: {
// Missing listing // Missing listing
}, },
} as { node: { listing?: unknown } }, } as unknown as { node: { listing?: unknown } },
];
const results = parseFacebookAds(
ads as unknown as Parameters<typeof parseFacebookAds>[0],
);
expect(results).toHaveLength(1);
expect(results[0]?.title).toBe("Valid Ad");
});
test("parses formatted fallback prices with multiple commas", () => {
const ads = [
{
node: {
listing: {
id: "big-price",
marketplace_listing_title: "Luxury Home",
listing_price: {
amount_with_offset_in_currency: "123456789",
formatted_amount: "$1,234,567.89",
currency: "CAD",
},
is_live: true,
},
},
},
]; ];
const results = parseFacebookAds(ads); const results = parseFacebookAds(ads);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Valid Ad"); expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ cents: 123456789 }),
}),
]);
});
test("does not trust amount_with_offset_in_currency without a parseable formatted price", () => {
const ads = [
{
node: {
listing: {
id: "bad-offset",
marketplace_listing_title: "Broken Price Listing",
listing_price: {
amount_with_offset_in_currency: "123456789",
formatted_amount: "price unavailable",
currency: "CAD",
},
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toEqual([]);
});
test("keeps valid free search listings", () => {
const ads = [
{
node: {
listing: {
id: "free-item",
marketplace_listing_title: "Free Chair",
listing_price: {
amount: "0.00",
formatted_amount: "FREE",
currency: "CAD",
},
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toEqual([
expect.objectContaining({
title: "Free Chair",
listingPrice: expect.objectContaining({
cents: 0,
amountFormatted: "FREE",
}),
}),
]);
});
test("keeps free search listings when amount is missing but formatted_amount is FREE", () => {
const ads = [
{
node: {
listing: {
id: "free-no-amount",
marketplace_listing_title: "Free Sofa",
listing_price: {
formatted_amount: "FREE",
currency: "CAD",
},
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toEqual([
expect.objectContaining({
title: "Free Sofa",
listingPrice: expect.objectContaining({
cents: 0,
amountFormatted: "FREE",
}),
}),
]);
}); });
}); });
}); });

View File

@@ -1,5 +1,7 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook"; import fetchFacebookItems, {
fetchFacebookItem,
} from "../src/scrapers/facebook";
// Mock fetch globally // Mock fetch globally
const originalFetch = global.fetch; const originalFetch = global.fetch;
@@ -13,7 +15,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
process.env.FACEBOOK_COOKIE = facebookCookie; process.env.FACEBOOK_COOKIE = facebookCookie;
global.fetch = mock(() => { global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests"); throw new Error("fetch should be mocked in individual tests");
}); }) as unknown as typeof fetch;
}); });
afterEach(() => { afterEach(() => {
@@ -27,35 +29,37 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
describe("Main Search Function", () => { describe("Main Search Function", () => {
test("should successfully fetch search results", async () => { test("should successfully fetch search results", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({ const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
payload: { {
resultGroups: [ payload: {
{ resultGroups: [
edges: [ {
{ edges: [
node: { {
listing: { node: {
id: "1", listing: {
marketplace_listing_title: "iPhone 13", id: "1",
listing_price: { marketplace_listing_title: "iPhone 13",
amount: "500.00", listing_price: {
formatted_amount: "CA$500", amount: "500.00",
currency: "CAD", formatted_amount: "CA$500",
}, currency: "CAD",
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
}, },
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
},
},
is_live: true,
}, },
is_live: true,
}, },
}, },
}, ],
], },
}, ],
], },
}, },
})}</script></body></html>`; )}</script></body></html>`;
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
@@ -65,11 +69,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("iPhone", 1, "toronto", 25); const results = await fetchFacebookItems("iPhone", 1, "toronto", 25);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("iPhone 13"); expect(results[0]?.title).toBe("iPhone 13");
}); });
test("should filter out items without price", async () => { test("should filter out items without price", async () => {
@@ -131,11 +135,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25); const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("With Price"); expect(results[0]?.title).toBe("With Price");
}); });
test("should respect MAX_ITEMS parameter", async () => { test("should respect MAX_ITEMS parameter", async () => {
@@ -186,7 +190,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 5); const results = await fetchFacebookItems("test", 1, "toronto", 5);
expect(results).toHaveLength(5); expect(results).toHaveLength(5);
@@ -227,7 +231,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems( const results = await fetchFacebookItems(
"nonexistent query", "nonexistent query",
@@ -248,7 +252,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25); const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toEqual([]); expect(results).toEqual([]);
@@ -277,7 +281,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("lamp", 1, "toronto", 25); const results = await fetchFacebookItems("lamp", 1, "toronto", 25);
expect(results).toEqual([]); expect(results).toEqual([]);
@@ -318,14 +322,16 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("lamp", 1, "toronto", 25); const results = await fetchFacebookItems("lamp", 1, "toronto", 25);
expect(results).toEqual([]); expect(results).toEqual([]);
}); });
test("should handle network errors", async () => { test("should handle network errors", async () => {
global.fetch = mock(() => Promise.reject(new Error("Network error"))); global.fetch = mock(() =>
Promise.reject(new Error("Network error")),
) as unknown as typeof fetch;
await expect( await expect(
fetchFacebookItems("test", 1, "toronto", 25), fetchFacebookItems("test", 1, "toronto", 25),
@@ -396,7 +402,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}); });
}); }) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25); const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(attempts).toBe(2); expect(attempts).toBe(2);
@@ -469,13 +475,13 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("cars", 1, "toronto", 25); const results = await fetchFacebookItems("cars", 1, "toronto", 25);
expect(results).toHaveLength(2); expect(results).toHaveLength(2);
// Both should be classified as "item" type in search results (vehicle detection is for item details) // Both should be classified as "item" type in search results (vehicle detection is for item details)
expect(results[0].title).toBe("2006 Honda Civic"); expect(results[0]?.title).toBe("2006 Honda Civic");
expect(results[1].title).toBe("iPhone 13"); expect(results[1]?.title).toBe("iPhone 13");
}); });
}); });
@@ -538,7 +544,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems( const results = await fetchFacebookItems(
"nintendo switch", "nintendo switch",
@@ -547,8 +553,8 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
25, 25,
); );
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("Nintendo Switch"); expect(results[0]?.title).toBe("Nintendo Switch");
expect(results[0].categoryId).toBe("479353692612078"); expect(results[0]?.categoryId).toBe("479353692612078");
}); });
test("should handle home goods/furniture listings", async () => { test("should handle home goods/furniture listings", async () => {
@@ -609,12 +615,12 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("table", 1, "toronto", 25); const results = await fetchFacebookItems("table", 1, "toronto", 25);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("Dining Table"); expect(results[0]?.title).toBe("Dining Table");
expect(results[0].categoryId).toBe("1569171756675761"); expect(results[0]?.categoryId).toBe("1569171756675761");
}); });
}); });
@@ -631,7 +637,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25); const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toEqual([]); expect(results).toEqual([]);
@@ -647,7 +653,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25); const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toEqual([]); expect(results).toEqual([]);
@@ -663,7 +669,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const results = await fetchFacebookItems("test", 1, "toronto", 25); const results = await fetchFacebookItems("test", 1, "toronto", 25);
expect(results).toEqual([]); expect(results).toEqual([]);
@@ -704,7 +710,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
get: () => null, get: () => null,
}, },
}), }),
); ) as unknown as typeof fetch;
const result = await fetchFacebookItem("123"); const result = await fetchFacebookItem("123");
expect(result).toBeNull(); expect(result).toBeNull();

View File

@@ -0,0 +1,124 @@
import { afterEach, describe, expect, mock, test } from "bun:test";
import { fetchHtml } from "../src/utils/http";
describe("fetchHtml", () => {
const originalFetch = global.fetch;
const originalNodeEnv = process.env.NODE_ENV;
const originalSetTimeout = globalThis.setTimeout;
const originalClearTimeout = globalThis.clearTimeout;
afterEach(() => {
global.fetch = originalFetch;
process.env.NODE_ENV = originalNodeEnv;
globalThis.setTimeout = originalSetTimeout;
globalThis.clearTimeout = originalClearTimeout;
});
test("does not schedule throttle timers during tests", async () => {
process.env.NODE_ENV = "test";
const scheduledDelays: number[] = [];
global.fetch = mock(() =>
Promise.resolve({
ok: true,
headers: { get: () => null },
text: () => Promise.resolve("<html></html>"),
}),
) as unknown as typeof fetch;
globalThis.setTimeout = mock((handler: TimerHandler, timeout?: number) => {
scheduledDelays.push(Number(timeout));
if (timeout !== 30_000 && typeof handler === "function") {
handler();
}
return 0 as unknown as ReturnType<typeof setTimeout>;
}) as unknown as typeof setTimeout;
globalThis.clearTimeout = mock(() => {}) as unknown as typeof clearTimeout;
await fetchHtml("https://example.com", 1000, { timeoutMs: 30_000 });
expect(scheduledDelays).not.toContain(1000);
});
test("fetchHtml returns responseUrl when includeResponseUrl is true", async () => {
process.env.NODE_ENV = "test";
global.fetch = mock(() =>
Promise.resolve({
ok: true,
status: 200,
url: "https://example.test/final",
headers: { get: () => null },
text: () => Promise.resolve("<html></html>"),
}),
) as unknown as typeof fetch;
const result = await fetchHtml("https://example.test", 0, {
includeResponseUrl: true,
});
expect(result.html).toBe("<html></html>");
expect(result.responseUrl).toBe("https://example.test/final");
});
test("rate limit epoch reset uses bounded wait", async () => {
process.env.NODE_ENV = "production";
const scheduledDelays: number[] = [];
const farFutureEpochSeconds = Math.floor(Date.now() / 1000) + 315_360_000;
let calls = 0;
global.fetch = mock(() => {
calls += 1;
return Promise.resolve({
ok: calls > 1,
status: calls > 1 ? 200 : 429,
url: "https://example.test",
headers: {
get: (name: string) =>
name === "X-RateLimit-Reset" ? String(farFutureEpochSeconds) : null,
},
text: () => Promise.resolve("<html></html>"),
});
}) as unknown as typeof fetch;
globalThis.setTimeout = mock((handler: TimerHandler, timeout?: number) => {
scheduledDelays.push(Number(timeout));
if (timeout !== 1_234_567 && typeof handler === "function") {
handler();
}
return 0 as unknown as ReturnType<typeof setTimeout>;
}) as unknown as typeof setTimeout;
globalThis.clearTimeout = mock(() => {}) as unknown as typeof clearTimeout;
await fetchHtml("https://example.test", 0, {
maxRetries: 1,
timeoutMs: 1_234_567,
});
expect(scheduledDelays).toContain(30_000);
expect(scheduledDelays).not.toContain(farFutureEpochSeconds * 1000);
});
test("custom Accept header overrides default accept without duplicate casing", async () => {
process.env.NODE_ENV = "test";
const customAccept = "text/plain";
let requestHeaders: HeadersInit | undefined;
global.fetch = mock((_url: string | URL | Request, init?: RequestInit) => {
requestHeaders = init?.headers;
return Promise.resolve({
ok: true,
status: 200,
url: "https://example.test",
headers: { get: () => null },
text: () => Promise.resolve("<html></html>"),
});
}) as unknown as typeof fetch;
await fetchHtml("https://example.test", 0, {
headers: { Accept: customAccept },
});
expect(requestHeaders).toBeDefined();
expect((requestHeaders as Record<string, string>).accept).toBe(
customAccept,
);
expect((requestHeaders as Record<string, string>).Accept).toBeUndefined();
});
});

View File

@@ -1,13 +1,60 @@
import { describe, expect, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import { import {
buildSearchUrl, buildSearchUrl,
type DetailedListing,
default as fetchKijijiItems,
NetworkError, NetworkError,
ParseError, ParseError,
parseDetailedListing,
parseSearch,
RateLimitError, RateLimitError,
resolveCategoryId, resolveCategoryId,
resolveLocationId, resolveLocationId,
ValidationError, ValidationError,
} from "../src/scrapers/kijiji"; } from "../src/scrapers/kijiji";
import type { UnstableListingBuckets } from "../src/types/common";
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
const getUnstableKijijiItems = async (): Promise<
UnstableListingBuckets<DetailedListing>
> =>
fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{},
{},
{ hideUnstableResults: true },
);
type _KijijiDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
>;
type _KijijiUnstableReturn = Assert<
IsExact<
Awaited<ReturnType<typeof getUnstableKijijiItems>>,
UnstableListingBuckets<DetailedListing>
>
>;
const originalFetch = global.fetch;
beforeEach(() => {
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
}) as unknown as typeof fetch;
});
afterEach(() => {
global.fetch = originalFetch;
});
describe("Location and Category Resolution", () => { describe("Location and Category Resolution", () => {
describe("resolveLocationId", () => { describe("resolveLocationId", () => {
@@ -21,6 +68,7 @@ describe("Location and Category Resolution", () => {
expect(resolveLocationId("ontario")).toBe(9004); expect(resolveLocationId("ontario")).toBe(9004);
expect(resolveLocationId("toronto")).toBe(1700273); expect(resolveLocationId("toronto")).toBe(1700273);
expect(resolveLocationId("gta")).toBe(1700272); expect(resolveLocationId("gta")).toBe(1700272);
expect(resolveLocationId("Nova Scotia")).toBe(9002);
}); });
test("should handle case insensitive matching", () => { test("should handle case insensitive matching", () => {
@@ -77,7 +125,7 @@ describe("URL Construction", () => {
sortOrder: "desc", sortOrder: "desc",
}); });
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272"); expect(url).toContain("b-phones/gta/iphone/k0c132l1700272");
expect(url).toContain("sort=relevancyDesc"); expect(url).toContain("sort=relevancyDesc");
expect(url).toContain("order=DESC"); expect(url).toContain("order=DESC");
}); });
@@ -97,6 +145,7 @@ describe("URL Construction", () => {
sortBy: "date", sortBy: "date",
sortOrder: "asc", sortOrder: "asc",
}); });
expect(dateUrl.match(/sort=/g)?.length).toBe(1);
expect(dateUrl).toContain("sort=DATE"); expect(dateUrl).toContain("sort=DATE");
expect(dateUrl).toContain("order=ASC"); expect(dateUrl).toContain("order=ASC");
@@ -108,12 +157,23 @@ describe("URL Construction", () => {
expect(priceUrl).toContain("order=DESC"); expect(priceUrl).toContain("order=DESC");
}); });
test("includes price filters in the generated search URL", () => {
const url = buildSearchUrl("iphone", {
priceMin: 8000,
priceMax: 10000,
});
expect(url).toContain("priceMin=80");
expect(url).toContain("priceMax=100");
});
test("should handle string location/category inputs", () => { test("should handle string location/category inputs", () => {
const url = buildSearchUrl("iphone", { const url = buildSearchUrl("iphone", {
location: "toronto", location: "toronto",
category: "phones", category: "phones",
}); });
expect(url).toContain("/b-phones/toronto/");
expect(url).toContain("k0c132l1700273"); // phones + toronto expect(url).toContain("k0c132l1700273"); // phones + toronto
}); });
}); });
@@ -155,3 +215,823 @@ describe("Error Classes", () => {
expect(error.name).toBe("ValidationError"); expect(error.name).toBe("ValidationError");
}); });
}); });
describe("fetchKijijiItems", () => {
test("filters fetched listings by priceMin and priceMax", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": {
url: "/v-low/k0l0",
title: "Low Listing",
},
"Listing:2": {
url: "/v-mid/k0l0",
title: "Mid Listing",
},
"Listing:3": {
url: "/v-high/k0l0",
title: "High Listing",
},
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, amount: number, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
global.fetch = mock((input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/k0c0l1700272")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-low/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(listingHtml("Low Listing", 7000, "v-low/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-mid/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(listingHtml("Mid Listing", 9000, "v-mid/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-high/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(listingHtml("High Listing", 12000, "v-high/k0l0")),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{ maxPages: 1, priceMin: 8000, priceMax: 10000 },
);
expect(results).toEqual([
expect.objectContaining({ title: "Mid Listing" }),
]);
});
test("respects REQUESTS_PER_SECOND without concurrent detail fetch bursts", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": { url: "/v-one/k0l0", title: "One" },
"Listing:2": { url: "/v-two/k0l0", title: "Two" },
"Listing:3": { url: "/v-three/k0l0", title: "Three" },
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount: 10000, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
let activeDetailRequests = 0;
let maxActiveDetailRequests = 0;
global.fetch = mock(async (input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/k0c0l1700272")) {
return {
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
};
}
activeDetailRequests++;
maxActiveDetailRequests = Math.max(
maxActiveDetailRequests,
activeDetailRequests,
);
await new Promise((resolve) => setTimeout(resolve, 5));
activeDetailRequests--;
if (url.endsWith("/v-one/k0l0")) {
return {
ok: true,
text: () => Promise.resolve(listingHtml("One", "v-one/k0l0")),
headers: { get: () => null },
url,
};
}
if (url.endsWith("/v-two/k0l0")) {
return {
ok: true,
text: () => Promise.resolve(listingHtml("Two", "v-two/k0l0")),
headers: { get: () => null },
url,
};
}
if (url.endsWith("/v-three/k0l0")) {
return {
ok: true,
text: () => Promise.resolve(listingHtml("Three", "v-three/k0l0")),
headers: { get: () => null },
url,
};
}
throw new Error(`Unexpected URL: ${url}`);
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
1,
"https://www.kijiji.ca",
{ maxPages: 1 },
);
expect(results).toHaveLength(3);
expect(maxActiveDetailRequests).toBe(1);
});
test("allows bounded concurrency to scale with REQUESTS_PER_SECOND", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": { url: "/v-one/k0l0", title: "One" },
"Listing:2": { url: "/v-two/k0l0", title: "Two" },
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount: 10000, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
let activeDetailRequests = 0;
let maxActiveDetailRequests = 0;
global.fetch = mock(async (input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/k0c0l1700272")) {
return {
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
};
}
activeDetailRequests++;
maxActiveDetailRequests = Math.max(
maxActiveDetailRequests,
activeDetailRequests,
);
await new Promise((resolve) => setTimeout(resolve, 300));
activeDetailRequests--;
if (url.endsWith("/v-one/k0l0")) {
return {
ok: true,
text: () => Promise.resolve(listingHtml("One", "v-one/k0l0")),
headers: { get: () => null },
url,
};
}
if (url.endsWith("/v-two/k0l0")) {
return {
ok: true,
text: () => Promise.resolve(listingHtml("Two", "v-two/k0l0")),
headers: { get: () => null },
url,
};
}
throw new Error(`Unexpected URL: ${url}`);
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
4,
"https://www.kijiji.ca",
{ maxPages: 1 },
);
expect(results).toHaveLength(2);
expect(maxActiveDetailRequests).toBeGreaterThan(1);
expect(maxActiveDetailRequests).toBeLessThanOrEqual(4);
});
test("classifies the filtered Kijiji result set in unstable mode", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": {
url: "/v-stable-one/k0l0",
title: "Stable Listing One",
},
"Listing:2": {
url: "/v-stable-two/k0l0",
title: "Stable Listing Two",
},
"Listing:3": {
url: "/v-unstable/k0l0",
title: "Unstable Listing",
},
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, amount: number, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
global.fetch = mock((input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/k0c0l1700272") && url.includes("priceMin=80")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-one/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-two/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-unstable/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Unstable Listing", 7000, "v-unstable/k0l0"),
),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{ maxPages: 1, priceMin: 8000 },
{},
{ hideUnstableResults: true },
);
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [],
});
});
test("keeps out-of-range Kijiji listings out of both buckets and median input", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": {
url: "/v-stable-one/k0l0",
title: "Stable Listing One",
},
"Listing:2": {
url: "/v-stable-two/k0l0",
title: "Stable Listing Two",
},
"Listing:3": {
url: "/v-out-of-range-high/k0l0",
title: "Out Of Range High",
},
"Listing:4": {
url: "/v-out-of-range-low/k0l0",
title: "Out Of Range Low",
},
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, amount: number, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
global.fetch = mock((input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (
url.includes("/k0c0l1700272") &&
url.includes("priceMin=80") &&
url.includes("priceMax=150")
) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-one/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-two/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-out-of-range-high/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml(
"Out Of Range High",
20000,
"v-out-of-range-high/k0l0",
),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-out-of-range-low/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Out Of Range Low", 7000, "v-out-of-range-low/k0l0"),
),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{ maxPages: 1, priceMin: 8000, priceMax: 15000 },
{},
{ hideUnstableResults: true },
);
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [],
});
});
test("parseDetailedListing ignores non-root listing-like entities", async () => {
const html = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"SearchListingCard:1": {
url: "/v-card/k0l0",
title: "Card Listing",
},
"Listing:detail": {
url: "/v-detailed/k0l0",
title: "Detailed Listing",
price: { amount: 10000, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(html, "https://www.kijiji.ca");
expect(result).toEqual(
expect.objectContaining({ title: "Detailed Listing" }),
);
});
test("fetchSellerDetails does not fire concurrent GraphQL requests", async () => {
const html = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: "/v-test/k0l0",
title: "Test Listing",
price: { amount: 10000, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
posterInfo: { posterId: "123" },
},
},
},
},
})}
</script>
</html>
`;
let activeAnvilRequests = 0;
let maxActiveAnvilRequests = 0;
global.fetch = mock(async (input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/anvil/api")) {
activeAnvilRequests++;
maxActiveAnvilRequests = Math.max(
maxActiveAnvilRequests,
activeAnvilRequests,
);
await new Promise((resolve) => setTimeout(resolve, 50));
activeAnvilRequests--;
return {
ok: true,
json: () => Promise.resolve({ data: { user: {} } }),
headers: { get: () => null },
url,
};
}
throw new Error(`Unexpected URL: ${url}`);
}) as unknown as typeof fetch;
await parseDetailedListing(html, "https://www.kijiji.ca", {
includeClientSideData: true,
sellerDataDepth: "detailed",
});
expect(maxActiveAnvilRequests).toBe(1);
});
test("returns results and unstableResults when unstable mode is enabled", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": {
url: "/v-stable-one/k0l0",
title: "Stable Listing One",
},
"Listing:2": {
url: "/v-stable-two/k0l0",
title: "Stable Listing Two",
},
"Listing:3": {
url: "/v-unstable/k0l0",
title: "Unstable Listing",
},
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, amount: number, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
global.fetch = mock((input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/k0c0l1700272")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-one/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-two/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-unstable/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Unstable Listing", 7000, "v-unstable/k0l0"),
),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as unknown as typeof fetch;
const results = await fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{ maxPages: 1 },
{},
{ hideUnstableResults: true },
);
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
});
});
});
describe("parseSearch", () => {
test("ignores SearchListingCard noise keys", () => {
const html = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"SearchListingCard:1": {
url: "/v-card-noise/k0l0",
title: "Card Noise",
},
"Listing:1": {
url: "/v-real-result/k0l0",
title: "Real Result",
},
},
},
},
})}
</script>
</html>
`;
expect(parseSearch(html, "https://www.kijiji.ca")).toEqual([
{
listingLink: "https://www.kijiji.ca/v-real-result/k0l0",
name: "Real Result",
},
]);
});
});

View File

@@ -13,7 +13,7 @@ describe("HTML Parsing Integration", () => {
// Mock fetch for all tests // Mock fetch for all tests
global.fetch = mock(() => { global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests"); throw new Error("fetch should be mocked in individual tests");
}); }) as unknown as typeof fetch;
}); });
afterEach(() => { afterEach(() => {
@@ -111,7 +111,7 @@ describe("HTML Parsing Integration", () => {
`; `;
const results = parseSearch(mockHtml, "https://www.kijiji.ca"); const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results[0].listingLink).toBe( expect(results[0]?.listingLink).toBe(
"https://www.kijiji.ca/v-iphone/k0l0", "https://www.kijiji.ca/v-iphone/k0l0",
); );
}); });
@@ -146,7 +146,49 @@ describe("HTML Parsing Integration", () => {
const results = parseSearch(mockHtml, "https://www.kijiji.ca"); const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].name).toBe("iPhone 13 Pro"); expect(results[0]?.name).toBe("iPhone 13 Pro");
});
test("should parse current StandardListing search records", () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
ROOT_QUERY: { test: "value" },
"StandardListing:123": {
__typename: "StandardListing",
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
title: "iPhone 13",
},
"StandardListing:456": {
__typename: "StandardListing",
url: "/v-cell-phone/city-of-toronto/iphone-14/456",
title: "iPhone 14",
},
},
},
},
})}
</script>
</html>
`;
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results).toEqual([
{
name: "iPhone 13",
listingLink:
"https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
},
{
name: "iPhone 14",
listingLink:
"https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-14/456",
},
]);
}); });
test("should return empty array for invalid HTML", () => { test("should return empty array for invalid HTML", () => {
@@ -303,6 +345,118 @@ describe("HTML Parsing Integration", () => {
expect(result).toBeNull(); expect(result).toBeNull();
}); });
test("should parse current StandardListing detail records", async () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"StandardListing:123": {
__typename: "StandardListing",
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
title: "iPhone 13",
description: "Lightly used iPhone 13",
price: {
__typename: "AmountPrice",
amount: 45000,
currency: "CAD",
type: "FIXED",
},
type: "OFFER",
status: "ACTIVE",
activationDate: "2026-04-20T10:00:00.000Z",
metrics: { views: "12" },
location: {
id: 1700273,
name: "City of Toronto",
address: "Toronto, ON",
coordinates: {
latitude: 43.6532,
longitude: -79.3832,
},
},
imageUrls: ["https://media.kijiji.ca/api/v1/image1.jpg"],
categoryId: 760,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: false,
},
posterInfo: {
posterId: "user123",
rating: 4.5,
},
attributes: {
__typename: "StandardListingAttributes",
all: [
{
__typename: "ListingAttributeV2",
canonicalName: "forsaleby",
canonicalValues: ["ownr"],
},
{
__typename: "ListingAttributeV2",
canonicalName: "phonebrand",
canonicalValues: ["apple"],
},
],
},
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toEqual({
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
title: "iPhone 13",
description: "Lightly used iPhone 13",
listingPrice: {
amountFormatted: "$450.00",
cents: 45000,
currency: "CAD",
},
listingType: "OFFER",
listingStatus: "ACTIVE",
creationDate: "2026-04-20T10:00:00.000Z",
endDate: undefined,
numberOfViews: 12,
address: "Toronto, ON",
images: ["https://media.kijiji.ca/api/v1/image1.jpg"],
categoryId: 760,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: false,
},
attributes: {
forsaleby: ["ownr"],
phonebrand: ["apple"],
},
location: {
id: 1700273,
name: "City of Toronto",
coordinates: {
latitude: 43.6532,
longitude: -79.3832,
},
},
sellerInfo: {
posterId: "user123",
rating: 4.5,
},
});
});
test("should handle missing optional fields", async () => { test("should handle missing optional fields", async () => {
const mockHtml = ` const mockHtml = `
<html> <html>

View File

@@ -0,0 +1,35 @@
import { describe, expect, test } from "bun:test";
import fetchEbayItems from "../../src/scrapers/ebay";
const LIVE_RESULT_LIMIT = 3;
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("eBay live parser", () => {
test(
"scrapes live search results into listing details",
async () => {
const results = await fetchEbayItems("iphone", 1, {
maxItems: LIVE_RESULT_LIMIT,
});
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith("https://");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -0,0 +1,44 @@
import { describe, expect, test } from "bun:test";
import fetchFacebookItems from "../../src/scrapers/facebook";
const LIVE_RESULT_LIMIT = 3;
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("Facebook live parser", () => {
test(
"scrapes live marketplace search results into listing details",
async () => {
if (!process.env.FACEBOOK_COOKIE?.trim()) {
throw new Error("FACEBOOK_COOKIE is required for Facebook live tests");
}
const results = await fetchFacebookItems(
"iphone",
1,
"toronto",
LIVE_RESULT_LIMIT,
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith(
"https://www.facebook.com/marketplace/item/",
);
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -0,0 +1,38 @@
import { describe, expect, test } from "bun:test";
import fetchKijijiItems from "../../src/scrapers/kijiji";
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("Kijiji live parser", () => {
test(
"scrapes live search results into detailed listings",
async () => {
const results = await fetchKijijiItems(
"iphone",
1,
"https://www.kijiji.ca",
{ maxPages: 1 },
{ includeImages: false, sellerDataDepth: "basic" },
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith("https://www.kijiji.ca/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -0,0 +1,29 @@
import { afterEach, describe, expect, mock, test } from "bun:test";
describe("logger", () => {
const originalNodeEnv = process.env.NODE_ENV;
const originalConsoleLog = console.log;
const originalConsoleWarn = console.warn;
afterEach(() => {
process.env.NODE_ENV = originalNodeEnv;
console.log = originalConsoleLog;
console.warn = originalConsoleWarn;
});
test("suppresses log and warn output during tests", async () => {
process.env.NODE_ENV = "test";
const logMock = mock(() => {});
const warnMock = mock(() => {});
console.log = logMock;
console.warn = warnMock;
const { logger } = await import("../src/utils/logger");
logger.log("hidden log");
logger.warn("hidden warn");
expect(logMock).not.toHaveBeenCalled();
expect(warnMock).not.toHaveBeenCalled();
});
});

View File

@@ -1,11 +1,6 @@
// Test setup for Bun test runner global.fetch = Object.assign(
// This file is loaded before any tests run due to bunfig.toml preload () => {
throw new Error("Tests must mock fetch explicitly");
// Mock fetch globally for tests },
global.fetch = { preconnect: fetch.preconnect },
global.fetch || ) as typeof fetch;
(() => {
throw new Error("fetch is not available in test environment");
});
// Add any global test utilities here

View File

@@ -0,0 +1,101 @@
import { describe, expect, test } from "bun:test";
import type { ListingDetails } from "../src/types/common";
import { classifyUnstableListings } from "../src/utils/unstable";
interface TestListing extends ListingDetails {
id: string;
}
function makeListing(id: string, cents: number): TestListing {
return {
id,
url: `https://example.com/${id}`,
title: id,
listingPrice: {
amountFormatted: `$${(cents / 100).toFixed(2)}`,
cents,
currency: "CAD",
},
listingType: "test",
listingStatus: "active",
};
}
describe("classifyUnstableListings", () => {
test("moves listings below 80% of median into unstableResults", () => {
const listings = [
makeListing("stable-1", 100_00),
makeListing("stable-2", 110_00),
makeListing("unstable", 70_00),
];
const buckets = classifyUnstableListings(listings);
expect(buckets.results.map((listing) => listing.id)).toEqual([
"stable-1",
"stable-2",
]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
"unstable",
]);
});
test("uses the midpoint median for even-sized priced inputs", () => {
const listings = [
makeListing("low", 79_00),
makeListing("mid-low", 100_00),
makeListing("mid-high", 120_00),
makeListing("high", 140_00),
];
const buckets = classifyUnstableListings(listings);
expect(buckets.results.map((listing) => listing.id)).toEqual([
"mid-low",
"mid-high",
"high",
]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
"low",
]);
});
test("keeps non-positive prices in results and excludes them from the median input", () => {
const listings = [
makeListing("zero", 0),
makeListing("negative", -500),
makeListing("stable-1", 100_00),
makeListing("stable-2", 120_00),
makeListing("unstable", 70_00),
];
const buckets = classifyUnstableListings(listings);
expect(buckets.results.map((listing) => listing.id)).toEqual([
"zero",
"negative",
"stable-1",
"stable-2",
]);
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
"unstable",
]);
});
test("returns all listings in results when fewer than two valid prices are present", () => {
const listings = [
makeListing("zero", 0),
makeListing("negative", -100),
makeListing("only-valid", 150_00),
];
const buckets = classifyUnstableListings(listings);
expect(buckets.results.map((listing) => listing.id)).toEqual([
"zero",
"negative",
"only-valid",
]);
expect(buckets.unstableResults).toEqual([]);
});
});

View File

@@ -1,13 +1,9 @@
{ {
"extends": "../../tsconfig.json",
"compilerOptions": { "compilerOptions": {
"lib": ["dom"],
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"paths": { "paths": {
"@/*": ["./src/*"] "@/*": ["./src/*"]
}, }
"strict": true, },
"noEmit": true "include": ["./src", "./test", "../../types/**/*.d.ts"]
}
} }

View File

@@ -21,5 +21,6 @@
## Verify ## Verify
- `bun test packages/mcp-server/test`
- `bun run --cwd packages/mcp-server build` - `bun run --cwd packages/mcp-server build`
- `bun run ci` - `bun run ci`

View File

@@ -2,18 +2,22 @@
"name": "@marketplace-scrapers/mcp-server", "name": "@marketplace-scrapers/mcp-server",
"version": "1.0.0", "version": "1.0.0",
"type": "module", "type": "module",
"module": "./src/index.ts", "exports": {
".": "./src/index.ts"
},
"private": true, "private": true,
"scripts": { "scripts": {
"start": "bun ./src/index.ts", "start": "bun ./src/index.ts",
"dev": "bun --watch ./src/index.ts", "dev": "bun --watch ./src/index.ts",
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp" "build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp",
"typecheck": "bun tsgo"
}, },
"dependencies": { "dependencies": {
"@marketplace-scrapers/core": "workspace:*" "@marketplace-scrapers/core": "workspace:*",
"@typescript/native-preview": "catalog:"
}, },
"devDependencies": { "devDependencies": {
"@types/bun": "latest" "@types/bun": "catalog:"
}, },
"peerDependencies": { "peerDependencies": {
"typescript": "^5" "typescript": "^5"

View File

@@ -1,3 +1,4 @@
import { logger } from "./logger";
import { handleMcpRequest } from "./protocol/handler"; import { handleMcpRequest } from "./protocol/handler";
import { serverCard } from "./protocol/metadata"; import { serverCard } from "./protocol/metadata";
@@ -33,4 +34,4 @@ const server = Bun.serve({
}, },
}); });
console.log(`MCP Server running on ${server.hostname}:${server.port}`); logger.log(`MCP Server running on ${server.hostname}:${server.port}`);

View File

@@ -0,0 +1,10 @@
const isTest = () => process.env.NODE_ENV === "test";
export const logger = {
log: (...args: Parameters<typeof console.log>) => {
if (!isTest()) console.log(...args);
},
error: (...args: Parameters<typeof console.error>) => {
if (!isTest()) console.error(...args);
},
};

View File

@@ -1,7 +1,33 @@
import { logger } from "../logger";
import { tools } from "./tools"; import { tools } from "./tools";
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api"; const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000;
async function callMarketplaceApi(
marketplace: string,
params: URLSearchParams,
): Promise<unknown> {
const url = `${API_BASE_URL}/${marketplace}?${params.toString()}`;
logger.log(`[MCP] Calling ${marketplace} API`);
const response = await Promise.race([
fetch(url),
new Promise<Response>((_, reject) =>
setTimeout(
() => reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
logger.error(
`[MCP] ${marketplace} API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
return response.json();
}
/** /**
* Handle MCP JSON-RPC 2.0 protocol requests * Handle MCP JSON-RPC 2.0 protocol requests
@@ -115,9 +141,10 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
params.append("priceMin", args.priceMin.toString()); params.append("priceMin", args.priceMin.toString());
if (args.priceMax) if (args.priceMax)
params.append("priceMax", args.priceMax.toString()); params.append("priceMax", args.priceMax.toString());
if (args.cookies) params.append("cookies", args.cookies); if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString());
console.log( logger.log(
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`, `[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
); );
const response = await Promise.race([ const response = await Promise.race([
@@ -133,13 +160,20 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
if (!response.ok) { if (!response.ok) {
const errorText = await response.text(); const errorText = await response.text();
console.error( logger.error(
`[MCP] Kijiji API error ${response.status}: ${errorText}`, `[MCP] Kijiji API error ${response.status}: ${errorText}`,
); );
throw new Error(`API returned ${response.status}: ${errorText}`); let errorMessage = `API returned ${response.status}: ${errorText}`;
try {
const errorJson = JSON.parse(errorText) as { message?: string };
if (errorJson.message) errorMessage = errorJson.message;
} catch {
// not JSON — use raw text
}
throw new Error(errorMessage);
} }
result = await response.json(); result = await response.json();
console.log( logger.log(
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`, `[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
); );
} else if (name === "search_facebook") { } else if (name === "search_facebook") {
@@ -155,32 +189,10 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
if (args.location) params.append("location", args.location); if (args.location) params.append("location", args.location);
if (args.maxItems) if (args.maxItems)
params.append("maxItems", args.maxItems.toString()); params.append("maxItems", args.maxItems.toString());
if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString());
console.log( result = await callMarketplaceApi("facebook", params);
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] Facebook API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
);
} else if (name === "search_ebay") { } else if (name === "search_ebay") {
const query = args.query; const query = args.query;
if (!query) { if (!query) {
@@ -207,32 +219,10 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
params.append("canadaOnly", args.canadaOnly.toString()); params.append("canadaOnly", args.canadaOnly.toString());
if (args.maxItems) if (args.maxItems)
params.append("maxItems", args.maxItems.toString()); params.append("maxItems", args.maxItems.toString());
if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString());
console.log( result = await callMarketplaceApi("ebay", params);
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
console.error(
`[MCP] eBay API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
console.log(
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
);
} else { } else {
return Response.json({ return Response.json({
jsonrpc: "2.0", jsonrpc: "2.0",

View File

@@ -11,7 +11,11 @@ export const tools = [
properties: { properties: {
query: { query: {
type: "string", type: "string",
description: "Search query for Kijiji listings", description:
"Search query for Kijiji listings. " +
"Kijiji requires ALL words to appear in the listing title — keep queries short and use terms sellers actually write. " +
"Avoid marketing/brand phrases sellers don't use (e.g. use 'macbook air m1' not 'macbook air m1 apple silicon'). " +
"If the search returns no results, try a shorter or more common query.",
}, },
location: { location: {
type: "string", type: "string",
@@ -46,16 +50,16 @@ export const tools = [
}, },
priceMin: { priceMin: {
type: "number", type: "number",
description: "Minimum price in cents", description: "Minimum price in dollars",
}, },
priceMax: { priceMax: {
type: "number", type: "number",
description: "Maximum price in cents", description: "Maximum price in dollars",
}, },
cookies: { unstableFilter: {
type: "string", type: "boolean",
description: description:
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')", "optional: when enabled, listings priced more than 20% below the median are moved into an `unstableResults` bucket. Changes the response shape from a plain list to an object with `results` and `unstableResults`.",
}, },
}, },
required: ["query"], required: ["query"],
@@ -81,6 +85,11 @@ export const tools = [
description: "Maximum number of items to return", description: "Maximum number of items to return",
default: 5, default: 5,
}, },
unstableFilter: {
type: "boolean",
description:
"optional: when enabled, listings priced more than 20% below the median are moved into an `unstableResults` bucket. Changes the response shape from a plain list to an object with `results` and `unstableResults`.",
},
}, },
required: ["query"], required: ["query"],
}, },
@@ -98,11 +107,11 @@ export const tools = [
}, },
minPrice: { minPrice: {
type: "number", type: "number",
description: "Minimum price filter", description: "Minimum price in dollars",
}, },
maxPrice: { maxPrice: {
type: "number", type: "number",
description: "Maximum price filter", description: "Maximum price in dollars",
}, },
strictMode: { strictMode: {
type: "boolean", type: "boolean",
@@ -134,6 +143,11 @@ export const tools = [
description: "Maximum number of items to return", description: "Maximum number of items to return",
default: 5, default: 5,
}, },
unstableFilter: {
type: "boolean",
description:
"optional: when enabled, listings priced more than 20% below the median are moved into an `unstableResults` bucket. Changes the response shape from a plain list to an object with `results` and `unstableResults`.",
},
}, },
required: ["query"], required: ["query"],
}, },

View File

@@ -8,25 +8,20 @@ describe("MCP protocol cookie inputs", () => {
beforeEach(() => { beforeEach(() => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve(new Response(JSON.stringify([]), { status: 200 })), Promise.resolve(new Response(JSON.stringify([]), { status: 200 })),
) as typeof fetch; ) as unknown as typeof fetch;
}); });
afterEach(() => { afterEach(() => {
global.fetch = originalFetch; global.fetch = originalFetch;
}); });
test("search tools should not expose Facebook or eBay cookie inputs", () => { test("search tools should not expose cookie inputs", () => {
const searchFacebookTool = tools.find( const toolNames = ["search_kijiji", "search_facebook", "search_ebay"];
(tool) => tool.name === "search_facebook", for (const toolName of toolNames) {
); const tool = tools.find((candidate) => candidate.name === toolName);
const searchEbayTool = tools.find((tool) => tool.name === "search_ebay"); expect(tool?.inputSchema.properties).not.toHaveProperty("cookies");
expect(tool?.inputSchema.properties).not.toHaveProperty("cookiesSource");
expect(searchFacebookTool?.inputSchema.properties).not.toHaveProperty( }
"cookiesSource",
);
expect(searchEbayTool?.inputSchema.properties).not.toHaveProperty(
"cookies",
);
}); });
test("search_facebook should not forward cookies query parameters", async () => { test("search_facebook should not forward cookies query parameters", async () => {
@@ -48,9 +43,245 @@ describe("MCP protocol cookie inputs", () => {
}), }),
); );
const calledUrl = (global.fetch as ReturnType<typeof mock>).mock const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0]; .calls[0]?.[0];
expect(String(calledUrl)).toContain("/facebook?q=laptop"); expect(String(calledUrl)).toContain("/facebook?q=laptop");
expect(String(calledUrl)).not.toContain("cookies="); expect(String(calledUrl)).not.toContain("cookies=");
}); });
test("search_kijiji should not forward cookies query parameters", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_kijiji",
arguments: {
query: "laptop",
cookies: "s=1",
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("/kijiji?q=laptop");
expect(String(calledUrl)).not.toContain("cookies=");
});
});
describe("MCP protocol unstableFilter", () => {
beforeEach(() => {
global.fetch = mock(() =>
Promise.resolve(new Response(JSON.stringify([]), { status: 200 })),
) as unknown as typeof fetch;
});
afterEach(() => {
global.fetch = originalFetch;
});
test("all search tools should document the unstableFilter property", () => {
const toolNames = ["search_kijiji", "search_facebook", "search_ebay"];
for (const toolName of toolNames) {
const tool = tools.find((t) => t.name === toolName);
expect(tool).toBeDefined();
expect(tool?.inputSchema.properties).toHaveProperty("unstableFilter");
const prop = tool?.inputSchema.properties.unstableFilter as {
type: string;
description: string;
};
expect(prop.type).toBe("boolean");
expect(prop.description).toContain("optional");
expect(prop.description).toContain("20%");
expect(prop.description).toContain("median");
expect(prop.description).toContain("unstableResults");
}
});
test("handler should forward unstableFilter=true for search_kijiji", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_kijiji",
arguments: {
query: "laptop",
unstableFilter: true,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("unstableFilter=true");
});
test("search_kijiji should document price filters as dollars", () => {
const tool = tools.find((candidate) => candidate.name === "search_kijiji");
const priceMin = tool?.inputSchema.properties.priceMin as {
description: string;
};
const priceMax = tool?.inputSchema.properties.priceMax as {
description: string;
};
expect(priceMin.description).toContain("dollars");
expect(priceMax.description).toContain("dollars");
});
test("handler should forward Kijiji dollar price filters to API", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_kijiji",
arguments: {
query: "macbook",
priceMin: 999.99,
priceMax: 1000,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("priceMin=999.99");
expect(String(calledUrl)).toContain("priceMax=1000");
});
test("handler should forward unstableFilter=true for search_facebook", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_facebook",
arguments: {
query: "laptop",
unstableFilter: true,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("unstableFilter=true");
});
test("tools/call returns API JSON as text content", async () => {
global.fetch = mock(() =>
Promise.resolve(
new Response(JSON.stringify([{ title: "item" }]), { status: 200 }),
),
) as unknown as typeof fetch;
const response = await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_facebook",
arguments: { query: "laptop" },
},
}),
}),
);
const body = await response.json();
expect(body.result.content[0].type).toBe("text");
expect(JSON.parse(body.result.content[0].text)).toEqual([
{ title: "item" },
]);
});
test("handler should forward unstableFilter=true for search_ebay", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_ebay",
arguments: {
query: "laptop",
unstableFilter: true,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("unstableFilter=true");
});
test("search_ebay should document price filters as dollars", () => {
const tool = tools.find((candidate) => candidate.name === "search_ebay");
const minPrice = tool?.inputSchema.properties.minPrice as {
description: string;
};
const maxPrice = tool?.inputSchema.properties.maxPrice as {
description: string;
};
expect(minPrice.description).toContain("dollars");
expect(maxPrice.description).toContain("dollars");
});
test("handler should forward eBay dollar price filters to API", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_ebay",
arguments: {
query: "macbook",
minPrice: 999.99,
maxPrice: 1000,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("minPrice=999.99");
expect(String(calledUrl)).toContain("maxPrice=1000");
});
}); });

View File

@@ -1,13 +1,9 @@
{ {
"extends": "../../tsconfig.json",
"compilerOptions": { "compilerOptions": {
"lib": ["dom"],
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"paths": { "paths": {
"@/*": ["./src/*"] "@/*": ["./src/*"]
}, }
"strict": true, },
"noEmit": true "include": ["./src", "./test", "../../types/**/*.d.ts"]
}
} }

25
tsconfig.json Normal file
View File

@@ -0,0 +1,25 @@
{
"$schema": "https://json.schemastore.org/tsconfig",
"extends": "@tsconfig/bun/tsconfig.json",
"compilerOptions": {
"lib": ["dom", "ESNext"],
"target": "ESNext",
"module": "preserve",
"moduleResolution": "bundler",
"strict": true,
"noEmit": true,
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitOverride": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false,
"types": ["@types/bun"]
}
}

14
turbo.json Normal file
View File

@@ -0,0 +1,14 @@
{
"$schema": "https://turbo.build/schema.json",
"tasks": {
"typecheck": {},
"build": {
"dependsOn": ["^build"],
"outputs": ["../../dist/**"]
},
"test": {
"dependsOn": ["^build"],
"outputs": []
}
}
}

25
types/argon2-wasm-pro/index.d.ts vendored Normal file
View File

@@ -0,0 +1,25 @@
declare module "argon2-wasm-pro" {
interface Argon2Options {
pass: string | Uint8Array;
salt: Uint8Array;
time: number;
mem: number;
hashLen: number;
parallelism: number;
type: number;
}
interface Argon2Result {
hash: Uint8Array;
hashHex: string;
encoded: string;
}
function hash(options: Argon2Options): Promise<Argon2Result>;
const argon2: {
hash: typeof hash;
};
export default argon2;
}