feat: port upstream scraper improvements to monorepo

Kijiji improvements:
- Add error classes: NetworkError, ParseError, RateLimitError, ValidationError
- Add exponential backoff with jitter for retries
- Add request timeout (30s abort)
- Add pagination support (SearchOptions.maxPages)
- Add location/category mappings and resolution functions
- Add enhanced DetailedListing interface with images, seller info, attributes
- Add GraphQL client for seller details

Facebook improvements:
- Add parseFacebookCookieString() for parsing cookie strings
- Add ensureFacebookCookies() with env var fallback
- Add extractFacebookItemData() with multiple extraction paths
- Add fetchFacebookItem() for individual item fetching
- Add extraction metrics and API stability monitoring
- Add vehicle-specific field extraction
- Improve error handling with specific guidance for auth errors

Shared utilities:
- Update http.ts with new error classes and improved fetchHtml

Documentation:
- Port KIJIJI.md, FMARKETPLACE.md, AGENTS.md from upstream

Tests:
- Port kijiji-core, kijiji-integration, kijiji-utils tests
- Port facebook-core, facebook-integration tests
- Add test setup file

Scripts:
- Port parse-facebook-cookies.ts script

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-23 00:34:50 -05:00
parent 497c7995a2
commit 50d56201af
14 changed files with 4687 additions and 179 deletions

View File

@@ -0,0 +1,54 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
describe("Utility Functions", () => {
describe("slugify", () => {
test("should convert basic strings to slugs", () => {
expect(slugify("Hello World")).toBe("hello-world");
expect(slugify("iPhone 13 Pro")).toBe("iphone-13-pro");
});
test("should handle special characters", () => {
expect(slugify("Café & Restaurant")).toBe("cafe-restaurant");
expect(slugify("100% New")).toBe("100-new");
});
test("should handle empty and edge cases", () => {
expect(slugify("")).toBe("");
expect(slugify(" ")).toBe("-");
expect(slugify("---")).toBe("-");
});
test("should preserve numbers and valid characters", () => {
expect(slugify("iPhone 13")).toBe("iphone-13");
expect(slugify("item123")).toBe("item123");
});
});
describe("formatCentsToCurrency", () => {
test("should format valid cent values", () => {
expect(formatCentsToCurrency(100)).toBe("$1.00");
expect(formatCentsToCurrency(1999)).toBe("$19.99");
expect(formatCentsToCurrency(0)).toBe("$0.00");
});
test("should handle string inputs", () => {
expect(formatCentsToCurrency("100")).toBe("$1.00");
expect(formatCentsToCurrency("1999")).toBe("$19.99");
});
test("should handle null/undefined inputs", () => {
expect(formatCentsToCurrency(null)).toBe("");
expect(formatCentsToCurrency(undefined)).toBe("");
});
test("should handle invalid inputs", () => {
expect(formatCentsToCurrency("invalid")).toBe("");
expect(formatCentsToCurrency(Number.NaN)).toBe("");
});
test("should use en-US locale formatting", () => {
expect(formatCentsToCurrency(123456)).toBe("$1,234.56");
});
});
});