refactor: add facebook html fallbacks

This commit is contained in:
2026-04-22 11:36:47 -04:00
parent 63ca006696
commit 7ddc96dfdf
2 changed files with 290 additions and 1 deletions

View File

@@ -404,6 +404,60 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(result?.marketplace_listing_title).toBe("Vintage Chair");
});
test("falls back to rendered item HTML when permalink bootstrap payloads are undecodable", () => {
const html = `
<html><body>
<script>"XCometMarketplacePermalinkController"</script>
<script>{invalid: json}</script>
<h1>Vintage Chair</h1>
<span>CA$80</span>
<div>Toronto, ON</div>
<div>Description</div>
<div>Solid wood chair</div>
<a href="/marketplace/item/123/">View listing</a>
</body></html>
`;
const result = extractFacebookItemData(html);
expect(result).not.toBeNull();
expect(result?.id).toBe("123");
expect(result?.marketplace_listing_title).toBe("Vintage Chair");
expect(result?.formatted_price?.text).toBe("CA$80");
expect(result?.location_text?.text).toBe("Toronto, ON");
expect(result?.redacted_description?.text).toBe("Solid wood chair");
});
test("uses canonical permalink context instead of earlier related links in item HTML fallback", () => {
const html = `
<html>
<head>
<link rel="canonical" href="https://www.facebook.com/marketplace/item/123/" />
</head>
<body>
<script>"XCometMarketplacePermalinkController"</script>
<script>{invalid: json}</script>
<a href="/marketplace/item/999/">
<span>Related Chair</span>
</a>
<h1>Vintage Chair</h1>
<span>CA$80</span>
<div>Toronto, ON</div>
<div>Message seller</div>
<div>Seller details</div>
<div>Description</div>
<div>Solid wood chair</div>
<a href="/marketplace/item/123/">View listing</a>
</body>
</html>
`;
const result = extractFacebookItemData(html);
expect(result).not.toBeNull();
expect(result?.id).toBe("123");
expect(result?.marketplace_listing_title).toBe("Vintage Chair");
expect(result?.redacted_description?.text).toBe("Solid wood chair");
});
test("prefers the canonical permalink target over earlier decoy items", () => {
const html = `
<html><body>
@@ -584,6 +638,33 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
);
});
test("falls back to rendered search HTML when search bootstrap payloads are undecodable", () => {
const html = `
<html><body>
<script>"XCometMarketplaceSearchController"</script>
<script>{invalid: json}</script>
<a href="/marketplace/item/987654321/">
<span>Vintage Bike</span>
<span>CA$120</span>
<span>Toronto, ON</span>
</a>
</body></html>
`;
const result = extractFacebookMarketplaceData(html);
expect(result).not.toBeNull();
expect(result).toHaveLength(1);
expect(result?.[0].node.listing.id).toBe("987654321");
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
"Vintage Bike",
);
expect(result?.[0].node.listing.listing_price).toEqual({
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
});
});
test("should handle empty search results", () => {
const mockData = {
require: [