refactor: add facebook html fallbacks
This commit is contained in:
@@ -404,6 +404,60 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
expect(result?.marketplace_listing_title).toBe("Vintage Chair");
|
||||
});
|
||||
|
||||
test("falls back to rendered item HTML when permalink bootstrap payloads are undecodable", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>{invalid: json}</script>
|
||||
<h1>Vintage Chair</h1>
|
||||
<span>CA$80</span>
|
||||
<div>Toronto, ON</div>
|
||||
<div>Description</div>
|
||||
<div>Solid wood chair</div>
|
||||
<a href="/marketplace/item/123/">View listing</a>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const result = extractFacebookItemData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.id).toBe("123");
|
||||
expect(result?.marketplace_listing_title).toBe("Vintage Chair");
|
||||
expect(result?.formatted_price?.text).toBe("CA$80");
|
||||
expect(result?.location_text?.text).toBe("Toronto, ON");
|
||||
expect(result?.redacted_description?.text).toBe("Solid wood chair");
|
||||
});
|
||||
|
||||
test("uses canonical permalink context instead of earlier related links in item HTML fallback", () => {
|
||||
const html = `
|
||||
<html>
|
||||
<head>
|
||||
<link rel="canonical" href="https://www.facebook.com/marketplace/item/123/" />
|
||||
</head>
|
||||
<body>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>{invalid: json}</script>
|
||||
<a href="/marketplace/item/999/">
|
||||
<span>Related Chair</span>
|
||||
</a>
|
||||
<h1>Vintage Chair</h1>
|
||||
<span>CA$80</span>
|
||||
<div>Toronto, ON</div>
|
||||
<div>Message seller</div>
|
||||
<div>Seller details</div>
|
||||
<div>Description</div>
|
||||
<div>Solid wood chair</div>
|
||||
<a href="/marketplace/item/123/">View listing</a>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const result = extractFacebookItemData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.id).toBe("123");
|
||||
expect(result?.marketplace_listing_title).toBe("Vintage Chair");
|
||||
expect(result?.redacted_description?.text).toBe("Solid wood chair");
|
||||
});
|
||||
|
||||
test("prefers the canonical permalink target over earlier decoy items", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
@@ -584,6 +638,33 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
);
|
||||
});
|
||||
|
||||
test("falls back to rendered search HTML when search bootstrap payloads are undecodable", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<script>{invalid: json}</script>
|
||||
<a href="/marketplace/item/987654321/">
|
||||
<span>Vintage Bike</span>
|
||||
<span>CA$120</span>
|
||||
<span>Toronto, ON</span>
|
||||
</a>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const result = extractFacebookMarketplaceData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result?.[0].node.listing.id).toBe("987654321");
|
||||
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
|
||||
"Vintage Bike",
|
||||
);
|
||||
expect(result?.[0].node.listing.listing_price).toEqual({
|
||||
amount: "120.00",
|
||||
formatted_amount: "CA$120",
|
||||
currency: "CAD",
|
||||
});
|
||||
});
|
||||
|
||||
test("should handle empty search results", () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
|
||||
Reference in New Issue
Block a user