refactor: rewrite facebook item parser for comet bootstrap

This commit is contained in:
2026-04-22 02:44:17 -04:00
parent c90ee54cc1
commit 63ca006696
2 changed files with 156 additions and 155 deletions

View File

@@ -369,43 +369,80 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
describe("Data Extraction", () => {
describe("extractFacebookItemData", () => {
test("should extract item data from standard require structure", () => {
const mockItemData = {
id: "123456",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Test Item",
formatted_price: { text: "$100.00" },
listing_price: { amount: "100.00", currency: "CAD" },
is_live: true,
};
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {
target: mockItemData,
},
},
test("extracts item details from Comet permalink bootstrap candidates", () => {
const html = `
<html><body>
<script>"XCometMarketplacePermalinkController"</script>
<script>
${JSON.stringify({
payload: {
listing: {
id: "123",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Vintage Chair",
formatted_price: { text: "CA$80" },
listing_price: {
amount: "80.00",
currency: "CAD",
amount_with_offset: "80.00",
},
redacted_description: { text: "Solid wood chair" },
location_text: { text: "Toronto, ON" },
marketplace_listing_seller: { id: "seller-1", name: "Alex" },
condition: "USED",
is_live: true,
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
})}
</script>
</body></html>
`;
const result = extractFacebookItemData(html);
expect(result).not.toBeNull();
expect(result?.id).toBe("123456");
expect(result?.marketplace_listing_title).toBe("Test Item");
expect(result?.id).toBe("123");
expect(result?.marketplace_listing_title).toBe("Vintage Chair");
});
test("prefers the canonical permalink target over earlier decoy items", () => {
const html = `
<html><body>
<script>"XCometMarketplacePermalinkController"</script>
<script>
${JSON.stringify({
payload: {
recommendation_units: [
{
listing: {
id: "decoy-1",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Recommended Chair",
is_live: true,
},
},
],
target: {
id: "real-123",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Canonical Chair",
formatted_price: { text: "CA$120" },
listing_price: {
amount: "120.00",
currency: "CAD",
amount_with_offset: "120.00",
},
is_live: true,
},
},
})}
</script>
</body></html>
`;
const result = extractFacebookItemData(html);
expect(result).not.toBeNull();
expect(result?.id).toBe("real-123");
expect(result?.marketplace_listing_title).toBe("Canonical Chair");
});
test("should handle missing item data", () => {