refactor: rewrite facebook item parser for comet bootstrap
This commit is contained in:
@@ -369,43 +369,80 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
|
||||
describe("Data Extraction", () => {
|
||||
describe("extractFacebookItemData", () => {
|
||||
test("should extract item data from standard require structure", () => {
|
||||
const mockItemData = {
|
||||
id: "123456",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Test Item",
|
||||
formatted_price: { text: "$100.00" },
|
||||
listing_price: { amount: "100.00", currency: "CAD" },
|
||||
is_live: true,
|
||||
};
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
viewer: {
|
||||
marketplace_product_details_page: {
|
||||
target: mockItemData,
|
||||
},
|
||||
},
|
||||
test("extracts item details from Comet permalink bootstrap candidates", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
listing: {
|
||||
id: "123",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Vintage Chair",
|
||||
formatted_price: { text: "CA$80" },
|
||||
listing_price: {
|
||||
amount: "80.00",
|
||||
currency: "CAD",
|
||||
amount_with_offset: "80.00",
|
||||
},
|
||||
redacted_description: { text: "Solid wood chair" },
|
||||
location_text: { text: "Toronto, ON" },
|
||||
marketplace_listing_seller: { id: "seller-1", name: "Alex" },
|
||||
condition: "USED",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const result = extractFacebookItemData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.id).toBe("123456");
|
||||
expect(result?.marketplace_listing_title).toBe("Test Item");
|
||||
expect(result?.id).toBe("123");
|
||||
expect(result?.marketplace_listing_title).toBe("Vintage Chair");
|
||||
});
|
||||
|
||||
test("prefers the canonical permalink target over earlier decoy items", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
recommendation_units: [
|
||||
{
|
||||
listing: {
|
||||
id: "decoy-1",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Recommended Chair",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
],
|
||||
target: {
|
||||
id: "real-123",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Canonical Chair",
|
||||
formatted_price: { text: "CA$120" },
|
||||
listing_price: {
|
||||
amount: "120.00",
|
||||
currency: "CAD",
|
||||
amount_with_offset: "120.00",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const result = extractFacebookItemData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.id).toBe("real-123");
|
||||
expect(result?.marketplace_listing_title).toBe("Canonical Chair");
|
||||
});
|
||||
|
||||
test("should handle missing item data", () => {
|
||||
|
||||
Reference in New Issue
Block a user