refactor: rewrite facebook search parser for comet bootstrap
This commit is contained in:
@@ -727,6 +727,151 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
const candidates = extractFacebookBootstrapCandidates(html);
|
||||
expect(candidates.map((c) => c.marker)).toEqual(["first", "second"]);
|
||||
});
|
||||
|
||||
test("extracts search results from Comet bootstrap candidates", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Bike",
|
||||
listing_price: {
|
||||
amount: "120.00",
|
||||
formatted_amount: "CA$120",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const ads = extractFacebookMarketplaceData(html);
|
||||
expect(ads).toHaveLength(1);
|
||||
expect(ads?.[0].node.listing.marketplace_listing_title).toBe("Bike");
|
||||
});
|
||||
|
||||
test("prefers the strongest marketplace edge set when multiple edges arrays exist", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
incidental: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "wrong-1",
|
||||
marketplace_listing_title: "Wrong Listing",
|
||||
listing_price: {
|
||||
amount: "1.00",
|
||||
formatted_amount: "CA$1",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "right-1",
|
||||
marketplace_listing_title: "Right Listing",
|
||||
listing_price: {
|
||||
amount: "250.00",
|
||||
formatted_amount: "CA$250",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const ads = extractFacebookMarketplaceData(html);
|
||||
expect(ads).toHaveLength(1);
|
||||
expect(ads?.[0].node.listing.id).toBe("right-1");
|
||||
});
|
||||
|
||||
test("rejects mixed edge arrays that contain non-listing entries", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Bike",
|
||||
listing_price: {
|
||||
amount: "120.00",
|
||||
formatted_amount: "CA$120",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
story: {
|
||||
id: "not-a-listing",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const ads = extractFacebookMarketplaceData(html);
|
||||
expect(ads).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -27,77 +27,40 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
|
||||
describe("Main Search Function", () => {
|
||||
test("should successfully fetch search results", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "iPhone 13 Pro",
|
||||
listing_price: {
|
||||
amount: "800.00",
|
||||
formatted_amount: "$800.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
creation_time: 1640995200,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "Samsung Galaxy",
|
||||
listing_price: {
|
||||
amount: "600.00",
|
||||
formatted_amount: "$600.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Mississauga" },
|
||||
},
|
||||
},
|
||||
creation_time: 1640995300,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "iPhone 13",
|
||||
listing_price: {
|
||||
amount: "500.00",
|
||||
formatted_amount: "CA$500",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
},
|
||||
})}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
@@ -105,9 +68,8 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems("iPhone", 1, "toronto", 25);
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].title).toBe("iPhone 13 Pro");
|
||||
expect(results[1].title).toBe("Samsung Galaxy");
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("iPhone 13");
|
||||
});
|
||||
|
||||
test("should filter out items without price", async () => {
|
||||
|
||||
Reference in New Issue
Block a user