fix: tighten item price and pacing behavior
This commit is contained in:
@@ -984,13 +984,13 @@ export function parseFacebookItem(
|
||||
const url = `https://www.facebook.com/marketplace/item/${item.id}`;
|
||||
|
||||
// Extract price information
|
||||
let cents = 0;
|
||||
let cents: number | undefined;
|
||||
let currency = "CAD"; // Default
|
||||
let amountFormatted = item.formatted_price?.text || "FREE";
|
||||
let amountFormatted = item.formatted_price?.text;
|
||||
|
||||
if (item.listing_price) {
|
||||
currency = item.listing_price.currency || "CAD";
|
||||
if (item.listing_price.amount && item.listing_price.amount !== "0.00") {
|
||||
if (item.listing_price.amount != null) {
|
||||
const amount = Number.parseFloat(item.listing_price.amount);
|
||||
if (!Number.isNaN(amount)) {
|
||||
cents = Math.round(amount * 100);
|
||||
@@ -1037,6 +1037,13 @@ export function parseFacebookItem(
|
||||
listingType = "vehicle";
|
||||
}
|
||||
|
||||
if (cents == null || !amountFormatted) {
|
||||
if (!listingStatus || listingStatus === "ACTIVE") return null;
|
||||
|
||||
cents = 0;
|
||||
amountFormatted = item.formatted_price?.text || "PRICE_UNAVAILABLE";
|
||||
}
|
||||
|
||||
const listingDetails: FacebookListingDetails = {
|
||||
url,
|
||||
title,
|
||||
|
||||
@@ -889,15 +889,19 @@ export default async function fetchKijijiItems(
|
||||
progressBar?.start(totalProgress, currentProgress);
|
||||
|
||||
// Process in batches for controlled concurrency
|
||||
const CONCURRENT_REQUESTS = 1;
|
||||
const CONCURRENT_REQUESTS = Math.max(1, Math.floor(requestsPerSecond));
|
||||
const results: (DetailedListing | null)[] = [];
|
||||
|
||||
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
|
||||
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
|
||||
const batchPromises = batch.map(async (link) => {
|
||||
const batchPromises = batch.map(async (link, batchIndex) => {
|
||||
try {
|
||||
const html = await fetchHtml(link, DELAY_MS, {
|
||||
// Per-request delay keeps detail fetches within REQUESTS_PER_SECOND.
|
||||
if (batchIndex > 0) {
|
||||
await new Promise((resolve) => setTimeout(resolve, DELAY_MS * batchIndex));
|
||||
}
|
||||
|
||||
const html = await fetchHtml(link, 0, {
|
||||
// Staggered starts keep request pacing within REQUESTS_PER_SECOND.
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
@@ -936,6 +940,10 @@ export default async function fetchKijijiItems(
|
||||
const batchResults = await Promise.all(batchPromises);
|
||||
results.push(...batchResults);
|
||||
|
||||
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
|
||||
await new Promise((resolve) => setTimeout(resolve, DELAY_MS));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
allListings.push(
|
||||
|
||||
@@ -1532,10 +1532,21 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(item);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.title).toBe("Minimal Item");
|
||||
expect(result?.description).toBeUndefined();
|
||||
expect(result?.seller).toBeUndefined();
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("returns null when item price data is present but unparseable", () => {
|
||||
const item = {
|
||||
id: "456b",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "Broken Price Item",
|
||||
formatted_price: { text: "price unavailable" },
|
||||
listing_price: { amount: "not-a-number", currency: "CAD" },
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(item);
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should identify vehicle listings", () => {
|
||||
|
||||
@@ -428,6 +428,104 @@ describe("fetchKijijiItems", () => {
|
||||
expect(maxActiveDetailRequests).toBe(1);
|
||||
});
|
||||
|
||||
test("allows bounded concurrency to scale with REQUESTS_PER_SECOND", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": { url: "/v-one/k0l0", title: "One" },
|
||||
"Listing:2": { url: "/v-two/k0l0", title: "Two" },
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount: 10000, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
let activeDetailRequests = 0;
|
||||
let maxActiveDetailRequests = 0;
|
||||
|
||||
global.fetch = mock(async (input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
activeDetailRequests++;
|
||||
maxActiveDetailRequests = Math.max(
|
||||
maxActiveDetailRequests,
|
||||
activeDetailRequests,
|
||||
);
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 300));
|
||||
|
||||
activeDetailRequests--;
|
||||
|
||||
if (url.endsWith("/v-one/k0l0")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("One", "v-one/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-two/k0l0")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Two", "v-two/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
4,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1 },
|
||||
);
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
expect(maxActiveDetailRequests).toBeGreaterThan(1);
|
||||
expect(maxActiveDetailRequests).toBeLessThanOrEqual(4);
|
||||
});
|
||||
|
||||
test("classifies the filtered Kijiji result set in unstable mode", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
|
||||
Reference in New Issue
Block a user