fix: align scraper unstable mode behavior
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import { parseHTML } from "linkedom";
|
||||
import type {
|
||||
HTMLString,
|
||||
UnstableListingBuckets,
|
||||
UnstableListingModeOptions,
|
||||
} from "../types/common";
|
||||
@@ -114,7 +115,7 @@ function parseEbayListings(
|
||||
if (!href.startsWith("http")) {
|
||||
href = href.startsWith("//")
|
||||
? `https:${href}`
|
||||
: `https://www.ebay.com${href}`;
|
||||
: `https://www.ebay.ca${href}`;
|
||||
}
|
||||
|
||||
// Find the container - go up several levels to find the item container
|
||||
@@ -397,6 +398,8 @@ export default async function fetchEbayItems(
|
||||
} = {},
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
|
||||
|
||||
const finalizeResults = (
|
||||
listings: EbayListingDetails[],
|
||||
): EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails> => {
|
||||
@@ -436,7 +439,7 @@ export default async function fetchEbayItems(
|
||||
|
||||
const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`;
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
|
||||
|
||||
console.log(`Fetching eBay search: ${searchUrl}`);
|
||||
|
||||
|
||||
@@ -1086,6 +1086,8 @@ export default async function fetchFacebookItems(
|
||||
MAX_ITEMS = 25,
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
|
||||
|
||||
const finalizeResults = (
|
||||
listings: FacebookListingDetails[],
|
||||
): FacebookListingDetails[] | UnstableListingBuckets<FacebookListingDetails> => {
|
||||
@@ -1093,7 +1095,11 @@ export default async function fetchFacebookItems(
|
||||
return listings.slice(0, MAX_ITEMS);
|
||||
}
|
||||
|
||||
return classifyUnstableListings(listings.slice(0, MAX_ITEMS));
|
||||
const classified = classifyUnstableListings(listings);
|
||||
return {
|
||||
results: classified.results.slice(0, MAX_ITEMS),
|
||||
unstableResults: classified.unstableResults,
|
||||
};
|
||||
};
|
||||
|
||||
const cookies = await ensureFacebookCookies();
|
||||
@@ -1107,7 +1113,7 @@ export default async function fetchFacebookItems(
|
||||
);
|
||||
}
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
|
||||
|
||||
// Encode search query for URL
|
||||
const encodedQuery = encodeURIComponent(SEARCH_QUERY);
|
||||
|
||||
@@ -725,6 +725,8 @@ export default async function fetchKijijiItems(
|
||||
listingOptions: ListingFetchOptions = {},
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
|
||||
|
||||
const finalizeResults = (
|
||||
listings: DetailedListing[],
|
||||
): DetailedListing[] | UnstableListingBuckets<DetailedListing> => {
|
||||
@@ -735,7 +737,7 @@ export default async function fetchKijijiItems(
|
||||
return classifyUnstableListings(listings);
|
||||
};
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
|
||||
|
||||
// Load Kijiji cookies (optional - helps bypass bot detection)
|
||||
const cookies = await loadCookiesOptional(
|
||||
@@ -824,7 +826,7 @@ export default async function fetchKijijiItems(
|
||||
progressBar?.start(totalProgress, currentProgress);
|
||||
|
||||
// Process in batches for controlled concurrency
|
||||
const CONCURRENT_REQUESTS = REQUESTS_PER_SECOND * 2; // 2x rate for faster processing
|
||||
const CONCURRENT_REQUESTS = Math.max(1, Math.floor(requestsPerSecond * 2)); // 2x rate for faster processing
|
||||
const results: (DetailedListing | null)[] = [];
|
||||
|
||||
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
|
||||
|
||||
Reference in New Issue
Block a user