in

Can not web scrape aliexpress product description using puppeteer or cheerio


Hi I am trying to scrape the product description off this aliexpress product page:

https://www.aliexpress.com/item/1005003413518415.html

There is a class called: origin-part

This class is located at the OVERVIEW tab

I tried using cheerio and puppeteer javascript I have no problem scraping other webpages static webpages like wikipedia or guardian. But for aliexpress it wont work.

Here is some off the code I wrote.

const puppeteer = require('puppeteer');
const cheerio = require('cheerio');

const url="https://www.aliexpress.com/item/1005003413518415.html";

async function configureBrowser() {
  const browser = await puppeteer.launch({
    headless: true,
    ignoreDefaultArgs: ['--disable-extensions']
  });

  const page = await browser.newPage();
  await page.setDefaultNavigationTimeout(0); // Why? My computer is super slow!!

  await page.goto(url);

  const obj = {
    browser,
    page
  }
  return obj;
}

async function checkDescription(page) {
  await page.reload({ waitUntil: 'networkidle0' }); // no idea what this is read somewhere so i put that in there lol

  // let html = await page.evaluate(() => document.body.innerHTML);
  let html = await page.evaluate(() => document.querySelectorAll('.origin-part'));

  console.log('html');
  console.log(html); // returns an empty object
  console.log('html');

   // trying cheerio here
  const $ = cheerio.load(html);

  $('.origin-part', html).each(function() {
    let words = $(this).text(); // the innerText wont come up
    console.log(words);
  })
}


async function monitor() {
  const config = await configureBrowser();
  await checkDescription(config.page);
  await config.browser.close();
}

monitor();



Source: https://stackoverflow.com/questions/70553143/can-not-web-scrape-aliexpress-product-description-using-puppeteer-or-cheerio

Concurrent modification during iteration: Instance(length:41) of ‘_GrowableList’ error Flutter

RDS MySQL DNS hostname incorrect