MyDataProvider » Blog » Lets scrape amazon products data via Rainforestapi

Lets scrape amazon products data via Rainforestapi

  • by

Rainforestapi provides excellent api for amazon products web scraping.

Find below samples from code at our solution for amazon products scraping :


using CatalogLoader;
using CatalogLoader.Utils;
using CatalogLoaderCommon;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace MyScrapers.Amazon_Com_Rainforest
{
    public class Amazon_Com_Rainforest : CustomScriptBase
    {
        string _apiKey = "";
        HtmlPageLoader _htmlPageLoader = null;
        public override void Login(LoginScriptParameters p)
        {
            base.Login(p);
            _apiKey = p.State.GrabberSettings.UserParameterGet("ApiKey");
            _htmlPageLoader = p.State.Proxy.GetHtmlPageLoaderEmpty();
        }
        public override GrabberSettings GetGrabberSettings()
        {
            var r = base.GetGrabberSettings();
            r.Settings.RequestAttempts = 5;
            r.Settings.UserParameters = "ApiMode=true[next]ApiKey";
            r.Settings.CacheEnabled = true;
            r.Settings.RequestTimeout = 100000;
            return r;
        }
        
        public override void RunProduct(RunProductScriptParameters p)
        {
            base.RunProduct(p);

            var enUrl = WebUtility.UrlEncode(p.Product.Url);
            var apiCall = $"https://api.rainforestapi.com/request?api_key={_apiKey}&type=product&url={enUrl}";
            _htmlPageLoader.Load(apiCall);
            var strJson = _htmlPageLoader.Content;

            var amz_out = Newtonsoft.Json.JsonConvert.DeserializeObject<product_output>(strJson);
            var ap = amz_out.product;
            p.Product.Art = ap.asin;
            p.Product.Name = ap.title;
            p.Product.Weight = ap.weight;
            p.Product.MNP = ap.model_number;
            p.Product.Manufacturer = ap.brand;
            p.Product.FullDescription = ap.description;
            p.Product.Price = ap?.buybox_winner?.price?.value;
            p.Product.Currency = ap?.buybox_winner?.price?.currency;
            p.Product.Quantity = ap?.buybox_winner?.availability?.type == "in_stock" ? "1" : "0";

            foreach (var i in ap.images)
            {
                p.Product.ImageAdd(i.link);
            }

            var f2v = new Dictionary<string, string>();
            foreach(var a in ap.attributes)
            {
                f2v[a.name] = a.value;
            }

            foreach (var a in ap.specifications)
            {
                f2v[a.name] = a.value;
                
            }
            foreach (var i in f2v)
            {
                p.Product.AddDynamicAttribute(i.Key, i.Value);
            }
            //if (ap.variants.Count > 1)
            //{
            //    var cmbs = new List<Combination>();
            //    foreach (var v in ap.variants)
            //    {
            //        var c = new Combination();
            //        cmbs.Add(c);
            //        c.Art = v.asin;
            //        foreach (var d in v.dimensions)
            //        {
            //            c.AddDynamicAttribute(d.name, d.value);
            //        }
            //        foreach (var i in v.images)
            //        {
            //            c.ImageAdd(i.link);
            //        }
            //    }
            //    p.Product.CombinationsAdd(cmbs);
            //}
        }
        public override void RunCategory(RunCategoryScriptParameters p)
        {
            //base.RunCategory(p);
            //p.Category.SourceUrlAdd();
        }

        public override void GetProductLinksForCategory(GetProductLinksForCategoryScriptParameters p)
        {
            base.GetProductLinksForCategory(p);
            if (string.IsNullOrWhiteSpace(p.Category.SourceUrl))
            {
                return;
            }
            var enUrl = WebUtility.UrlEncode(p.Category.SourceUrl);
            var apiCall = $"https://api.rainforestapi.com/request?api_key={_apiKey}&type=category&url={enUrl}";

            //using (var h = new HttpWebRequest_BeginGetResponse())
            {
                //h.Timeout = 120000;
                _htmlPageLoader.Load(apiCall);
                var strJson = _htmlPageLoader.Content;
                //var strJson = h.Load(apiCall);
                //p.Process.m_ti.AddLogInfo(strJson);
                var catOutput = Newtonsoft.Json.JsonConvert.DeserializeObject<category_output>(strJson);
                foreach(var r in catOutput.category_results)
                {
                    p.Category.ProductLinks.Add(r.link);
                }
            }
        }
    }

    class product_output
    {
        public product product;
    }

    class product
    {
        public string title;
        public string asin;
        public string brand;
        public string description;
        public string weight;
        public string dimensions;
        public string model_number;

        public List<variant> variants = new List<variant>();
        public List<image> images = new List<image>();
        public List<attribute> attributes = new List<attribute>();
        public List<attribute> specifications = new List<attribute>();

        public buybox_winner buybox_winner;
    }

    class buybox_winner
    {
        public price price;
        public price rrp;
        public availability availability;
    }

    class availability
    {
        public string type;
    }

    class price
    {
        public string currency;
        public string value;
    }
    class attribute
    {
        public string name;
        public string value;
    }
    class variant
    {
        public string asin;
        public List<image> images = new List<image>();
        public List<attribute> dimensions = new List<attribute>();
    }

    class image
    {
        public string link;
    }

    class category_output
    {
        public List<category_result> category_results = new List<category_result>();
    }

    class category_result
    {
        public string link;
    }
}