MyDataProvider » Blog » Lets scrape amazon products data via Rainforestapi

Lets scrape amazon products data via Rainforestapi

  • by

Rainforestapi provides excellent api for amazon products web scraping.

Find below samples from code at our solution for amazon products scraping :

[code lang=”csharp”]

using CatalogLoader;
using CatalogLoader.Utils;
using CatalogLoaderCommon;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace MyScrapers.Amazon_Com_Rainforest
{
public class Amazon_Com_Rainforest : CustomScriptBase
{
string _apiKey = "";
HtmlPageLoader _htmlPageLoader = null;
public override void Login(LoginScriptParameters p)
{
base.Login(p);
_apiKey = p.State.GrabberSettings.UserParameterGet("ApiKey");
_htmlPageLoader = p.State.Proxy.GetHtmlPageLoaderEmpty();
}
public override GrabberSettings GetGrabberSettings()
{
var r = base.GetGrabberSettings();
r.Settings.RequestAttempts = 5;
r.Settings.UserParameters = "ApiMode=true[next]ApiKey";
r.Settings.CacheEnabled = true;
r.Settings.RequestTimeout = 100000;
return r;
}

public override void RunProduct(RunProductScriptParameters p)
{
base.RunProduct(p);

var enUrl = WebUtility.UrlEncode(p.Product.Url);
var apiCall = $"https://api.rainforestapi.com/request?api_key={_apiKey}&type=product&url={enUrl}";
_htmlPageLoader.Load(apiCall);
var strJson = _htmlPageLoader.Content;

var amz_out = Newtonsoft.Json.JsonConvert.DeserializeObject<product_output>(strJson);
var ap = amz_out.product;
p.Product.Art = ap.asin;
p.Product.Name = ap.title;
p.Product.Weight = ap.weight;
p.Product.MNP = ap.model_number;
p.Product.Manufacturer = ap.brand;
p.Product.FullDescription = ap.description;
p.Product.Price = ap?.buybox_winner?.price?.value;
p.Product.Currency = ap?.buybox_winner?.price?.currency;
p.Product.Quantity = ap?.buybox_winner?.availability?.type == "in_stock" ? "1" : "0";

foreach (var i in ap.images)
{
p.Product.ImageAdd(i.link);
}

var f2v = new Dictionary<string, string>();
foreach(var a in ap.attributes)
{
f2v[a.name] = a.value;
}

foreach (var a in ap.specifications)
{
f2v[a.name] = a.value;

}
foreach (var i in f2v)
{
p.Product.AddDynamicAttribute(i.Key, i.Value);
}
//if (ap.variants.Count > 1)
//{
// var cmbs = new List<Combination>();
// foreach (var v in ap.variants)
// {
// var c = new Combination();
// cmbs.Add(c);
// c.Art = v.asin;
// foreach (var d in v.dimensions)
// {
// c.AddDynamicAttribute(d.name, d.value);
// }
// foreach (var i in v.images)
// {
// c.ImageAdd(i.link);
// }
// }
// p.Product.CombinationsAdd(cmbs);
//}
}
public override void RunCategory(RunCategoryScriptParameters p)
{
//base.RunCategory(p);
//p.Category.SourceUrlAdd();
}

public override void GetProductLinksForCategory(GetProductLinksForCategoryScriptParameters p)
{
base.GetProductLinksForCategory(p);
if (string.IsNullOrWhiteSpace(p.Category.SourceUrl))
{
return;
}
var enUrl = WebUtility.UrlEncode(p.Category.SourceUrl);
var apiCall = $"https://api.rainforestapi.com/request?api_key={_apiKey}&type=category&url={enUrl}";

//using (var h = new HttpWebRequest_BeginGetResponse())
{
//h.Timeout = 120000;
_htmlPageLoader.Load(apiCall);
var strJson = _htmlPageLoader.Content;
//var strJson = h.Load(apiCall);
//p.Process.m_ti.AddLogInfo(strJson);
var catOutput = Newtonsoft.Json.JsonConvert.DeserializeObject<category_output>(strJson);
foreach(var r in catOutput.category_results)
{
p.Category.ProductLinks.Add(r.link);
}
}
}
}

class product_output
{
public product product;
}

class product
{
public string title;
public string asin;
public string brand;
public string description;
public string weight;
public string dimensions;
public string model_number;

public List<variant> variants = new List<variant>();
public List<image> images = new List<image>();
public List<attribute> attributes = new List<attribute>();
public List<attribute> specifications = new List<attribute>();

public buybox_winner buybox_winner;
}

class buybox_winner
{
public price price;
public price rrp;
public availability availability;
}

class availability
{
public string type;
}

class price
{
public string currency;
public string value;
}
class attribute
{
public string name;
public string value;
}
class variant
{
public string asin;
public List<image> images = new List<image>();
public List<attribute> dimensions = new List<attribute>();
}

class image
{
public string link;
}

class category_output
{
public List<category_result> category_results = new List<category_result>();
}

class category_result
{
public string link;
}
}

[/code]