MyDataProvider » Blog » Lets scrape amazon products data via Rainforestapi

Lets scrape amazon products data via Rainforestapi

Rainforestapi provides excellent api for amazon products web scraping.

Find below samples from code at our solution for amazon products scraping :

[code lang=”csharp”]

using CatalogLoader;
using CatalogLoader.Utils;
using CatalogLoaderCommon;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace MyScrapers.Amazon_Com_Rainforest
{
public class Amazon_Com_Rainforest : CustomScriptBase
{
string _apiKey = "";
HtmlPageLoader _htmlPageLoader = null;
public override void Login(LoginScriptParameters p)
{
base.Login(p);
_apiKey = p.State.GrabberSettings.UserParameterGet("ApiKey");
_htmlPageLoader = p.State.Proxy.GetHtmlPageLoaderEmpty();
}
public override GrabberSettings GetGrabberSettings()
{
var r = base.GetGrabberSettings();
r.Settings.RequestAttempts = 5;
r.Settings.UserParameters = "ApiMode=true[next]ApiKey";
r.Settings.CacheEnabled = true;
r.Settings.RequestTimeout = 100000;
return r;
}

public override void RunProduct(RunProductScriptParameters p)
{
base.RunProduct(p);

var enUrl = WebUtility.UrlEncode(p.Product.Url);
var apiCall = $"https://api.rainforestapi.com/request?api_key={_apiKey}&type=product&url={enUrl}";
_htmlPageLoader.Load(apiCall);
var strJson = _htmlPageLoader.Content;

var amz_out = Newtonsoft.Json.JsonConvert.DeserializeObject<product_output>(strJson);
var ap = amz_out.product;
p.Product.Art = ap.asin;
p.Product.Name = ap.title;
p.Product.Weight = ap.weight;
p.Product.MNP = ap.model_number;
p.Product.Manufacturer = ap.brand;
p.Product.FullDescription = ap.description;
p.Product.Price = ap?.buybox_winner?.price?.value;
p.Product.Currency = ap?.buybox_winner?.price?.currency;
p.Product.Quantity = ap?.buybox_winner?.availability?.type == "in_stock" ? "1" : "0";

foreach (var i in ap.images)
{
p.Product.ImageAdd(i.link);
}

var f2v = new Dictionary<string, string>();
foreach(var a in ap.attributes)
{
f2v[a.name] = a.value;
}

foreach (var a in ap.specifications)
{
f2v[a.name] = a.value;

}
foreach (var i in f2v)
{
p.Product.AddDynamicAttribute(i.Key, i.Value);
}
//if (ap.variants.Count > 1)
//{
// var cmbs = new List<Combination>();
// foreach (var v in ap.variants)
// {
// var c = new Combination();
// cmbs.Add(c);
// c.Art = v.asin;
// foreach (var d in v.dimensions)
// {
// c.AddDynamicAttribute(d.name, d.value);
// }
// foreach (var i in v.images)
// {
// c.ImageAdd(i.link);
// }
// }
// p.Product.CombinationsAdd(cmbs);
//}
}
public override void RunCategory(RunCategoryScriptParameters p)
{
//base.RunCategory(p);
//p.Category.SourceUrlAdd();
}

public override void GetProductLinksForCategory(GetProductLinksForCategoryScriptParameters p)
{
base.GetProductLinksForCategory(p);
if (string.IsNullOrWhiteSpace(p.Category.SourceUrl))
{
return;
}
var enUrl = WebUtility.UrlEncode(p.Category.SourceUrl);
var apiCall = $"https://api.rainforestapi.com/request?api_key={_apiKey}&type=category&url={enUrl}";

//using (var h = new HttpWebRequest_BeginGetResponse())
{
//h.Timeout = 120000;
_htmlPageLoader.Load(apiCall);
var strJson = _htmlPageLoader.Content;
//var strJson = h.Load(apiCall);
//p.Process.m_ti.AddLogInfo(strJson);
var catOutput = Newtonsoft.Json.JsonConvert.DeserializeObject<category_output>(strJson);
foreach(var r in catOutput.category_results)
{
p.Category.ProductLinks.Add(r.link);
}
}
}
}

class product_output
{
public product product;
}

class product
{
public string title;
public string asin;
public string brand;
public string description;
public string weight;
public string dimensions;
public string model_number;

public List<variant> variants = new List<variant>();
public List<image> images = new List<image>();
public List<attribute> attributes = new List<attribute>();
public List<attribute> specifications = new List<attribute>();

public buybox_winner buybox_winner;
}

class buybox_winner
{
public price price;
public price rrp;
public availability availability;
}

class availability
{
public string type;
}

class price
{
public string currency;
public string value;
}
class attribute
{
public string name;
public string value;
}
class variant
{
public string asin;
public List<image> images = new List<image>();
public List<attribute> dimensions = new List<attribute>();
}

class image
{
public string link;
}

class category_output
{
public List<category_result> category_results = new List<category_result>();
}

class category_result
{
public string link;
}
}

[/code]