CoronaScraper v1
C# .NET
using System;
using System.Collections.Generic;
using System.Text;
using HtmlAgilityPack;
namespace CoronaScraper
{
class Scraper
{
const string Url = "https://www.worldometers.info/coronavirus/?fbclid=IwAR2hjFNZpDQ5GqxLBv6z5HQXophgSMGvj4VnYZsmUdvZ7mGSxWwMe46caJY";
const string TableId = "main_table_countries_today";
public List<CountryData> Scrapovat()
{
var web = new HtmlWeb();
var doc = web.Load(Url);
var rows = doc.DocumentNode.SelectNodes($"//table[@id='{TableId}']/tbody/tr");
var result = new List<CountryData>();
foreach (var row in rows)
{
var rowData = ExctractData(row);
result.Add(rowData);
}
return result;
}
private CountryData ExctractData(HtmlNode tr)
{
var tds = tr.SelectNodes("td");
var country = tds[0].InnerText;
int? getValueFromCell(int index)
{
var cellText = tds[index].InnerText;
return ParseCellText(cellText);
}
return new CountryData(
countryName: country,
totalCases: getValueFromCell(1),
newCases: getValueFromCell(2),
totalDeaths: getValueFromCell(3),
newDeaths: getValueFromCell(4),
totalRecovered: getValueFromCell(5),
activeCases: getValueFromCell(6),
serious: getValueFromCell(7),
topCases: getValueFromCell(8)
);
}
private readonly System.Globalization.CultureInfo EnUsCulture = System.Globalization.CultureInfo.GetCultureInfo("en-US");
private int? ParseCellText(string text)
{
System.Globalization.NumberStyles style = System.Globalization.NumberStyles.Integer | System.Globalization.NumberStyles.AllowThousands;
return int.TryParse(text, style, EnUsCulture, out int result) ? result : (int?)null;
}
}
/// <remarks>
/// <para>Cisla jsou v INTu. Potencialni problem, pokud bude nakazenych vice, nez 2 miliardy.</para>
/// <para>Null, pokud hodnota neni dostupna.</para>
/// </remarks>
class CountryData
{
public CountryData(string countryName, int? totalCases, int? newCases, int? totalDeaths, int? newDeaths, int? totalRecovered, int? activeCases, int? serious, int? topCases)
{
CountryName = countryName ?? throw new ArgumentNullException(nameof(countryName));
TotalCases = totalCases;
NewCases = newCases;
TotalDeaths = totalDeaths;
NewDeaths = newDeaths;
TotalRecovered = totalRecovered;
ActiveCases = activeCases;
Serious = serious;
TopCases = topCases;
}
public string CountryName { get; }
public int? TotalCases { get; }
public int? NewCases { get; }
public int? TotalDeaths { get; }
public int? NewDeaths { get; }
public int? TotalRecovered { get; }
public int? ActiveCases { get; }
public int? Serious { get; }
public int? TopCases { get; }
public const string FormatString = "{0,-20} | {1,10} | {2,10} | {3,6}";
public override string ToString()
{
double? smrtnostProc =
this.TotalCases.HasValue && this.TotalDeaths.HasValue && this.TotalDeaths.Value != 0
? 100.0 * (double)this.TotalDeaths.Value / (double)this.TotalCases.Value
: (double?)null;
return string.Format(FormatString, this.CountryName, this.TotalCases.ValueOrNa(), this.TotalDeaths.ValueOrNa(), smrtnostProc.ValueOrNa());
}
}
static class Helper
{
private const string NA = "n/a";
public static string ValueOrNa(this int? n) => n?.ToString("N0") ?? NA;
public static string ValueOrNa(this double? n) => n?.ToString("N1") ?? NA;
}
}
Neformátovaný
Přidáno: 22.3.2020
Expirace: Neuvedeno