NOVINKA: Získej 40 hodin praktických dovedností s AI – ZDARMA ke každému akreditovanému kurzu!
Hledáme nové posily do ITnetwork týmu. Podívej se na volné pozice a přidej se do nejagilnější firmy na trhu - Více informací.

CoronaScraper v1

C# .NET

using System;
using System.Collections.Generic;
using System.Text;
using HtmlAgilityPack;

namespace CoronaScraper
{
  class Scraper
  {

    const string Url = "https://www.worldometers.info/coronavirus/?fbclid=IwAR2hjFNZpDQ5GqxLBv6z5HQXophgSMGvj4VnYZsmUdvZ7mGSxWwMe46caJY";

    const string TableId = "main_table_countries_today";

    public List<CountryData> Scrapovat()
    {

      var web = new HtmlWeb();
      var doc = web.Load(Url);
      var rows = doc.DocumentNode.SelectNodes($"//table[@id='{TableId}']/tbody/tr");
      var result = new List<CountryData>();
      foreach (var row in rows)
      {
        var rowData = ExctractData(row);
        result.Add(rowData);
      }
      return result;
    }

    private CountryData ExctractData(HtmlNode tr)
    {
      var tds = tr.SelectNodes("td");
      var country = tds[0].InnerText;

      int? getValueFromCell(int index)
      {
        var cellText = tds[index].InnerText;
        return ParseCellText(cellText);
      }

      return new CountryData(
        countryName: country,
        totalCases: getValueFromCell(1),
        newCases: getValueFromCell(2),
        totalDeaths: getValueFromCell(3),
        newDeaths: getValueFromCell(4),
        totalRecovered: getValueFromCell(5),
        activeCases: getValueFromCell(6),
        serious: getValueFromCell(7),
        topCases: getValueFromCell(8)
       );

    }

    private readonly System.Globalization.CultureInfo EnUsCulture = System.Globalization.CultureInfo.GetCultureInfo("en-US");

    private int? ParseCellText(string text)
    {
      System.Globalization.NumberStyles style = System.Globalization.NumberStyles.Integer | System.Globalization.NumberStyles.AllowThousands;
      return int.TryParse(text, style, EnUsCulture, out int result) ? result : (int?)null;
    }

  }

  /// <remarks>
  /// <para>Cisla jsou v INTu. Potencialni problem, pokud bude nakazenych vice, nez 2 miliardy.</para>
  /// <para>Null, pokud hodnota neni dostupna.</para>
  /// </remarks>
  class CountryData
  {
    public CountryData(string countryName, int? totalCases, int? newCases, int? totalDeaths, int? newDeaths, int? totalRecovered, int? activeCases, int? serious, int? topCases)
    {
      CountryName = countryName ?? throw new ArgumentNullException(nameof(countryName));
      TotalCases = totalCases;
      NewCases = newCases;
      TotalDeaths = totalDeaths;
      NewDeaths = newDeaths;
      TotalRecovered = totalRecovered;
      ActiveCases = activeCases;
      Serious = serious;
      TopCases = topCases;
    }

    public string CountryName { get; }

    public int? TotalCases { get; }

    public int? NewCases { get; }

    public int? TotalDeaths { get; }

    public int? NewDeaths { get; }

    public int? TotalRecovered { get; }

    public int? ActiveCases { get; }

    public int? Serious { get; }

    public int? TopCases { get; }

    public const string FormatString = "{0,-20} | {1,10} | {2,10} | {3,6}";

    public override string ToString()
    {
      double? smrtnostProc = 
        this.TotalCases.HasValue && this.TotalDeaths.HasValue && this.TotalDeaths.Value != 0 
        ? 100.0 * (double)this.TotalDeaths.Value / (double)this.TotalCases.Value 
        : (double?)null;
      return string.Format(FormatString, this.CountryName, this.TotalCases.ValueOrNa(), this.TotalDeaths.ValueOrNa(), smrtnostProc.ValueOrNa());
    }

  }

  static class Helper
  {

    private const string NA = "n/a";

    public static string ValueOrNa(this int? n) => n?.ToString("N0") ?? NA;
    public static string ValueOrNa(this double? n) => n?.ToString("N1") ?? NA;


  }

}

Neformátovaný

Přidáno: 22.3.2020
Expirace: Neuvedeno

Avatar
Autor: Bugmaster
Aktivity