ilk commit

This commit is contained in:
hOLOlu
2026-05-04 01:19:04 +03:00
commit 5f33557f2d
2072 changed files with 75437 additions and 0 deletions

View File

@@ -0,0 +1,154 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using HtmlAgilityPack;
namespace DownloadManager.Core.Grabber;
public class GrabberOptions
{
public string StartUrl { get; set; } = string.Empty;
public int MaxDepth { get; set; } = 2;
public bool StayOnDomain { get; set; } = true;
public string[] FileExtensions { get; set; } = Array.Empty<string>();
public string? UrlPattern { get; set; }
public long? MinFileSizeBytes { get; set; }
public long? MaxFileSizeBytes { get; set; }
public int MaxFileCount { get; set; } = 500;
}
public record GrabberResult(string Url, string FileName, long? SizeBytes, string Extension);
public class SiteGrabber
{
private readonly IHttpClientFactory _httpClientFactory;
public SiteGrabber(IHttpClientFactory httpClientFactory)
{
_httpClientFactory = httpClientFactory;
}
public async IAsyncEnumerable<GrabberResult> GrabAsync(
GrabberOptions opts,
[EnumeratorCancellation] CancellationToken ct)
{
var visited = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
var queue = new Queue<(string Url, int Depth)>();
queue.Enqueue((opts.StartUrl, 0));
var baseUri = new Uri(opts.StartUrl);
var found = 0;
while (queue.Count > 0 && !ct.IsCancellationRequested && found < opts.MaxFileCount)
{
var (url, depth) = queue.Dequeue();
if (!visited.Add(url)) continue;
string html;
try
{
var handler = new HttpClientHandler { AllowAutoRedirect = true };
using var client = new HttpClient(handler);
client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0");
html = await client.GetStringAsync(url, ct);
}
catch { continue; }
var doc = new HtmlDocument();
doc.LoadHtml(html);
var nodes = doc.DocumentNode.SelectNodes("//a[@href]");
if (nodes == null) continue;
foreach (var node in nodes)
{
if (ct.IsCancellationRequested) break;
var href = node.GetAttributeValue("href", "");
if (string.IsNullOrEmpty(href)) continue;
if (!Uri.TryCreate(baseUri, href, out var absoluteUri)) continue;
var absoluteUrl = absoluteUri.AbsoluteUri;
if (IsDownloadTarget(absoluteUrl, opts))
{
found++;
long? size = await GetFileSizeRobustAsync(absoluteUrl, ct);
yield return new GrabberResult(
absoluteUrl,
Path.GetFileName(absoluteUri.LocalPath),
size,
Path.GetExtension(absoluteUrl).TrimStart('.').ToLower()
);
}
else if (depth < opts.MaxDepth && IsSameDomain(absoluteUrl, baseUri.Host))
{
queue.Enqueue((absoluteUrl, depth + 1));
}
if (found >= opts.MaxFileCount) break;
}
}
}
private async Task<long?> GetFileSizeRobustAsync(string url, CancellationToken ct)
{
try
{
var handler = new HttpClientHandler { AllowAutoRedirect = true };
using var client = new HttpClient(handler);
client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36");
client.Timeout = TimeSpan.FromSeconds(6);
// 1. Önce HEAD dene
using var headReq = new HttpRequestMessage(HttpMethod.Head, url);
using var headResp = await client.SendAsync(headReq, HttpCompletionOption.ResponseHeadersRead, ct);
// Eğer boyut 500 bayttan büyükse gerçektir, değilse muhtemelen sahte redirect sayfasıdır
if (headResp.IsSuccessStatusCode && headResp.Content.Headers.ContentLength > 500)
{
return headResp.Content.Headers.ContentLength;
}
// 2. HEAD başarısızsa veya 146B gibi gelmişse, Range ile GET dene
using var getReq = new HttpRequestMessage(HttpMethod.Get, url);
getReq.Headers.Range = new System.Net.Http.Headers.RangeHeaderValue(0, 0);
using var getResp = await client.SendAsync(getReq, HttpCompletionOption.ResponseHeadersRead, ct);
if (getResp.Content.Headers.ContentRange?.HasLength == true)
{
return getResp.Content.Headers.ContentRange.Length;
}
var fallback = getResp.Content.Headers.ContentLength;
return fallback > 500 ? fallback : null;
}
catch { return null; }
}
private bool IsDownloadTarget(string url, GrabberOptions opts)
{
var ext = Path.GetExtension(url).TrimStart('.').ToLower();
if (string.IsNullOrEmpty(ext)) return false;
var skip = new[] { "html", "htm", "php", "aspx", "jsp", "txt" };
if (skip.Contains(ext)) return false;
return opts.FileExtensions.Length == 0 || opts.FileExtensions.Contains(ext);
}
private bool IsSameDomain(string url, string host)
{
try
{
var uri = new Uri(url);
return uri.Host.Equals(host, StringComparison.OrdinalIgnoreCase);
}
catch { return false; }
}
}