Skip to content

Commit

Permalink
Implement file downloading via FileDownloadController (very WIP)
Browse files Browse the repository at this point in the history
Thread now has its own GetHashCode impl.
ImageLink now has reference to its parent thread, includes thread hashcode in its hashcode.
TODO:
 - HTML download has been broken, it probably needs its own download manager?
 - Implement fail callback on imagelink download, just like the success callback.
 - Generalise the download manager class to use generic, for use with HTML downloading?
 - Normalise 404/other network error handling across 4chan/8chan thread implementations.
 - Add settings for the FileDownloadController that the user can customise (max concurrent downloads, currently at const 25).
  • Loading branch information
Issung committed Oct 6, 2023
1 parent 22b099a commit b68cbc1
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 91 deletions.
7 changes: 5 additions & 2 deletions GChan.UnitTest/ConcurrentDictionaryHashCodeKeyTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using FluentAssertions;
using GChan.Trackers;
using System.Collections.Concurrent;
using Xunit;

Expand All @@ -9,9 +10,11 @@ namespace GChan.UnitTest
/// </summary>
public class ConcurrentDictionaryHashCodeKeyTests
{
readonly static Thread thread = new Thread_4Chan("https://boards.4chan.org/hr/thread/123");

// Two different image link instances, with the same data.
readonly ImageLink il1 = new ImageLink(123, "http://test.com", "test123");
readonly ImageLink il2 = new ImageLink(123, "http://test.com", "test123");
readonly static ImageLink il1 = new ImageLink(123, "http://test.com", "test123", 1, thread);
readonly static ImageLink il2 = new ImageLink(123, "http://test.com", "test123", 1, thread);

[Fact]
public void TestAddAndRetrieveWithIndexOperator()
Expand Down
81 changes: 57 additions & 24 deletions GChan/Controllers/FileDownloadController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,92 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading;

namespace GChan.Controllers
{
/// <summary>
/// Class that manages file downloading.
/// Class that manages a file download pool.
/// </summary>
public class FileDownloadController
{
private const int SkimCount = 10;
private const int ConcurrentCount = 25;

private static readonly Logger logger = LogManager.GetCurrentClassLogger();
private static readonly TimeSpan interval = TimeSpan.FromSeconds(10);
private static readonly TimeSpan interval = TimeSpan.FromSeconds(1);

private readonly ConcurrentDictionary<ImageLink, Thread> tasks = new();
private readonly ConcurrentQueue<ImageLink> queue = new();
private readonly ConcurrentDictionary<ImageLink, Thread> downloading = new();
private readonly ConcurrentQueue<ImageLink> waiting = new();
private readonly Timer timer;


public FileDownloadController()
{
timer = new(Skim, null, interval, interval);
timer = new(TimerTick, null, interval, interval);
}

public void Queue(ImageLink link)
{
waiting.Enqueue(link);
}

public void Queue(IEnumerable<ImageLink> links)
{
foreach (var link in links)
{
logger.Log(LogLevel.Info, "Queueing {image_link} for download.", link);
waiting.Enqueue(link);
}
}

/// <summary>
/// Take some links off of the queue,
/// A tick of the timer.
/// </summary>
/// <param name="state"></param>
/// <exception cref="NotImplementedException"></exception>
private void Skim(object state)
/// <param name="_">Unnecessary paramater.</param>
private void TimerTick(object _)
{
var chunk = new List<ImageLink>(SkimCount);
var skimCount = ConcurrentCount - downloading.Count;
logger.Log(LogLevel.Info, "Skimming {skim_count} images from queue.", skimCount);
var items = Skim(skimCount);

while (chunk.Count < SkimCount && queue.TryDequeue(out var item))
{
chunk.Add(item);
}
// TODO: If no images were found in queue set the timer to a slightly longer interval, to stop poll spamming.

foreach (var link in chunk)
{

foreach (var image in items)
{
var newThread = new Thread(() => image.Download(DownloadComplete));
newThread.Start();
downloading.TryAdd(image, newThread);
}

// TODO: Find all failed threads, remove them, set them back into the waiting list.
//foreach (...)
//{
//
//}
}

public void Queue(ImageLink link)
/// <summary>
/// Take some links off of the queue.
/// </summary>
private List<ImageLink> Skim(int amount)
{
queue.Enqueue(link);
var chunk = new List<ImageLink>(amount);

while (chunk.Count < amount && waiting.TryDequeue(out var item))
{
chunk.Add(item);
}

return chunk;
}

public bool Remove(ImageLink imageLink)
/// <summary>
/// Called when a download has completed successfully.<br/>
/// Removes <paramref name="imageLink"/> from the downloading list.
/// </summary>
private void DownloadComplete(ImageLink imageLink)
{
return tasks.TryRemove(imageLink, out var _);
logger.Log(LogLevel.Info, "Link {image_link} completed downloading succesfully.", imageLink);
downloading.TryRemove(imageLink, out var _);
}
}
}
10 changes: 7 additions & 3 deletions GChan/Controllers/MainController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
using System.Windows.Forms;
using SysThread = System.Threading.Thread;
using Thread = GChan.Trackers.Thread;
using Timer = System.Windows.Forms.Timer;
using Type = GChan.Trackers.Type;

namespace GChan.Controllers
Expand All @@ -29,7 +30,9 @@ class MainController

private SysThread scanThread = null;

private readonly System.Windows.Forms.Timer scanTimer = new System.Windows.Forms.Timer();
private readonly FileDownloadController fileDownloadController = new();

private readonly Timer scanTimer = new();

private readonly ILogger logger = LogManager.GetCurrentClassLogger();

Expand Down Expand Up @@ -329,8 +332,9 @@ private void ScanRoutine()
for (int i = 0; i < threads.Length; i++)
{
if (threads[i].Scraping)
{
ThreadPool.QueueUserWorkItem(new WaitCallback(threads[i].Download));
{
var links = threads[i].GetImageLinks();
fileDownloadController.Queue(links);
}
}
}
Expand Down
65 changes: 56 additions & 9 deletions GChan/ImageLink.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
using NLog;
using GChan.Properties;
using GChan.Trackers;
using NLog;
using System;
using System.IO;
using System.Linq;
using System.Net;
using System.Windows.Forms;

namespace GChan
{
Expand All @@ -24,19 +27,63 @@ public class ImageLink : IEquatable<ImageLink>
/// </summary>
public string UploadedFilename;

private readonly ILogger logger = LogManager.GetCurrentClassLogger();

/// <summary>
/// The ID of the post this image belongs to.
/// </summary>
public long No;

public ImageLink(long tim, string url, string uploadedFilename, long no)
/// <summary>
/// The thread this image is from.
/// </summary>
public Thread Thread;

private static readonly ILogger logger = LogManager.GetCurrentClassLogger();

public ImageLink(
long tim,
string url,
string uploadedFilename,
long no,
Thread thread
)
{
Tim = tim;
Url = url;
UploadedFilename = Utils.SanitiseFilename(uploadedFilename);
No = no;
Thread = thread;
}

public void Download(Action<ImageLink> successCallback)
{
if (!Directory.Exists(Thread.SaveTo))
{
Directory.CreateDirectory(Thread.SaveTo);
}

string destFilepath = Utils.CombinePathAndFilename(Thread.SaveTo, GenerateFilename((ImageFileNameFormat)Settings.Default.ImageFilenameFormat));

try
{
// TODO: Async/Taskify.
using var webClient = new WebClient();
webClient.DownloadFile(Url, destFilepath);
Thread.SavedIds.Add(Tim);
successCallback(this);
}
catch (WebException ex)
{
logger.Error(ex, $"Error occured while downloading link {Url}.");
}
catch (UnauthorizedAccessException uaex)
{
MessageBox.Show(uaex.Message, $"No Permission to access folder {Thread.SaveTo}.");
logger.Error(uaex);
}
catch (Exception ex)
{
logger.Error(ex);
}
}

public string GenerateFilename(ImageFileNameFormat format)
Expand All @@ -55,8 +102,6 @@ public string GenerateFilename(ImageFileNameFormat format)
return result;
}

public bool Download()

public bool Equals(ImageLink other)
{
if (other == null)
Expand All @@ -65,7 +110,7 @@ public bool Equals(ImageLink other)
}

return Tim == other.Tim &&
URL == other.URL &&
Url == other.Url &&
UploadedFilename == other.UploadedFilename;
}

Expand All @@ -85,8 +130,10 @@ public override int GetHashCode()
{
int hash = 17;
hash = hash * 23 + Tim.GetHashCode();
hash = hash * 23 + (URL?.GetHashCode() ?? 0);
hash = hash * 23 + (Url?.GetHashCode() ?? 0);
hash = hash * 23 + (UploadedFilename?.GetHashCode() ?? 0);
hash = hash * 23 + No.GetHashCode();
hash = hash * 23 + Thread.GetHashCode();
return hash;
}
}
Expand Down
4 changes: 2 additions & 2 deletions GChan/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@
// Sie können alle Werte angeben oder die standardmäßigen Build- und Revisionsnummern
// übernehmen, indem Sie "*" eingeben:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("5.1.0")]
[assembly: AssemblyFileVersion("5.1.0")]
[assembly: AssemblyVersion("5.2.0")]
[assembly: AssemblyFileVersion("5.2.0")]
9 changes: 6 additions & 3 deletions GChan/Trackers/Sites/Thread_4Chan.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public static bool UrlIsThread(string url)
return Regex.IsMatch(url, THREAD_REGEX);
}

protected override ImageLink[] GetImageLinks(bool includeAlreadySaved = false)
protected override ImageLink[] GetImageLinksImpl(bool includeAlreadySaved = false)
{
var baseUrl = $"http://i.4cdn.org/{BoardCode}/";
var jsonUrl = $"http://a.4cdn.org/{BoardCode}/thread/{ID}.json";
Expand All @@ -62,7 +62,8 @@ protected override ImageLink[] GetImageLinks(bool includeAlreadySaved = false)
x[timPath].GetTimHashCode(),
baseUrl + Uri.EscapeDataString(x[timPath].Value<string>()) + x["ext"], // Require escaping for the flash files stored with arbitrary string names.
x["filename"].Value<string>(),
x["no"].Value<long>()
x["no"].Value<long>(),
this
)
)
.ToArray();
Expand Down Expand Up @@ -118,7 +119,9 @@ protected override void DownloadHTMLPage()
new ImageLink(post["tim"].Value<long>(),
old,
post["filename"].ToString(),
post["no"].Value<long>())
post["no"].Value<long>(),
this
)
.GenerateFilename((ImageFileNameFormat)Settings.Default.ImageFilenameFormat));

//Save thumbs for files that need it
Expand Down
4 changes: 2 additions & 2 deletions GChan/Trackers/Sites/Thread_8Kun.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static bool UrlIsThread(string url)
return Regex.IsMatch(url, threadRegex);
}

protected override ImageLink[] GetImageLinks(bool includeAlreadySaved = false)
protected override ImageLink[] GetImageLinksImpl(bool includeAlreadySaved = false)
{
string jsonUrl = $"http://8kun.top/{BoardCode}/res/{ID}.json"; // Thread JSON url

Expand Down Expand Up @@ -77,7 +77,7 @@ protected override ImageLink[] GetImageLinks(bool includeAlreadySaved = false)
Fpath1Url(tims[j], exts[j]); // "1"

// Save image link using reply no (number) as tim because 8kun tims have letters and numbers in them. The reply number will work just fine.
links.Add(new ImageLink(no, url, filenames[j], no));
links.Add(new ImageLink(no, url, filenames[j], no, this));
}
}
}
Expand Down
Loading

0 comments on commit b68cbc1

Please sign in to comment.