using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
public class BigFileMergeSort
{
private static void SplitAndSort(string inputFile, string outputFile)
{
var lines = new List<string>(100_000);
var part = 0;
var size = 0;
Task blocker = Task.CompletedTask;
using (var reader = new StreamReader(inputFile))
{
while (!reader.EndOfStream)
{
string line = reader.ReadLine()!;
lines.Add(line);
size += line.Length;
if (size > 128_000_000)
{
size = 0;
var fname = $"{outputFile}part{part++}.txt";
var flines = lines.ToList();
Task.WaitAll(new[] { blocker });
blocker = Task.Run(() => Jazda(flines, fname));
lines.Clear();
}
}
Task.WaitAll(new[] { blocker });
if (lines.Count > 0)
{
Jazda(lines, $"{outputFile}part{part++}.txt");
}
}
static void Jazda(List<string> flines, string fname)
{
flines.Sort();
File.WriteAllLines(fname, flines);
}
}
public class FilePart
{
public required StreamReader S { get; set; }
public required string? L { get; set; }
public void Go() => L = S.ReadLine();
public bool GameOver => L is null;
}
private static void Merge(string outputFile)
{
var parts = new List<FilePart>();
var flush = new List<string>();
var size = 0;
foreach (var file in Directory.GetFiles("C:\\temp\\sort", $"*part*.txt"))
{
parts.Add(new FilePart() { S = new StreamReader(file), L = null });
}
parts.ForEach(x => x.Go());
Task blocker = Task.CompletedTask;
while (parts.Count > 0)
{
var kd = parts.OrderBy(x => x.L).First();
flush.Add(kd.L!);
size += kd.L!.Length;
if (size > 128_000_000)
{
Task.WaitAll(new[] { blocker });
size = 0;
var tmp = flush;
blocker = Task.Run(() =>
{
File.AppendAllLines(outputFile, tmp);
tmp.Clear();
});
flush = new List<string>();
}
kd.Go();
if (kd.GameOver)
{
parts.Remove(kd);
}
}
Task.WaitAll(new[] { blocker });
if (flush.Count > 0)
{
File.AppendAllLines(outputFile, flush);
}
}
public static void Main()
{
string inputFile = "C:\\temp\\sort\\testfile.txt";
string outputFile = "C:\\temp\\sort\\output";
var sw = Stopwatch.StartNew();
SplitAndSort(inputFile, outputFile);
Merge(outputFile);
Console.WriteLine(sw.Elapsed.TotalSeconds);
}
}