Suponha que eu tenha o seguinte arquivo CSV:
Column1,Column2,Column3
C,3,1
B,2,2
A,3,3
C,3,10
B,2,20
A,2,30
C,3,100
B,1,200
A,1,300
Eu quero resolver isso
- Primeiro pela Coluna1
- Segundo pela Coluna2
- Terceiro pela Coluna3
Portanto o resultado deverá ser o seguinte:
Column1,Column2,Column3
A,1,300
A,2,30
A,3,3
B,1,200
B,2,2
B,2,20
C,3,1
C,3,10
C,3,100
Para classificar CSV por colunas em ordem crescente/decrescente, criei a seguinte classe:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace SikorskiLibMemoryLess
{
public enum SortTypeEnum
{
Ascending,
Descending
}
public static class ListExtensions
{
public static string ToCommaSeparatedString(this List<string> list)
{
if (list == null || !list.Any())
{
return string.Empty;
}
return string.Join(",", list);
}
}
public class CSVSorter
{
public List<string> Header { get; private set; }
public List<List<string>> Data { get; private set; }
public CSVSorter()
{
Data = new List<List<string>>();
}
public void LoadCSV(string filePath, bool hasHeader = true)
{
using (StreamReader sr = new StreamReader(filePath))
{
string line;
bool isFirstLine = true;
while ((line = sr.ReadLine()) != null)
{
List<string> columns = line.Split(',').Select(col => string.IsNullOrWhiteSpace(col) ? null : col).ToList();
if (isFirstLine && hasHeader)
{
Header = columns;
isFirstLine = false;
}
else
{
Data.Add(columns);
if (isFirstLine)
{
isFirstLine = false;
if (!hasHeader)
{
Header = Enumerable.Range(1, columns.Count).Select(i => "Column" + i).ToList();
}
}
}
}
}
}
public void LoadCSV(string fileName, string fileDir, bool hasHeader = true)
{
string filePath = Path.Combine(fileDir, fileName);
LoadCSV(filePath, hasHeader);
}
public void LoadCSV(IEnumerable<List<string>> csvData, bool hasHeader = true)
{
bool isFirstLine = true;
foreach (var columns in csvData)
{
var processedColumns = columns.Select(col => string.IsNullOrWhiteSpace(col) ? null : col).ToList();
if (isFirstLine && hasHeader)
{
Header = processedColumns;
isFirstLine = false;
}
else
{
Data.Add(processedColumns);
if (isFirstLine)
{
isFirstLine = false;
if (!hasHeader)
{
Header = Enumerable.Range(1, processedColumns.Count).Select(i => "Column" + i).ToList();
}
}
}
}
}
public void Sort(int[] columns, SortTypeEnum ascendingOrDescending)
{
try
{
IOrderedEnumerable<List<string>> sortedData = null;
if (ascendingOrDescending == SortTypeEnum.Ascending)
{
sortedData = Data.OrderBy(row => GetColumnValueSafe(row, columns[0]));
for (int i = 1; i < columns.Length; i++)
{
sortedData = sortedData.ThenBy(row => GetColumnValueSafe(row, columns[i]));
}
}
Data = sortedData.ToList();
}
catch (Exception ex)
{
Console.WriteLine($"Error during sorting: {ex.Message}");
Console.WriteLine(ex.StackTrace);
}
}
private string GetColumnValueSafe(List<string> row, int columnIndex)
{
return columnIndex < row.Count ? row[columnIndex] : null;
}
public IEnumerable<List<string>> Get()
{
yield return Header;
foreach (var row in Data)
{
yield return row;
}
}
public void SaveCSV(string filePath)
{
using (StreamWriter sw = new StreamWriter(filePath))
{
// Write the header
sw.WriteLine(string.Join(",", Header));
// Write the data rows
foreach (var row in Data)
{
sw.WriteLine(string.Join(",", row.Select(col => col ?? "")));
}
}
}
public void SaveCSV(string fileName, string fileDir)
{
string filePath = Path.Combine(fileDir, fileName);
SaveCSV(filePath);
}
}
class Program
{
static void Main()
{
// Example usage for CSV with potential missing data
CSVSorter sorter = new CSVSorter();
sorter.LoadCSV(@"C:\git\rouse_data~~\[PENULTIMATE]\OUTPUT_CSV\input.csv", true); // Adjust path as needed
sorter.Sort(new int[] { 0,1,2 }, SortTypeEnum.Ascending); // Sort by the second column (e.g., particleVolume) in ascending order
sorter.SaveCSV(@"C:\git\rouse_data~~\[PENULTIMATE]\OUTPUT_CSV\sorted___input.csv");
// Display sorted data
foreach (var row in sorter.Get())
{
Console.WriteLine(string.Join(", ", row));
}
}
}
}
Esta aula não está me dando o resultado desejado.
Column1,Column2,Column3
C,3,1
B,2,2
A,3,3
C,3,10
B,2,20
A,2,30
C,3,100
B,1,200
A,1,300
Existe alguma biblioteca de código aberto para classificar dados CSV por colunas?