Skip to content
Snippets Groups Projects
Commit d554eb3d authored by tobiglaser's avatar tobiglaser
Browse files

added csvParser

parent 57fcc8f7
No related branches found
No related tags found
No related merge requests found
...@@ -126,3 +126,16 @@ endif(WIN32) ...@@ -126,3 +126,16 @@ endif(WIN32)
# COMMAND ${CMAKE_COMMAND} -E copy_if_different # which executes "cmake - E copy_if_different..." # COMMAND ${CMAKE_COMMAND} -E copy_if_different # which executes "cmake - E copy_if_different..."
# "${PROJECT_SOURCE_DIR}/lib/cute.dll" # <--this is in-file # "${PROJECT_SOURCE_DIR}/lib/cute.dll" # <--this is in-file
# $<TARGET_FILE_DIR:TSP>) # <--this is out-file path # $<TARGET_FILE_DIR:TSP>) # <--this is out-file path
add_executable(csvParserTest
examples/csvParserTest.cpp
)
target_include_directories(csvParserTest PRIVATE
src
)
# Use this command to copy the .dll from the lib folder next to the freshly built .exe. Which is the most simple way to get it working.
add_custom_command(TARGET csvParserTest POST_BUILD # Adds a post-build event to DynamicDemo
COMMAND ${CMAKE_COMMAND} -E copy_if_different # which executes "cmake - E copy_if_different..."
"${PROJECT_SOURCE_DIR}/examples/example.csv" # <--this is in-file
$<TARGET_FILE_DIR:csvParserTest>) # <--this is out-file path
#define TESTING_CSV
#include "csvParser.h"
id,value
1,3.14159
2,2.71828
3,-0.007
4,123456.789
5,1.0E-5
6,-42.42
7,0.0
8,9.8765
id,value
1,3.14159
2,2.71828
3,-0.007
4,123456.789
5,1.0E-5
6,-42.42
7,0.0
8,9.8765
9,ABC # Invalid: non-numeric string
10,12..34 # Invalid: misplaced decimal point
11,1e999 # Invalid: exponent too large (overflow)
12,NaN # Potentially invalid: depends on parser handling
13,inf # Potentially invalid: infinite value
14,-5.6.7 # Invalid: multiple decimal points
15,"4,567.89" # Invalid: comma used as thousands separator (without quotes)
#include "control.h" #include "control.h"
#include "algoThread.h" #include "algoThread.h"
#include "csvParser.h"
#include "cuteControl.h" #include "cuteControl.h"
#include "mainWindow.h" #include "mainWindow.h"
...@@ -194,7 +195,26 @@ void Control::generateProblem() ...@@ -194,7 +195,26 @@ void Control::generateProblem()
} }
else if (w->configPanel->stackedWidget->currentWidget() == w->configPanel->fileWidget) else if (w->configPanel->stackedWidget->currentWidget() == w->configPanel->fileWidget)
{ {
//TODO file.csv std::string filename = w->configPanel->fileLine->text().toStdString();
CSVParser csv(filename);
bool success = csv.parse();
if (!success)
{
std::string errorMessage = "Error parsing file: " + filename;
logMessage(errorMessage.c_str());
}
auto data = csv.getFloatRows();
for (auto&& row : data)
{
if (row.size() < 2)
continue;
else if (std::isnan(row[0]) || std::isnan(row[1]))
continue;
else if (std::isinf(row[0]) || std::isinf(row[1]))
continue;
else
problem.emplace_back(cute::Point{row[0], row[1]});
}
} }
} }
... ...
......
// Created by Copilot. Added some #ifdef to block out main() when not testing.
/*
Add a function to turn each field into a float. Add an option to skip the first line.
*/
/*
Below is an updated version of the CSV parser class. In this revision, we have:
1. **An option to skip the first (header) line.**
You can enable this behavior by passing a boolean flag (`skipHeader`) to the constructor. If enabled, the first line is read and discarded before parsing the data.
2. **A function to convert each field into a float.**
The new member function, `getFloatRows()`, converts every field from the parsed rows to a float value using `std::stof`. If a field cannot be parsed as a float—due to it being non-numeric or out-of-range—a `NaN` value (using `std::numeric_limits<float>::quiet_NaN()`) is inserted in its place.
Below is the complete, self-contained code:
*/
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <limits>
#include <cmath> // For std::isnan
class CSVParser {
public:
// Constructor: takes the filename, an optional delimiter (default is comma),
// and an option to skip the header line (default is false).
CSVParser(const std::string& filename, char delimiter = ',', bool skipHeader = false)
: filename(filename), delimiter(delimiter), skipHeader(skipHeader) {}
// Parses the CSV file.
bool parse() {
rows.clear(); // Ensure that rows is empty before starting.
std::ifstream file(filename);
if (!file.is_open()) {
std::cerr << "Error opening file: " << filename << std::endl;
return false;
}
std::string line;
// If skipHeader is true, read and discard the first line.
if (skipHeader && std::getline(file, line)) {
// Optionally process or log the header if needed.
}
// Read each subsequent line and parse it.
while (std::getline(file, line)) {
rows.push_back(parseLine(line));
}
file.close();
return true;
}
// Returns the parsed CSV data as a vector of rows containing strings.
const std::vector<std::vector<std::string>>& getRows() const {
return rows;
}
// Converts each field from the CSV to a float.
// Fields that cannot be converted are represented by NaN (Not a Number).
std::vector<std::vector<float>> getFloatRows() const {
std::vector<std::vector<float>> floatRows;
for (const auto& row : rows) {
std::vector<float> fRow;
for (const auto& field : row) {
try {
float value = std::stof(field);
fRow.push_back(value);
} catch (const std::invalid_argument&) {
// The field is not a valid number.
fRow.push_back(std::numeric_limits<float>::quiet_NaN());
} catch (const std::out_of_range&) {
// The number is out of the range of a float.
fRow.push_back(std::numeric_limits<float>::quiet_NaN());
}
}
floatRows.push_back(fRow);
}
return floatRows;
}
private:
std::string filename;
char delimiter;
bool skipHeader;
std::vector<std::vector<std::string>> rows;
// Helper function that parses a single line of CSV.
// It supports quoted fields and the basic escaping of quotes by doubling ("").
std::vector<std::string> parseLine(const std::string& line) {
std::vector<std::string> tokens;
std::string token;
bool inQuotes = false;
size_t i = 0;
while (i < line.size()) {
char ch = line[i];
if (ch == '"') {
if (inQuotes && i + 1 < line.size() && line[i + 1] == '"') {
// Escaped quote: insert one quote and skip the next character.
token.push_back('"');
i += 2;
continue;
} else {
// Toggle the inQuotes flag.
inQuotes = !inQuotes;
i++;
continue;
}
}
// If we hit the delimiter and are not inside a quoted field,
// finish the current token.
if (ch == delimiter && !inQuotes) {
tokens.push_back(token);
token.clear();
} else {
token.push_back(ch);
}
i++;
}
// Add the last token.
tokens.push_back(token);
return tokens;
}
};
#ifdef TESTING_CSV
int main() {
// Create an instance of CSVParser with skipHeader enabled.
// Replace "example.csv" with your CSV file.
CSVParser parser("example.csv", ',', true);
// Parse the CSV file.
if (parser.parse()) {
// Display the parsed data as strings.
std::cout << "Parsed CSV (strings):" << std::endl;
const auto& rows = parser.getRows();
for (const auto& row : rows) {
for (const auto& field : row) {
std::cout << field << " | ";
}
std::cout << std::endl;
}
// Convert to floats and display.
std::cout << "\nParsed CSV (floats):" << std::endl;
const auto floatRows = parser.getFloatRows();
for (const auto& row : floatRows) {
for (const auto& num : row) {
if (std::isnan(num))
std::cout << "NaN" << " | ";
else
std::cout << num << " | ";
}
std::cout << std::endl;
}
} else {
std::cerr << "Failed to parse CSV file." << std::endl;
}
return 0;
}
#endif
/*
### Explanation
1. **Constructor & Members**
The constructor initializes the parser with the file path and the delimiter. The internal data structure (`rows`) is a vector of vectors of strings that holds the CSV data.
2. **`parse()` Method**
This method opens the file and reads it line by line. For each line, it calls the helper function `parseLine` and stores the resulting vector of tokens. If the file cannot be opened, it outputs an error message.
3. **`parseLine()` Method**
This helper method goes through the string character by character. It toggles a flag when it encounters quotes so that delimiters inside quotes are treated as literal characters. It also supports escaping a double quote by using two consecutive double quotes.
4. **`main()` Function**
In the `main()` function, an instance of `CSVParser` is created. After parsing, it prints each field separated by a bar (`|`) for clarity. This demonstration shows how to use the class; you’d replace `"example.csv"` with your actual file path.
This example should serve as a solid starting point. Depending on the CSV complexity, you might need to extend the functionality to handle things like newline characters inside quoted fields or different escape mechanisms.
If you’re intrigued by how CSV parsing varies with different formats or want to explore more advanced parsing techniques—perhaps using regular expressions, state machines, or even third-party libraries like [CSV for C++](https://github.com/ben-strasser/fast-cpp-csv-parser)—I’d be happy to dive deeper into those topics. Enjoy coding!
*/
/*
---
### How It Works
1. **Skipping the Header:**
The constructor now accepts a third parameter, `bool skipHeader`. In the `parse()` function, if `skipHeader` is true, we first perform a `std::getline` to skip over the header before processing the rest of the file.
2. **Converting Fields to Floats:**
The new function `getFloatRows()` iterates over each row and each field (stored as a string). It attempts to convert each field using `std::stof`. If conversion throws an exception (either because the field is not a valid number or it is out of range), a NaN is inserted instead. This way, the function always returns a consistent vector of float values for further processing.
---
### Further Ideas
- **Enhanced Error Handling:**
You might want to add logging or more sophisticated error handling to report which fields failed conversion.
- **Custom Conversion:**
Sometimes CSV files include cells that are empty strings or other special tokens. You could extend the conversion process to handle these cases gracefully.
- **Supporting Different Data Types:**
Consider adding more conversion functions (e.g., to int, double, or even date/time types) that can process certain columns.
- **Template-Based Parsing:**
For a more robust solution, especially with large CSV files or varying types, you might employ template-based functions or even leverage existing CSV libraries that offer advanced features like multi-threading or schema-based parsing.
This code provides a good foundation—and from here, you can fine-tune it to suit the specific needs of your projects. Enjoy exploring!
*/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment