added csvParser

d554eb3d · tobiglaser · 57fcc8f7 · d554eb3d · d554eb3d · d554eb3d
Commit d554eb3d authored 6 months ago by tobiglaser
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -126,3 +126,16 @@ endif(WIN32)
 #    COMMAND ${CMAKE_COMMAND} -E copy_if_different  # which executes "cmake - E copy_if_different..."
 #        "${PROJECT_SOURCE_DIR}/lib/cute.dll"   # <--this is in-file
 #        $<TARGET_FILE_DIR:TSP>)            # <--this is out-file path
+add_executable(csvParserTest
+    examples/csvParserTest.cpp
+)
+target_include_directories(csvParserTest PRIVATE
+    src
+)
+# Use this command to copy the .dll from the lib folder next to the freshly built .exe. Which is the most simple way to get it working.
+add_custom_command(TARGET csvParserTest POST_BUILD   # Adds a post-build event to DynamicDemo
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different  # which executes "cmake - E copy_if_different..."
+        "${PROJECT_SOURCE_DIR}/examples/example.csv"   # <--this is in-file
+        $<TARGET_FILE_DIR:csvParserTest>)            # <--this is out-file path
--- a/examples/csvParserTest.cpp
+++ b/examples/csvParserTest.cpp
+#define TESTING_CSV
+#include "csvParser.h"
--- a/examples/example.csv
+++ b/examples/example.csv
+id,value
+1,3.14159
+2,2.71828
+3,-0.007
+4,123456.789
+5,1.0E-5
+6,-42.42
+7,0.0
+8,9.8765
+id,value
+1,3.14159
+2,2.71828
+3,-0.007
+4,123456.789
+5,1.0E-5
+6,-42.42
+7,0.0
+8,9.8765
+9,ABC  # Invalid: non-numeric string
+10,12..34  # Invalid: misplaced decimal point
+11,1e999  # Invalid: exponent too large (overflow)
+12,NaN  # Potentially invalid: depends on parser handling
+13,inf  # Potentially invalid: infinite value
+14,-5.6.7  # Invalid: multiple decimal points
+15,"4,567.89"  # Invalid: comma used as thousands separator (without quotes)
--- a/src/control.cpp
+++ b/src/control.cpp
 #include "control.h"
 #include "algoThread.h"
+#include "csvParser.h"
 #include "cuteControl.h"
 #include "mainWindow.h"
@@ -194,7 +195,26 @@ void Control::generateProblem()
    }
    else if (w->configPanel->stackedWidget->currentWidget() == w->configPanel->fileWidget)
    {
-        //TODO file.csv
+        std::string filename = w->configPanel->fileLine->text().toStdString();
+        CSVParser   csv(filename);
+        bool        success = csv.parse();
+        if (!success)
+        {
+            std::string errorMessage = "Error parsing file: " + filename;
+            logMessage(errorMessage.c_str());
+        }
+        auto data = csv.getFloatRows();
+        for (auto&& row : data)
+        {
+            if (row.size() < 2)
+                continue;
+            else if (std::isnan(row[0]) || std::isnan(row[1]))
+                continue;
+            else if (std::isinf(row[0]) || std::isinf(row[1]))
+                continue;
+            else
+                problem.emplace_back(cute::Point{row[0], row[1]});
+        }
    }
 }


--- a/src/csvParser.h
+++ b/src/csvParser.h
+// Created by Copilot. Added some #ifdef to block out main() when not testing.
+/*
+Add a function to turn each field into a float. Add an option to skip the first line.
+*/
+/*
+Below is an updated version of the CSV parser class. In this revision, we have:
+1. **An option to skip the first (header) line.**  
+    You can enable this behavior by passing a boolean flag (`skipHeader`) to the constructor. If enabled, the first line is read and discarded before parsing the data.
+2. **A function to convert each field into a float.**  
+    The new member function, `getFloatRows()`, converts every field from the parsed rows to a float value using `std::stof`. If a field cannot be parsed as a float—due to it being non-numeric or out-of-range—a `NaN` value (using `std::numeric_limits<float>::quiet_NaN()`) is inserted in its place.
+Below is the complete, self-contained code:
+*/
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <string>
+#include <limits>
+#include <cmath>      // For std::isnan
+class CSVParser {
+public:
+    // Constructor: takes the filename, an optional delimiter (default is comma), 
+    // and an option to skip the header line (default is false).
+    CSVParser(const std::string& filename, char delimiter = ',', bool skipHeader = false)
+        : filename(filename), delimiter(delimiter), skipHeader(skipHeader) {}
+    // Parses the CSV file.
+    bool parse() {
+        rows.clear();  // Ensure that rows is empty before starting.
+        std::ifstream file(filename);
+        if (!file.is_open()) {
+            std::cerr << "Error opening file: " << filename << std::endl;
+            return false;
+        }
+        std::string line;
+        // If skipHeader is true, read and discard the first line.
+        if (skipHeader && std::getline(file, line)) {
+            // Optionally process or log the header if needed.
+        }
+        // Read each subsequent line and parse it.
+        while (std::getline(file, line)) {
+            rows.push_back(parseLine(line));
+        }
+        file.close();
+        return true;
+    }
+    // Returns the parsed CSV data as a vector of rows containing strings.
+    const std::vector<std::vector<std::string>>& getRows() const {
+        return rows;
+    }
+    // Converts each field from the CSV to a float.
+    // Fields that cannot be converted are represented by NaN (Not a Number).
+    std::vector<std::vector<float>> getFloatRows() const {
+        std::vector<std::vector<float>> floatRows;
+        for (const auto& row : rows) {
+            std::vector<float> fRow;
+            for (const auto& field : row) {
+                try {
+                    float value = std::stof(field);
+                    fRow.push_back(value);
+                } catch (const std::invalid_argument&) {
+                    // The field is not a valid number.
+                    fRow.push_back(std::numeric_limits<float>::quiet_NaN());
+                } catch (const std::out_of_range&) {
+                    // The number is out of the range of a float.
+                    fRow.push_back(std::numeric_limits<float>::quiet_NaN());
+                }
+            }
+            floatRows.push_back(fRow);
+        }
+        return floatRows;
+    }
+private:
+    std::string filename;
+    char delimiter;
+    bool skipHeader;
+    std::vector<std::vector<std::string>> rows;
+    // Helper function that parses a single line of CSV.
+    // It supports quoted fields and the basic escaping of quotes by doubling ("").
+    std::vector<std::string> parseLine(const std::string& line) {
+        std::vector<std::string> tokens;
+        std::string token;
+        bool inQuotes = false;
+        size_t i = 0;
+        while (i < line.size()) {
+            char ch = line[i];
+            if (ch == '"') {
+                if (inQuotes && i + 1 < line.size() && line[i + 1] == '"') {
+                    // Escaped quote: insert one quote and skip the next character.
+                    token.push_back('"');
+                    i += 2;
+                    continue;
+                } else {
+                    // Toggle the inQuotes flag.
+                    inQuotes = !inQuotes;
+                    i++;
+                    continue;
+                }
+            }
+            // If we hit the delimiter and are not inside a quoted field,
+            // finish the current token.
+            if (ch == delimiter && !inQuotes) {
+                tokens.push_back(token);
+                token.clear();
+            } else {
+                token.push_back(ch);
+            }
+            i++;
+        }
+        // Add the last token.
+        tokens.push_back(token);
+        return tokens;
+    }
+};
+#ifdef TESTING_CSV
+int main() {
+    // Create an instance of CSVParser with skipHeader enabled.
+    // Replace "example.csv" with your CSV file.
+    CSVParser parser("example.csv", ',', true);
+    // Parse the CSV file.
+    if (parser.parse()) {
+        // Display the parsed data as strings.
+        std::cout << "Parsed CSV (strings):" << std::endl;
+        const auto& rows = parser.getRows();
+        for (const auto& row : rows) {
+            for (const auto& field : row) {
+                std::cout << field << " | ";
+            }
+            std::cout << std::endl;
+        }
+        // Convert to floats and display.
+        std::cout << "\nParsed CSV (floats):" << std::endl;
+        const auto floatRows = parser.getFloatRows();
+        for (const auto& row : floatRows) {
+            for (const auto& num : row) {
+                if (std::isnan(num))
+                    std::cout << "NaN" << " | ";
+                else
+                    std::cout << num << " | ";
+            }
+            std::cout << std::endl;
+        }
+    } else {
+        std::cerr << "Failed to parse CSV file." << std::endl;
+    }
+    return 0;
+}
+#endif
+/*
+### Explanation
+1. **Constructor & Members**  
+   The constructor initializes the parser with the file path and the delimiter. The internal data structure (`rows`) is a vector of vectors of strings that holds the CSV data.
+2. **`parse()` Method**  
+   This method opens the file and reads it line by line. For each line, it calls the helper function `parseLine` and stores the resulting vector of tokens. If the file cannot be opened, it outputs an error message.
+3. **`parseLine()` Method**  
+   This helper method goes through the string character by character. It toggles a flag when it encounters quotes so that delimiters inside quotes are treated as literal characters. It also supports escaping a double quote by using two consecutive double quotes.
+4. **`main()` Function**  
+   In the `main()` function, an instance of `CSVParser` is created. After parsing, it prints each field separated by a bar (`|`) for clarity. This demonstration shows how to use the class; you’d replace `"example.csv"` with your actual file path.
+This example should serve as a solid starting point. Depending on the CSV complexity, you might need to extend the functionality to handle things like newline characters inside quoted fields or different escape mechanisms.
+If you’re intrigued by how CSV parsing varies with different formats or want to explore more advanced parsing techniques—perhaps using regular expressions, state machines, or even third-party libraries like [CSV for C++](https://github.com/ben-strasser/fast-cpp-csv-parser)—I’d be happy to dive deeper into those topics. Enjoy coding!
+*/
+/*
+---
+### How It Works
+1. **Skipping the Header:**  
+    The constructor now accepts a third parameter, `bool skipHeader`. In the `parse()` function, if `skipHeader` is true, we first perform a `std::getline` to skip over the header before processing the rest of the file.
+2. **Converting Fields to Floats:**  
+    The new function `getFloatRows()` iterates over each row and each field (stored as a string). It attempts to convert each field using `std::stof`. If conversion throws an exception (either because the field is not a valid number or it is out of range), a NaN is inserted instead. This way, the function always returns a consistent vector of float values for further processing.
+---
+### Further Ideas
+- **Enhanced Error Handling:**  
+    You might want to add logging or more sophisticated error handling to report which fields failed conversion.
+- **Custom Conversion:**  
+    Sometimes CSV files include cells that are empty strings or other special tokens. You could extend the conversion process to handle these cases gracefully.
+- **Supporting Different Data Types:**  
+    Consider adding more conversion functions (e.g., to int, double, or even date/time types) that can process certain columns.
+- **Template-Based Parsing:**  
+    For a more robust solution, especially with large CSV files or varying types, you might employ template-based functions or even leverage existing CSV libraries that offer advanced features like multi-threading or schema-based parsing.
+This code provides a good foundation—and from here, you can fine-tune it to suit the specific needs of your projects. Enjoy exploring!
+*/