const std = @import("std"); const File = std.fs.File; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; const BUFSIZE = 1024; pub const CSVReader = struct { file: File, done: bool, buf: [BUFSIZE]u8, // The index in `buf` of the first unprocessed byte. buffered_start: usize, // The number of bytes read but unprocessed. buffered_len: usize, allocator: Allocator, pub fn new(file: File, alloc: Allocator) CSVReader { return CSVReader{ .file = file, .done = false, .buf = undefined, .buffered_start = 0, .buffered_len = 0, .allocator = alloc, }; } pub fn next(self: *CSVReader) !?[][]u8 { if (self.done) return null; var record = ArrayList([]u8).init(self.allocator); errdefer record.deinit(); var field = ArrayList(u8).init(self.allocator); errdefer field.deinit(); var inQuotes = false; var lastWasQuote = false; // Read until EOF or the end of a record. while (true) { // Process buffered data. for (self.buf[self.buffered_start .. self.buffered_start + self.buffered_len], 0..) |byte, i| { if (byte == '"') { inQuotes = !inQuotes; // If the last byte was a quote and this is also a quote, it's an escaped quote. lastWasQuote = !lastWasQuote; if (!lastWasQuote) try field.append(byte); continue; } else { lastWasQuote = false; } // All characters are regular if we are inside quotes. if (inQuotes) { try field.append(byte); // Otherwise, newline is end of record. } else if (byte == '\n') { // If this happens right at the start of a record, it was just a trailing newline. if (record.items.len == 0 and field.items.len == 0) return null; // Set these indexes so that whatever's left in the buffer // will be used when reading the next record instead of skipped. self.buffered_start += i + 1; self.buffered_len -= (i + 1); try record.append(try field.toOwnedSlice()); return try record.toOwnedSlice(); // End of field. } else if (byte == ',') { try record.append(try field.toOwnedSlice()); errdefer field.deinit(); // Part of field. } else { try field.append(byte); } } // We've processed all buffered data and haven't seen the end of the record. Buffer more data. self.buffered_start = 0; self.buffered_len = try self.file.readAll(&self.buf); // There's no more. if (self.buffered_len == 0) { self.done = true; // If this happened at the start of a record, it was just a trailing newline. if (record.items.len == 0 and field.items.len == 0) return null; // Otherwise complete the last field and return the record. try record.append(try field.toOwnedSlice()); return try record.toOwnedSlice(); } } } };