Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ All notable changes to this project will be documented in this file.
result := df.Filter("bad_column", "==", 1)
```

- **`Head` and `Tail` panic on time columns** — The internal `slice()` function had no `TimeType` case, causing it to fall through to the error path on any DataFrame that contained a time column. Added the missing `case TimeType:` branch, consistent with the pattern already used in `selectRows()`.
- **`Head` and `Tail` panic on time columns** — The internal `slice()` function had no `TimeType` case, causing it to fall through to the error path on any DataFrame that contained a time column. Added the missing `case TimeType:` branch, consistent with the pattern already used in `SelectRows()`.

- **GroupBy key collisions on pipe characters** — Group keys were built by joining column values with `"|"`, meaning a value containing that character (e.g. `"a|b"`) would be indistinguishable from two separate values (`"a"` and `"b"`) in a multi-column group. Keys are now length-prefixed (`"3:foo"`) and separated by a null byte, making them unambiguous for all string content. The original values are stored directly on the group struct so no parsing of the key is needed when building the result.

Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

_Smooth, intelligent data processing for Go._

Otters is a high-performance DataFrame library for Go, inspired by Pandas but designed for Go's strengths: type safety, performance, and simplicity.
Otters is a high-performance DataFrame library for Go, inspired by Pandas but designed for Go's
strengths: type safety, performance, and simplicity.

[![Go Version](https://img.shields.io/badge/go-1.19+-blue.svg)](https://golang.org)
[![Go Report Card](https://goreportcard.com/badge/github.com/datumbrain/otters)](https://goreportcard.com/report/github.com/datumbrain/otters)
Expand Down Expand Up @@ -180,7 +181,7 @@ df, err := otters.ReadCSVWithOptions("data.csv", otters.CSVOptions{
})

// From data
df, err := otters.NewDataFrameFromMap(map[string]interface{}{
df, err := otters.NewDataFrameFromMap(map[string]any{
"name": []string{"Alice", "Bob", "Carol"},
"age": []int64{25, 30, 35},
"salary": []float64{50000, 60000, 70000},
Expand Down
14 changes: 7 additions & 7 deletions csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func ReadCSVWithOptions(filename string, options CSVOptions) (*DataFrame, error)
}

func skipRows(reader *csv.Reader, skipCount int, operation string) error {
for i := 0; i < skipCount; i++ {
for range skipCount {
if _, err := reader.Read(); err != nil {
if err == io.EOF {
return nil
Expand Down Expand Up @@ -112,7 +112,7 @@ func readCSVWithoutHeaders(reader *csv.Reader, options CSVOptions, operation str

func generateHeaders(count int) []string {
headers := make([]string, count)
for i := 0; i < count; i++ {
for i := range count {
headers[i] = fmt.Sprintf("Column_%d", i)
}
return headers
Expand All @@ -137,7 +137,7 @@ func readDataRows(reader *csv.Reader, headers []string, options CSVOptions, oper
}

rows = append(rows, row)
rowCount++
rowCount += 1

if options.MaxRows > 0 && rowCount >= options.MaxRows {
break
Expand Down Expand Up @@ -181,7 +181,7 @@ func (df *DataFrame) WriteCSVWithOptions(filename string, options CSVOptions) er
}

// Write data rows
for i := 0; i < df.length; i++ {
for i := range df.length {
var row []string
for _, colName := range df.order {
value, err := df.columns[colName].Get(i)
Expand Down Expand Up @@ -289,7 +289,7 @@ func buildDataFrameFromRows(headers []string, rows [][]string) (*DataFrame, erro
}

// convertStringSliceToType converts a slice of strings to the specified type
func convertStringSliceToType(values []string, targetType ColumnType) (interface{}, error) {
func convertStringSliceToType(values []string, targetType ColumnType) (any, error) {
switch targetType {
case StringType:
// Return a copy to avoid external modification
Expand Down Expand Up @@ -367,7 +367,7 @@ func cleanHeader(header string) string {
}

// formatValueForCSV formats a value for CSV output
func formatValueForCSV(value interface{}) string {
func formatValueForCSV(value any) string {
switch v := value.(type) {
case string:
return v
Expand Down Expand Up @@ -479,7 +479,7 @@ func ValidateCSV(filename string) (*CSVInfo, error) {
info.Rows+1, info.Columns, len(row)))
}

info.Rows++
info.Rows += 1
}

return info, nil
Expand Down
6 changes: 3 additions & 3 deletions csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Bob,30`
}

func TestWriteCSVEdgeCases(t *testing.T) {
data := map[string]interface{}{
data := map[string]any{
"col1": []int64{1, 2, 3},
"col2": []string{"a", "b", "c"},
}
Expand Down Expand Up @@ -398,7 +398,7 @@ func TestCSV_WriteCSV_PropagatesDataFrameError(t *testing.T) {
}

func TestCSV_WriteCSVWithOptions_WritesFile(t *testing.T) {
data := map[string]interface{}{
data := map[string]any{
"col1": []int64{1, 2, 3},
"col2": []float64{1.1, 2.2, 3.3},
"col3": []bool{true, false, true},
Expand All @@ -420,7 +420,7 @@ func TestCSV_WriteCSVWithOptions_WritesFile(t *testing.T) {

func TestCSV_WriteCSV_TimeColumn_WritesFile(t *testing.T) {
tm := time.Date(2023, 1, 1, 12, 30, 0, 0, time.UTC)
data := map[string]interface{}{
data := map[string]any{
"col1": []time.Time{tm, tm},
}
df, _ := NewDataFrameFromMap(data)
Expand Down
21 changes: 9 additions & 12 deletions df.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func NewDataFrameFromSeries(series ...*Series) (*DataFrame, error) {
}

// NewDataFrameFromMap creates a DataFrame from a map of column data
func NewDataFrameFromMap(data map[string]interface{}) (*DataFrame, error) {
func NewDataFrameFromMap(data map[string]any) (*DataFrame, error) {
if len(data) == 0 {
return NewDataFrame(), nil
}
Expand Down Expand Up @@ -157,7 +157,7 @@ func (df *DataFrame) Tail(n int) *DataFrame {
}

// Get returns the value at the specified row and column
func (df *DataFrame) Get(row int, column string) (interface{}, error) {
func (df *DataFrame) Get(row int, column string) (any, error) {
if df.err != nil {
return nil, df.err
}
Expand All @@ -174,7 +174,7 @@ func (df *DataFrame) Get(row int, column string) (interface{}, error) {
}

// Set updates the value at the specified row and column
func (df *DataFrame) Set(row int, column string, value interface{}) error {
func (df *DataFrame) Set(row int, column string, value any) error {
if df.err != nil {
return df.err
}
Expand Down Expand Up @@ -332,12 +332,9 @@ func (df *DataFrame) String() string {
sb.WriteString("\n")

// Write data (show first 10 rows max for display)
maxRows := df.length
if maxRows > 10 {
maxRows = 10
}
maxRows := min(df.length, 10)

for i := 0; i < maxRows; i++ {
for i := range maxRows {
var row []string
for _, colName := range df.order {
value, _ := df.columns[colName].Get(i)
Expand All @@ -348,7 +345,7 @@ func (df *DataFrame) String() string {
}

if df.length > 10 {
sb.WriteString(fmt.Sprintf("... (%d more rows)\n", df.length-10))
fmt.Fprintf(&sb, "... (%d more rows)\n", df.length-10)
}

return sb.String()
Expand All @@ -362,12 +359,12 @@ func (df *DataFrame) Info() string {

var sb strings.Builder
sb.WriteString("DataFrame Info:\n")
sb.WriteString(fmt.Sprintf(" Shape: (%d, %d)\n", df.length, len(df.columns)))
fmt.Fprintf(&sb, " Shape: (%d, %d)\n", df.length, len(df.columns))
sb.WriteString(" Columns:\n")

for _, colName := range df.order {
series := df.columns[colName]
sb.WriteString(fmt.Sprintf(" %s: %s\n", colName, series.Type.String()))
fmt.Fprintf(&sb, " %s: %s\n", colName, series.Type.String())
}

return sb.String()
Expand All @@ -394,7 +391,7 @@ func (df *DataFrame) slice(start, end int, operation string) *DataFrame {

for _, colName := range df.order {
series := df.columns[colName]
var newData interface{}
var newData any

// Slice the appropriate data type
switch series.Type {
Expand Down
Loading