From 90596b0dc1e3216e59a5c7c30afee9feb9267270 Mon Sep 17 00:00:00 2001 From: Diogo Ribeiro Date: Fri, 10 Apr 2026 14:55:53 +0100 Subject: [PATCH 1/2] Add perceptron classifier to machine learning section with documentation --- DIRECTORY.md | 5 + documentation/perceptron.md | 25 +++++ machine_learning/perceptron.r | 167 ++++++++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+) create mode 100644 documentation/perceptron.md create mode 100644 machine_learning/perceptron.r diff --git a/DIRECTORY.md b/DIRECTORY.md index a23287b5..7ad001e3 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -80,6 +80,11 @@ ## Machine Learning * [Gradient Boosting](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/gradient_boosting.r) + * [K-NN](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/k-NN.r) + * [K-Medoids](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/k_medoids.r) + * [LSTM Time Series](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/lstm_time_series.r) + * [Naive Bayes](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/naive_bayes.r) + * [Perceptron](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/perceptron.r) ## Mathematics diff --git a/documentation/perceptron.md b/documentation/perceptron.md new file mode 100644 index 00000000..eef34221 --- /dev/null +++ b/documentation/perceptron.md @@ -0,0 +1,25 @@ +# Perceptron + +A simple linear classifier using the perceptron learning rule. This implementation supports binary and multiclass classification using one-vs-rest updates. + +``` r +library(R6) +source("../machine_learning/perceptron.r") + +# example data for binary classification +X <- matrix(c( + 0.1, 0.3, + 0.2, 0.1, + 0.9, 0.8, + 0.7, 0.9 +), ncol = 2, byrow = TRUE) + +y <- factor(c("class1", "class1", "class2", "class2")) + +model <- Perceptron$new(learning_rate = 0.1, n_epochs = 20, shuffle = FALSE, random_state = 42) +model$fit(X, y) + +predictions <- model$predict(X) +print(predictions) +print(model$score(X, y)) +``` diff --git a/machine_learning/perceptron.r b/machine_learning/perceptron.r new file mode 100644 index 00000000..d3f70578 --- /dev/null +++ b/machine_learning/perceptron.r @@ -0,0 +1,167 @@ +# perceptron.r +# Perceptron classifier implementation in R +# A simple linear classifier using the perceptron learning rule. +# Supports binary and multiclass classification via one-vs-rest updates. +# Time Complexity: O(n_epochs * n_samples * n_features) +# Space Complexity: O(n_classes * n_features) + +library(R6) + +Perceptron <- R6Class("Perceptron", + public = list( + learning_rate = NULL, + n_epochs = NULL, + shuffle = NULL, + fit_intercept = NULL, + random_state = NULL, + classes = NULL, + weights = NULL, + bias = NULL, + is_multiclass = NULL, + + initialize = function(learning_rate = 0.1, + n_epochs = 100, + shuffle = TRUE, + fit_intercept = TRUE, + random_state = NULL) { + self$learning_rate <- learning_rate + self$n_epochs <- n_epochs + self$shuffle <- shuffle + self$fit_intercept <- fit_intercept + self$random_state <- random_state + }, + + fit = function(X, y) { + if (is.data.frame(X)) X <- as.matrix(X) + if (!is.matrix(X)) stop("X must be a numeric matrix or data.frame.") + if (!is.numeric(X)) stop("X must contain numeric features.") + if (any(is.na(X))) stop("X must not contain missing values.") + + if (is.character(y)) y <- factor(y) + if (is.factor(y)) { + self$classes <- levels(y) + } else { + self$classes <- sort(unique(y)) + } + + if (length(y) != nrow(X)) stop("Length of y must match rows of X.") + if (length(self$classes) < 2) stop("Perceptron requires at least two classes.") + + X <- as.matrix(X) + n_samples <- nrow(X) + n_features <- ncol(X) + if (self$fit_intercept) { + X <- cbind(1, X) + n_features <- n_features + 1 + } + + if (length(self$classes) == 2) { + self$is_multiclass <- FALSE + self$weights <- rep(0, n_features) + self$bias <- 0 + } else { + self$is_multiclass <- TRUE + self$weights <- matrix(0, nrow = length(self$classes), ncol = n_features) + self$bias <- rep(0, length(self$classes)) + } + + if (!is.null(self$random_state)) { + set.seed(self$random_state) + } + + y_encoded <- self$encode_labels(y) + + for (epoch in seq_len(self$n_epochs)) { + indices <- seq_len(n_samples) + if (self$shuffle) { + indices <- sample(indices) + } + + for (i in indices) { + x_i <- X[i, ] + y_i <- y_encoded[i] + + if (self$is_multiclass) { + scores <- self$weights %*% x_i + predicted <- which.max(scores) + if (predicted != y_i) { + self$weights[y_i, ] <- self$weights[y_i, ] + self$learning_rate * x_i + self$weights[predicted, ] <- self$weights[predicted, ] - self$learning_rate * x_i + } + } else { + score <- sum(self$weights * x_i) + self$bias + if (y_i * score <= 0) { + self$weights <- self$weights + self$learning_rate * y_i * x_i + self$bias <- self$bias + self$learning_rate * y_i + } + } + } + } + + invisible(self) + }, + + predict = function(X_new) { + if (is.data.frame(X_new)) X_new <- as.matrix(X_new) + if (is.vector(X_new)) X_new <- matrix(X_new, nrow = 1) + if (!is.matrix(X_new)) stop("X_new must be a numeric matrix, data.frame, or vector.") + if (!is.numeric(X_new)) stop("X_new must contain numeric features.") + if (any(is.na(X_new))) stop("X_new must not contain missing values.") + + if (self$fit_intercept) { + X_new <- cbind(1, X_new) + } + + if (self$is_multiclass) { + scores <- X_new %*% t(self$weights) + predicted_idx <- apply(scores, 1, which.max) + return(self$classes[predicted_idx]) + } + + raw_scores <- as.numeric(X_new %*% self$weights + self$bias) + if (is.factor(self$classes)) { + labels <- c(self$classes[1], self$classes[2]) + } else { + labels <- self$classes + } + predictions <- ifelse(raw_scores >= 0, labels[2], labels[1]) + return(predictions) + }, + + score = function(X, y) { + predictions <- self$predict(X) + if (is.factor(y) || is.character(y)) { + y <- as.character(y) + predictions <- as.character(predictions) + } + mean(predictions == y) + }, + + encode_labels = function(y) { + if (self$is_multiclass) { + if (is.factor(y)) { + return(as.integer(y)) + } + return(match(y, self$classes)) + } + + if (is.factor(y)) { + y <- as.character(y) + } + labels <- sort(unique(y)) + if (length(labels) != 2) stop("Binary perceptron requires exactly two classes.") + self$classes <- labels + y_bin <- ifelse(y == labels[2], 1, -1) + return(y_bin) + } + ) +) + +# Example usage: +# data(iris) +# X <- as.matrix(iris[, 1:4]) +# y <- iris$Species +# model <- Perceptron$new(learning_rate = 0.1, n_epochs = 50, shuffle = TRUE) +# model$fit(X, y) +# preds <- model$predict(X) +# cat('Training accuracy:', model$score(X, y), '\n') From 49cdc59d5a8a702b3653736d1f250775c6adbdf5 Mon Sep 17 00:00:00 2001 From: Diogo Ribeiro Date: Fri, 10 Apr 2026 16:02:05 +0100 Subject: [PATCH 2/2] fix: address review comments on intercept/labels and align docs/directory --- DIRECTORY.md | 4 ---- documentation/perceptron.md | 2 +- machine_learning/perceptron.r | 34 +++++++++++++++++++--------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/DIRECTORY.md b/DIRECTORY.md index 7ad001e3..7f900dac 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -80,10 +80,6 @@ ## Machine Learning * [Gradient Boosting](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/gradient_boosting.r) - * [K-NN](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/k-NN.r) - * [K-Medoids](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/k_medoids.r) - * [LSTM Time Series](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/lstm_time_series.r) - * [Naive Bayes](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/naive_bayes.r) * [Perceptron](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/perceptron.r) diff --git a/documentation/perceptron.md b/documentation/perceptron.md index eef34221..a4649fb1 100644 --- a/documentation/perceptron.md +++ b/documentation/perceptron.md @@ -1,6 +1,6 @@ # Perceptron -A simple linear classifier using the perceptron learning rule. This implementation supports binary and multiclass classification using one-vs-rest updates. +A simple linear classifier using the perceptron learning rule. This implementation supports binary classification and multiclass classification with direct multiclass perceptron updates. ``` r library(R6) diff --git a/machine_learning/perceptron.r b/machine_learning/perceptron.r index d3f70578..a543cb40 100644 --- a/machine_learning/perceptron.r +++ b/machine_learning/perceptron.r @@ -1,7 +1,8 @@ # perceptron.r # Perceptron classifier implementation in R # A simple linear classifier using the perceptron learning rule. -# Supports binary and multiclass classification via one-vs-rest updates. +# Supports binary classification and multiclass classification +# using direct multiclass perceptron updates. # Time Complexity: O(n_epochs * n_samples * n_features) # Space Complexity: O(n_classes * n_features) @@ -16,7 +17,6 @@ Perceptron <- R6Class("Perceptron", random_state = NULL, classes = NULL, weights = NULL, - bias = NULL, is_multiclass = NULL, initialize = function(learning_rate = 0.1, @@ -58,11 +58,9 @@ Perceptron <- R6Class("Perceptron", if (length(self$classes) == 2) { self$is_multiclass <- FALSE self$weights <- rep(0, n_features) - self$bias <- 0 } else { self$is_multiclass <- TRUE self$weights <- matrix(0, nrow = length(self$classes), ncol = n_features) - self$bias <- rep(0, length(self$classes)) } if (!is.null(self$random_state)) { @@ -89,10 +87,9 @@ Perceptron <- R6Class("Perceptron", self$weights[predicted, ] <- self$weights[predicted, ] - self$learning_rate * x_i } } else { - score <- sum(self$weights * x_i) + self$bias + score <- sum(self$weights * x_i) if (y_i * score <= 0) { self$weights <- self$weights + self$learning_rate * y_i * x_i - self$bias <- self$bias + self$learning_rate * y_i } } } @@ -118,12 +115,8 @@ Perceptron <- R6Class("Perceptron", return(self$classes[predicted_idx]) } - raw_scores <- as.numeric(X_new %*% self$weights + self$bias) - if (is.factor(self$classes)) { - labels <- c(self$classes[1], self$classes[2]) - } else { - labels <- self$classes - } + raw_scores <- as.numeric(X_new %*% self$weights) + labels <- self$classes predictions <- ifelse(raw_scores >= 0, labels[2], labels[1]) return(predictions) }, @@ -148,9 +141,20 @@ Perceptron <- R6Class("Perceptron", if (is.factor(y)) { y <- as.character(y) } - labels <- sort(unique(y)) - if (length(labels) != 2) stop("Binary perceptron requires exactly two classes.") - self$classes <- labels + labels <- self$classes + if (is.factor(labels)) { + labels <- as.character(labels) + } + if (is.null(labels) || length(labels) == 0) { + labels <- unique(y) + if (length(labels) != 2) stop("Binary perceptron requires exactly two classes.") + self$classes <- labels + } else { + if (length(labels) != 2) stop("Binary perceptron requires exactly two classes.") + } + if (any(!y %in% labels)) { + stop("Binary perceptron received labels not present in self$classes.") + } y_bin <- ifelse(y == labels[2], 1, -1) return(y_bin) }