From bf464a264a33a6cd8623b07128394850af3a37da Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Wed, 27 Aug 2025 18:36:24 +0300 Subject: [PATCH 1/9] add ligr layers --- .../models/nn/transformers/ligr_layers.py | 177 ++++++++++++++++++ rectools/models/nn/transformers/net_blocks.py | 96 +++++++++- tests/models/nn/transformers/test_sasrec.py | 44 +++++ 3 files changed, 314 insertions(+), 3 deletions(-) create mode 100644 rectools/models/nn/transformers/ligr_layers.py diff --git a/rectools/models/nn/transformers/ligr_layers.py b/rectools/models/nn/transformers/ligr_layers.py new file mode 100644 index 00000000..cb18b4fd --- /dev/null +++ b/rectools/models/nn/transformers/ligr_layers.py @@ -0,0 +1,177 @@ +import typing as tp + +import torch +from torch import nn + +from rectools.models.nn.transformers.net_blocks import TransformerLayersBase + +from .net_blocks import init_feed_forward + + +class LiGRLayer(nn.Module): + """ + Transformer Layer as described in "From Features to Transformers: + Redefining Ranking for Scalable Impact" https://arxiv.org/pdf/2502.03417 + + Parameters + ---------- + n_factors: int + Latent embeddings size. + n_heads: int + Number of attention heads. + dropout_rate: float + Probability of a hidden unit to be zeroed. + ff_factors_multiplier: int, default 4 + Feed-forward layers latent embedding size multiplier. + bias_in_ff: bool, default ``False`` + Add bias in Linear layers of Feed Forward + ff_activation: {"swiglu", "relu", "gelu"}, default "swiglu" + Activation function to use. + """ + + def __init__( + self, + n_factors: int, + n_heads: int, + dropout_rate: float, + ff_factors_multiplier: int = 4, + bias_in_ff: bool = False, + ff_activation: str = "swiglu", + ): + super().__init__() + self.multi_head_attn = nn.MultiheadAttention(n_factors, n_heads, dropout_rate, batch_first=True) + self.layer_norm_1 = nn.LayerNorm(n_factors) + self.dropout_1 = nn.Dropout(dropout_rate) + self.layer_norm_2 = nn.LayerNorm(n_factors) + self.feed_forward = init_feed_forward(n_factors, ff_factors_multiplier, dropout_rate, ff_activation, bias_in_ff) + self.dropout_2 = nn.Dropout(dropout_rate) + + self.gating_linear_1 = nn.Linear(n_factors, n_factors) + self.gating_linear_2 = nn.Linear(n_factors, n_factors) + + def forward( + self, + seqs: torch.Tensor, + attn_mask: tp.Optional[torch.Tensor], + key_padding_mask: tp.Optional[torch.Tensor], + ) -> torch.Tensor: + """ + Forward pass through transformer block. + + Parameters + ---------- + seqs: torch.Tensor + User sequences of item embeddings. + attn_mask: torch.Tensor, optional + Optional mask to use in forward pass of multi-head attention as `attn_mask`. + key_padding_mask: torch.Tensor, optional + Optional mask to use in forward pass of multi-head attention as `key_padding_mask`. + + + Returns + ------- + torch.Tensor + User sequences passed through transformer layers. + """ + mha_input = self.layer_norm_1(seqs) + mha_output, _ = self.multi_head_attn( + mha_input, + mha_input, + mha_input, + attn_mask=attn_mask, + key_padding_mask=key_padding_mask, + need_weights=False, + ) + gated_skip = torch.nn.functional.sigmoid(self.gating_linear_1(seqs)) + seqs = seqs + torch.mul(gated_skip, self.dropout_1(mha_output)) + + ff_input = self.layer_norm_2(seqs) + ff_output = self.feed_forward(ff_input) + gated_skip = torch.nn.functional.sigmoid(self.gating_linear_2(seqs)) + seqs = seqs + torch.mul(gated_skip, self.dropout_2(ff_output)) + return seqs + + +class LiGRLayers(TransformerLayersBase): + """ + LiGR Transformer blocks. + + Parameters + ---------- + n_blocks: int + Number of transformer blocks. + n_factors: int + Latent embeddings size. + n_heads: int + Number of attention heads. + dropout_rate: float + Probability of a hidden unit to be zeroed. + ff_factors_multiplier: int, default 4 + Feed-forward layers latent embedding size multiplier. Pass in ``transformer_layers_kwargs`` to override. + ff_activation: {"swiglu", "relu", "gelu"}, default "swiglu" + Activation function to use. Pass in ``transformer_layers_kwargs`` to override. + bias_in_ff: bool, default ``False`` + Add bias in Linear layers of Feed Forward. Pass in ``transformer_layers_kwargs`` to override. + """ + + def __init__( + self, + n_blocks: int, + n_factors: int, + n_heads: int, + dropout_rate: float, + ff_factors_multiplier: int = 4, + ff_activation: str = "swiglu", + bias_in_ff: bool = False, + ): + super().__init__() + self.n_blocks = n_blocks + self.n_factors = n_factors + self.n_heads = n_heads + self.dropout_rate = dropout_rate + self.ff_factors_multiplier = ff_factors_multiplier + self.ff_activation = ff_activation + self.bias_in_ff = bias_in_ff + self.transformer_blocks = nn.ModuleList([self._init_transformer_block() for _ in range(self.n_blocks)]) + + def _init_transformer_block(self) -> nn.Module: + return LiGRLayer( + self.n_factors, + self.n_heads, + self.dropout_rate, + self.ff_factors_multiplier, + bias_in_ff=self.bias_in_ff, + ff_activation=self.ff_activation, + ) + + def forward( + self, + seqs: torch.Tensor, + timeline_mask: torch.Tensor, + attn_mask: tp.Optional[torch.Tensor], + key_padding_mask: tp.Optional[torch.Tensor], + **kwargs: tp.Any, + ) -> torch.Tensor: + """ + Forward pass through transformer blocks. + + Parameters + ---------- + seqs: torch.Tensor + User sequences of item embeddings. + timeline_mask: torch.Tensor + Mask indicating padding elements. + attn_mask: torch.Tensor, optional + Optional mask to use in forward pass of multi-head attention as `attn_mask`. + key_padding_mask: torch.Tensor, optional + Optional mask to use in forward pass of multi-head attention as `key_padding_mask`. + + + Returns + ------- + torch.Tensor + User sequences passed through transformer layers. + """ + for block_idx in range(self.n_blocks): + seqs = self.transformer_blocks[block_idx](seqs, attn_mask, key_padding_mask) + return seqs diff --git a/rectools/models/nn/transformers/net_blocks.py b/rectools/models/nn/transformers/net_blocks.py index 7e56256a..d16da734 100644 --- a/rectools/models/nn/transformers/net_blocks.py +++ b/rectools/models/nn/transformers/net_blocks.py @@ -33,14 +33,18 @@ class PointWiseFeedForward(nn.Module): Probability of a hidden unit to be zeroed. activation: torch.nn.Module Activation function module. + bias: bool, default ``True`` + If ``True``, add bias to linear layers. """ - def __init__(self, n_factors: int, n_factors_ff: int, dropout_rate: float, activation: torch.nn.Module) -> None: + def __init__( + self, n_factors: int, n_factors_ff: int, dropout_rate: float, activation: torch.nn.Module, bias: bool = True + ) -> None: super().__init__() - self.ff_linear_1 = nn.Linear(n_factors, n_factors_ff) + self.ff_linear_1 = nn.Linear(n_factors, n_factors_ff, bias) self.ff_dropout_1 = torch.nn.Dropout(dropout_rate) self.ff_activation = activation - self.ff_linear_2 = nn.Linear(n_factors_ff, n_factors) + self.ff_linear_2 = nn.Linear(n_factors_ff, n_factors, bias) def forward(self, seqs: torch.Tensor) -> torch.Tensor: """ @@ -61,6 +65,92 @@ def forward(self, seqs: torch.Tensor) -> torch.Tensor: return fin +class SwigluFeedForward(nn.Module): + """ + Feed-Forward network to introduce nonlinearity into the transformer model. + This implementation is based on FuXi and LLama SwigLU https://arxiv.org/pdf/2502.03036, + LiGR https://arxiv.org/pdf/2502.03417 + + Parameters + ---------- + n_factors : int + Latent embeddings size. + n_factors_ff : int + How many hidden units to use in the network. + dropout_rate : float + Probability of a hidden unit to be zeroed. + bias: bool, default ``True`` + If ``True``, add bias to linear layers. + """ + + def __init__(self, n_factors: int, n_factors_ff: int, dropout_rate: float, bias: bool = True) -> None: + super().__init__() + self.ff_linear_1 = nn.Linear(n_factors, n_factors_ff, bias=bias) + self.ff_dropout_1 = torch.nn.Dropout(dropout_rate) + self.ff_activation = torch.nn.SiLU() + self.ff_linear_2 = nn.Linear(n_factors_ff, n_factors, bias=bias) + self.ff_linear_3 = nn.Linear(n_factors, n_factors_ff, bias=bias) + + def forward(self, seqs: torch.Tensor) -> torch.Tensor: + """ + Forward pass. + + Parameters + ---------- + seqs : torch.Tensor + User sequences of item embeddings. + + Returns + ------- + torch.Tensor + User sequence that passed through all layers. + """ + output = self.ff_activation(self.ff_linear_1(seqs)) * self.ff_linear_3(seqs) + fin = self.ff_linear_2(self.ff_dropout_1(output)) + return fin + + +def init_feed_forward( + n_factors: int, ff_factors_multiplier: int, dropout_rate: float, ff_activation: str, bias: bool = True +) -> nn.Module: + """ + Initialise Feed-Forward network with one of activation functions: "swiglu", "relu", "gelu". + + Parameters + ---------- + n_factors : int + Latent embeddings size. + ff_factors_multiplier : int + How many hidden units to use in the network. + dropout_rate : float + Probability of a hidden unit to be zeroed. + ff_activation : {"swiglu", "relu", "gelu"} + Activation function to use. + bias: bool, default ``True`` + If ``True``, add bias to linear layers. + + Returns + ------- + nn.Module + Feed-Forward network. + """ + if ff_activation == "swiglu": + return SwigluFeedForward(n_factors, n_factors * ff_factors_multiplier, dropout_rate, bias=bias) + if ff_activation == "gelu": + return PointWiseFeedForward( + n_factors, n_factors * ff_factors_multiplier, dropout_rate, activation=torch.nn.GELU(), bias=bias + ) + if ff_activation == "relu": + return PointWiseFeedForward( + n_factors, + n_factors * ff_factors_multiplier, + dropout_rate, + activation=torch.nn.ReLU(), + bias=bias, + ) + raise ValueError(f"Unsupported ff_activation: {ff_activation}") + + class TransformerLayersBase(nn.Module): """Base class for transformer layers.""" diff --git a/tests/models/nn/transformers/test_sasrec.py b/tests/models/nn/transformers/test_sasrec.py index d1bd5911..1aa9c501 100644 --- a/tests/models/nn/transformers/test_sasrec.py +++ b/tests/models/nn/transformers/test_sasrec.py @@ -33,6 +33,7 @@ TrainerCallable, TransformerLightningModule, ) +from rectools.models.nn.transformers.ligr_layers import LiGRLayers from rectools.models.nn.transformers.negative_sampler import CatalogUniformSampler from rectools.models.nn.transformers.sasrec import SASRecDataPreparator, SASRecTransformerLayers from rectools.models.nn.transformers.similarity import DistanceSimilarityModule @@ -759,6 +760,49 @@ def test_torch_model(self, dataset: Dataset) -> None: model.fit(dataset) assert isinstance(model.torch_model, TransformerTorchBackbone) + @pytest.mark.parametrize( + "filter_viewed,expected", + ( + ( + True, + pd.DataFrame( + { + Columns.User: [10, 10, 30, 30, 30, 40, 40, 40], + Columns.Item: [17, 15, 17, 14, 13, 12, 14, 13], + Columns.Rank: [1, 2, 1, 2, 3, 1, 2, 3], + } + ), + ), + ( + False, + pd.DataFrame( + { + Columns.User: [10, 10, 10, 30, 30, 30, 40, 40, 40], + Columns.Item: [12, 17, 11, 12, 11, 17, 12, 17, 11], + Columns.Rank: [1, 2, 3, 1, 2, 3, 1, 2, 3], + } + ), + ), + ), + ) + def test_ligr_layers(self, dataset: Dataset, filter_viewed: bool, expected: pd.DataFrame) -> None: + model = SASRecModel( + transformer_layers_type=LiGRLayers, + transformer_layers_kwargs={ + "ff_factors_multiplier": 1, + "ff_activation": "swiglu", + "bias_in_ff": True, + }, + ) + model.fit(dataset=dataset) + users = np.array([10, 30, 40]) + actual = model.recommend(users=users, dataset=dataset, k=3, filter_viewed=filter_viewed) + pd.testing.assert_frame_equal(actual.drop(columns=Columns.Score), expected) + pd.testing.assert_frame_equal( + actual.sort_values([Columns.User, Columns.Score], ascending=[True, False]).reset_index(drop=True), + actual, + ) + class TestSASRecDataPreparator: From 8466584c8b7d77da76825cb01664162385ae2cc4 Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Thu, 28 Aug 2025 12:59:32 +0300 Subject: [PATCH 2/9] fix ligr test --- CHANGELOG.md | 5 +++++ tests/models/nn/transformers/test_sasrec.py | 13 ++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4ea80f7..eca3564d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +### Added +- LiGR transformer layers from "From Features to Transformers: Redefining Ranking for Scalable Impact" () + ## [0.16.0] - 27.07.2025 ### Added diff --git a/tests/models/nn/transformers/test_sasrec.py b/tests/models/nn/transformers/test_sasrec.py index dad367ca..e7f4c41e 100644 --- a/tests/models/nn/transformers/test_sasrec.py +++ b/tests/models/nn/transformers/test_sasrec.py @@ -769,7 +769,7 @@ def test_torch_model(self, dataset: Dataset) -> None: pd.DataFrame( { Columns.User: [10, 10, 30, 30, 30, 40, 40, 40], - Columns.Item: [17, 15, 17, 14, 13, 12, 14, 13], + Columns.Item: [17, 15, 17, 13, 14, 13, 14, 12], Columns.Rank: [1, 2, 1, 2, 3, 1, 2, 3], } ), @@ -779,14 +779,20 @@ def test_torch_model(self, dataset: Dataset) -> None: pd.DataFrame( { Columns.User: [10, 10, 10, 30, 30, 30, 40, 40, 40], - Columns.Item: [12, 17, 11, 12, 11, 17, 12, 17, 11], + Columns.Item: [11, 13, 17, 11, 17, 13, 11, 17, 13], Columns.Rank: [1, 2, 3, 1, 2, 3, 1, 2, 3], } ), ), ), ) - def test_ligr_layers(self, dataset: Dataset, filter_viewed: bool, expected: pd.DataFrame) -> None: + def test_ligr_layers( + self, + dataset: Dataset, + filter_viewed: bool, + expected: pd.DataFrame, + get_trainer_func: TrainerCallable, + ) -> None: model = SASRecModel( transformer_layers_type=LiGRLayers, transformer_layers_kwargs={ @@ -794,6 +800,7 @@ def test_ligr_layers(self, dataset: Dataset, filter_viewed: bool, expected: pd.D "ff_activation": "swiglu", "bias_in_ff": True, }, + get_trainer_func=get_trainer_func, ) model.fit(dataset=dataset) users = np.array([10, 30, 40]) From 24ff361e34d4df87de8411be9f6963bf46bf6574 Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Thu, 28 Aug 2025 15:43:53 +0300 Subject: [PATCH 3/9] fix ligr tests --- CHANGELOG.md | 2 +- tests/models/nn/transformers/test_sasrec.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eca3564d..5d948d3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added -- LiGR transformer layers from "From Features to Transformers: Redefining Ranking for Scalable Impact" () +- LiGR transformer layers from "From Features to Transformers: Redefining Ranking for Scalable Impact" ([#295](https://github.com/MobileTeleSystems/RecTools/pull/295)) ## [0.16.0] - 27.07.2025 diff --git a/tests/models/nn/transformers/test_sasrec.py b/tests/models/nn/transformers/test_sasrec.py index e7f4c41e..7bb274cb 100644 --- a/tests/models/nn/transformers/test_sasrec.py +++ b/tests/models/nn/transformers/test_sasrec.py @@ -768,9 +768,9 @@ def test_torch_model(self, dataset: Dataset) -> None: True, pd.DataFrame( { - Columns.User: [10, 10, 30, 30, 30, 40, 40, 40], - Columns.Item: [17, 15, 17, 13, 14, 13, 14, 12], - Columns.Rank: [1, 2, 1, 2, 3, 1, 2, 3], + Columns.User: [30, 30, 30, 40, 40, 40], + Columns.Item: [17, 13, 14, 13, 14, 12], + Columns.Rank: [1, 2, 3, 1, 2, 3], } ), ), @@ -778,9 +778,9 @@ def test_torch_model(self, dataset: Dataset) -> None: False, pd.DataFrame( { - Columns.User: [10, 10, 10, 30, 30, 30, 40, 40, 40], - Columns.Item: [11, 13, 17, 11, 17, 13, 11, 17, 13], - Columns.Rank: [1, 2, 3, 1, 2, 3, 1, 2, 3], + Columns.User: [30, 30, 30, 40, 40, 40], + Columns.Item: [11, 17, 13, 11, 17, 13], + Columns.Rank: [1, 2, 3, 1, 2, 3], } ), ), @@ -803,7 +803,7 @@ def test_ligr_layers( get_trainer_func=get_trainer_func, ) model.fit(dataset=dataset) - users = np.array([10, 30, 40]) + users = np.array([30, 40]) actual = model.recommend(users=users, dataset=dataset, k=3, filter_viewed=filter_viewed) pd.testing.assert_frame_equal(actual.drop(columns=Columns.Score), expected) pd.testing.assert_frame_equal( From c88fc7f16188fdebec6cd3d988da0f6ca7f32af8 Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Thu, 28 Aug 2025 16:29:36 +0300 Subject: [PATCH 4/9] fix ligr tests --- tests/models/nn/transformers/test_sasrec.py | 23 ++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tests/models/nn/transformers/test_sasrec.py b/tests/models/nn/transformers/test_sasrec.py index 7bb274cb..bf9db666 100644 --- a/tests/models/nn/transformers/test_sasrec.py +++ b/tests/models/nn/transformers/test_sasrec.py @@ -768,9 +768,9 @@ def test_torch_model(self, dataset: Dataset) -> None: True, pd.DataFrame( { - Columns.User: [30, 30, 30, 40, 40, 40], - Columns.Item: [17, 13, 14, 13, 14, 12], - Columns.Rank: [1, 2, 3, 1, 2, 3], + Columns.User: [10, 10, 30, 30, 30, 40, 40, 40], + Columns.Item: [17, 15, 17, 13, 14, 13, 12, 14], + Columns.Rank: [1, 2, 1, 2, 3, 1, 2, 3], } ), ), @@ -778,9 +778,9 @@ def test_torch_model(self, dataset: Dataset) -> None: False, pd.DataFrame( { - Columns.User: [30, 30, 30, 40, 40, 40], - Columns.Item: [11, 17, 13, 11, 17, 13], - Columns.Rank: [1, 2, 3, 1, 2, 3], + Columns.User: [10, 10, 10, 30, 30, 30, 40, 40, 40], + Columns.Item: [13, 11, 17, 11, 17, 12, 17, 11, 13], + Columns.Rank: [1, 2, 3, 1, 2, 3, 1, 2, 3], } ), ), @@ -801,9 +801,18 @@ def test_ligr_layers( "bias_in_ff": True, }, get_trainer_func=get_trainer_func, + n_factors=32, + n_blocks=2, + session_max_len=3, + lr=0.001, + batch_size=4, + epochs=2, + deterministic=True, + item_net_block_types=(IdEmbeddingsItemNet,), + similarity_module_type=DistanceSimilarityModule, ) model.fit(dataset=dataset) - users = np.array([30, 40]) + users = np.array([10, 30, 40]) actual = model.recommend(users=users, dataset=dataset, k=3, filter_viewed=filter_viewed) pd.testing.assert_frame_equal(actual.drop(columns=Columns.Score), expected) pd.testing.assert_frame_equal( From df6f79ee3ba101a175cda60dd34bb4b6c0d8e617 Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Thu, 28 Aug 2025 16:44:13 +0300 Subject: [PATCH 5/9] remove ligr test --- tests/models/nn/transformers/test_sasrec.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/models/nn/transformers/test_sasrec.py b/tests/models/nn/transformers/test_sasrec.py index bf9db666..457ac778 100644 --- a/tests/models/nn/transformers/test_sasrec.py +++ b/tests/models/nn/transformers/test_sasrec.py @@ -774,16 +774,6 @@ def test_torch_model(self, dataset: Dataset) -> None: } ), ), - ( - False, - pd.DataFrame( - { - Columns.User: [10, 10, 10, 30, 30, 30, 40, 40, 40], - Columns.Item: [13, 11, 17, 11, 17, 12, 17, 11, 13], - Columns.Rank: [1, 2, 3, 1, 2, 3, 1, 2, 3], - } - ), - ), ), ) def test_ligr_layers( From b1c7f6596e1981e221043f2908612ed7e52ef849 Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Thu, 28 Aug 2025 17:29:22 +0300 Subject: [PATCH 6/9] fix coverage --- tests/models/nn/transformers/test_sasrec.py | 27 +++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/models/nn/transformers/test_sasrec.py b/tests/models/nn/transformers/test_sasrec.py index 457ac778..9f09d4ed 100644 --- a/tests/models/nn/transformers/test_sasrec.py +++ b/tests/models/nn/transformers/test_sasrec.py @@ -762,9 +762,21 @@ def test_torch_model(self, dataset: Dataset) -> None: assert isinstance(model.torch_model, TransformerTorchBackbone) @pytest.mark.parametrize( - "filter_viewed,expected", + "activation,filter_viewed,expected,", ( ( + "swiglu", + True, + pd.DataFrame( + { + Columns.User: [10, 10, 30, 30, 30, 40, 40, 40], + Columns.Item: [17, 15, 17, 13, 14, 13, 12, 14], + Columns.Rank: [1, 2, 1, 2, 3, 1, 2, 3], + } + ), + ), + ( + "gelu", True, pd.DataFrame( { @@ -778,6 +790,7 @@ def test_torch_model(self, dataset: Dataset) -> None: ) def test_ligr_layers( self, + activation: str, dataset: Dataset, filter_viewed: bool, expected: pd.DataFrame, @@ -787,7 +800,7 @@ def test_ligr_layers( transformer_layers_type=LiGRLayers, transformer_layers_kwargs={ "ff_factors_multiplier": 1, - "ff_activation": "swiglu", + "ff_activation": activation, "bias_in_ff": True, }, get_trainer_func=get_trainer_func, @@ -810,6 +823,16 @@ def test_ligr_layers( actual, ) + def test_raises_when_activation_is_not_supported(self, dataset: Dataset) -> None: + model = SASRecModel( + transformer_layers_type=LiGRLayers, + transformer_layers_kwargs={ + "ff_activation": "not_supported_activation", + }, + ) + with pytest.raises(ValueError): + model.fit(dataset) + class TestSASRecDataPreparator: From 3509291d8a1867662547b783114319060871bf82 Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Thu, 28 Aug 2025 21:50:19 +0300 Subject: [PATCH 7/9] full coverage --- tests/models/nn/transformers/test_sasrec.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/models/nn/transformers/test_sasrec.py b/tests/models/nn/transformers/test_sasrec.py index 9f09d4ed..bb3a881d 100644 --- a/tests/models/nn/transformers/test_sasrec.py +++ b/tests/models/nn/transformers/test_sasrec.py @@ -786,6 +786,17 @@ def test_torch_model(self, dataset: Dataset) -> None: } ), ), + ( + "relu", + True, + pd.DataFrame( + { + Columns.User: [10, 10, 30, 30, 30, 40, 40, 40], + Columns.Item: [17, 15, 17, 13, 14, 13, 12, 14], + Columns.Rank: [1, 2, 1, 2, 3, 1, 2, 3], + } + ), + ), ), ) def test_ligr_layers( From 716b5015846df08fc9593777d995901d78aadbf6 Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Sat, 30 Aug 2025 11:56:06 +0300 Subject: [PATCH 8/9] acm in readme --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index b383177b..368935b4 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,15 @@ faster than ever before. - In [HSTU tutorial](examples/tutorials/transformers_HSTU_tutorial.ipynb) we show that original metrics reported for HSTU on public Movielens datasets may actually be **underestimated** - Configurable, customizable, callback-friendly, checkpoints-included, logs-out-of-the-box, custom-validation-ready, multi-gpu-compatible! See [Transformers Advanced Training User Guide](examples/tutorials/transformers_advanced_training_guide.ipynb) and [Transformers Customization Guide](examples/tutorials/transformers_customization_guide.ipynb) + +## ✨ Highlights: RecTools framework at ACM RecSys'25 ✨ + +**RecTools implementations are featured in ACM RecSys'25: ["eSASRec: Enhancing Transformer-based Recommendations in a Modular Fashion"](https://www.arxiv.org/abs/2508.06450):** +- The article presents a systematic benchmark of Transformer modifications using RecTools models. It offers a detailed evaluation of training objectives, Transformer architectures, loss functions, and negative sampling strategies in realistic, production-like settings +- We introduce a new SOTA baseline, **eSASRec**, which combines SASRec’s training objective with LiGR Transformer layers and Sampled Softmax loss, forming a simple yet powerful recipe +- **eSASRec** shows 23% boost over SOTA models, such as ActionPiece, on academic benchmarks +- [LiGR](https://arxiv.org/pdf/2502.03417) Transformer layers used in **eSASRec** are now in RecTools + Plase note that we always compare the quality of our implementations to academic papers results. [Public benchmarks for transformer models SASRec and BERT4Rec](https://github.com/blondered/bert4rec_repro?tab=readme-ov-file#rectools-transformers-benchmark-results) show that RecTools implementations achieve highest scores on multiple datasets compared to other published results. From f1928a80520065825cd0a2041c41d056529b7aa3 Mon Sep 17 00:00:00 2001 From: Spirina Majya Aleksandrovna Date: Tue, 2 Sep 2025 16:31:53 +0300 Subject: [PATCH 9/9] ligr_layers.py -> ligr.py --- README.md | 2 +- rectools/models/nn/transformers/{ligr_layers.py => ligr.py} | 0 tests/models/nn/transformers/test_sasrec.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename rectools/models/nn/transformers/{ligr_layers.py => ligr.py} (100%) diff --git a/README.md b/README.md index 368935b4..8c58ccc7 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ The table below lists recommender models that are available in RecTools. | Model | Type | Description (🎏 for user/item features, πŸ”† for warm inference, ❄️ for cold inference support) | Tutorials & Benchmarks | |---------------------|----|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------| | HSTU | Neural Network | `rectools.models.HSTUModel` - Sequential model with unidirectional pointwise aggregated attention mechanism, incorporating relative attention bias from positional and temporal information, introduced in ["Actions speak louder then words..."](https://arxiv.org/pdf/2402.17152), combined with "Shifted Sequence" training objective as in original public benchmarks
🎏 | πŸ““ [HSTU Theory & Practice](examples/tutorials/transformers_HSTU_tutorial.ipynb)
πŸ“• [Transformers Theory & Practice](examples/tutorials/transformers_tutorial.ipynb)
πŸ“— [Advanced training guide](examples/tutorials/transformers_advanced_training_guide.ipynb)
πŸš€ [Top performance on public datasets](examples/tutorials/transformers_HSTU_tutorial.ipynb) -| SASRec | Neural Network | `rectools.models.SASRecModel` - Transformer-based sequential model with unidirectional attention mechanism and "Shifted Sequence" training objective
🎏 | πŸ“• [Transformers Theory & Practice](examples/tutorials/transformers_tutorial.ipynb)
πŸ“— [Advanced training guide](examples/tutorials/transformers_advanced_training_guide.ipynb)
πŸ“˜ [Customization guide](examples/tutorials/transformers_customization_guide.ipynb)
πŸš€ [Top performance on public benchmarks](https://github.com/blondered/bert4rec_repro?tab=readme-ov-file#rectools-transformers-benchmark-results) | +| SASRec | Neural Network | `rectools.models.SASRecModel` - Transformer-based sequential model with unidirectional attention mechanism and "Shifted Sequence" training objective.
For eSASRec variant specify `rectools.models.nn.transformers.ligr.LiGRLayers` for `transformer_layers_type` and `sampled_softmax` for `loss`
🎏 | πŸ“• [Transformers Theory & Practice](examples/tutorials/transformers_tutorial.ipynb)
πŸ“— [Advanced training guide](examples/tutorials/transformers_advanced_training_guide.ipynb)
πŸ“˜ [Customization guide](examples/tutorials/transformers_customization_guide.ipynb)
πŸš€ [Top performance on public benchmarks](https://github.com/blondered/bert4rec_repro?tab=readme-ov-file#rectools-transformers-benchmark-results) | | BERT4Rec | Neural Network | `rectools.models.BERT4RecModel` - Transformer-based sequential model with bidirectional attention mechanism and "MLM" (masked item) training objective
🎏 | πŸ“• [Transformers Theory & Practice](examples/tutorials/transformers_tutorial.ipynb)
πŸ“— [Advanced training guide](examples/tutorials/transformers_advanced_training_guide.ipynb)
πŸ“˜ [Customization guide](examples/tutorials/transformers_customization_guide.ipynb)
πŸš€ [Top performance on public benchmarks](https://github.com/blondered/bert4rec_repro?tab=readme-ov-file#rectools-transformers-benchmark-results) | | [implicit](https://github.com/benfred/implicit) ALS Wrapper | Matrix Factorization | `rectools.models.ImplicitALSWrapperModel` - Alternating Least Squares Matrix Factorizattion algorithm for implicit feedback.
🎏 | πŸ“™ [Theory & Practice](https://rectools.readthedocs.io/en/latest/examples/tutorials/baselines_extended_tutorial.html#Implicit-ALS)
πŸš€ [50% boost to metrics with user & item features](examples/5_benchmark_iALS_with_features.ipynb) | | [implicit](https://github.com/benfred/implicit) BPR-MF Wrapper | Matrix Factorization | `rectools.models.ImplicitBPRWrapperModel` - Bayesian Personalized Ranking Matrix Factorization algorithm. | πŸ“™ [Theory & Practice](https://rectools.readthedocs.io/en/latest/examples/tutorials/baselines_extended_tutorial.html#Bayesian-Personalized-Ranking-Matrix-Factorization-(BPR-MF)) | diff --git a/rectools/models/nn/transformers/ligr_layers.py b/rectools/models/nn/transformers/ligr.py similarity index 100% rename from rectools/models/nn/transformers/ligr_layers.py rename to rectools/models/nn/transformers/ligr.py diff --git a/tests/models/nn/transformers/test_sasrec.py b/tests/models/nn/transformers/test_sasrec.py index bb3a881d..d1040b89 100644 --- a/tests/models/nn/transformers/test_sasrec.py +++ b/tests/models/nn/transformers/test_sasrec.py @@ -33,7 +33,7 @@ TrainerCallable, TransformerLightningModule, ) -from rectools.models.nn.transformers.ligr_layers import LiGRLayers +from rectools.models.nn.transformers.ligr import LiGRLayers from rectools.models.nn.transformers.negative_sampler import CatalogUniformSampler from rectools.models.nn.transformers.sasrec import SASRecDataPreparator, SASRecTransformerLayers from rectools.models.nn.transformers.similarity import DistanceSimilarityModule