-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
89 lines (71 loc) · 3.17 KB
/
models.py
File metadata and controls
89 lines (71 loc) · 3.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import torch.nn as nn
import torch
# import copy
class MLPModel(nn.Module):
def __init__(self, args, alphabet_size):
"""
A simplified MLP model that always outputs a 'related simplex' (logits).
Designed to be used as density model p(v) in frameworks like Argmax Flows.
Args:
args: Configuration object containing model hyperparameters (e.g., hidden_dim).
alphabet_size (int): The number of categories/categories per sequence position.
"""
super().__init__()
self.alphabet_size = alphabet_size
self.args = args
# Projection for time embeddings
self.time_embedder = nn.Sequential(
GaussianFourierProjection(embed_dim=args.hidden_dim),
nn.Linear(args.hidden_dim, args.hidden_dim),
nn.ReLU()
)
# Projection for the input sequence.
self.input_expansion = 2
self.embedder = nn.Linear(self.input_expansion * alphabet_size, args.hidden_dim)
# The core MLP. Input is [hidden_dim (from sequence) + hidden_dim (t) + hidden_dim (r)]
# Output is logits for each position (alphabet_size)
self.mlp = nn.Sequential(
nn.Linear(args.hidden_dim + 2 * args.hidden_dim, args.hidden_dim),
nn.LayerNorm(args.hidden_dim), # Add this
nn.Dropout(0.1),
nn.ReLU(),
nn.Linear(args.hidden_dim, args.hidden_dim),
nn.LayerNorm(args.hidden_dim), # Add this
nn.Dropout(0.1),
nn.ReLU(),
nn.Linear(args.hidden_dim, alphabet_size)
)
def forward(self, x, t, r, cls=None):
"""
Args:
x: [batch, seq_len, alphabet_size * expansion]
t: [batch] (current time)
r: [batch] (previous time)
cls: optional [batch] class labels
"""
# Time embeddings for both t and r
t_embed = self.time_embedder(t) # [batch, hidden_dim]
r_embed = self.time_embedder(r) # [batch, hidden_dim]
feat = self.embedder(x) # [batch, seq_len, hidden_dim]
feat = feat + t_embed.unsqueeze(1) + r_embed.unsqueeze(1) # [batch, seq_len, hidden_dim]
# Prepare MLP input with full context
mlp_input = torch.cat([
feat,
t_embed.unsqueeze(1).expand(-1, x.size(1), -1),
r_embed.unsqueeze(1).expand(-1, x.size(1), -1)
], dim=-1) # [batch, seq_len, hidden_dim * 3]
# Process through MLP
output = self.mlp(mlp_input) # [batch, seq_len, output_dim]
return output # [batch, seq_len, alphabet_size]
class GaussianFourierProjection(nn.Module):
"""
Gaussian random features for encoding time steps.
"""
def __init__(self, embed_dim, scale=30.):
super().__init__()
# Randomly sample weights during initialization. These weights are fixed
# during optimization and are not trainable.
self.W = nn.Parameter(torch.randn(embed_dim // 2) * scale, requires_grad=False)
def forward(self, x):
x_proj = x[:, None] * self.W[None, :] * 2 * torch.pi
return torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1)