-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGeneticCode.lean
More file actions
319 lines (277 loc) · 11.4 KB
/
GeneticCode.lean
File metadata and controls
319 lines (277 loc) · 11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
/-
Authors: Colin Jones
Last Updated: 07/25/2025
Description: Contains a function that allows the user to convert a coding strand of DNA into a
sequence of RNA or amino acids. Proves the injectivity of mapping DNA to RNA and the redundancy
(non-injectivity) of DNA and RNA to amino acid. Includes brief exploration of point mutations.
-/
import Mathlib.Algebra.Group.Nat.Defs
import Mathlib.Data.List.Basic
/-
# Genetic code
This file defines 2 functions called `codon` and `anticodon` that match a 3 letter string comprised
of 4 letters (`U`, `A`, `G`, or `C`) with their respective amino acid.
## Main Definitions
* `Redundant`: Not injective, not one-to-one
## Main Functions
* `dna_to_rna_template`: Converts a coding strand of DNA into RNA assuming the strand is template
* `dna_to_rna_coding`: Converts a coding strand of DNA into RNA assuming the strand is coding
* `rna_to_amino`: Takes a list of RNA triplets and converts them into a list of amino acids
* `dna_to_rna_template`: Converts a coding strand of DNA into a sequence of amino acids assuming
template strand is the input
* `dna_to_rna_coding`: Converts a coding strand of DNA into a sequence of amino acids assuming
coding strand is input
* `dna_to_amino_template`: Converts a template strand of DNA into its corresponding peptide chain
* `dna_to_amino_coding`: Converts a template strand of DNA into its corresponding peptide chain
## Main Proofs
* `template_coding_equivalence`: `dna_to_rna_template` applied to a list is equivalent to
`dna_to_rna` applied to that list reversed
* `length_conservation`: The length of the list is equal to the length of the output of
`dna_to_rna_template` applied to that list
* `injective_dna_to_rna_template`: The function `dna_to_rna_template` is injective or one-to-one
* `redundant_rna_to_amino`: The function `rna_to_amino` is redundant or not injective
* `redundant_genetic_code`: The function `dna_to_amino_template` representing the genetic code is
redundant
## Implementation Details
This file assumes that the reading frame begins at the first nucleotide always and all lists are
5' → 3'. The function `dna_to_rna_template` takes the template strand as an input, so it is
'read' backwards from the 3' → 5' direction e.g. `[A, G, T]` becomes `[A, C, U]`. Proof has to be
given that the input is a valid DNA base or contains DNA bases in the functions:
`dna_to_rna_singlet`, `dna_to_rna_template`, `dna_to_rna_coding`, `dna_to_amino_template`, and
`dna_to_amino_coding`. To prove this is true, include `(by aesop)` at the end of the `#eval`
function like this: `#eval dna_to_rna_template [A, G, T] (by aesop)`. That particular input will
output `[A, C, U]` in the Lean InfoView.
-/
open Function Classical List
variable (n : NucBase) (s : List NucBase) (i : ℕ)
/- # Definition of Nucleotide Base #
U := Uracil
A := Adenosine
G := Guanosine
C := Cytosine
T := Thymine
-/
inductive NucBase
| U | A | G | C | T
deriving DecidableEq, Repr
open NucBase
/- # Definition of Amino Acid #
Each amino acid is abbreviated by their single-letter abbreviation except for the stop codon
which is represented as `STOP`.
-/
inductive AminoAcid
| G | D | E | V | K | R | H | P | Q | F | T | W | Y | M | S | L | I | A | C | N | STOP
deriving DecidableEq, Repr
/- # General Definitions # -/
def NucBase.isRNABase : Bool := n matches U | A | G | C
def NucBase.isDNABase : Bool := n matches T | A | G | C
def Redundant {α β} (f : α → β) : Prop := ¬ Injective f
/- # DNA Function Definitions # -/
def T_to_U (n : NucBase) (_ : n.isDNABase) :=
if n = T then U else n
def dna_to_rna_singlet (n : NucBase) (hn : n.isDNABase) :=
match n with
| T => A
| A => U
| G => C
| C => G
def dna_to_dna_singlet (n : NucBase) (hn : n.isDNABase) :=
match n with
| T => A
| A => T
| G => C
| C => G
def dna_to_rna_template (hs : ∀ n ∈ s, n.isDNABase) : List NucBase :=
(s.reverse).pmap dna_to_rna_singlet (by aesop)
def dna_to_rna_coding (_ : ∀ n ∈ s, n.isDNABase) : List NucBase :=
s.pmap T_to_U (by aesop)
def dna_replication (hs : ∀ n ∈ s, n.isDNABase) : List NucBase :=
s.pmap dna_to_dna_singlet hs
/- # RNA Function Definitions # -/
def rna_to_amino_single_triplet (s : List NucBase) : AminoAcid :=
match s with
| [U, U, U] => AminoAcid.F
| [U, U, C] => AminoAcid.F
| [U, U, A] => AminoAcid.L
| [U, U, G] => AminoAcid.L
| [C, U, U] => AminoAcid.L
| [C, U, C] => AminoAcid.L
| [C, U, A] => AminoAcid.L
| [C, U, G] => AminoAcid.L
| [A, U, U] => AminoAcid.I
| [A, U, C] => AminoAcid.I
| [A, U, A] => AminoAcid.I
| [A, U, G] => AminoAcid.M
| [G, U, U] => AminoAcid.V
| [G, U, C] => AminoAcid.V
| [G, U, A] => AminoAcid.V
| [G, U, G] => AminoAcid.V
| [U, C, U] => AminoAcid.S
| [U, C, C] => AminoAcid.S
| [U, C, A] => AminoAcid.S
| [U, C, G] => AminoAcid.S
| [C, C, U] => AminoAcid.P
| [C, C, C] => AminoAcid.P
| [C, C, A] => AminoAcid.P
| [C, C, G] => AminoAcid.P
| [A, C, U] => AminoAcid.T
| [A, C, C] => AminoAcid.T
| [A, C, A] => AminoAcid.T
| [A, C, G] => AminoAcid.T
| [G, C, U] => AminoAcid.A
| [G, C, C] => AminoAcid.A
| [G, C, A] => AminoAcid.A
| [G, C, G] => AminoAcid.A
| [U, A, U] => AminoAcid.Y
| [U, A, C] => AminoAcid.Y
| [U, A, A] => AminoAcid.STOP
| [U, A, G] => AminoAcid.STOP
| [C, A, U] => AminoAcid.H
| [C, A, C] => AminoAcid.H
| [C, A, A] => AminoAcid.Q
| [C, A, G] => AminoAcid.Q
| [A, A, U] => AminoAcid.N
| [A, A, C] => AminoAcid.N
| [A, A, A] => AminoAcid.K
| [A, A, G] => AminoAcid.K
| [G, A, U] => AminoAcid.D
| [G, A, C] => AminoAcid.D
| [G, A, A] => AminoAcid.E
| [G, A, G] => AminoAcid.E
| [U, G, U] => AminoAcid.C
| [U, G, C] => AminoAcid.C
| [U, G, A] => AminoAcid.STOP
| [U, G, G] => AminoAcid.W
| [C, G, U] => AminoAcid.R
| [C, G, C] => AminoAcid.R
| [C, G, A] => AminoAcid.R
| [C, G, G] => AminoAcid.R
| [A, G, U] => AminoAcid.S
| [A, G, C] => AminoAcid.S
| [A, G, A] => AminoAcid.R
| [A, G, G] => AminoAcid.R
| [G, G, U] => AminoAcid.G
| [G, G, C] => AminoAcid.G
| [G, G, A] => AminoAcid.G
| [G, G, G] => AminoAcid.G
| _ => AminoAcid.STOP
def rna_to_amino (L : List (List NucBase)) : List AminoAcid :=
match L with
| [] => []
| y :: ys => [rna_to_amino_single_triplet y] ++ rna_to_amino ys
def rna_to_dna_singlet (n : NucBase) (hn : n.isRNABase) :=
match n with
| U => A
| A => T
| G => C
| C => G
def rna_to_dna (hs : ∀ n ∈ s, n.isRNABase) : List NucBase :=
s.pmap rna_to_dna_singlet hs
/- ### Main Functions ###
dna_to_amino: Takes a list of DNA nucleic acids and converts them into a sequence of amino acids
-/
def dna_to_amino_template (hs : ∀ n ∈ s, n.isDNABase) : List AminoAcid :=
(dna_to_rna_template s hs).toChunks 3 |> rna_to_amino
def dna_to_amino_coding (hs : ∀ n ∈ s, n.isDNABase) : List AminoAcid :=
(dna_to_rna_coding s hs).toChunks 3 |> rna_to_amino
/- # Mutations # -/
def point_mutation := s.set i n
/- # Proofs # -/
theorem length_conservation (hs : ∀ n ∈ s, n.isDNABase = True) :
s.length = (dna_to_rna_template s hs).length ∧ s.length = (dna_to_rna_coding s hs).length := by
constructor <;>
· induction s
· rfl
· simp only [mem_cons, or_true, decide_true, implies_true, forall_true_left, forall_eq_or_imp,
length_cons, map_cons, length_map, Nat.add_left_cancel_iff, dna_to_rna_coding,
dna_to_rna_template, reverse_cons, pmap_append, pmap_reverse, pmap_cons, pmap_nil,
length_append, length_reverse, length_pmap, length_nil, zero_add]
lemma injective_dna_to_rna_singlet :
Injective (fun n : {x // NucBase.isDNABase x} ↦ dna_to_rna_singlet n n.prop) := by
rintro ⟨n, hn⟩ ⟨m, hm⟩
cases n <;> cases m <;>
any_goals simp [isDNABase] at hn hm
all_goals simp [dna_to_rna_singlet]
lemma singlet_iff (n₁ n₂ : {x // NucBase.isDNABase x}) :
dna_to_rna_singlet n₁ n₁.prop = dna_to_rna_singlet n₂ n₂.prop ↔ n₁ = n₂ := by
obtain ⟨n₁, hn₁⟩ := n₁
obtain ⟨n₂, hn₂⟩ := n₂
constructor <;> simp only [Subtype.mk.injEq]
· have h' := injective_dna_to_rna_singlet
simp only [Injective, Subtype.forall, Subtype.mk.injEq] at h'
apply h' <;> assumption
· intro h
congr
theorem injective_dna_to_rna_template :
Injective (fun s : {x : List NucBase // ∀ n ∈ x, n.isDNABase} ↦ dna_to_rna_template s s.prop)
:= by
rintro ⟨s₁, hs₁⟩ ⟨s₂, hs₂⟩
simp only [Subtype.mk.injEq]
intro h
have hL : s₁.length = s₂.length := by
rw [(length_conservation s₁ hs₁).1, (length_conservation s₂ hs₂).1]
exact congrArg length h
apply Array.ext.extAux s₁ s₂ hL
intro i hi₁ hi₂
have hi₁' : i < s₁.reverse.length := by simp_all only [length_reverse]
have hi₂' : i < s₂.reverse.length := by simp_all only [length_reverse]
unfold dna_to_rna_template at h
rw [ext_get_iff] at h
obtain ⟨h1, h2⟩ := h
simp only [length_pmap, get_eq_getElem, getElem_pmap, forall_true_left] at h2
have hn := h2 i (by aesop) (by aesop)
have hi := singlet_iff
simp only [Subtype.forall, Subtype.mk.injEq] at hi
have hj : s₁.reverse[i] = s₂.reverse[i] := by simp_all only [pmap_reverse, length_reverse,
length_pmap, getElem_reverse, getElem_mem]
apply getElem_of_eq
have hj' := Array.ext.extAux s₁.reverse s₂.reverse (by aesop) (by aesop)
simp_all only [pmap_reverse, length_reverse, length_pmap, getElem_reverse, imp_self,
implies_true, reverse_inj]
lemma injective_T_to_U : Injective (fun n : {x // NucBase.isDNABase x} ↦ T_to_U n n.prop) := by
rintro ⟨n, hn⟩ ⟨m, hm⟩
cases n <;> cases m <;>
repeat (first | intro h; simp_all | contradiction)
theorem injective_dna_to_rna_coding :
Injective (fun s : {x : List NucBase // ∀ n ∈ x, n.isDNABase} ↦ dna_to_rna_coding s s.prop)
:= by
rintro ⟨s₁, hs₁⟩ ⟨s₂, hs₂⟩
simp only [Subtype.mk.injEq]
intro h
have hL : s₁.length = s₂.length := by
rw [(length_conservation s₁ hs₁).2, (length_conservation s₂ hs₂).2]
exact congrArg length h
apply Array.ext.extAux s₁ s₂ hL
intro i hi₁ hi₂
unfold dna_to_rna_coding at h
rw [ext_get_iff] at h
obtain ⟨h1, h2⟩ := h
simp only [length_pmap, get_eq_getElem, getElem_pmap, forall_true_left] at h2
have hn := by apply h2 i hi₁ hi₂
have hi := injective_T_to_U
unfold Injective at hi
simp only [length_pmap, forall_true_left, Subtype.forall, Subtype.mk.injEq, getElem_mem] at hi
apply hi
exact hn
repeat simp_all only [length_pmap, getElem_mem, forall_true_left]
theorem redundant_rna_to_amino : Redundant rna_to_amino := by
simp only [Redundant, Injective, not_forall]
use [[U, U, U]], [[U, U, C]]
simp only [cons.injEq, reduceCtorEq, and_true, not_false_eq_true, exists_prop]
exact ⟨rfl, of_decide_eq_false rfl⟩
theorem redundant_genetic_code_template :
Redundant (fun s : {x : List NucBase // ∀ n ∈ x, n.isDNABase} ↦ dna_to_amino_template s s.prop)
:= by
simp only [Redundant, Injective, not_forall]
use ⟨[A, A, A], by decide⟩, ⟨[G, A, A], by decide⟩
simp only [Subtype.mk.injEq, cons.injEq, reduceCtorEq, and_true, not_false_eq_true,
exists_prop]
rfl
theorem redundant_genetic_code_coding :
Redundant (fun s : {x : List NucBase // ∀ n ∈ x, n.isDNABase} ↦ dna_to_amino_coding s s.prop)
:= by
simp only [Redundant, Injective, not_forall]
use ⟨[A, A, A], by decide⟩, ⟨[A, A, G], by decide⟩
simp only [Subtype.mk.injEq, cons.injEq, reduceCtorEq, and_true, and_false, not_false_eq_true,
exists_prop]
rfl