Conversation
SidorinAnton
left a comment
There was a problem hiding this comment.
В целом неплохо!
Основной момент (по файлам) -- в контекстном менеджере (блоке with) можно открыть сразу несколько файлов через запятую.
| count = 0 | ||
| for line in seqs_file: | ||
| count += 1 | ||
| if count == 1 or (count - 1) % 4 == 0: | ||
| seqs[line.strip()] = [] | ||
| if count % 2 == 0 and count % 4 != 0: | ||
| seqs[list(seqs)[-1]].append(line.strip()) | ||
| if count % 4 == 0: | ||
| seqs[list(seqs)[-1]].append(line.strip()) |
There was a problem hiding this comment.
Очень сложно ))
Мы же можем обнулять счетчик => явно проверять с 0, 1 и 3
| if verbose != True and verbose != False: | ||
| raise ValueError("Invalid *verbose* argument given") |
There was a problem hiding this comment.
Имхо, эта проверка не нужна. Более того, если хочется явно проверять на bool, то лучше использовать isinstance
| raise ValueError("Invalid quality sequence given") | ||
| if verbose != True and verbose != False: | ||
| raise ValueError("Invalid *verbose* argument given") | ||
| return seqs, gc_bounds, length_bounds, quality_threshold, verbose, output_filename |
There was a problem hiding this comment.
В таких случаях лучше возвращать словарь
| gc_result = is_gc_good(seq, gc_bounds, verbose) | ||
| if gc_result: | ||
| len_result = is_len_good(seq, length_bounds, verbose) | ||
| if len_result: | ||
| qual_result = is_qual_good(seq_qual, quality_threshold, verbose) | ||
| if qual_result: | ||
| seqs_filtered[seq_name] = seqs[seq_name] |
There was a problem hiding this comment.
- good? ))) Скорее тогда что-нить в духе
passed - Тут не критично, но такая архитектура не оч, т.к. если добавится еще 20 фильтров, то код далеко уедет
| return seqs_filtered | ||
|
|
||
|
|
||
| NEW_LINE = "\n" # needed for output in f-strings |
There was a problem hiding this comment.
Мм? )))
В f-строках спокойно можно использовать \n ...
print(f"Lol{1234}\nKek")There was a problem hiding this comment.
Спасибо! Я кажется гуглил и вообщем до 3.12 вроде как нельзя, потому сделал как посоветовали
https://stackoverflow.com/questions/44780357/how-can-i-use-newline-n-in-an-f-string-to-format-output
| def three_one_letter_code(sequences: (tuple[str] or list[str])) -> list: | ||
| """ | ||
| Reverse the protein sequences from one-letter to three-letter format and vice-versa | ||
|
|
||
| Case 1: get three-letter sequence\n | ||
| Use one-letter amino-acids sequences of any letter case | ||
|
|
||
| Case 2: get one-letter sequence\n | ||
| Use three-letter amino-acid separated by "-" sequences. | ||
| Please note that sequences without "-" are parsed as one-letter code sequences\n | ||
| Example: for sequence "Ala" function will return "Ala-leu-ala" | ||
|
|
||
| Arguments: | ||
| - sequences (tuple[str] or list[str]): protein sequences to convert\n | ||
| Example: ["WAG", "MkqRe", "msrlk", "Met-Ala-Gly", "Met-arg-asn-Trp-Ala-Gly", "arg-asn-trp"] | ||
|
|
||
| Return: | ||
| - list: one-letter/three-letter protein sequences\n | ||
| Example: ["Met-Ala-Gly", "Met-arg-asn-Trp-Ala-Gly", "arg-asn-trp", "WAG", "MkqRe", "rlk"] | ||
| """ | ||
| inversed_sequences = [] | ||
| for sequence in sequences: | ||
| inversed_sequence = [] | ||
| if "-" not in sequence: | ||
| for letter in sequence: | ||
| if letter.islower(): | ||
| inversed_sequence.append( | ||
| dictionaries.AMINO_ACIDS[letter.capitalize()].lower() | ||
| ) | ||
| else: | ||
| inversed_sequence.append(dictionaries.AMINO_ACIDS[letter]) | ||
| inversed_sequences.append("-".join(inversed_sequence)) | ||
| else: | ||
| aa_splitted = sequence.split("-") | ||
| for aa in aa_splitted: | ||
| aa_index = list(dictionaries.AMINO_ACIDS.values()).index( | ||
| aa.capitalize() | ||
| ) | ||
| if aa[0].islower(): | ||
| inversed_sequence.append( | ||
| list(dictionaries.AMINO_ACIDS.keys())[aa_index].lower() | ||
| ) | ||
| else: | ||
| inversed_sequence.append( | ||
| list(dictionaries.AMINO_ACIDS.keys())[aa_index] | ||
| ) | ||
| inversed_sequences.append("".join(inversed_sequence)) | ||
| return inversed_sequences |
There was a problem hiding this comment.
Архитектура не оч ))
Если хочется сделать конвертацию 1 -> 3 и 3 -> 1, то лучше написать 2 отдельные функции.
| - dictionary: sequences (str] as keys , starting positions for presented motif (list) as values\n | ||
| Example: {"AMGAGW": [2], "GAWSGRAGA": [0, 7]} | ||
| """ | ||
| new_line = "\n" |
| if nucl_acids == "RNA": | ||
| del nucl_acid_seqs["DNA"] | ||
| if nucl_acids == "DNA": | ||
| del nucl_acid_seqs["RNA"] |
There was a problem hiding this comment.
Зачем? )))
Если хочется возвращать только одну н.к., то тогда имеет смысл сразу кидать значение (список)
| with open(input_fasta, "r") as input: | ||
| with open(output_path, "w"): | ||
| while True: | ||
| line = input.readline().strip() | ||
| print(line) | ||
| if not line: | ||
| break | ||
| if not line.startswith(">"): | ||
| line = line[shift:] + line[:shift] | ||
| with open(output_path, "a") as output: | ||
| output.write(line + "\n") |
There was a problem hiding this comment.
-
Не оч понимаю, а почему нельзя было открыть сразу на чтение и запись, чтоб потом не открывать на дозапись?
with open(...) as ..., open(..., "w") as ...: -
Зачем while? Почему просто не итерироваться по строкам? ))
| with open(input_fasta, "r") as input: | ||
| with open(output_path, "w"): | ||
| read = [] | ||
| while True: | ||
| line = input.readline().strip() | ||
| if not line: | ||
| break | ||
| if line.startswith(">"): | ||
| line += "\n" | ||
| if read: | ||
| with open(output_path, "a") as output: | ||
| output.write("".join(read) + "\n") | ||
| read = [line] | ||
| else: | ||
| read.append(line) | ||
| with open(output_path, "a") as output: | ||
| output.write("".join(read)) |
There was a problem hiding this comment.
Опять сложно ))
with open(INPUT) as inp_fa, open(OUTPUT, "w") as opt_fa:
for line in inp_fa:
if line.startswith(">"):
opt_fa.write("\n") # Можно аккуратнее сделать проверку на первую строку, тогда не будет \n вначале
opt_fa.write(line)
continue
opt_fa.write(line.strip())
No description provided.