提交饱和编辑的相关设计,及检验代码

This commit is contained in:
2026-02-26 14:02:42 +08:00
commit cb556b47c0
36 changed files with 5437 additions and 0 deletions

89
better_input_seq.py Normal file
View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import csv
import os
import gzip
from glob import glob
from tqdm import tqdm
import pandas as pd
def load_left_aa(ref):
df = pd.read_csv(ref)
aa = set()
for _, row in df.iterrows():
gene, pos = row["gene"], row["aa_pos"]
aa.add(f"{gene}_AA{pos}")
return aa
def reader(path):
with gzip.open(path, "rt") as r:
dict_reader = csv.DictReader(r)
for row in tqdm(dict_reader):
yield row
def process_seq(sequence: str):
before, codon = sequence.split("(")
codon, after = codon.split(")")
src, dst = codon.split("/")
mismatch_codon = sum([x != y for x, y in zip(src, dst)])
if mismatch_codon == 1:
if src[:2] == dst[:2]:
before += src[:2]
return f"{before}({src[-1]}/{dst[-1]}){after}"
if src[1:] == dst[1:]:
after = src[1:] + after
return f"{before}({src[0]}/{dst[0]}){after}"
before += src[0]
after = src[-1] + after
return f"{before}({src[1]}/{dst[1]}){after}"
elif mismatch_codon == 2:
if src[0] == dst[0]:
before = before + src[0]
return f"{before}({src[1:]}/{dst[1:]}){after}"
if src[-1] == dst[-1]:
after = src[-1] + after
return f"{before}({src[:2]}/{dst[:2]}){after}"
return None
# return sequence
def main(ref, infile, outfile):
ref = load_left_aa(ref)
data = []
for file in glob(infile):
for row in reader(file):
seq_name = row["sequence_name"].split("_")[:2]
seq_name = "_".join(seq_name)
if seq_name in ref:
row["editseq"] = process_seq(row["editseq"])
if row["editseq"]:
row.pop("strategy")
row.pop("mutation_type")
data.append(row)
with gzip.open(outfile, "wt+") as w:
dict_writer = csv.DictWriter(w, fieldnames=data[0].keys())
dict_writer.writeheader()
# 写入数据行
dict_writer.writerows(data)
if __name__ == '__main__':
from fire import Fire
Fire(main)