提交饱和编辑的相关设计,及检验代码
This commit is contained in:
89
better_input_seq.py
Normal file
89
better_input_seq.py
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import csv
|
||||
import os
|
||||
import gzip
|
||||
from glob import glob
|
||||
from tqdm import tqdm
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def load_left_aa(ref):
|
||||
df = pd.read_csv(ref)
|
||||
|
||||
aa = set()
|
||||
for _, row in df.iterrows():
|
||||
gene, pos = row["gene"], row["aa_pos"]
|
||||
aa.add(f"{gene}_AA{pos}")
|
||||
|
||||
return aa
|
||||
|
||||
|
||||
def reader(path):
|
||||
with gzip.open(path, "rt") as r:
|
||||
dict_reader = csv.DictReader(r)
|
||||
|
||||
for row in tqdm(dict_reader):
|
||||
yield row
|
||||
|
||||
|
||||
|
||||
def process_seq(sequence: str):
|
||||
|
||||
before, codon = sequence.split("(")
|
||||
codon, after = codon.split(")")
|
||||
|
||||
src, dst = codon.split("/")
|
||||
|
||||
mismatch_codon = sum([x != y for x, y in zip(src, dst)])
|
||||
if mismatch_codon == 1:
|
||||
if src[:2] == dst[:2]:
|
||||
before += src[:2]
|
||||
return f"{before}({src[-1]}/{dst[-1]}){after}"
|
||||
if src[1:] == dst[1:]:
|
||||
after = src[1:] + after
|
||||
return f"{before}({src[0]}/{dst[0]}){after}"
|
||||
|
||||
before += src[0]
|
||||
after = src[-1] + after
|
||||
return f"{before}({src[1]}/{dst[1]}){after}"
|
||||
elif mismatch_codon == 2:
|
||||
if src[0] == dst[0]:
|
||||
before = before + src[0]
|
||||
return f"{before}({src[1:]}/{dst[1:]}){after}"
|
||||
if src[-1] == dst[-1]:
|
||||
after = src[-1] + after
|
||||
return f"{before}({src[:2]}/{dst[:2]}){after}"
|
||||
return None
|
||||
# return sequence
|
||||
|
||||
|
||||
def main(ref, infile, outfile):
|
||||
ref = load_left_aa(ref)
|
||||
|
||||
data = []
|
||||
for file in glob(infile):
|
||||
for row in reader(file):
|
||||
seq_name = row["sequence_name"].split("_")[:2]
|
||||
seq_name = "_".join(seq_name)
|
||||
|
||||
if seq_name in ref:
|
||||
row["editseq"] = process_seq(row["editseq"])
|
||||
|
||||
if row["editseq"]:
|
||||
row.pop("strategy")
|
||||
row.pop("mutation_type")
|
||||
data.append(row)
|
||||
|
||||
with gzip.open(outfile, "wt+") as w:
|
||||
dict_writer = csv.DictWriter(w, fieldnames=data[0].keys())
|
||||
dict_writer.writeheader()
|
||||
|
||||
# 写入数据行
|
||||
dict_writer.writerows(data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from fire import Fire
|
||||
Fire(main)
|
||||
Reference in New Issue
Block a user