90 lines
2.2 KiB
Python
90 lines
2.2 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
import csv
|
|
import os
|
|
import gzip
|
|
from glob import glob
|
|
from tqdm import tqdm
|
|
|
|
import pandas as pd
|
|
|
|
|
|
def load_left_aa(ref):
|
|
df = pd.read_csv(ref)
|
|
|
|
aa = set()
|
|
for _, row in df.iterrows():
|
|
gene, pos = row["gene"], row["aa_pos"]
|
|
aa.add(f"{gene}_AA{pos}")
|
|
|
|
return aa
|
|
|
|
|
|
def reader(path):
|
|
with gzip.open(path, "rt") as r:
|
|
dict_reader = csv.DictReader(r)
|
|
|
|
for row in tqdm(dict_reader):
|
|
yield row
|
|
|
|
|
|
|
|
def process_seq(sequence: str):
|
|
|
|
before, codon = sequence.split("(")
|
|
codon, after = codon.split(")")
|
|
|
|
src, dst = codon.split("/")
|
|
|
|
mismatch_codon = sum([x != y for x, y in zip(src, dst)])
|
|
if mismatch_codon == 1:
|
|
if src[:2] == dst[:2]:
|
|
before += src[:2]
|
|
return f"{before}({src[-1]}/{dst[-1]}){after}"
|
|
if src[1:] == dst[1:]:
|
|
after = src[1:] + after
|
|
return f"{before}({src[0]}/{dst[0]}){after}"
|
|
|
|
before += src[0]
|
|
after = src[-1] + after
|
|
return f"{before}({src[1]}/{dst[1]}){after}"
|
|
elif mismatch_codon == 2:
|
|
if src[0] == dst[0]:
|
|
before = before + src[0]
|
|
return f"{before}({src[1:]}/{dst[1:]}){after}"
|
|
if src[-1] == dst[-1]:
|
|
after = src[-1] + after
|
|
return f"{before}({src[:2]}/{dst[:2]}){after}"
|
|
return None
|
|
# return sequence
|
|
|
|
|
|
def main(ref, infile, outfile):
|
|
ref = load_left_aa(ref)
|
|
|
|
data = []
|
|
for file in glob(infile):
|
|
for row in reader(file):
|
|
seq_name = row["sequence_name"].split("_")[:2]
|
|
seq_name = "_".join(seq_name)
|
|
|
|
if seq_name in ref:
|
|
row["editseq"] = process_seq(row["editseq"])
|
|
|
|
if row["editseq"]:
|
|
row.pop("strategy")
|
|
row.pop("mutation_type")
|
|
data.append(row)
|
|
|
|
with gzip.open(outfile, "wt+") as w:
|
|
dict_writer = csv.DictWriter(w, fieldnames=data[0].keys())
|
|
dict_writer.writeheader()
|
|
|
|
# 写入数据行
|
|
dict_writer.writerows(data)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from fire import Fire
|
|
Fire(main)
|