#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import gzip
from glob import glob

import pandas as pd


def amino_acid_to_codon(amino_acid):
    """
    简化的氨基酸到密码子转换函数

    参数:
        amino_acid (str): 单字母氨基酸代码

    返回:
        list: 可能的密码子列表
    """
    genetic_code = {
        'A': ['GCT', 'GCC', 'GCA', 'GCG'],
        'R': ['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'],
        'N': ['AAT', 'AAC'],
        'D': ['GAT', 'GAC'],
        'C': ['TGT', 'TGC'],
        'E': ['GAA', 'GAG'],
        'Q': ['CAA', 'CAG'],
        'G': ['GGT', 'GGC', 'GGA', 'GGG'],
        'H': ['CAT', 'CAC'],
        'I': ['ATT', 'ATC', 'ATA'],
        'L': ['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'],
        'K': ['AAA', 'AAG'],
        'M': ['ATG'],
        'F': ['TTT', 'TTC'],
        'P': ['CCT', 'CCC', 'CCA', 'CCG'],
        'S': ['TCT', 'TCC', 'TCA', 'TCG', 'AGT', 'AGC'],
        'T': ['ACT', 'ACC', 'ACA', 'ACG'],
        'W': ['TGG'],
        'Y': ['TAT', 'TAC'],
        'V': ['GTT', 'GTC', 'GTA', 'GTG'],
        '*': ['TAA', 'TAG', 'TGA'],
    }

    return genetic_code.get(amino_acid.upper(), [])


def main(ref, infile, outfile):
    print(infile, outfile)
    df = pd.read_excel(ref, 1)

    keys = {}
    for _, row in df.iterrows():
        row = list(row)
        for src in amino_acid_to_codon(row[1]):
            keys[f"{src}_{row[2]}"] = 0

    if os.path.dirname(outfile):
        os.makedirs(os.path.dirname(outfile), exist_ok = True)

    header = False
    with gzip.open(outfile, "wt+") as w:
        with gzip.open(infile, "rt") as r:
            for line in r:
                if not header:
                    w.write(line.strip() + "\n")
                    header = line.strip().split(",")
                    continue

                try:
                    target = header.index("sequence_name")
                except ValueError:
                    target = header.index("Target_name")
                
                key = line.strip().split(",")[target]
                key = key.split("_")[2:]
                key = "_".join(key).strip('"')

                if key in keys:
                    w.write(line.strip() + "\n")


if __name__ == '__main__':
    from fire import Fire
    Fire(main)