Files
procas12f/merge_results.py

54 lines
1.2 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import gzip
import polars as pd
from glob import glob
from tqdm import tqdm
from multiprocessing import Pool
def read_file(args):
path, nicking = args
if os.path.getsize(path) < 1:
return None
for i in ["FANCD2", "BRIP1", "RAD51C", "FABCI", "FANCA"]:
if path.startswith(i):
return None
try:
df = pd.read_csv(path)
if nicking:
key = os.path.basename(path).split("_nicking")[0]
df = df.with_columns(sequence_name=pd.lit(key))
except Exception:
print(path)
return None
if "low_conf" not in path:
df = df.with_columns(conf=pd.lit("high"))
else:
df = df.with_columns(conf=pd.lit("low"))
return df
def main(indir, output, nicking=False):
print(indir, output, nicking)
fs = glob(os.path.join(indir, "*"))
with Pool(6) as p:
dfs = list(tqdm(p.imap(read_file, [[x, nicking] for x in fs]), total=len(fs)))
df = pd.concat([x for x in dfs if x is not None])
with gzip.open(output, "w+") as w:
df.write_csv(w)
if __name__ == '__main__':
from fire import Fire
Fire(main)