1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
- import math
- import os
- from functools import reduce
- from typing import Optional
- import pandas as pd
- class Data:
- def __init__(self, base_path):
- self.base_path = base_path
- self.df: Optional[pd.DataFrame] = None
- self.current_file_name = ''
- def read_folder(self):
- l = [file for file in os.listdir(f'{self.base_path}') if file.endswith('.tsv')]
- return sorted(l, reverse=True)
- def read_data(self, file_name):
- if file_name is None:
- return None
- self.current_file_name = file_name
- self.df = pd.read_csv(f'{self.base_path}/{file_name}', index_col=0, sep='\t')
- def read_data_old(self, path):
- if path is None:
- return None
- files_names = [file for file in os.listdir(f'{self.base_path}/{path}') if file.endswith('.tsv')]
- frames = [pd.read_csv(f'{self.base_path}/{path}/{f}', index_col=0, sep='\t') for f in files_names]
- if frames is None or len(frames) == 0:
- return None
- elif len(frames) == 1:
- self.df = frames[0]
- else:
- self.df = reduce(
- lambda left, right: left.merge(right, left_index=True, right_index=True,
- sort=True,
- how='outer'), frames[1:], frames[0])
- def get_columns(self):
- if self.df is None:
- return []
- else:
- return list(self.df.columns)
|