data.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import math
  2. import os
  3. from functools import reduce
  4. from typing import Optional
  5. import pandas as pd
  6. class Data:
  7. def __init__(self, base_path):
  8. self.base_path = base_path
  9. self.df: Optional[pd.DataFrame] = None
  10. self.current_file_name = ''
  11. def read_folder(self):
  12. l = [file for file in os.listdir(f'{self.base_path}') if file.endswith('.tsv')]
  13. return sorted(l, reverse=True)
  14. def read_data(self, file_name):
  15. if file_name is None:
  16. return None
  17. self.current_file_name = file_name
  18. self.df = pd.read_csv(f'{self.base_path}/{file_name}', index_col=0, sep='\t')
  19. def read_data_old(self, path):
  20. if path is None:
  21. return None
  22. files_names = [file for file in os.listdir(f'{self.base_path}/{path}') if file.endswith('.tsv')]
  23. frames = [pd.read_csv(f'{self.base_path}/{path}/{f}', index_col=0, sep='\t') for f in files_names]
  24. if frames is None or len(frames) == 0:
  25. return None
  26. elif len(frames) == 1:
  27. self.df = frames[0]
  28. else:
  29. self.df = reduce(
  30. lambda left, right: left.merge(right, left_index=True, right_index=True,
  31. sort=True,
  32. how='outer'), frames[1:], frames[0])
  33. def get_columns(self):
  34. if self.df is None:
  35. return []
  36. else:
  37. return list(self.df.columns)