data.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import math
  2. import os
  3. from functools import reduce
  4. from typing import Optional
  5. import pandas as pd
  6. class Data:
  7. def __init__(self, base_path):
  8. self.base_path = base_path
  9. self.df: Optional[pd.DataFrame] = None
  10. self.current_folder_name = ''
  11. self.current_file_name = ''
  12. def read_folder(self, path):
  13. self.current_folder_name = path
  14. if path is None:
  15. return []
  16. return [file for file in os.listdir(f'{self.base_path}/{path}') if file.endswith('.tsv')]
  17. def read_data(self, file_name):
  18. if file_name is None:
  19. return None
  20. self.current_file_name = file_name
  21. self.df = pd.read_csv(f'{self.base_path}/{self.current_folder_name}/{file_name}', index_col=0, sep='\t')
  22. def read_data_old(self, path):
  23. if path is None:
  24. return None
  25. files_names = [file for file in os.listdir(f'{self.base_path}/{path}') if file.endswith('.tsv')]
  26. frames = [pd.read_csv(f'{self.base_path}/{path}/{f}', index_col=0, sep='\t') for f in files_names]
  27. if frames is None or len(frames) == 0:
  28. return None
  29. elif len(frames) == 1:
  30. self.df = frames[0]
  31. else:
  32. self.df = reduce(
  33. lambda left, right: left.merge(right, left_index=True, right_index=True,
  34. sort=True,
  35. how='outer'), frames[1:], frames[0])
  36. def get_columns(self):
  37. if self.df is None:
  38. return []
  39. else:
  40. return list(self.df.columns)