Source code for gitlab2pandas.processing


import human_id
import pandas as pd
from gitlab2pandas.core import Core

[docs]class Processing(Core):
[docs] def replace_user_id(self): """ Replace user_ids with a pseudonym geneerated by human-id. There might be some user_ids in commits that are not connected to a User. """ users_df = self.get_pandas_data_frame(Core.Features.USERS) if users_df is None or users_df.empty: return uuids = [] users = {} for index, row in users_df.iterrows(): uuid = human_id.generate_id(seed=row["web_url"]) uuids.append(uuid) users[row["id"]] = uuid users_df = users_df.assign(uuid=uuids) self.save_as_pandas(self.Features.USERS,users_df) for filename in self.Features.to_list(): if filename != "Users": self.__try_replace_id_with_uuid(filename, users, "user_id") self.__try_replace_id_with_uuid(filename, users, "author_id") self.__try_replace_id_with_uuid(filename, users, "owner_id") self.__try_replace_id_with_uuid(filename, users, "assignee_id") self.__try_replace_id_with_uuid(filename, users, "closed_by_id") self.__try_replace_id_with_uuid(filename, users, "merge_user_id") self.__try_replace_id_with_uuid(filename, users, "resolved_by_id") self.__try_replace_list_id_with_uuid(filename, users, "assignees_ids") self.__try_replace_list_id_with_uuid(filename, users, "reviewers_ids")
def __try_replace_id_with_uuid(self, filename, users, column): """ Replaces all user_ids for a standard column in a pandas file with a human-id. """ df = self.get_pandas_data_frame(filename) if df is None: return if column in df: for user_id, user_uuid in users.items(): df.loc[df[column] == user_id, column] = user_uuid self.save_as_pandas(filename,df) def __try_replace_list_id_with_uuid(self, filename, users, column): """ Replaces all user_ids for a list column in a pandas file with a human-id. """ df = self.get_pandas_data_frame(filename) if df is None: return data_list = [] if column in df: for index, row in df.iterrows(): uuids = [] for id in row[column]: if id in users: uuids.append(users[id]) data = row data[column] = uuids data_list.append(data) self.save_as_pandas(filename,pd.DataFrame(data_list))