我从数据库中选择了一个数值列的子集,并希望遍历这些列,选择一个,target_column
并将其与数据框中其他两列之间的数值运算结果进行比较。但是,我不确定如何比较结果(例如col1 * col2 = target_column
)。
# For all possible combinations of numeric columns
for col1, col2 in combinations(numeric_cols, 2):
# For a target column in numeric_columns
for target_column in numeric_cols:
# Skip if the target column is one of the relationship columns
if target_column in (col1, col2):
continue
编辑:我已经解决了一些问题,但我仍然不确定这是否是最有效的方法
def analyse_relationships(df):
numeric_cols = df.select_dtypes(include=[np.number])
threshold = 0.001
relationships = []
# For all possible combinations of numeric columns
for col1, col2 in combinations(numeric_cols, 2):
# For a target column in numeric_columns
for target_column in numeric_cols:
# Skip if the target column is one of the relationship columns
if target_column in (col1, col2):
continue
# Calculate different operations
product = numeric_cols[col1] * numeric_cols[col2]
sum_cols = numeric_cols[col1] + numeric_cols[col2]
diff = numeric_cols[col1] - numeric_cols[col2]
if np.allclose(product, numeric_cols[target_column], rtol=threshold):
relationships.append(f"{col1} * {col2} = {target_column}")
elif np.allclose(sum_cols, numeric_cols[target_column], rtol=threshold):
relationships.append(f"{col1} + {col2} = {target_column}")
elif np.allclose(diff, numeric_cols[target_column], rtol=threshold):
relationships.append(f"{col1} - {col2} = {target_column}")