I modified the function plot_dq_scatter_dropdown using the shapes
argument instead of add_vline
& add_hline
arguments. Here is the revised function:
def plot_dq_scatter_dropdown(df):
# Initialize the figure
fig = go.Figure()
# Get columns for Y-axis options (excluding 'rows' and 'outlier_prob')
y_columns = [col for col in df.columns if col not in ["rows", "outlier_prob"]]
# Calculate median of rows (constant)
median_x = df["rows"].median()
# Create dropdown buttons with updated configuration
buttons = []
for y_col in y_columns:
median_y = df[y_col].median()
button = dict(
label=y_col,
method="update",
args=[
# Trace updates
{
"y": [df[y_col]], # Update scatter plot Y values
"x": [df["rows"]],
"marker.color": [df["outlier_prob"]],
},
# Layout updates
{
"title": f"Scatter Plot: rows vs {y_col}",
"yaxis.title": y_col,
"shapes": [
# Vertical median line for rows
{
"type": "line",
"x0": median_x,
"x1": median_x,
"y0": 0,
"y1": 1,
"yref": "paper",
"line": {"color": "orange", "dash": "dash", "width": 2}
},
# Horizontal median line for selected Y variable
{
"type": "line",
"x0": 0,
"x1": 1,
"xref": "paper",
"y0": median_y,
"y1": median_y,
"line": {"color": "orange", "dash": "dash", "width": 2}
}
],
"annotations": [
# Annotation for vertical median line
{
"x": median_x,
"y": 1,
"xref": "x",
"yref": "paper",
"text": "Median rows",
"showarrow": False,
"xanchor": "left",
"yanchor": "bottom"
},
# Annotation for horizontal median line
{
"x": 0,
"y": median_y,
"xref": "paper",
"yref": "y",
"text": f"Median {y_col}: {median_y:.2f}",
"showarrow": False,
"xanchor": "left",
"yanchor": "bottom"
}
]
}
]
)
buttons.append(button)
# Add initial scatter plot
initial_y = y_columns[0]
initial_median_y = df[initial_y].median()
fig.add_trace(go.Scatter(
x=df["rows"],
y=df[initial_y],
mode='markers',
marker=dict(
color=df['outlier_prob'],
colorscale='viridis',
showscale=True,
colorbar=dict(title='Outlier Probability')
),
hoverinfo='text',
text=df.index,
showlegend=False
))
# Update layout with dropdown menu and initial median lines
fig.update_layout(
title=f"Scatter Plot: rows vs {initial_y}",
xaxis_title="rows",
yaxis_title=initial_y,
updatemenus=[{
"buttons": buttons,
"direction": "down",
"showactive": True,
"x": 0.17,
"y": 1.15,
"type": "dropdown"
}],
shapes=[
# Initial vertical median line
{
"type": "line",
"x0": median_x,
"x1": median_x,
"y0": 0,
"y1": 1,
"yref": "paper",
"line": {"color": "orange", "dash": "dash", "width": 2}
},
# Initial horizontal median line
{
"type": "line",
"x0": 0,
"x1": 1,
"xref": "paper",
"y0": initial_median_y,
"y1": initial_median_y,
"line": {"color": "orange", "dash": "dash", "width": 2}
}
],
annotations=[
# Initial annotation for vertical median line
{
"x": median_x,
"y": 1,
"xref": "x",
"yref": "paper",
"text": "Median rows",
"showarrow": False,
"xanchor": "left",
"yanchor": "bottom"
},
# Initial annotation for horizontal median line
{
"x": 0,
"y": initial_median_y,
"xref": "paper",
"yref": "y",
"text": f"Median {initial_y}: {initial_median_y:.2f}",
"showarrow": False,
"xanchor": "left",
"yanchor": "bottom"
}
]
)
# Show the plot
fig.show()
This should calculate the median values for each column and includes them in the button configuration. It also annotates the lines accordingly. I am attaching some images of the resulting output