Skip to content

Commit 53db0b8

Browse files
author
miranov25
committed
feat(DataFrameUtils): Enhance docstrings and error handling for scatter plots
- Added NumPy-style docstrings to df_draw_scatter and drawExample
1 parent 161f0f0 commit 53db0b8

File tree

1 file changed

+106
-42
lines changed

1 file changed

+106
-42
lines changed

UTILS/dfextensions/DataFrameUtils.py

Lines changed: 106 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,47 @@ def df_draw_scatter(
2020
jitter=False,
2121
show=True # if False, don't plt.show(); always return (fig, ax)
2222
):
23+
"""
24+
Create a scatter plot from a DataFrame with optional color, marker size, and jitter.
25+
26+
Parameters
27+
----------
28+
df : pandas.DataFrame
29+
Input DataFrame containing the data.
30+
expr : str
31+
Expression in 'y:x' format specifying y-axis and x-axis columns (e.g., 'sigma:pTmin').
32+
selection : str, bool array, or callable, optional
33+
Filter to apply. Can be a pandas query string (engine='python'), a boolean mask,
34+
or a callable returning a mask (default: None, uses full df).
35+
color : str, optional
36+
Column name for color mapping (continuous or categorical, default: None).
37+
marker : str, optional
38+
Column name for marker size mapping (numeric, default: None).
39+
cmap : str, optional
40+
Colormap name (e.g., 'tab10', default: 'tab10').
41+
jitter : bool, optional
42+
Add small random jitter to x and y coordinates (default: False).
43+
show : bool, optional
44+
Display the plot if True (default: True); always returns (fig, ax).
45+
46+
Returns
47+
-------
48+
tuple
49+
(fig, ax) : matplotlib Figure and Axes objects for further customization.
50+
51+
Raises
52+
------
53+
ValueError
54+
If expr is not in 'y:x' format or selection query fails.
55+
TypeError
56+
If selection is neither str, bool array, nor callable.
57+
58+
Notes
59+
-----
60+
- Filters NA values from x and y before plotting.
61+
- Jitter helps visualize quantized data (x: ±0.1, y: ±2e-4).
62+
- Colorbar is added for continuous color; categorical colors use the first color for NA.
63+
"""
2364
# --- parse "y:x"
2465
try:
2566
y_col, x_col = expr.split(":")
@@ -144,46 +185,69 @@ def df_draw_scatter_categorical(
144185
show: bool = False,
145186
):
146187
"""
147-
Scatter plot with categorical COLOR and MARKER SHAPE; flexible size control. Returns (fig, ax).
148-
149-
Parameters
150-
----------
151-
expr : str
152-
ROOT-like "y:x" expression, e.g. "sigma:pTmin".
153-
selection : str, optional
154-
pandas query string evaluated with engine="python".
155-
Example: "productionId.str.contains(r'(?:LHC25b8a|LHC24)', regex=True, na=False)".
156-
color : str, optional
157-
Categorical column used for colors (legend #1).
158-
marker_style : str, optional
159-
Categorical column used for marker shapes (legend #2).
160-
marker_size : None | "" | number | str, optional
161-
- None or "" → constant default size (150 pt^2).
162-
- number → fixed size (pt^2) for all points.
163-
- str (column):
164-
* numeric → min–max normalize to [100, 400] pt^2
165-
* non-numeric → map categories to sizes (150, 220, 290, …)
166-
jitter : bool, default False
167-
Add small uniform jitter to x and y.
168-
top_k_color, other_label_color, order_color :
169-
control color categories (reduce tail to 'Other', set order).
170-
top_k_marker, other_label_marker, order_marker :
171-
control marker-shape categories.
172-
palette : list, optional
173-
Colors to cycle through; defaults to repeating 'tab20'.
174-
markers : list, optional
175-
Marker shapes; defaults to ["o","s","^","D","P","X","v","<",">","h","H","*","p"].
176-
legend_outside : bool, default True
177-
Reserve right margin and place legends outside so they aren’t clipped.
178-
legend_cols_color, legend_cols_marker : int
179-
Number of columns for each legend block.
180-
show : bool, default True
181-
If True, plt.show() is called. Function always returns (fig, ax).
182-
183-
Raises
184-
------
185-
ValueError / TypeError on malformed expr or failed selection.
186-
"""
188+
Create a scatter plot with categorical colors and marker shapes from a DataFrame.
189+
190+
Parameters
191+
----------
192+
df : pandas.DataFrame
193+
Input DataFrame containing the data.
194+
expr : str
195+
Expression in 'y:x' format specifying y-axis and x-axis columns (e.g., 'sigma:pTmin').
196+
selection : str, optional
197+
Pandas query string (engine='python') to filter data (e.g., "productionId.str.contains(...)").
198+
color : str, optional
199+
Column name for categorical color mapping (legend #1, default: None).
200+
marker_style : str, optional
201+
Column name for categorical marker shape mapping (legend #2, default: None).
202+
marker_size : None | "" | number | str, optional
203+
- None or "" : Constant size (150 pt²).
204+
- number : Fixed size (pt²) for all points.
205+
- str : Column name; numeric values normalized to [100, 400] pt², categorical cycled (150, 220, ...).
206+
jitter : bool, default False
207+
Add small uniform jitter to x and y coordinates.
208+
top_k_color : int, optional
209+
Keep top-K color categories, others mapped to `other_label_color` (default: None).
210+
other_label_color : str, default "Other"
211+
Label for non-top-K color categories.
212+
order_color : list, optional
213+
Explicit order for color legend categories (default: by frequency).
214+
top_k_marker : int, optional
215+
Keep top-K marker categories, others mapped to `other_label_marker` (default: None).
216+
other_label_marker : str, default "Other"
217+
Label for non-top-K marker categories.
218+
order_marker : list, optional
219+
Explicit order for marker legend categories (default: by frequency).
220+
palette : list, optional
221+
List of color specs to cycle (default: repeats 'tab20').
222+
markers : list, optional
223+
List of marker styles (default: ["o", "s", "^", ...]).
224+
legend_outside : bool, default True
225+
Place legends outside plot, reserving right margin.
226+
legend_cols_color : int, default 1
227+
Number of columns in color legend.
228+
legend_cols_marker : int, default 1
229+
Number of columns in marker legend.
230+
show : bool, default True
231+
Display the plot if True (default: True); always returns (fig, ax).
232+
233+
Returns
234+
-------
235+
tuple
236+
(fig, ax) : matplotlib Figure and Axes objects.
237+
238+
Raises
239+
------
240+
ValueError
241+
If expr is not 'y:x' format or selection query fails.
242+
TypeError
243+
If selection is not a string or marker_size is invalid.
244+
245+
Notes
246+
-----
247+
- Designed for ALICE data visualization (e.g., D0 resolution plots).
248+
- Filters NA values and handles categorical data robustly.
249+
- Legends are added outside to avoid clipping; adjust `bbox_to_anchor` if needed.
250+
"""
187251
# --- parse "y:x"
188252
try:
189253
y_col, x_col = expr.split(":")
@@ -385,15 +449,15 @@ def drawExample():
385449
marker_size=100, # pt²
386450
)
387451
fig.savefig("out.png", dpi=200, bbox_inches="tight")
388-
452+
##
389453
fig, ax = df_draw_scatter_categorical(
390454
df, "sigma:pTmin",
391455
selection="productionId.str.contains(r'(?:LHC24|LHC25a5)', regex=True, na=False)",
392456
color="productionId",
393457
marker_style="centmin",
394458
marker_size=100, # pt²
395459
)
396-
fig.savefig("resol_LHC24_LHC25a5.png", dpi=200, bbox_inches="tight")
460+
fig.savefig("resol_LHC24_LHC25a5.png", dpi=200)
397461

398462
fig, ax = df_draw_scatter_categorical(
399463
df, "sigma:pTmin",

0 commit comments

Comments
 (0)