Advanced features: group_by
This example demonstrates advanced table formatting features in rtflite,
focusing on the group_by functionality that provides enhanced readability
by suppressing duplicate values within groups.
Overview
The group_by feature is particularly useful for clinical trial listings
where multiple rows belong to the same subject or treatment group.
Instead of repeating identical values in every row, group_by displays
the value only once per group, leaving subsequent rows blank for better
visual organization.
Key benefits
- Improved readability: Reduces visual clutter by eliminating redundant information
- Clinical standards compliance: Follows pharmaceutical industry conventions for listing formats
- Hierarchical grouping: Supports multiple columns with nested group relationships
Imports
from importlib.resources import files
import polars as pl
import rtflite as rtf
Load and prepare adverse events data
Load the adverse events dataset and create a subset for demonstration:
# Load adverse events data from parquet file
data_path = files("rtflite.data").joinpath("adae.parquet")
df = pl.read_parquet(data_path)
# Take a subset of the data for this example (rows 200-260)
ae_subset = df.slice(200, 60)
Create additional columns for a more comprehensive listing format:
# Create formatted columns for the listing
ae_t1 = ae_subset.with_columns(
[
# Create subline header with study and site information
(
pl.lit("Trial Number: ")
+ pl.col("STUDYID")
+ pl.lit(", Site Number: ")
+ pl.col("SITEID").cast(pl.String)
).alias("SUBLINEBY"),
# Create subject line with demographic information
(
pl.lit("Subject ID = ")
+ pl.col("USUBJID")
+ pl.lit(", Gender = ")
+ pl.col("SEX")
+ pl.lit(", Race = ")
+ pl.col("RACE")
+ pl.lit(", AGE = ")
+ pl.col("AGE").cast(pl.String)
+ pl.lit(" Years")
+ pl.lit(", TRT = ")
+ pl.col("TRTA")
).alias("SUBJLINE"),
# Format adverse event term (title case)
pl.col("AEDECOD").str.to_titlecase().alias("AEDECD1"),
# Create duration string
(pl.col("ADURN").cast(pl.String) + pl.lit(" ") + pl.col("ADURU")).alias("DUR"),
]
).select(
[
"SUBLINEBY",
"TRTA",
"SUBJLINE",
"USUBJID",
"ASTDY",
"AEDECD1",
"DUR",
"AESEV",
"AESER",
"AEREL",
"AEACN",
"AEOUT",
]
)
# Sort by key variables to group related events together
ae_t1 = ae_t1.sort(["SUBLINEBY", "TRTA", "SUBJLINE", "USUBJID", "ASTDY"])
Demonstrate single column group_by
Start with a simple example using a single column for grouping:
# Create RTF document with single column group_by
doc_single = rtf.RTFDocument(
df=ae_t1.select(["USUBJID", "AEDECD1", "AESEV", "AESER"])
.head(15)
.sort(["USUBJID", "AEDECD1"]),
rtf_title=rtf.RTFTitle(
text=["Adverse Events Listing", "Example 1: Single Column group_by"],
text_convert=False,
),
rtf_column_header=rtf.RTFColumnHeader(
text=["Subject ID", "Adverse Event", "Severity", "Serious"],
text_format="b",
text_justification=["l", "l", "c", "c"],
),
rtf_body=rtf.RTFBody(
group_by=["USUBJID", "AEDECD1"], # Group by subject ID and adverse event
col_rel_width=[3, 4, 2, 2],
text_justification=["l", "l", "c", "c"],
),
rtf_footnote=rtf.RTFFootnote(
text="Note: Subject ID and Adverse Event values are shown only once per group for better readability",
text_convert=False,
),
)
# Generate the RTF file
doc_single.write_rtf("advanced-group-by-single.rtf")
Multi-page example with group context
Demonstrate how group_by works with pagination, including context restoration:
# Create larger dataset for multi-page demonstration
ae_large = ae_t1.head(100) # Use more rows to trigger pagination
doc_multipage = rtf.RTFDocument(
df=ae_large.select(["USUBJID", "ASTDY", "AEDECD1", "AESEV", "AESER"]).sort(
["USUBJID", "ASTDY"]
),
rtf_page=rtf.RTFPage(nrow=25), # Force pagination
rtf_title=rtf.RTFTitle(
text=["Adverse Events Listing", "Example 3: Multi-page with group_by"],
text_convert=False,
),
rtf_column_header=rtf.RTFColumnHeader(
text=["Subject ID", "Study Day", "Adverse Event", "Severity", "Serious"],
text_format="b",
text_justification=["l", "c", "l", "c", "c"],
),
rtf_body=rtf.RTFBody(
group_by=["USUBJID", "ASTDY"],
col_rel_width=[3, 1, 4, 2, 2],
text_justification=["l", "c", "l", "c", "c"],
),
rtf_footnote=rtf.RTFFootnote(
text=[
"Note: In multi-page listings, group context is automatically restored",
"at the beginning of each new page for better readability.",
],
text_convert=False,
),
)
# Generate the RTF file
doc_multipage.write_rtf("advanced-group-by-multipage.rtf")
Combining group_by with new_page (treatment separation)
Demonstrate the powerful combination of group_by and new_page for clinical trial reporting:
# Create treatment-separated document with group_by within each page
# Filter data to have multiple treatment groups
ae_with_treatments = (
ae_t1.filter(pl.col("TRTA").is_in(["Placebo", "Xanomeline High Dose"]))
.select(["TRTA", "USUBJID", "ASTDY", "AEDECD1", "AESEV"])
.head(40)
.sort(["TRTA", "USUBJID", "ASTDY"])
)
doc_treatment_separated = rtf.RTFDocument(
df=ae_with_treatments,
rtf_title=rtf.RTFTitle(
text=[
"Adverse Events Listing",
"Example 5: group_by + new_page (Treatment Separation)",
],
text_convert=False,
),
rtf_column_header=rtf.RTFColumnHeader(
text=["Subject ID", "Study Day", "Adverse Event", "Severity"],
col_rel_width=[3, 1, 4, 2],
text_format="b",
text_justification=["l", "c", "l", "c"],
),
rtf_body=rtf.RTFBody(
page_by=["TRTA"], # Separate pages by treatment
new_page=True, # Force new page for each treatment
pageby_row="first_row", # Display treatment as spanning row and remove column
group_by=[
"USUBJID",
"ASTDY",
], # Suppress duplicates within each treatment page
col_rel_width=[2, 3, 1, 4, 2],
border_top=["single", "", "", "", ""],
border_bottom=["single", "", "", "", ""],
text_justification=["l", "l", "c", "l", "c"],
pageby_header=True, # Repeat headers on each treatment page
),
rtf_footnote=rtf.RTFFootnote(
text=[
"Example of group_by + new_page combination:",
"- Each treatment group gets its own page(s) (new_page=True)",
"- Within each treatment, USUBJID and ASTDY are suppressed when duplicate (group_by)",
"- Headers are repeated on each treatment page (pageby_header=True)",
],
text_convert=False,
),
)
# Generate the RTF file
doc_treatment_separated.write_rtf("advanced-group-by-group-newpage.rtf")
Demonstrating subline_by with subheader generation
The subline_by feature creates visually distinct subheader rows that
group related data, making listings easier to read and follow:
# Create data with clear grouping structure for subline demonstration
ae_subline_data = (
ae_t1.filter(pl.col("TRTA").is_in(["Placebo", "Xanomeline High Dose"]))
.head(30)
.sort(["SUBLINEBY", "TRTA", "USUBJID"])
)
# Create RTF document with subline_by to generate subheaders
doc_subline = rtf.RTFDocument(
df=ae_subline_data.select(["SUBLINEBY", "USUBJID", "AEDECD1", "AESEV", "AESER"]),
rtf_title=rtf.RTFTitle(
text=[
"Adverse Events Listing",
"Example 6: subline_by with Subheader Generation",
],
text_convert=False,
),
rtf_column_header=rtf.RTFColumnHeader(
text=[
"Subject ID",
"Adverse Event",
"Severity",
"Serious",
], # Headers for remaining columns after SUBLINEBY removal
col_rel_width=[3, 2, 4, 2,],
text_format="b",
text_justification=["l", "l", "c", "c"],
),
rtf_body=rtf.RTFBody(
subline_by=["SUBLINEBY"], # Creates subheader rows from SUBLINEBY values
col_rel_width=[3, 2, 4, 2,],
text_justification=["l", "l", "l", "c", "c"],
),
rtf_footnote=rtf.RTFFootnote(
text=[
"Note: subline_by creates subheader rows that span all columns",
"- SUBLINEBY column values become bold subheader text",
"- Original SUBLINEBY column is removed from table data",
"- Subheaders provide clear visual grouping of related records",
],
text_convert=False,
),
)
# Generate the RTF file
doc_subline.write_rtf("advanced-group-by-subline.rtf")
Advanced combination - subline_by with group_by
Demonstrate the powerful combination of subline_by and group_by for
comprehensive clinical listings:
# Create data with multiple visits per subject for comprehensive demonstration
ae_comprehensive = (
ae_t1.head(40)
.with_columns(
[
# Add visit information to create multiple rows per subject
pl.when(pl.int_range(pl.len()) % 3 == 0)
.then(pl.lit("Visit 1"))
.when(pl.int_range(pl.len()) % 3 == 1)
.then(pl.lit("Visit 2"))
.otherwise(pl.lit("Visit 3"))
.alias("VISIT")
]
)
.sort(["SUBLINEBY", "USUBJID", "VISIT"])
)
doc_comprehensive = rtf.RTFDocument(
df=ae_comprehensive.select(["SUBLINEBY", "USUBJID", "VISIT", "AEDECD1", "AESEV"]),
rtf_title=rtf.RTFTitle(
text=[
"Adverse Events Listing",
"Example 7: subline_by + group_by Comprehensive",
],
text_convert=False,
),
rtf_column_header=rtf.RTFColumnHeader(
text=[
"Subject ID",
"Visit",
"Adverse Event",
"Severity",
], # Headers for remaining columns after SUBLINEBY removal
col_rel_width=[3, 2, 4, 2,],
text_format="b",
text_justification=["l", "c", "l", "c"],
),
rtf_body=rtf.RTFBody(
subline_by=["SUBLINEBY"], # Creates trial/site subheaders
group_by=["USUBJID"], # Suppresses duplicate subject IDs
col_rel_width=[3, 2, 4, 2,],
text_justification=["l", "l", "c", "l", "c"],
),
rtf_footnote=rtf.RTFFootnote(
text=[
"Advanced example combining subline_by and group_by:",
"- SUBLINEBY creates bold subheader rows for trial/site information",
"- group_by suppresses duplicate USUBJID values within each group",
"- Result: Clear visual hierarchy with minimal redundancy",
],
text_convert=False,
),
)
# Generate the RTF file
doc_comprehensive.write_rtf("advanced-group-by-comprehensive.rtf")
page_by with divider row filtering
A common scenario in clinical reporting is data that includes divider rows marked with "-----" to visually separate sections. The page_by feature automatically filters these divider rows to create clean output while preserving all associated data.
Example: Data with divider rows
# Create example data with divider rows
df = pl.DataFrame({
"section": ["-----", "Age", "Age"],
"item": ["Participant in Population", " <60", " >=60"],
"value": [55, 25, 30],
})
df
When using page_by on data containing "-----" divider rows, rtflite automatically:
doc_divider = rtf.RTFDocument(
df=df,
rtf_body=rtf.RTFBody(
page_by="section",
col_rel_width=[1, 1],
text_justification=["l", "l", "c"],
border_top = ["single", "", ""],
border_bottom = ["single", "", ""]
),
)
# Generate the RTF file
doc_divider.write_rtf("advanced-group-by-divider-filtering.rtf")