@@ -1051,18 +1051,39 @@ impl ColumnChunkMetaData {
10511051 self . geo_statistics . as_deref ( )
10521052 }
10531053
1054- /// Returns the page encoding stats,
1055- /// or `None` if no page encoding stats are available.
1054+ /// Returns the page encoding statistics, or `None` if no page encoding statistics
1055+ /// are available.
10561056 pub fn page_encoding_stats ( & self ) -> Option < & Vec < PageEncodingStats > > {
10571057 self . encoding_stats . as_ref ( )
10581058 }
10591059
1060- /// Returns the page encoding stats reduced to a bitmask.
1060+ /// Returns the page encoding statistics reduced to a bitmask, or `None` if statistics are
1061+ /// not available.
10611062 ///
1062- /// Decoding the full page encoding statistics can be costly, and is not always necessary.
1063- /// This field contains a mask of all encodings used for data pages. This can still support
1064- /// some uses of the full statistics, such as determining if all data pages are dictionary
1065- /// encoded.
1063+ /// The [`PageEncodingStats`] struct was added to the Parquet specification specifically to
1064+ /// enable fast determination of whether all pages in a column chunk are dictionary encoded
1065+ /// (see <https://github.com/apache/parquet-format/pull/16>).
1066+ /// Decoding the full page encoding statistics, however, can be very costly, and is not
1067+ /// necessary to support the aforementioned use case. As an alternative, this crate can
1068+ /// instead distill the list of `PageEncodingStats` down to a bitmask of just the encodings
1069+ /// used for data pages
1070+ /// (see [`ParquetMetaDataOptions::set_encoding_stats_as_mask`]).
1071+ /// To test for an all-dictionary-encoded chunk one could use this bitmask in the following way:
1072+ ///
1073+ /// ```rust
1074+ /// use parquet::basic::Encoding;
1075+ /// use parquet::file::metadata::ColumnChunkMetaData;
1076+ /// // test if all data pages in the column chunk are dictionary encoded
1077+ /// fn is_all_dictionary_encoded(col_meta: &ColumnChunkMetaData) -> bool {
1078+ /// // check that dictionary encoding was used
1079+ /// col_meta.dictionary_page_offset().is_some()
1080+ /// && col_meta.page_encoding_stats_mask().is_some_and(|mask| {
1081+ /// // mask should only have one bit set, either for PLAIN_DICTIONARY or
1082+ /// // RLE_DICTIONARY
1083+ /// mask.is_only(Encoding::PLAIN_DICTIONARY) || mask.is_only(Encoding::RLE_DICTIONARY)
1084+ /// })
1085+ /// }
1086+ /// ```
10661087 pub fn page_encoding_stats_mask ( & self ) -> Option < & EncodingMask > {
10671088 self . encoding_stats_mask . as_ref ( )
10681089 }
0 commit comments