#![deny(warnings)] //! [TSM.ID].[11031972] -- Platform X Ecosystem //! xcu-parquet -- Columnar Storage Engine (Parquet-like) pub mod blackbox; use std::collections::HashMap; #[derive(Debug)] pub enum ParquetError { ColumnNotFound(String), TypeMismatch(String), WriteError(String) } impl std::fmt::Display for ParquetError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::ColumnNotFound(e) => write!(f, "Column: {e}"), Self::TypeMismatch(e) => write!(f, "Type: {e}"), Self::WriteError(e) => write!(f, "Write: {e}") } } } impl std::error::Error for ParquetError {} #[derive(Debug, Clone)] pub enum ColumnValue { Int64(i64), Float64(f64), Str(String), Bool(bool), Null } #[derive(Debug, Clone)] pub struct ColumnSchema { pub name: String, pub col_type: String, pub nullable: bool } pub struct ColumnStore { schema: Vec, columns: HashMap>, row_count: usize, } impl ColumnStore { pub fn new(schema: Vec) -> Self { let mut columns = HashMap::new(); for col in &schema { columns.insert(col.name.clone(), Vec::new()); } Self { schema, columns, row_count: 0 } } /// Insert a row (HashMap of column_name → value) pub fn insert_row(&mut self, row: HashMap) -> Result<(), ParquetError> { for col in &self.schema { let val = row.get(&col.name).cloned().unwrap_or(ColumnValue::Null); if let ColumnValue::Null = val { if !col.nullable { return Err(ParquetError::TypeMismatch(format!("{} is not nullable", col.name))); } } self.columns.get_mut(&col.name) .ok_or_else(|| ParquetError::ColumnNotFound(col.name.clone()))? .push(val); } self.row_count += 1; Ok(()) } /// Read a column (full scan) pub fn read_column(&self, name: &str) -> Result<&[ColumnValue], ParquetError> { self.columns.get(name).map(|v| v.as_slice()) .ok_or_else(|| ParquetError::ColumnNotFound(name.into())) } /// Filter rows where column matches predicate pub fn filter(&self, column: &str, predicate: F) -> Result, ParquetError> where F: Fn(&ColumnValue) -> bool { let col = self.columns.get(column).ok_or_else(|| ParquetError::ColumnNotFound(column.into()))?; Ok(col.iter().enumerate().filter(|(_, v)| predicate(v)).map(|(i, _)| i).collect()) } /// Aggregate: sum of numeric column pub fn sum(&self, column: &str) -> Result { let col = self.columns.get(column).ok_or_else(|| ParquetError::ColumnNotFound(column.into()))?; let mut total = 0.0; for v in col { match v { ColumnValue::Int64(n) => total += *n as f64, ColumnValue::Float64(n) => total += n, _ => {} } } Ok(total) } /// Aggregate: count non-null pub fn count(&self, column: &str) -> Result { let col = self.columns.get(column).ok_or_else(|| ParquetError::ColumnNotFound(column.into()))?; Ok(col.iter().filter(|v| !matches!(v, ColumnValue::Null)).count()) } /// Compute min/max for numeric column pub fn min_max(&self, column: &str) -> Result<(f64, f64), ParquetError> { let col = self.columns.get(column).ok_or_else(|| ParquetError::ColumnNotFound(column.into()))?; let mut min = f64::INFINITY; let mut max = f64::NEG_INFINITY; for v in col { let val = match v { ColumnValue::Int64(n) => *n as f64, ColumnValue::Float64(n) => *n, _ => continue }; if val < min { min = val; } if val > max { max = val; } } Ok((min, max)) } pub fn row_count(&self) -> usize { self.row_count } pub fn column_count(&self) -> usize { self.schema.len() } } #[cfg(test)] mod tests { use super::*; fn make_store() -> ColumnStore { let schema = vec![ ColumnSchema { name: "id".into(), col_type: "int64".into(), nullable: false }, ColumnSchema { name: "value".into(), col_type: "float64".into(), nullable: true }, ColumnSchema { name: "name".into(), col_type: "string".into(), nullable: true }, ]; let mut store = ColumnStore::new(schema); for i in 0..10 { let mut row = HashMap::new(); row.insert("id".into(), ColumnValue::Int64(i)); row.insert("value".into(), ColumnValue::Float64(i as f64 * 1.5)); row.insert("name".into(), ColumnValue::Str(format!("item-{i}"))); store.insert_row(row).unwrap(); } store } #[test] fn test_sum() { let s = make_store(); assert_eq!(s.sum("id").unwrap(), 45.0); } #[test] fn test_filter() { let s = make_store(); let rows = s.filter("value", |v| matches!(v, ColumnValue::Float64(f) if *f > 10.0)).unwrap(); assert!(!rows.is_empty()); } #[test] fn test_min_max() { let s = make_store(); let (min, max) = s.min_max("id").unwrap(); assert_eq!(min, 0.0); assert_eq!(max, 9.0); } }