Files
multiverse/xcom-ultra/xcu-parquet/src/lib.rs
T

126 lines
5.0 KiB
Rust

#![deny(warnings)]
//! [TSM.ID].[11031972] -- Platform X Ecosystem
//! xcu-parquet -- Columnar Storage Engine (Parquet-like)
pub mod blackbox;
use std::collections::HashMap;
#[derive(Debug)]
pub enum ParquetError { ColumnNotFound(String), TypeMismatch(String), WriteError(String) }
impl std::fmt::Display for ParquetError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { Self::ColumnNotFound(e) => write!(f, "Column: {e}"), Self::TypeMismatch(e) => write!(f, "Type: {e}"), Self::WriteError(e) => write!(f, "Write: {e}") }
}
}
impl std::error::Error for ParquetError {}
#[derive(Debug, Clone)]
pub enum ColumnValue { Int64(i64), Float64(f64), Str(String), Bool(bool), Null }
#[derive(Debug, Clone)]
pub struct ColumnSchema { pub name: String, pub col_type: String, pub nullable: bool }
pub struct ColumnStore {
schema: Vec<ColumnSchema>,
columns: HashMap<String, Vec<ColumnValue>>,
row_count: usize,
}
impl ColumnStore {
pub fn new(schema: Vec<ColumnSchema>) -> Self {
let mut columns = HashMap::new();
for col in &schema { columns.insert(col.name.clone(), Vec::new()); }
Self { schema, columns, row_count: 0 }
}
/// Insert a row (HashMap of column_name → value)
pub fn insert_row(&mut self, row: HashMap<String, ColumnValue>) -> Result<(), ParquetError> {
for col in &self.schema {
let val = row.get(&col.name).cloned().unwrap_or(ColumnValue::Null);
if let ColumnValue::Null = val {
if !col.nullable { return Err(ParquetError::TypeMismatch(format!("{} is not nullable", col.name))); }
}
self.columns.get_mut(&col.name)
.ok_or_else(|| ParquetError::ColumnNotFound(col.name.clone()))?
.push(val);
}
self.row_count += 1;
Ok(())
}
/// Read a column (full scan)
pub fn read_column(&self, name: &str) -> Result<&[ColumnValue], ParquetError> {
self.columns.get(name).map(|v| v.as_slice())
.ok_or_else(|| ParquetError::ColumnNotFound(name.into()))
}
/// Filter rows where column matches predicate
pub fn filter<F>(&self, column: &str, predicate: F) -> Result<Vec<usize>, ParquetError>
where F: Fn(&ColumnValue) -> bool {
let col = self.columns.get(column).ok_or_else(|| ParquetError::ColumnNotFound(column.into()))?;
Ok(col.iter().enumerate().filter(|(_, v)| predicate(v)).map(|(i, _)| i).collect())
}
/// Aggregate: sum of numeric column
pub fn sum(&self, column: &str) -> Result<f64, ParquetError> {
let col = self.columns.get(column).ok_or_else(|| ParquetError::ColumnNotFound(column.into()))?;
let mut total = 0.0;
for v in col {
match v { ColumnValue::Int64(n) => total += *n as f64, ColumnValue::Float64(n) => total += n, _ => {} }
}
Ok(total)
}
/// Aggregate: count non-null
pub fn count(&self, column: &str) -> Result<usize, ParquetError> {
let col = self.columns.get(column).ok_or_else(|| ParquetError::ColumnNotFound(column.into()))?;
Ok(col.iter().filter(|v| !matches!(v, ColumnValue::Null)).count())
}
/// Compute min/max for numeric column
pub fn min_max(&self, column: &str) -> Result<(f64, f64), ParquetError> {
let col = self.columns.get(column).ok_or_else(|| ParquetError::ColumnNotFound(column.into()))?;
let mut min = f64::INFINITY;
let mut max = f64::NEG_INFINITY;
for v in col {
let val = match v { ColumnValue::Int64(n) => *n as f64, ColumnValue::Float64(n) => *n, _ => continue };
if val < min { min = val; }
if val > max { max = val; }
}
Ok((min, max))
}
pub fn row_count(&self) -> usize { self.row_count }
pub fn column_count(&self) -> usize { self.schema.len() }
}
#[cfg(test)]
mod tests {
use super::*;
fn make_store() -> ColumnStore {
let schema = vec![
ColumnSchema { name: "id".into(), col_type: "int64".into(), nullable: false },
ColumnSchema { name: "value".into(), col_type: "float64".into(), nullable: true },
ColumnSchema { name: "name".into(), col_type: "string".into(), nullable: true },
];
let mut store = ColumnStore::new(schema);
for i in 0..10 {
let mut row = HashMap::new();
row.insert("id".into(), ColumnValue::Int64(i));
row.insert("value".into(), ColumnValue::Float64(i as f64 * 1.5));
row.insert("name".into(), ColumnValue::Str(format!("item-{i}")));
store.insert_row(row).unwrap();
}
store
}
#[test]
fn test_sum() { let s = make_store(); assert_eq!(s.sum("id").unwrap(), 45.0); }
#[test]
fn test_filter() {
let s = make_store();
let rows = s.filter("value", |v| matches!(v, ColumnValue::Float64(f) if *f > 10.0)).unwrap();
assert!(!rows.is_empty());
}
#[test]
fn test_min_max() { let s = make_store(); let (min, max) = s.min_max("id").unwrap(); assert_eq!(min, 0.0); assert_eq!(max, 9.0); }
}