Skip to main content

satif_sdk.utils

ENCODING_SAMPLE_SIZE

Bytes for encoding detection

DELIMITER_SAMPLE_SIZE

Bytes for delimiter detection

sanitize_sql_identifier

def sanitize_sql_identifier(name: str, prefix: str = "item") -> str

Clean up a string to be a safe SQL identifier. Replaces problematic characters with underscores, ensures it starts with a letter or underscore, and appends an underscore if it's a basic SQL keyword.

normalize_list_argument

def normalize_list_argument(arg_value: Optional[Union[T, List[Optional[T]]]],
arg_name_for_error: str,
expected_len: int) -> List[Optional[T]]

Normalizes an argument that can be a single item or a list into a list of a specific expected length.

If arg_value is a single item, it's repeated expected_len times. If arg_value is a list, its length must match expected_len. If arg_value is None, a list of Nones of expected_len is returned.

validate_skip_rows_config

def validate_skip_rows_config(
config: SkipRowsConfig,
file_name_for_error: Optional[str] = None) -> SkipRowsConfig

Validate types and values for skip_rows config.

validate_skip_columns_config

def validate_skip_columns_config(
config: SkipColumnsConfig,
file_name_for_error: Optional[str] = None) -> SkipColumnsConfig

Validate types and values for skip_columns config.

parse_skip_rows_config

def parse_skip_rows_config(
skip_rows_config: SkipRowsConfig) -> Union[int, Set[int]]

Parse validated skip_rows config into int (for initial skip) or Set[int] (for indexed skip).

parse_skip_columns_config

def parse_skip_columns_config(
skip_columns_config: SkipColumnsConfig) -> Tuple[Set[int], Set[str]]

Parse validated skip_columns config into separate sets for indices and names.

detect_file_encoding

def detect_file_encoding(file_path: Path,
sample_size: int = ENCODING_SAMPLE_SIZE) -> str

Detect file encoding using charset-normalizer.

detect_csv_delimiter

def detect_csv_delimiter(sample_text: str) -> str

Detect CSV delimiter using clevercsv.Sniffer.