Source code for quick_pp.las_handler

import mmap
import os
import re

import lasio
import numpy as np
import pandas as pd
import welly

from quick_pp import logger



[docs]
def read_las_files(las_files, depth_uom=None):
    """Read and merge data and headers from multiple LAS files.

    This function iterates through a list of LAS file objects, reads each one,
    and concatenates their curve data and header information into respective
    pandas DataFrames. It prioritizes reading with `welly` and uses a
    memory-mapped fallback for robustness.

    Args:
        las_files (list): A list of file-like objects, each opened in binary mode.
        depth_uom (str, optional): The unit of measurement for the depth index,
                                   to be used by `welly`. Defaults to None.

    Returns:
        tuple[pd.DataFrame, pd.DataFrame]:
            - A DataFrame containing the merged curve data from all LAS files.
            - A DataFrame containing the merged header information from all LAS files.
    """
    merged_data = pd.DataFrame()
    header_data = pd.DataFrame()

    for f in las_files:
        try:
            df, well_header = read_las_file_welly(f, depth_uom)
        except Exception as e:
            logger.error(f"[read_las_files] Exception for {f.name} | {e} ")
            df, well_header, _ = read_las_file_mmap(f)
        merged_data = pd.concat([merged_data, df], ignore_index=True)
        header_data = pd.concat([header_data, well_header], ignore_index=True)

    merged_data.reset_index(inplace=True, drop=True)

    return merged_data, header_data




[docs]
def read_las_file_mmap(file_object, required_sets=["PEP"]):  # noqa
    """Read a single LAS file using a memory-mapped approach for multi-set data.

    This function parses LAS files, particularly those containing multiple data
    sets (e.g., from different logging runs), which standard parsers might
    mishandle. It employs memory-mapping for efficient file access and allows
    for the selective extraction of specified data sets.

    Args:
        file_object (file): A file-like object opened in binary mode to be read.
        required_sets (list, optional): A list of data set identifiers to extract.
                                        Only data sets matching these identifiers
                                        will be processed. Defaults to ['PEP'].

    Returns:
        tuple[pd.DataFrame, pd.DataFrame, welly.well.Well]: A tuple containing:
            - A DataFrame containing the curve data.
            - A DataFrame containing the header information.
            - A `welly.well.Well` object representing the well.
    """
    fileno = file_object.fileno()  # identifier for files
    parameter_line_numbers = []
    with mmap.mmap(fileno, length=0, access=mmap.ACCESS_READ) as mmap_obj:
        all_text = mmap_obj.read()
        set_count = len(re.findall(r"\b(SET)\s+", all_text.decode(), re.MULTILINE))
        if set_count > 1:
            well_count = 0
            dataset_count = 0
            line_number = 1
            parameter_count = 0
            mmap_obj.seek(0)
            pointer = 0
            while pointer < mmap_obj.size():
                text = mmap_obj.readline()
                pointer = mmap_obj.tell()  # show current position of pointer

                if b"~Well" in text:
                    well_count += 1
                if b"~Curve" in text:
                    dataset_count += 1
                if b"~P" in text or b"~Tops_Parameter" in text:
                    # Record parameter info in tuple:(parameter_count, parameter_set, pointer location, line number)
                    parameter_line_numbers.append(
                        (parameter_count, "", pointer, line_number)
                    )
                    parameter_count += 1
                if re.compile(r"^\b(SET)\s+").search(text.decode()):
                    parameter_set = re.split(
                        r"[\s+,.:]", text.decode().replace(" ", "")
                    )[1]
                    temp_list_from_tuple = list(
                        parameter_line_numbers[parameter_count - 1]
                    )
                    temp_list_from_tuple[1] = parameter_set
                    temp_list_from_tuple = tuple(temp_list_from_tuple)
                    parameter_line_numbers[parameter_count - 1] = temp_list_from_tuple
                line_number += 1

            # Record well header numbers in tuple: (0, '', pointer location, line number)
            well_header_line_numbers = [
                (0, "", 0, 1),
                (
                    0,
                    "",
                    parameter_line_numbers[0][2] - 1,
                    parameter_line_numbers[0][3] - 1,
                ),
            ]

            mmap_obj.seek(0)  # Reset the pointer location
            curves_df, header_df, welly_object = concat_datasets(
                file_object=mmap_obj.read(),
                header_line_numbers=well_header_line_numbers,
                parameter_line_numbers=parameter_line_numbers,
                required_sets=required_sets,
            )
        else:
            well_count = 1
            counter = 0
            pointer_list = []
            section_dict = {}
            mmap_obj.seek(0)
            pointer = 0
            while pointer < mmap_obj.size():
                text = mmap_obj.readline()
                pointer = mmap_obj.tell()
                if pointer not in pointer_list:
                    pointer_list.append(pointer)
                    counter += 1
                if b"~" in text:
                    section = (
                        text.decode().replace("~", "").rstrip().split(" ")[0].upper()
                    )
                    rename_set = {
                        "V": "VERSION",
                        "W": "WELL",
                        "P": "PARAMETER",
                        "C": "CURVE",
                        "O": "OTHER",
                        "A": "ASCII",
                    }
                    for initial, word in rename_set.items():
                        if section == initial:
                            section = section.replace(initial, word)
                    section_text = text
                    text = mmap_obj.readline()
                    pointer = mmap_obj.tell()
                    if pointer not in pointer_list:
                        pointer_list.append(pointer)
                        counter += 1
                    while b"~" not in text and len(text) > 0:
                        section_text = section_text + text
                        text = mmap_obj.readline()
                        pointer = mmap_obj.tell()
                        if pointer not in pointer_list:
                            pointer_list.append(pointer)
                            counter += 1
                    section_dict[section] = section_text
                    if len(text) > 0:
                        mmap_obj.seek(pointer_list[counter - 2])

            mmap_obj.seek(0)  # Reset the pointer location
            curves_df, header_df, welly_object = extract_dataset(section_dict)

    return curves_df, header_df, welly_object




[docs]
def read_las_file_welly(file_object, depth_uom=None):
    """Read a LAS file using the welly library.

    This function reads a LAS file into a `welly` object and then processes it
    to extract curve data and header information into pandas DataFrames.

    Args:
        file_object (file): A file-like object, whose `.name` attribute (file path)
                            is used by `welly`.
        depth_uom (str, optional): The unit of measurement for the depth index,
                                   passed to `welly` for index creation. Defaults to None.

    Returns:
        tuple[pd.DataFrame, pd.DataFrame]:
            - A DataFrame containing the processed curve data.
            - A DataFrame containing the header information.
    """
    welly_dataset = welly.las.from_las(file_object.name)
    welly_object = welly.well.Well.from_datasets(welly_dataset, index_units=depth_uom)
    df, well_header = pre_process(welly_object)
    return df, well_header




[docs]
def pre_process(welly_object):
    """Pre-process a `welly` object to extract and clean data.

    This function takes a `welly.well.Well` object and performs the following
    pre-processing steps:
    1. Converts the depth index into a 'DEPTH' column.
    2. Replaces the LAS-defined NULL value with `np.nan`.
    3. Inserts 'WELL_NAME' and 'UWI' columns at the beginning of the DataFrame.

    Args:
        welly_object (welly.well.Well): The welly object to process.

    Returns:
        tuple[pd.DataFrame, pd.DataFrame]:
            - A DataFrame containing the processed curve data.
            - A DataFrame containing the header information.
    """
    header_df = getattr(welly_object, "header", pd.DataFrame())

    # Guard against welly objects with no LAS data
    try:
        data_df = welly_object.las[0]
    except Exception as e:
        logger.error(f"[las_handler] pre_process: no las data in welly_object | {e}")
        return pd.DataFrame(), header_df

    data_df.index.rename("DEPTH", inplace=True)
    data_df = data_df.reset_index(drop=False)
    data_df["DEPTH"] = data_df["DEPTH"].round(4)

    # Safely determine NULL value from header, with sensible default
    try:
        null_rows = header_df[header_df.get("mnemonic", pd.Series()) == "NULL"]["value"]
        nullValue = (
            float(null_rows.values[0])
            if len(null_rows) and pd.notna(null_rows.values[0])
            else -999.25
        )
    except Exception:
        nullValue = -999.25

    data_df = data_df.where(data_df >= nullValue, np.nan)
    # Insert well name
    well_name = get_wellname_from_header(header_df)
    if "WELL_NAME" not in data_df.columns:
        data_df.insert(0, "WELL_NAME", well_name)
    # Insert UWI if available
    if "UWI" in header_df["mnemonic"].values:
        uwi = get_uwi_from_header(header_df)
        data_df.insert(0, "UWI", uwi)

    return data_df, header_df




[docs]
def get_wellname_from_header(header_df):
    """Extract the well name from the LAS header DataFrame.

    Args:
        header_df (pd.DataFrame): The LAS header data.

    Returns:
        str: The well name, with slashes and spaces replaced by hyphens.
    """
    try:
        descr = header_df.get("descr", pd.Series()).fillna("").str.upper()
        mask = (header_df.get("mnemonic", pd.Series()) == "WELL") | (descr == "WELL")
        if mask.any():
            val = header_df.loc[mask, "value"].values[0]
            if pd.isna(val) or str(val).strip() == "":
                return "UNKNOWN_WELL"
            return str(val).replace("/", "-").replace(" ", "-")
    except Exception as e:
        logger.error(f"[las_handler] get_wellname_from_header error | {e}")
    return "UNKNOWN_WELL"




[docs]
def get_uwi_from_header(header_df):
    """Extract the Unique Well Identifier (UWI) from the LAS header DataFrame.

    If the UWI is not found, it falls back to using the well name.

    Args:
        header_df (pd.DataFrame): The LAS header data.

    Returns:
        str: The UWI or well name, with slashes and spaces replaced by hyphens.
    """
    try:
        descr = header_df.get("descr", pd.Series()).fillna("").str.upper()
        mask = (header_df.get("mnemonic", pd.Series()) == "UWI") | (
            descr == "UNIQUE WELL ID"
        )
        if mask.any():
            val = header_df.loc[mask, "value"].values[0]
            if pd.isna(val) or str(val).strip() == "":
                return get_wellname_from_header(header_df)
            uwi = str(val).replace("/", "-").replace(" ", "-")
            return uwi
    except Exception as e:
        logger.error(f"[las_handler] get_uwi_from_header error | {e}")
    return get_wellname_from_header(header_df)




[docs]
def get_unit_from_header(header_df, mnemonic):
    """Extract the unit for a specific curve mnemonic from the LAS header.

    Args:
        header_df (pd.DataFrame): The LAS header data.
        mnemonic (str): The curve mnemonic to look for.

    Returns:
        str or None: The unit of the curve, or None if not found.
    """
    try:
        matches = header_df[
            header_df.get("mnemonic", pd.Series()).str.contains(mnemonic, na=False)
        ]
        return matches["unit"].values[0] if len(matches) else None
    except Exception:
        return None




[docs]
def get_descr_from_header(header_df, mnemonic):
    """Extract the description for a specific curve mnemonic from the LAS header.

    Args:
        header_df (pd.DataFrame): The LAS header data.
        mnemonic (str): The curve mnemonic to look for.

    Returns:
        str or None: The description of the curve, or None if not found.
    """
    try:
        matches = header_df[
            header_df.get("mnemonic", pd.Series()).str.contains(mnemonic, na=False)
        ]
        return matches["descr"].values[0] if len(matches) else None
    except Exception:
        return None




[docs]
def extract_dataset(section_dict):
    """Extract a single dataset from a dictionary of LAS file sections.

    This function is designed for LAS files with a single data set. It
    reconstructs the LAS file content from a dictionary of its sections,
    reads it using `lasio` and `welly`, and then applies pre-processing.

    Args:
        section_dict (dict): A dictionary where keys are LAS section names
                             (e.g., 'WELL', 'CURVE') and values are their content.

    Returns:
        tuple[pd.DataFrame, pd.DataFrame, welly.well.Well]: A tuple containing the processed curve data, header data, and the `welly` object.
    """
    header_bytes = section_dict["WELL"]
    data_bytes = b""
    for k, v in section_dict.items():
        if k in ["PARAMETER", "CURVE", "ASCII"]:
            data_bytes = data_bytes + v

    file_object = header_bytes.decode() + data_bytes.decode()
    las_object = lasio.read(file_object, read_policy=())

    # Fix las_object
    df = las_object.df()
    df = df.apply(pd.to_numeric, errors="coerce")
    las_object.set_data_from_df(df)

    welly_object = welly.Well.from_lasio(las_object)
    well_df = pre_process(welly_object)
    header_df = welly_object.header

    return well_df, header_df, welly_object




[docs]
def concat_datasets(
    file_object, header_line_numbers, parameter_line_numbers, required_sets=None
):
    """Extract and concatenate specified datasets from a multi-set LAS file.

    This function iterates through the parameter sections identified in a LAS file.
    For each section that matches the `required_sets`, it reconstructs a temporary
    single-set LAS file in memory, reads it, and concatenates the resulting data.

    Args:
        file_object (bytes): The complete content of the LAS file as a bytes object.
        header_line_numbers (list): A list of tuples defining the start and end pointers
                                    for the main well header section.
        parameter_line_numbers (list): A list of tuples, each containing metadata
                                       about a `~Parameter` section, including its
                                       set identifier and pointer location.
        required_sets (list, optional): A list of data set identifiers to extract and
                                        concatenate. Defaults to ['PEP'].

    Returns:
        tuple[pd.DataFrame, pd.DataFrame, welly.well.Well]: A tuple containing the concatenated curve data, the last header data, and the last `welly` object.
    """
    required_sets = required_sets or ["PEP"]
    well_df = pd.DataFrame()
    header_df = pd.DataFrame()
    welly_object = welly.Well()
    for i, (_, param_set, pointer, _) in enumerate(parameter_line_numbers):
        # Currently only extracting one dataset: PEP
        if param_set in required_sets:
            well_info = file_object[
                header_line_numbers[0][2] : header_line_numbers[1][2] + 1
            ].decode()
            if i < len(parameter_line_numbers) - 1:
                temp_file_object = file_object[
                    pointer : parameter_line_numbers[i + 1][2]
                ].decode()
            else:
                temp_file_object = file_object[pointer:].decode()
            temp_file_object = well_info + temp_file_object
            las_object = lasio.read(temp_file_object, read_policy=())

            # Fix las_object
            df = las_object.df()
            df = df.apply(pd.to_numeric, errors="coerce")
            las_object.set_data_from_df(df)

            welly_object = welly.Well.from_lasio(las_object)
            temp_well_df = pre_process(welly_object)
            well_df = pd.concat([well_df, temp_well_df], axis=1)
            header_df = welly_object.header

    return well_df, header_df, welly_object




[docs]
def check_index_consistent(welly_object):
    """Check if the depth index of a welly object is consistent.

    A consistent index means it is monotonically increasing with a constant step.

    Args:
        welly_object (welly.well.Well): The welly object to check.

    Returns:
        bool: True if the index is consistent, False otherwise.
    """
    try:
        index_diff = np.diff(welly_object.las[0].index)
        if all(index_diff == index_diff[0]) and all(index_diff > 0):
            return True
        else:
            return False
    except Exception as e:
        logger.error(f"[las_handler] `check_index_consistent` Error | {e}")
        return False




[docs]
def export_to_las(well_data, well_name, folder="", vars_units=None):
    """Export a DataFrame of well data to a LAS file.

    This function takes a pandas DataFrame containing well log data, sets the
    'DEPTH' column as the index, and writes the data to a LAS 2.0 file.

    Args:
        well_data (pd.DataFrame): The DataFrame containing the well log data.
                                  It must include a 'DEPTH' column.
        well_name (str): The name of the well, used for the output filename.
        folder (str, optional): The directory to save the LAS file in. Defaults to ''.
        vars_units (dict, optional): A dictionary mapping curve mnemonics to their units.
                                     If not provided, units are inferred from the configuration.
    """
    from .config import Config

    units = vars_units if vars_units else Config.vars_units(well_data)
    well_data.set_index("DEPTH", inplace=True, drop=True)
    w = welly.Well().from_df(well_data, units=units, name=well_name)

    w = w.from_df(well_data, units=units, name=well_name)
    # Convert to lasio to handle index name
    las = w.to_lasio()
    las.curves[0].mnemonic = "DEPTH"
    # Write to LAS format
    well_path = os.path.join(folder, f"{well_name}.las")
    las.write(well_path)