#!/usr/bin/python
# coding: utf-8

"""Contains various functions to read particular datasets
"""

# $Id: io.py 7318 2012-02-17 17:44:08Z gerrit $

import ConfigParser
import os.path
import gzip

import numpy
from . import artsXML
from . import constants
from . import general

cheval_dtype = numpy.dtype(zip(["p", "T", "z", "VMR_H2O", "VMR_O3",
                                "CLW", "CIW", "Rain", "Snow"],
                                [numpy.float64]*9))

nicam_dtype = numpy.dtype(zip("p T z CLW CIW Rain Snow VMR_H2O".split(),
                              [numpy.float64]*9))

def get_chevalier_file(tp, var):
    """Returns path to Chevalier data
    
    Requires environment variable ARTS_XMLDATA_PATH to be set.

    Parameters
    ~~~~~~~~~~

    type : string-like
        What type of Chevalier-data to read. Valid values: 'clear',
        'cloud', 'all'.
    var : string-like
        For what the Chevalier-data is maximised. Valid values: 'ccol',
        'oz', 'q', 'rcol', 't'.

    Returns
    ~~~~~~~

    Returns a string with the path to the Chevalier-data.
    """
    if constants.cheval_dir is None:
        raise general.PyARTSError(\
            "Cannot find Chevalier data; is environment ARTSXML_DATA_PATH " \
            "set?")
    return os.path.join(constants.cheval_dir, "chevallierl91_%s_%s.xml.gz" % (tp, var))

def get_nicam_file():
    """Returns path to NICAM ArtsXML file
    """

    return general.get_config("nicam")

def get_chevalier_orig_file(var):
    """Returns path to original Chevalier data
    """

    return os.path.join(general.get_config("cheval_orig"),
                        "nwp_saf_%s_sampled.atm" % var.lower())

def get_garand_file():
    """Returns path to Garand data

    Requires environment variable ARTS_XMLDATA_PATH to be set.
    """

    return os.path.join(constants.garand_dir, "garand_profiles.xml.gz")

def get_scattering_file(tp, shape):
    """Returns path to SSD and SMD.

    Needs ~/.PyARTSrc section [path] with 'tp' set.

    Does not verify existence.

    Parameters
    ~~~~~~~~~~

    tp : string-like
        What type to read, e.g. hong, yang
    shape : string-like
        What shape to read, e.g. rosette, bullet

    Returns
    ~~~~~~~

    2-tuple with (single scattering data, scattering meta data)
    """

    p = general.get_config(tp)
    return tuple(os.path.join(p, s + shape + ".xml.gz") for s in ("ssd_", "smd_"))

def read_chevalier(tp):
    """Read Chevallier data, both "clear-sky" and cloudy fields.

    Requires environment variable ARTS_XMLDATA_PATH to be set.

    Parameters
    ~~~~~~~~~~

    var : string-like
        For what the Chevalier-data is maximised. Valid values: 'ccol',
        'oz', 'q', 'rcol', 't'.


    Returns
    ~~~~~~~

    nd-array, 5000 x 92, containing "clear-sky" and cloudy fields.
    Record names as for the Chevalier README:

    """

    M = []
    for s in ("clear", "cloud"):
        p = get_chevalier_file(s, tp)
        contents = artsXML.load(p)["Array"]
        n = len(contents)
        nl, nf = contents[0]["Matrix"].shape
        # cannot change list comprehension to generator comprehension,
        # concatenate needs to know size in advance
        M.append(numpy.concatenate([c["Matrix"] for c in contents], 0).reshape(-1, nl, nf))
    # copy data to ensure row-order, then view innermost dimension as
    # records
    return numpy.concatenate(M, 2).copy(order="C").view(dtype=cheval_dtype).squeeze()
read_chevalier.__doc__ += str(cheval_dtype.names)

def read_nicam():
    f = get_nicam_file()
    M = artsXML.load(f)["Array"]
    return [elem["Matrix"].copy(order="C").view(dtype=nicam_dtype).squeeze() for elem in M]

## yang_phase = "/storage4/home/mendrok/data/cloud/yang/P11/${shape}_P11_infr.dat"
## yang_ang = "/storage4/home/mendrok/data/cloud/yang/P11/angle.dat"
## yang_other = "/storage4/home/mendrok/data/cloud/yang/P11/optical/${shape}_total.dat"

def get_SSD_Yang_dtype(angshape):
    """Returns SingleScatteringData Yang dtype.
    
    Needs shape of angles (e.g. (498,)).
    """
    return numpy.dtype([
        # SingleScatteringData props
        ("P11", numpy.float64, angshape),
        ("K", numpy.float64),
        ("A", numpy.float64),
        # ScatteringMetaData props
        ("density", numpy.float64),
        ("d_max", numpy.float64),
        ("V", numpy.float64),
        ("A_projected", numpy.float64),
        ("aspect_ratio", numpy.float64)])


def readYang2005(f):
    """Read Yang-2005 type formatted file and get SingleScatteringData ndarray.

    Parameters
    ~~~~~~~~~~

    f : string-like
        Path to Yang-data to be read

    Returns
    ~~~~~~~

    Returns (size, wavelength, angles), SSD, e.g. a two-element tuple, the
    first element having in turn three elements.

    size : 1D-array (sizes,)
        Sizes for the particles [m]

    wavelengths : 1D-array (wavelengths,)
        Wavelengths for the particles [m]

    angles : 1D-array (angles,)
        Zenith angles for the scattering [degrees]

    SSD : 2D-array (wavelengths, sizes)
        An array with shape (wavelengths, sizes), containing the fields:

        P11 : first element of the phase matrix [m^2] (angles,)

        A_projected : projected area [m]
        
        K : extinction cross section [m^2]

        A : absorption cross section [m^2]

        density : particle density [kg/m^3]

        d_max : maximum diameter [m]

        V : volume [m^3]

        aspect_ratio : particle aspect ratio [no unit]

    See also
    ~~~~~~~~

    /storage3/data/scattering_databases/yang/Yang_2005_IR/README
    """
    n_wavelength, n_size = 49, 45
    newshape = n_wavelength, n_size
    M = readYang2005_raw(f)
    # turn flattened array into (n_wavelength, n_size) matrix
    M.shape = newshape
    # angles are in the first line of the datafile, from column 9 onwards
    angles = numpy.array([float(x) for x in open(f, "r").readline().split()[9:]])

    SSD = numpy.empty(shape=newshape, dtype=get_SSD_Yang_dtype(angles.shape))
    SSD["A_projected"] = M["area"] * 1e-12 # µm^2 -> m^2
    # compensate for delta transmission effect as per Yang2005-README
    Qe_adj = M["extioneff"]*(1-M["Fdelta"]*M["albedo"])
    albedo_adj = M["albedo"]*(1-M["Fdelta"])/(1-M["Fdelta"]*M["albedo"])
    # normalise by area
    SSD["K"] = Qe_adj * SSD["A_projected"] # extinction [m^2]
    # ω₀ = β_sca/β_ext = 1 - β_abs/β_ext --> β_abs = β_ext * (1-ω₀)
    SSD["A"] = SSD["K"] * (1 - albedo_adj) # absorption [m^2]
    # Yang2005 normalises to 4π; ARTS normalises to scattering cross section
    factor = (SSD["K"] - SSD["A"])/(4*numpy.pi)
    # add new-axis to enable correct broadcasting
    SSD["P11"] = M["scattering_phase_function"] * factor[..., numpy.newaxis]
    SSD["density"] = 920 # kg/m^3
    SSD["d_max"] = M["size"] * 1e-6 # um -> m
    SSD["V"] = M["volume"] * 1e-18 # um^3 -> m^3
    SSD["aspect_ratio"] = -1 # undefined but ARTS doesn't like nan as flag

    size = M["size"][0, :] * 1e-6 # um -> m
    wavelength = M["wl"][:, 0] * 1e-6 # um -> m

    return (size, wavelength, angles), SSD

yang2005_dtype = numpy.dtype([(s, numpy.float64) for s in \
    ["wl", "size", "volume", "area", "extioneff", "albedo", "GG",
    "Fdelta"]] + \
    [("scattering_phase_function", numpy.float64, (498,))])

def readYang2005_raw(f):
    """Read Yang-2005 type formatted file, no processing.

    Mandatory input:
        
        f       string-like     path to file to read
    """

    return numpy.loadtxt(f, dtype=yang2005_dtype, skiprows=1)

def readYang_old(phase, opt, ang):
    """Read old Yang-type formatted file and get SingleScatteringData ndarray.

    Input file format is defined in:

    /storage4/home/mendrok/data/cloud/yang/P11/README

    Mandatory input:

        phase   string-like     path to file with phase matrix P11 values (LW)
        opt     string-like     path to file with other parameters (total)
        ang     string-like     path to file containing corresponding angles

    OUT:
        tuple           (sizes, wavelengths, angles)
        ndarray         size x wavelength; contains all you want to know
    """

    n_wavelength, n_size = 49, 45
    newshape = n_wavelength, n_size
    (M_phase, M_other) = readYang_old_raw(phase, opt)
    angles = numpy.loadtxt(ang)
    # the first column has wavelength, the second column has size
    # however, the original data are flattened, 2205 rows correspond to
    # 49*45 wavelengths * sizes
    M_phase = M_phase.reshape(newshape)

    # first 720 elements are optical, rest is IR
    M_other = M_other[720:].reshape(newshape)

    SSD = numpy.empty(shape=newshape, dtype=get_SSD_Yang_dtype(angles.shape))
    SSD["A_projected"] = M_other["A_proj"] * 1e-12 # um^2 -> m^2
    SSD["K"] = M_other["Qe"] * SSD["A_projected"]
    SSD["A"] = M_other["Qa"] * SSD["A_projected"]
    # Rescale from 4*pi to scattering phase function
    factor = (SSD["K"] - SSD["A"])/(4*numpy.pi)
    # add new-axis to enable correct broadcasting
    SSD["P11"] = M_phase["P11"] * factor[..., numpy.newaxis]
    SSD["density"] = 920 # kg/m^3
    SSD["d_max"] = M_phase["size"] * 1e-6 # um -> m
    SSD["V"] = M_other["volume"] * 1e-18 # um^3 -> m^3
    SSD["aspect_ratio"] = -1 # undefined

    size = M_phase["size"][0, :] * 1e-6 # um -> m
    wavelength = M_phase["wl"][:, 0] * 1e-6 # um -> m

    return (size, wavelength, angles), SSD

yang_old_phase_dtype = numpy.dtype([("wl", numpy.float64), ("size", numpy.float64), ("P11", numpy.float64, (498,))])
yang_old_other_dtype = numpy.dtype([(s, numpy.float64) for s in ["wl", "size", "volume", "A_proj", "Qe", "Qa", "ssa", "asym"]])
def readYang_old_raw(phase, opt):
    """Read old raw Yang-type formatted file. Don't process.

    Mandatory input:

        phase   string-like     path to file with phase matrix P11 values
        opt     string-like     path to file with other parameters

    OUT:
        
        ndarray for the phase
        ndarray for the other
            dtypes defined in yang_old_phase_dtype, yang_old_other_dtype
    """

    M_phase = numpy.loadtxt(phase, dtype=yang_old_phase_dtype)
    M_other = numpy.loadtxt(opt, dtype=yang_old_other_dtype)
    return (M_phase, M_other)

def readHong(p):
    """Read Hong-type formatted file and get SingleScatteringData ndarray.

    Parameters
    ~~~~~~~~~~

    p : string-like
        Path to Yang-data to be read

    Returns
    ~~~~~~~

    Returns (size, wavelength, angles), SSD, e.g. a two-element tuple, the
    first element having in turn three elements.

    size : 1D-array (sizes,)
        Sizes for the particles [m]

    wavelengths : 1D-array (wavelengths,)
        Wavelengths for the particles [m]

    angles : 1D-array (angles,)
        Zenith angles for the scattering [degrees]

    SSD : 2D-array (wavelengths, sizes)
        An array with shape (wavelengths, sizes), containing the fields:

        Z : 8 elements of the phase matrix [m^2] (8, angles)
            P11, P12, P21, P22, P33, P34, P43, P44

        K : extinction cross section [m^2]

        A : absorption cross section [m^2]

        A_projected : projected area [m]
        
        density : particle density [kg/m^3]

        d_max : maximum diameter [m]

        V : volume [m^3]

        aspect_ratio : particle aspect ratio [no unit]
    """

    n_wavelength, n_size = 21, 38
    newshape = n_wavelength, n_size
    angles, M = readHong_raw(p)
    M = M.reshape((n_wavelength, n_size))
    SSD_dtype = numpy.dtype([
        # SingleScatteringData props
        ("Z", numpy.float64, (8, angles.size)),
        ("K", numpy.float64),
        ("A", numpy.float64),
        # ScatteringMetaData props
        ("density", numpy.float64),
        ("d_max", numpy.float64),
        ("V", numpy.float64),
        ("A_projected", numpy.float64),
        ("aspect_ratio", numpy.float64)])

    # copy and convert data
    SSD = numpy.empty(shape=newshape, dtype=SSD_dtype)
    SSD["A_projected"] = M["projected_area"] * 1e-12 # um^2 -> m^2
    SSD["K"] = M["extinction_efficiency"] * SSD["A_projected"]
    SSD["A"] = M["absorption_efficiency"] * SSD["A_projected"]

    factor = (SSD["K"] - SSD["A"])/(4*numpy.pi)
    factor = factor[..., numpy.newaxis, numpy.newaxis]
    SSD["Z"] = M["scattering_phase_function"] * factor

    SSD["density"] = 920 # kg/m^3
    SSD["d_max"] = M["max_d"] * 1e-6 # um -> m
    SSD["V"] = M["volume"] * 1e-18 # um^3 -> m^3
    SSD["aspect_ratio"] = -1 # undefined

    sizes = SSD["d_max"][0, :]
    wavelengths = M["wavelength"][:, 0] * 1e-6 # um -> m
    return (sizes, wavelengths, angles), SSD

# To verify, something like:
# abs(scipy.integrate.trapz(2*pi*sin(deg2rad(angles))*SSD["P11"][..., :], deg2rad(angles))/(SSD["K"] - SSD["A"])-1).max()

hong_dtype = numpy.dtype([(s, numpy.float64) for s in \
    ["freq", "wavelength", "max_d", "spherical_radius", "volume",
    "projected_area", "extinction_efficiency", "absorption_efficiency",
    "albedo", "asymmetry_factor"]] + \
    [("scattering_phase_function", numpy.float64, (8, 181))])


def readHong_raw(p):
    """Read raw Hong-type formatted file. Don't process.

    Mandatory input:
        
        p       string-like     path to file with Hong data

    OUT:

        angle, array with angles
        ndarray, complicated dtype
    """

    # obnoxious with some lines 10, some lines 181; read as flat, then
    # reshape
    S = file(p).readlines()
    flat = numpy.fromiter(" ".join([c.strip() for c in S[3:]]).split(), dtype=numpy.float64)
    perfreq = flat.reshape(-1, 10+8*181)
    # read angles as well
    angles = numpy.fromiter(S[2].split(), dtype=numpy.float64)
    return angles, perfreq.copy(order="C").view(dtype=hong_dtype).squeeze()

def get_species_from_arrayofmatrixfile(fn):
    """Reads a Cheval/Garand ArrayOfMatrix to find species contained.

    File may be gzipped.
    """

    if fn.endswith("gz"):
        fp = gzip.GzipFile(fn, "r")
    else:
        fp = file(fn, "r")
    try:
        for i in xrange(10):
            line = fp.readline()
            if "H2O" in line:
                spec = line[line.find("H2O"):]
                break
        else:
            raise general.PyARTSError("Cannot identify species from %s" % fn)
    finally:
        fp.close()
    return [x[:x.find("_")] for x in spec.strip().split()]
#
#     temp(:),     &! 1) Temperature [K]                          (1-91)
#     hum(:),      &! 2) Humidity [kg/kg]                         (92-182)
#     ozo(:),      &! 3) Ozone [kg/kg]                            (183-273)
#     cc(:),       &! 4) Cloud Cover [0-1]                        (274-364)
#     clw(:),      &! 5) C Liquid W [kg/kg]                       (365-455)
#     ciw(:),      &! 6) C Ice W [kg/kg]                          (456-546)
#     rain(:),     &! 7) Rain [kg/(m2 *s)]                        (547-637)
#     snow(:),     &! 8) Snow [kg/(m2 *s)]                        (638-728)
#     w(:),        &! 9) Vertical Velocity [Pa/s]                 (729-819)
#     lnpsurf,     &!10) Ln of Surf Pressure in [Pa]              (820)
#     z0,          &!11) Surface geopotential [m2/s2]             (821) 
#     tsurf,       &!12) Surface Skin Temperature [K]             (822)
#     t2m,         &!13) 2m Temperature [K]                       (823)
#     td2m,        &!14) 2m Dew point temperature [K]             (824)
#     hum2m,       &!15) 2m Specific Humidity [kg/kg]             (825)
#     u10,         &!16) 10m wind speed U component [m/s]         (826)
#     v10,         &!17) 10m wind speed V component [m/s]         (827)
#     stratrsrf,   &!18) Stratiform rain at surface [kg/(m2 *s)]  (828)
#     convrsrf,    &!19) Convective rain at surface [kg/(m2 *s)]  (829)
#     snowsurf,    &!20) Snow at surface [kg/(m2 *s)]             (830)
#     lsm,         &!21) Land/sea Mask [0-1]                      (831)
#     lat,         &!22) Latitude [deg]                           (832)
#     long,        &!23) Longitude [deg]                          (833)
#     year,        &!24) Year                                     (834)
#     month,       &!25) Month                                    (835)
#     day,         &!26) Day                                      (836)
#     step,        &!27) Step                                     (837)
#     gpoint,      &!28) Grid point [1-843490]                    (838)
#     ind           !29) Index (rank-sorted)                      (839) 

chev_orig_dtype_names = ("temp hum ozo cc clw ciw rain snow w lnpsurf z0 " 
                         "tsurf t2m td2m hum2m u10 v10 stratrsrf convrsrf "
                         "snowsurf lsm lat long year month day step gpoint "
                         "ind".split())
chev_orig_dtype_sizes = [91] * 9 + [1] * 20
chev_orig_dtype_types = [numpy.float64] * 29
#chev_orig_dtype_types[20] = numpy.bool_
chev_orig_dtype_types[23] = numpy.uint16
for i in (24, 25, 26, 28):
    chev_orig_dtype_types[i] = numpy.uint16
chev_orig_dtype_types[27] = numpy.uint32
chev_orig_dtype = zip(chev_orig_dtype_names, chev_orig_dtype_types,
                      chev_orig_dtype_sizes)

def read_chevalier_orig(f):
    return numpy.loadtxt(f, dtype=chev_orig_dtype)

def get_f_grid(sat, sensor, channel, mode):
    includes = os.getenv("ARTS_INCLUDE_PATH")
    if mode == "fast":
        suf = "f_grid_fast"
    else:
        raise NotImplementedError("Only implemented for fast grids")

    for inc in includes.split(":"):
        p = os.path.join(inc, sensor.lower(),
                sat.upper() + "_" + sensor.upper() + "." + suf + ".xml")
        if os.path.exists(p):
            break
    else:
        raise RuntimeError("No f-grid found for %s / %s / %s / %s" % (sat, sensor, channel, mode))
    return artsXML.load(p)

