Source code for pvlib.iotools.psm3

"""
Get PSM3 TMY
see https://developer.nrel.gov/docs/solar/nsrdb/psm3_data_download/
"""

import io
import requests
import pandas as pd
from json import JSONDecodeError
from pvlib._deprecation import deprecated
from pvlib import tools

NSRDB_API_BASE = "https://developer.nrel.gov"
PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-2-2-download.csv"
TMY_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-tmy-download.csv"
PSM5MIN_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-5min-download.csv"

ATTRIBUTES = (
    'air_temperature', 'dew_point', 'dhi', 'dni', 'ghi', 'surface_albedo',
    'surface_pressure', 'wind_direction', 'wind_speed')
PVLIB_PYTHON = 'pvlib python'

# Dictionary mapping PSM3 response names to pvlib names
VARIABLE_MAP = {
    'GHI': 'ghi',
    'DHI': 'dhi',
    'DNI': 'dni',
    'Clearsky GHI': 'ghi_clear',
    'Clearsky DHI': 'dhi_clear',
    'Clearsky DNI': 'dni_clear',
    'Solar Zenith Angle': 'solar_zenith',
    'Temperature': 'temp_air',
    'Dew Point': 'temp_dew',
    'Relative Humidity': 'relative_humidity',
    'Pressure': 'pressure',
    'Wind Speed': 'wind_speed',
    'Wind Direction': 'wind_direction',
    'Surface Albedo': 'albedo',
    'Precipitable Water': 'precipitable_water',
}

# Dictionary mapping pvlib names to PSM3 request names
# Note, PSM3 uses different names for the same variables in the
# response and the request
REQUEST_VARIABLE_MAP = {
    'ghi': 'ghi',
    'dhi': 'dhi',
    'dni': 'dni',
    'ghi_clear': 'clearsky_ghi',
    'dhi_clear': 'clearsky_dhi',
    'dni_clear': 'clearsky_dni',
    'solar_zenith': 'solar_zenith_angle',
    'temp_air': 'air_temperature',
    'temp_dew': 'dew_point',
    'relative_humidity': 'relative_humidity',
    'pressure': 'surface_pressure',
    'wind_speed': 'wind_speed',
    'wind_direction': 'wind_direction',
    'albedo': 'surface_albedo',
    'precipitable_water': 'total_precipitable_water',
}



[docs]
def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
             attributes=ATTRIBUTES, leap_day=True, full_name=PVLIB_PYTHON,
             affiliation=PVLIB_PYTHON, map_variables=True, url=None,
             timeout=30):
    """
    Retrieve NSRDB PSM3 timeseries weather data from the PSM3 API. The NSRDB
    is described in [1]_ and the PSM3 API is described in [2]_, [3]_, and [4]_.

    .. versionchanged:: 0.9.0
       The function now returns a tuple where the first element is a dataframe
       and the second element is a dictionary containing metadata. Previous
       versions of this function had the return values switched.

    .. versionchanged:: 0.10.0
       The default endpoint for hourly single-year datasets is now v3.2.2.
       The previous datasets can still be accessed (for now) by setting
       the ``url`` parameter to the original API endpoint
       (``"https://developer.nrel.gov/api/nsrdb/v2/solar/psm3-download.csv"``).

    Parameters
    ----------
    latitude : float or int
        in decimal degrees, between -90 and 90, north is positive
    longitude : float or int
        in decimal degrees, between -180 and 180, east is positive
    api_key : str
        NREL Developer Network API key
    email : str
        NREL API uses this to automatically communicate messages back
        to the user only if necessary
    names : str, default 'tmy'
        PSM3 API parameter specifing year (e.g. ``2020``) or TMY variant
        to download (e.g. ``'tmy'`` or ``'tgy-2019'``).  The allowed values
        update periodically, so consult the NSRDB references below for the
        current set of options.
    interval : int, {60, 5, 15, 30}
        interval size in minutes, must be 5, 15, 30 or 60. Must be 60 for
        typical year requests (i.e., tmy/tgy/tdy).
    attributes : list of str, optional
        meteorological fields to fetch. If not specified, defaults to
        ``pvlib.iotools.psm3.ATTRIBUTES``. See references [2]_, [3]_, and [4]_
        for lists of available fields. Alternatively, pvlib names may also be
        used (e.g. 'ghi' rather than 'GHI'); see :const:`REQUEST_VARIABLE_MAP`.
        To retrieve all available fields, set ``attributes=[]``.
    leap_day : bool, default : True
        include leap day in the results. Only used for single-year requests
        (i.e., it is ignored for tmy/tgy/tdy requests).
    full_name : str, default 'pvlib python'
        optional
    affiliation : str, default 'pvlib python'
        optional
    map_variables : bool, default True
        When true, renames columns of the Dataframe to pvlib variable names
        where applicable. See variable :const:`VARIABLE_MAP`.
    url : str, optional
        API endpoint URL.  If not specified, the endpoint is determined from
        the ``names`` and ``interval`` parameters.
    timeout : int, default 30
        time in seconds to wait for server response before timeout

    Returns
    -------
    data : pandas.DataFrame
        timeseries data from NREL PSM3
    metadata : dict
        metadata from NREL PSM3 about the record, see
        :func:`pvlib.iotools.read_psm3` for fields

    Raises
    ------
    requests.HTTPError
        if the request response status is not ok, then the ``'errors'`` field
        from the JSON response or any error message in the content will be
        raised as an exception, for example if the `api_key` was rejected or if
        the coordinates were not found in the NSRDB

    Notes
    -----
    The required NREL developer key, `api_key`, is available for free by
    registering at the `NREL Developer Network <https://developer.nrel.gov/>`_.

    .. warning:: The "DEMO_KEY" `api_key` is severely rate limited and may
        result in rejected requests.

    .. warning:: PSM3 is limited to data found in the NSRDB, please consult the
        references below for locations with available data. Additionally,
        querying data with < 30-minute resolution uses a different API endpoint
        with fewer available fields (see [4]_).

    See Also
    --------
    pvlib.iotools.read_psm3

    References
    ----------

    .. [1] `NREL National Solar Radiation Database (NSRDB)
       <https://nsrdb.nrel.gov/>`_
    .. [2] `Physical Solar Model (PSM) v3.2.2
       <https://developer.nrel.gov/docs/solar/nsrdb/psm3-2-2-download/>`_
    .. [3] `Physical Solar Model (PSM) v3 TMY
       <https://developer.nrel.gov/docs/solar/nsrdb/psm3-tmy-download/>`_
    .. [4] `Physical Solar Model (PSM) v3 - Five Minute Temporal Resolution
       <https://developer.nrel.gov/docs/solar/nsrdb/psm3-5min-download/>`_
    """
    # The well know text (WKT) representation of geometry notation is strict.
    # A POINT object is a string with longitude first, then the latitude, with
    # four decimals each, and exactly one space between them.
    longitude = ('%9.4f' % longitude).strip()
    latitude = ('%8.4f' % latitude).strip()
    # TODO: make format_WKT(object_type, *args) in tools.py

    # convert to string to accomodate integer years being passed in
    names = str(names)

    # convert pvlib names in attributes to psm3 convention
    attributes = [REQUEST_VARIABLE_MAP.get(a, a) for a in attributes]

    # required query-string parameters for request to PSM3 API
    params = {
        'api_key': api_key,
        'full_name': full_name,
        'email': email,
        'affiliation': affiliation,
        'reason': PVLIB_PYTHON,
        'mailing_list': 'false',
        'wkt': 'POINT(%s %s)' % (longitude, latitude),
        'names': names,
        'attributes':  ','.join(attributes),
        'leap_day': str(leap_day).lower(),
        'utc': 'false',
        'interval': interval
    }
    # request CSV download from NREL PSM3
    if url is None:
        # determine the endpoint that suits the user inputs
        if any(prefix in names for prefix in ('tmy', 'tgy', 'tdy')):
            url = TMY_URL
        elif interval in (5, 15):
            url = PSM5MIN_URL
        else:
            url = PSM_URL

    response = requests.get(url, params=params, timeout=timeout)
    if not response.ok:
        # if the API key is rejected, then the response status will be 403
        # Forbidden, and then the error is in the content and there is no JSON
        try:
            errors = response.json()['errors']
        except JSONDecodeError:
            errors = response.content.decode('utf-8')
        raise requests.HTTPError(errors, response=response)
    # the CSV is in the response content as a UTF-8 bytestring
    # to use pandas we need to create a file buffer from the response
    fbuf = io.StringIO(response.content.decode('utf-8'))
    return read_psm3(fbuf, map_variables)




[docs]
def read_psm3(filename, map_variables=True):
    """
    Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
    is described in [1]_ and the SAM CSV format is described in [2]_.

    .. versionchanged:: 0.9.0
       The function now returns a tuple where the first element is a dataframe
       and the second element is a dictionary containing metadata. Previous
       versions of this function had the return values switched.

    Parameters
    ----------
    filename: str, path-like, or buffer
        Filename or in-memory buffer of a file containing data to read.
    map_variables: bool, default True
        When true, renames columns of the Dataframe to pvlib variable names
        where applicable. See variable :const:`VARIABLE_MAP`.

    Returns
    -------
    data : pandas.DataFrame
        timeseries data from NREL PSM3
    metadata : dict
        metadata from NREL PSM3 about the record, see notes for fields

    Notes
    -----
    The return is a tuple with two items. The first item is a dataframe with
    the PSM3 timeseries data.

    The second item is a dictionary with metadata from NREL PSM3 about the
    record containing the following fields:

    * Source
    * Location ID
    * City
    * State
    * Country
    * Latitude
    * Longitude
    * Time Zone
    * Elevation
    * Local Time Zone
    * Clearsky DHI Units
    * Clearsky DNI Units
    * Clearsky GHI Units
    * Dew Point Units
    * DHI Units
    * DNI Units
    * GHI Units
    * Solar Zenith Angle Units
    * Temperature Units
    * Pressure Units
    * Relative Humidity Units
    * Precipitable Water Units
    * Wind Direction Units
    * Wind Speed Units
    * Cloud Type -15
    * Cloud Type 0
    * Cloud Type 1
    * Cloud Type 2
    * Cloud Type 3
    * Cloud Type 4
    * Cloud Type 5
    * Cloud Type 6
    * Cloud Type 7
    * Cloud Type 8
    * Cloud Type 9
    * Cloud Type 10
    * Cloud Type 11
    * Cloud Type 12
    * Fill Flag 0
    * Fill Flag 1
    * Fill Flag 2
    * Fill Flag 3
    * Fill Flag 4
    * Fill Flag 5
    * Surface Albedo Units
    * Version

    Examples
    --------
    >>> # Read a local PSM3 file:
    >>> df, metadata = iotools.read_psm3("data.csv")  # doctest: +SKIP

    >>> # Read a file object or an in-memory buffer:
    >>> with open(filename, 'r') as f:  # doctest: +SKIP
    ...     df, metadata = iotools.read_psm3(f)  # doctest: +SKIP

    See Also
    --------
    pvlib.iotools.get_psm3

    References
    ----------
    .. [1] `NREL National Solar Radiation Database (NSRDB)
       <https://nsrdb.nrel.gov/>`_
    .. [2] `Standard Time Series Data File Format
       <https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
    """
    with tools._file_context_manager(filename) as fbuf:
        # The first 2 lines of the response are headers with metadata
        metadata_fields = fbuf.readline().split(',')
        metadata_values = fbuf.readline().split(',')
        # get the column names so we can set the dtypes
        columns = fbuf.readline().split(',')
        columns[-1] = columns[-1].strip()  # strip trailing newline
        # Since the header has so many columns, excel saves blank cols in the
        # data below the header lines.
        columns = [col for col in columns if col != '']
        dtypes = dict.fromkeys(columns, float)  # all floats except datevec
        dtypes.update({'Year': int, 'Month': int, 'Day': int, 'Hour': int,
                       'Minute': int, 'Cloud Type': int, 'Fill Flag': int})
        data = pd.read_csv(
            fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
            delimiter=',', lineterminator='\n')  # skip carriage returns \r

    metadata_fields[-1] = metadata_fields[-1].strip()  # trailing newline
    metadata_values[-1] = metadata_values[-1].strip()  # trailing newline
    metadata = dict(zip(metadata_fields, metadata_values))
    # the response is all strings, so set some metadata types to numbers
    metadata['Local Time Zone'] = int(metadata['Local Time Zone'])
    metadata['Time Zone'] = int(metadata['Time Zone'])
    metadata['Latitude'] = float(metadata['Latitude'])
    metadata['Longitude'] = float(metadata['Longitude'])
    metadata['Elevation'] = int(metadata['Elevation'])

    # the response 1st 5 columns are a date vector, convert to datetime
    dtidx = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
    # in USA all timezones are integers
    tz = 'Etc/GMT%+d' % -metadata['Time Zone']
    data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)

    if map_variables:
        data = data.rename(columns=VARIABLE_MAP)
        metadata['latitude'] = metadata.pop('Latitude')
        metadata['longitude'] = metadata.pop('Longitude')
        metadata['altitude'] = metadata.pop('Elevation')

    return data, metadata



parse_psm3 = deprecated(since="0.12.1", name="parse_psm3",
                        alternative="read_psm3")(read_psm3)