From 88225246ac528cca98c22f693b8ac1718febeaf1 Mon Sep 17 00:00:00 2001 From: CalCraven Date: Sun, 16 Jun 2024 08:57:05 -0500 Subject: [PATCH 1/2] Adjust and move pandas dataframe conversion to external module --- gmso/core/topology.py | 77 ------ gmso/external/__init__.py | 1 + gmso/external/convert_dataframe.py | 381 +++++++++++++++++++++++++++ gmso/tests/test_convert_dataframe.py | 153 +++++++++++ gmso/tests/test_topology.py | 88 +------ 5 files changed, 536 insertions(+), 164 deletions(-) create mode 100644 gmso/external/convert_dataframe.py create mode 100644 gmso/tests/test_convert_dataframe.py diff --git a/gmso/core/topology.py b/gmso/core/topology.py index 856adaa7f..f3abd8a9a 100644 --- a/gmso/core/topology.py +++ b/gmso/core/topology.py @@ -1237,83 +1237,6 @@ def write_forcefield(self, filename, overwrite=False): ff = self.get_forcefield() ff.to_xml(filename=filename, overwrite=overwrite) - def to_dataframe(self, parameter="sites", site_attrs=None, unyts_bool=True): - """Return a pandas dataframe object for the sites in a topology - - Parameters - ---------- - parameter : str, default='sites' - A string determining what aspects of the gmso topology will be reported. - Options are: 'sites', 'bonds', 'angles', 'dihedrals', and 'impropers'. Defaults to 'sites'. - site_attrs : list of str, default=None - List of strings that are attributes of the topology site and can be included as entries in the pandas dataframe. - Examples of these can be found by printing `topology.sites[0].__dict__`. - See https://gmso.mosdef.org/en/stable/data_structures.html#gmso.Atom for additional information on labeling. - unyts_bool: bool, default=True - Determine if numerical values are saved as unyt quantities or floats. See - https://unyt.readthedocs.io/en/stable/usage.html - for more information about manipulating unyt quantities. - Default is True. - - Returns - ------- - Pandas Dataframe - A pandas.Dataframe object, see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html - for further information. - - Examples - ________ - >>> topology.to_dataframe(parameter = 'sites', site_attrs = ['charge']) - This will return a dataframe with a listing of the sites and include the charges that correspond to each site. - >>> topology.to_dataframe(parameter = 'dihedrals', site_attrs = ['positions']) - This will return a dataframe with a listing of the sites that make up each dihedral, the positions of each of - those sites, and the parameters that are associated with the dihedrals. - - Notes - ____ - A dataframe is easily manipulated. In order to change the rounding to two decimals places for a column named `label`: - >>> df['label'] = df['label'].round(2) - The column labels can also be easily modified. This line can take a dataframe `df` and rename a column labeled - `Atom0` to `newname` using a dictionary. - >>> df.rename(columns = {'Atom0':'newname'}) - See https://pandas.pydata.org/pandas-docs/stable/getting_started/intro_tutorials/index.html for further information. - """ - from gmso.utils.io import import_ - - pd = import_("pandas") - if not site_attrs: - site_attrs = [] - df = pd.DataFrame() - if not self.is_typed(): - raise GMSOError( - "This topology is not typed, please type this object before converting to a pandas dataframe" - ) - if parameter == "sites": - df["atom_types"] = list(site.atom_type.name for site in self.sites) - df["names"] = list(site.name for site in self.sites) - for attr in site_attrs: - df = self._parse_dataframe_attrs(df, attr, parameter, unyts_bool) - elif parameter in ["bonds", "angles", "dihedrals", "impropers"]: - if len(getattr(self, parameter)) == 0: - raise GMSOError( - f"There arent any {parameter} in the topology. The dataframe would be empty." - ) - df = self._pandas_from_parameters( - df, - parameter=parameter, - site_attrs=site_attrs, - unyts_bool=unyts_bool, - ) - df = self._parse_parameter_expression(df, parameter, unyts_bool) - else: - raise AttributeError( - "{} is not yet supported for outputting parameters to a dataframe. \ - Please use one of 'sites', 'bonds', 'angles', 'dihedrals', or \ - 'impropers'".format(str(parameter)) - ) - - return df - def get_forcefield(self): """Get an instance of gmso.ForceField out of this topology diff --git a/gmso/external/__init__.py b/gmso/external/__init__.py index bf94976d6..8a0451072 100644 --- a/gmso/external/__init__.py +++ b/gmso/external/__init__.py @@ -1,6 +1,7 @@ # ruff: noqa: F401 """Support for various in-memory representations of chemical systems.""" +from .convert_dataframe import to_dataframeDict from .convert_hoomd import ( to_gsd_snapshot, to_hoomd_forcefield, diff --git a/gmso/external/convert_dataframe.py b/gmso/external/convert_dataframe.py new file mode 100644 index 000000000..894f42f82 --- /dev/null +++ b/gmso/external/convert_dataframe.py @@ -0,0 +1,381 @@ +"""Module support for converting to/from Pandas DataFrame objects.""" + +import functools +import warnings +from collections.abc import Iterable + +import numpy as np +import unyt as u + +from gmso import Topology +from gmso.core.views import PotentialFilters +from gmso.exceptions import GMSOError +from gmso.utils.io import import_ + +pd = import_("pandas") +pfilter = PotentialFilters.UNIQUE_PARAMETERS + + +def to_dataframeDict( + topology: Topology, + parameters: str or list[str] = "all", + format: str = "default", + columns: list[str] = None, + handle_unyts: str = "to_headers", +) -> pd.DataFrame: + """Return a dictionary of pandas dataframe objects for a topology. + + Parameters + ---------- + topology : gmso.Topology, required + Topology to use for converting values + parameters : str or list of str, optional, default='all' + A string determining what aspects of the gmso topology will be reported. + Options are: 'all', 'sites', 'bonds', 'angles', 'dihedrals', and 'impropers'. Defaults to 'all'. Can pass multiple strings as a list. + format : str, optional, default='default' + The output formatting style for the dataframe. + Options are 'default', 'specific_columns', 'publication', `remove_duplicates`. Defaults to 'default' + 'default' will output default column values of ["name", "atom_type.name", "atom_type.parameters", "charge", "mass"], + and any additional attributes in the `columns` argument. + 'specific_columns' will only output the attributes from the `columns` argument. + 'publication' will use the default outputs, but remove duplicate values from the dataframes. It adds a column labeled + 'Atom Indices' to the `sites` dataframe, which enumerates the indices that the atom_type is a part of. + `remove_duplicates` will remove duplicate rows from the dataframe. For sites, this column is `atom_types.name`. + For connections, it is the `connection_types.connection_members`. For sites, an additional column will be added, labeled + `Atom Indices` that includes the site indexes of members that are the given `atom_type.name`. Because these methods + are specific to a given Topology element, the `parameters` argument must be one of + {"sites", "bonds", "angles", "dihedrals", "impropers"}, not {"all"}. + columns : list of str, optional, default=None + List of strings that are attributes of the topology site and can be included as entries in the pandas dataframe. + Examples of these can be found by printing `topology.sites[0].__dict__` or `topology.bonds[0].__dict__`. + See https://gmso.mosdef.org/en/stable/data_structures.html#gmso.Atom for additional information on labeling. + handle_unyts: str, optional, default='to_headers' + The placement/recording of unyt quantities in dataframe. + Options are 'to_headers', 'with_data', 'no_unyts' + Determines if numerical values in the DataFrame are saved as unyt quantities or floats. Default case, 'to_headers", + puts the unyts as strings to go with the column header of the dataframe. + `with_data` leaves any values alone, so any values in the Topology that are unyt quantities will stay that way. + `no_unyts` strips any unyt values and converts to a float in the associated element of the dataframe. + See https://unyt.readthedocs.io/en/stable/usage.html + for more information about manipulating unyt quantities. + + Returns + ------- + Dictionary of Pandas Dataframe + A python dictionary of pandas.Dataframe object, see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html + for further information. The keys of this dictionary are the attributes of the topology that are associated with each DataFrame. These + can be `sites`, `bonds`, `angles`, `dihedrals`, `impropers`, which are determined from the argument `parameters`. + + + Examples + -------- + # example topology to use + ``` python + >>> import gmso + >>> import mbuild as mb + >>> from gmso.parameterization import apply + >>> cpd = mb.load("C", smiles=True) + >>> top = cpd.to_gmso() + >>> ff = gmso.ForceField("oplsaa") + >>> ptop = apply(top, ff) + ``` + + + >>> gmso.external.convert_dataframe.to_dataframeDict(ptop, parameters='sites', columns=['charge'], handle_unyts="to_headers") + This will return a dataframe with a listing of the sites and include the charges that correspond to each site. + ``` + {'sites': + name atom_type.name epsilon (kJ/mol) sigma (nm) charge (elementary_charge) mass (amu) + 0 C opls_138 0.276144 0.35 -0.24 12.011 + 1 H opls_140 0.125520 0.25 0.06 1.008 + 2 H opls_140 0.125520 0.25 0.06 1.008 + 3 H opls_140 0.125520 0.25 0.06 1.008 + 4 H opls_140 0.125520 0.25 0.06 1.008 + } + ``` + + >>> topology.to_dataframe(parameters = 'dihedrals', site_attrs = ['positions']) + This will return a dataframe with a listing of the sites that make up each dihedral, the positions of each of + those sites, and the parameters that are associated with the dihedrals. + + Notes + ----- + A dataframe is easily manipulated. In order to change the rounding to two decimals places for a column named `label`: + >>> df['label'] = df['label'].round(2) + The column labels can also be easily modified. This line can take a dataframe `df` and rename a column labeled + `Atom0` to `newname` using a dictionary. + >>> df.rename(columns = {'Atom0':'newname'}) + See https://pandas.pydata.org/pandas-docs/stable/getting_started/intro_tutorials/index.html for further information. + """ + if columns is None: + columns = [] + if not topology.is_typed(): + raise GMSOError( + "This topology is not typed, please type this object before converting to a pandas dataframe" + ) + outDict = {} # dictionary of dataframes to write out + + # Get columns from format methods + columnsDict = {} + connectionsList = [ + "bonds", + "angles", + "dihedrals", + "impropers", + ] # these can be handled generally + remove_duplicatesBool = False # flag to remove duplicate parameters from the dataframe and put indices into a new column + if format == "default": + columnsDict = { + param: [ + "name", + f"{param[:-1]}_type.member_classes", + f"{param[:-1]}_type.parameters", + ] + for param in connectionsList + } + columnsDict["sites"] = [ + "name", + "atom_type.name", + "atom_type.parameters", + "charge", + "mass", + ] + if isinstance(columns, list): + columnsDict = { + key: columnsDict[key] + columns for key in columnsDict.keys() + } # add in any provided columns + elif format == "specific_columns": + assert parameters != "all", ( + f"When formatting for specific columns, please set parameter argument to be one of {['sites']+connectionsList}." + "Otherwise use a format of default." + ) + if isinstance(parameters, str): + parametersList = [parameters] + else: + parametersList = parameters + columnsDict = {parameter: columns for parameter in parametersList} + elif format == "publication": + columnsDict = { + param: [ + "name", + f"{param[:-1]}_type.member_classes", + f"{param[:-1]}_type.parameters", + ] + for param in connectionsList + } + columnsDict["sites"] = [ + "name", + "atom_type.name", + "atom_type.parameters", + "charge", + "mass", + ] + remove_duplicatesBool = True + elif format == "remove_duplicates": + assert parameters != "all", ( + f"When formatting for specific columns, please set parameter argument to be one of {['sites']+connectionsList}." + "Otherwise use a format of default." + ) + if not columns and parameters == "sites": # default values + columns = ["atom_type.name"] + elif not columns and parameters in connectionsList: + columns = [f"{parameters[:-1]}_type.member_classes"] + columnsDict = {parameters: columns} + else: + raise ValueError( + f"Available options for format are 'default', 'specific_columns', 'publication', or 'remove_duplicates'. The incorrect argument passed was {format=}." + ) + + if parameters == "all": + parametersList = ["sites"] + connectionsList + elif parameters in connectionsList or parameters == "sites": + parametersList = [parameters] + elif isinstance(parameters, list) and all( + [parameter in connectionsList + ["sites"] for parameter in parameters] + ): + parametersList = parameters + else: + allowed_parameters = "', '".join(connectionsList) + raise ValueError( + f"Parameters argument {parameters} must be one of: 'all', 'sites', '{allowed_parameters}'." + ) + + for param in parametersList: + if not getattr(topology, f"n_{param}"): + warnings.warn( + UserWarning( + f"Topology {topology} has no {param}, so adding a None element to dictionary" + ) + ) + outDict[param] = None + continue + dataList, columns = _generate_component_lists( + topology, param, columnsDict.get(param) + ) + # handle unyts in values + dataList, columns = _parse_unyts(handle_unyts, dataList, columns) + dataDict = {col: data for col, data in zip(columns, dataList)} + outDict[param] = pd.DataFrame(dataDict) # create dataframe + + if ( + remove_duplicatesBool and topology.n_sites > 0 + ): # use flag to remove duplicates in sites + outDict["sites"] = _add_duplicate_indices_to_sites_dataframe(outDict["sites"]) + for param in connectionsList: + if not getattr(topology, f"n_{param}"): + continue + outDict[param] = _remove_duplicate_connections(outDict[param], param) + + if format == "remove_duplicates": + for df in outDict.values(): # remove duplicate values + df.drop("Atom Indices", errors="ignore") + df.drop_duplicates(inplace=True, ignore_index=True) + + return outDict + + +def _parse_unyts(handle_unyts, dataList, columnsList): + if handle_unyts == "to_headers": # move units to the header + columnsList = _parse_unyts_to_headers(dataList, columnsList) + dataList = _parse_unyts_no_unytss(dataList) + elif handle_unyts == "with_data": # leave units where they are + pass + elif handle_unyts == "no_unyts": # convert units to floats + dataList = _parse_unyts_no_unytss(dataList) + else: + raise ValueError( + f"Supplied the argument {handle_unyts=} of {type(handle_unyts)}, but must provide one of the arguments 'to_headers', 'with_data', or 'no_unyts'." + ) + return dataList, columnsList + + +def _parse_unyts_no_unytss(dataList) -> list: + for i in range(len(dataList)): + if isinstance(dataList[i][0], u.unyt_array): + dataList[i] = [float(x) for x in dataList[i]] # turn to float + return dataList + + +def _parse_unyts_to_headers(dataList, columns) -> list: + new_colsList = [] + for data, col in zip(dataList, columns): + if isinstance(data[0], u.unyt_array): + unit = str(data[0].units) # assumption that all data in List is same units + new_colsList.append(col + f" ({unit})") + else: + new_colsList.append(col) + return new_colsList + + +def _generate_component_lists(topology, parameter, columns) -> list: + outList = [] + columnsList = [] + for column in columns: + valuesList = _recursive_getattr(topology, parameter, column) + if isinstance(valuesList[0], dict): + # add keys to columnsList and values to outList + keys = list(valuesList[0].keys()) + values_dictList = [[value[key] for value in valuesList] for key in keys] + outList.extend(values_dictList) + columnsList.extend(keys) + elif isinstance(valuesList[0], u.unyt_array) and not isinstance( + valuesList[0], u.unyt_quantity + ): + outList.extend(np.array(valuesList).T) + if column == "position": + columnsList.extend(["x", "y", "z"]) + else: + columnsList.extend( + [f"{column}-({i})" for i in range(len(valuesList[0]))] + ) + elif isinstance(valuesList[0], tuple) or isinstance( + valuesList[0], list + ): # could be connection_members + outList.extend(np.array(valuesList).T) + if "connection_members" in column: + columnsList.extend( + [f"{parameter} member ({i})" for i in range(len(valuesList[0]))] + ) + else: + columnsList.extend( + [f"{column}-({i})" for i in range(len(valuesList[0]))] + ) + + # handle positions? + # handle connection_members + pass + else: + outList.append(valuesList) + columnsList.append(column) + return outList, columnsList + + +def _recursive_getattr(topology, attr, attr_attr): + """Parse a topology to get a list of attributes from an iterable.""" + + def _getattr(obj, attr1): + try: + return getattr(obj, attr1) + except AttributeError: + raise AttributeError( + f"The GMSO Topology is missing the requested attribute {attr1} from {obj}.{attr_attr}" + ) + + iteritems = getattr(topology, attr) + + def _parseFunction(x): + return functools.reduce(_getattr, [x] + attr_attr.split(".")) + + return list(map(_parseFunction, iteritems)) + + +def _add_duplicate_indices_to_sites_dataframe(df: pd.DataFrame) -> pd.DataFrame: + unique_col = "atom_type.name" # use to grab what is considered `unique`, may be able to make this a variable in the future + df["Atom Indices"] = df[unique_col].apply( + lambda x: ", ".join(str(v) for v in df.index[df[unique_col] == x].to_list()) + ) + keep = df[~df.duplicated(subset=unique_col)] + return keep.reset_index() + + +def _remove_duplicate_connections(df: pd.DataFrame, parameter) -> pd.DataFrame: + # dset connection members length + membersMap = {"bonds": 2, "angles": 3, "dihedrals": 4, "impropers": 4} + # drop duplicate rows in df + n_atoms = membersMap[parameter] + df = df.drop_duplicates( + subset=[f"{parameter[:-1]}_type.member_classes-({i})" for i in range(n_atoms)] + ) + # remove columns for indexing + # df = df.drop(labels=[f"Atom{i}" for i in range(n_atoms)], axis=1) + return df.reset_index(drop=True) + + +def multi_topology_dataframe(topologies: list) -> pd.DataFrame: + """Take an iterable of topologies and create a combined dataframe to encompass all parameters.""" + assert isinstance(topologies, Iterable) + assert isinstance(next(iter(topologies)), Topology) + topList = list(topologies) + dictList = [] + for top in topList: + dictList.append(to_dataframeDict(top, format="publication")) + concatDict = {} + for parameter in ["sites", "bonds", "angles", "dihedrals", "impropers"]: + dfsList = list(map(lambda x: x.get(parameter), dictList)) + if not any(elem is not None for elem in dfsList): + continue + dfout = pd.concat( + [ + df.drop("Atom Indices", errors="ignore") + for df in dfsList + if df is not None + ] + ) # remove missing dfs + # remove duplicates + concatDict[parameter] = dfout.drop_duplicates().reset_index() + + return concatDict + + +def generate_topology_report(topologies: list) -> pd.DataFrame: + """Generate information of 2D structure and parameters for an iterable of Topologies.""" diff --git a/gmso/tests/test_convert_dataframe.py b/gmso/tests/test_convert_dataframe.py new file mode 100644 index 000000000..a0103604d --- /dev/null +++ b/gmso/tests/test_convert_dataframe.py @@ -0,0 +1,153 @@ +import numpy as np +import pytest +import unyt as u + +from gmso.external.convert_dataframe import ( + _recursive_getattr, + multi_topology_dataframe, + to_dataframeDict, +) +from gmso.tests.base_test import BaseTest +from gmso.utils.io import has_pandas + + +class TestConvertDataFrame(BaseTest): + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_recursive_sites(self, typed_ethane): + out = list(_recursive_getattr(typed_ethane, "sites", "atom_type.atomclass")) + expected = [site.atom_type.atomclass for site in typed_ethane.sites] + assert out == expected + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_recursive_dihedrals(self, typed_ethane): + out = list( + _recursive_getattr(typed_ethane, "dihedrals", "dihedral_type.member_types") + ) + expected = [ + dihedral.dihedral_type.member_types for dihedral in typed_ethane.dihedrals + ] + assert out == expected + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_to_dataframeDict(self, typed_ethane): + expected_valuesList = [8, 7, 12, 9] + checkList = ["sites", "bonds", "angles", "dihedrals"] + for parameter, val in zip(checkList, expected_valuesList): + assert ( + len(to_dataframeDict(typed_ethane, parameters=parameter)[parameter]) + == val + ) + allDict = to_dataframeDict(typed_ethane, parameters="all") + dfList = [allDict.get(key) for key in checkList] + assert list(map(len, dfList)) == expected_valuesList + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_dataframe_impropers(self, benzeneTopology): + expected_valuesList = [12, 12, 18, 24, 6] + checkList = ["sites", "bonds", "angles", "dihedrals", "impropers"] + for parameter, val in zip(checkList, expected_valuesList): + assert ( + len(to_dataframeDict(benzeneTopology, parameters=parameter)[parameter]) + == val + ) + allDict = to_dataframeDict(benzeneTopology, parameters="all") + dfList = [allDict.get(key) for key in checkList] + assert list(map(len, dfList)) == expected_valuesList + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_dataframe_default_columns(self, typed_ethane): + expected_columns = set( + [ + "name", + "atom_type.name", + "sigma", + "epsilon", + "charge", + "mass", + ] + ) + assert np.all( + set( + to_dataframeDict(typed_ethane, "sites", handle_unyts="no_unyts")[ + "sites" + ].columns + ) + == expected_columns + ) + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_dataframe_specified_columns(self, typed_ethane): + input_columns = ["name", "position", "group"] + expected_columns = ["name", "x", "y", "z", "group"] + assert np.all( + list( + to_dataframeDict( + typed_ethane, + "sites", + columns=input_columns, + format="specific_columns", + )["sites"].columns + ) + == expected_columns + ) + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_dataframe_publication(self, benzeneTopology): + dfDict = to_dataframeDict(benzeneTopology, "all", format="publication") + df = dfDict["sites"] + assert len(df.index) == 2 + assert len(df.columns) == 8 + assert "Atom Indices" in df.columns + assert df["Atom Indices"].loc[0] == ", ".join(str(v) for v in np.arange(6)) + assert df["Atom Indices"].loc[1] == ", ".join(str(v) for v in np.arange(6, 12)) + + connectList = ["bonds", "angles", "dihedrals", "impropers"] + checkList = [2, 2, 3, 1] + for connect, check in zip(connectList, checkList): + df = dfDict[connect] + assert len(df.index) == check + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_dataframe_remove_duplicates(self, benzeneTopology): + dfDict = to_dataframeDict(benzeneTopology, "sites", format="remove_duplicates") + df = dfDict["sites"] + assert len(df.index) == 2 + assert len(df.columns) == 1 + assert "Atom Indices" not in df.columns + + connectList = ["bonds", "angles", "dihedrals", "impropers"] + checkList = [2, 2, 3, 1] + for connect, check in zip(connectList, checkList): + dfDict = to_dataframeDict( + benzeneTopology, connect, format="remove_duplicates" + ) + df = dfDict[connect] + assert len(df.index) == check + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_dataframe_unyts(self, typed_ethane): + dfDict = to_dataframeDict( + typed_ethane, "all", format="publication", handle_unyts="with_data" + ) + df = dfDict["sites"] + assert isinstance(df["charge"].loc[0], u.unyt_quantity) + + dfDict = to_dataframeDict( + typed_ethane, "all", format="publication", handle_unyts="no_unyts" + ) + df = dfDict["sites"] + assert isinstance(df["charge"].loc[0], float) + + @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") + def test_multi_topology_dataframe(self, benzeneTopology, spce_water): + dfDict = multi_topology_dataframe( + [benzeneTopology, spce_water, benzeneTopology] + ) + connectList = ["sites", "bonds", "angles", "dihedrals", "impropers"] + checkList = [4, 3, 3, 3, 1] + for connect, check in zip(connectList, checkList): + df = dfDict.get(connect) + if df is None: + assert df == check + else: + assert len(df.index) == check diff --git a/gmso/tests/test_topology.py b/gmso/tests/test_topology.py index 49e246b76..8fd8ed634 100644 --- a/gmso/tests/test_topology.py +++ b/gmso/tests/test_topology.py @@ -21,7 +21,7 @@ from gmso.exceptions import GMSOError from gmso.external.convert_parmed import from_parmed from gmso.tests.base_test import BaseTest -from gmso.utils.io import get_fn, has_pandas, has_parmed, import_ +from gmso.utils.io import get_fn, has_parmed, import_ from gmso.utils.units import GMSO_UnitRegistry as UnitReg if has_parmed: @@ -715,92 +715,6 @@ def test_topology_set_scaling_factors_none(self): with pytest.raises(ValueError): top.set_scaling_factors(None, None) - @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") - def test_to_dataframe(self, typed_ethane): - assert len(typed_ethane.to_dataframe()) == 8 - assert len(typed_ethane.to_dataframe(parameter="bonds")) == 7 - assert len(typed_ethane.to_dataframe(parameter="angles")) == 12 - assert len(typed_ethane.to_dataframe(parameter="dihedrals")) == 9 - assert np.isclose( - float( - typed_ethane.to_dataframe(site_attrs=["charge", "position"])[ - "charge (e)" - ][0] - ), - typed_ethane.sites[0] - .charge.in_units( - u.Unit("elementary_charge", registry=UnitReg.default_reg()) - ) - .to_value(), - ) - assert ( - typed_ethane.to_dataframe(site_attrs=["atom_type.name"])["atom_type.name"][ - 0 - ] - == "opls_135" - ) - assert np.allclose( - float(typed_ethane.to_dataframe(site_attrs=["charge", "position"])["x"][0]), - 0, - ) - assert np.allclose( - float( - typed_ethane.to_dataframe( - parameter="bonds", site_attrs=["charge", "position"] - )["charge Atom0 (e)"][0] - ), - typed_ethane.bonds[0] - .connection_members[0] - .charge.in_units( - u.Unit("elementary_charge", registry=UnitReg.default_reg()) - ) - .to_value(), - ) - with pytest.raises(AttributeError) as e: - typed_ethane.to_dataframe(site_attrs=["missingattr"]) - assert str(e.value) == "The attribute missingattr is not in this gmso object." - with pytest.raises(AttributeError) as e: - typed_ethane.to_dataframe(site_attrs=["missingattr.missingattr"]) - assert ( - str(e.value) - == "The attribute missingattr.missingattr is not in this gmso object." - ) - with pytest.raises(AttributeError) as e: - typed_ethane.to_dataframe(site_attrs=["missingattr.attr"]) - assert ( - str(e.value) == "The attribute missingattr.attr is not in this gmso object." - ) - with pytest.raises(AttributeError) as e: - typed_ethane.to_dataframe(parameter="bonds", site_attrs=["missingattr"]) - assert str(e.value) == "The attribute missingattr is not in this gmso object." - with pytest.raises(AttributeError) as e: - typed_ethane.to_dataframe( - parameter="bonds", site_attrs=["missingattr.attr"] - ) - assert ( - str(e.value) == "The attribute missingattr.attr is not in this gmso object." - ) - with pytest.raises(GMSOError) as e: - top = Topology() - top.to_dataframe(parameter="bonds") - assert ( - str(e.value) - == "There arent any bonds in the topology. The dataframe would be empty." - ) - - @pytest.mark.skipif(not has_pandas, reason="Pandas is not installed") - def test_pandas_from_parameters(self, typed_ethane): - pd = import_("pandas") - df = pd.DataFrame() - assert np.allclose( - float( - typed_ethane._pandas_from_parameters(df, "bonds", ["positions"])[ - "x Atom1 (nm)" - ][6] - ), - -0.03570001, - ) - def test_is_typed_check(self, typed_chloroethanol): groups = [ "sites", From b364e48c43ff478a795da16cf2491f83fbb761c3 Mon Sep 17 00:00:00 2001 From: CalCraven Date: Sun, 16 Jun 2024 08:57:49 -0500 Subject: [PATCH 2/2] Fix bug with finding hoomd minor version when checking for gaff PeriodicImpropers --- gmso/external/convert_hoomd.py | 2 +- gmso/tests/test_hoomd.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gmso/external/convert_hoomd.py b/gmso/external/convert_hoomd.py index 6779dafef..59a40db53 100644 --- a/gmso/external/convert_hoomd.py +++ b/gmso/external/convert_hoomd.py @@ -1275,7 +1275,7 @@ def _parse_improper_forces( base_units, ) - if int(hoomd_version[0]) >= 4 and int(hoomd_version[1]) >= 5: + if int(hoomd_version[0]) + float(hoomd_version[1]) * 0.1 >= 4.5: itype_group_map = { "HarmonicImproperPotential": { "container": hoomd.md.improper.Harmonic, diff --git a/gmso/tests/test_hoomd.py b/gmso/tests/test_hoomd.py index f3032871e..5b2c74082 100644 --- a/gmso/tests/test_hoomd.py +++ b/gmso/tests/test_hoomd.py @@ -388,7 +388,8 @@ def test_zero_charges(self): @pytest.mark.skipif(not has_hoomd, reason="hoomd is not installed") @pytest.mark.skipif(not has_mbuild, reason="mbuild not installed") @pytest.mark.skipif( - int(hoomd_version[0]) <= 3.8, reason="Deprecated features in HOOMD 4" + int(hoomd_version[0]) + float(hoomd_version[1]) * 0.1 < 4.5, + reason="Feature added in HOOMD 4.5", ) def test_gaff_sim(self, gaff_forcefield): base_units = {