I tried to read an atus extract i requested on the website using ipumspy and used the suggested read code. These are my exact outputs.
codebook = readers.read_ipums_ddi(“/Users/marcusesteban/Documents/Data/atus_00001_ddi.xml”)
/opt/miniconda3/envs/default/lib/python3.11/site-packages/ipumspy/readers.py:49: CitationWarning: Use of data from IPUMS is subject to conditions including that users should cite the data appropriately.
See theipums_conditions
attribute of this codebook for terms of use.
See theipums_citation
attribute of this codebook for the appropriate citation.
warnings.warn(
atus_df = readers.read_microdata_chunked(codebook, filename = “/Users/marcusesteban/Downloads/atus_00001.dat.gz”, chunksize=1000)
ab_df = pd.concat([ab_df[ab_df[‘STATEFIP’].isin([8, 30, 48])] for ab_df in atus_df])
Traceback (most recent call last):
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/arrays/integer.py:51 in _safe_cast
return values.astype(dtype, casting=“safe”, copy=copy)
TypeError: Cannot cast array data from dtype(‘float64’) to dtype(‘int64’) according to the rule ‘safe’
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
Cell In[60], line 1
ab_df = pd.concat([ab_df[ab_df[‘STATEFIP’].isin([8, 30, 48])] for ab_df in atus_df])
Cell In[60], line 1 in
ab_df = pd.concat([ab_df[ab_df[‘STATEFIP’].isin([8, 30, 48])] for ab_df in atus_df])
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/ipumspy/readers.py:413 in read_microdata_chunked
yield from _read_microdata(
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/ipumspy/readers.py:167 in _read_microdata
yield from (_fix_decimal_expansion(df).astype(dtype) for df in data)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/ipumspy/readers.py:167 in
yield from (_fix_decimal_expansion(df).astype(dtype) for df in data)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/generic.py:6226 in astype
res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/generic.py:6240 in astype
new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/internals/managers.py:448 in astype
return self.apply(“astype”, dtype=dtype, copy=copy, errors=errors)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/internals/managers.py:352 in apply
applied = getattr(b, f)(**kwargs)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/internals/blocks.py:526 in astype
new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:299 in astype_array_safe
new_values = astype_array(values, dtype, copy=copy)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:230 in astype_array
values = astype_nansafe(values, dtype, copy=copy)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:95 in astype_nansafe
return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/arrays/masked.py:132 in _from_sequence
values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/arrays/numeric.py:258 in _coerce_to_array
values, mask, _, _ = _coerce_to_data_and_mask(
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/arrays/numeric.py:214 in _coerce_to_data_and_mask
values = dtype_cls._safe_cast(values, dtype, copy=False)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/arrays/integer.py:57 in _safe_cast
raise TypeError(
TypeError: cannot safely cast non-equivalent float64 to int64
I tried the same line again and this happened
ab_df = pd.concat([ab_df[ab_df[‘STATEFIP’].isin([8, 30, 48])] for ab_df in atus_df])
Traceback (most recent call last):
Cell In[61], line 1
ab_df = pd.concat([ab_df[ab_df[‘STATEFIP’].isin([8, 30, 48])] for ab_df in atus_df])
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/util/_decorators.py:331 in wrapper
return func(*args, **kwargs)
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/reshape/concat.py:368 in concat
op = _Concatenator(
File /opt/miniconda3/envs/default/lib/python3.11/site-packages/pandas/core/reshape/concat.py:425 in init
raise ValueError(“No objects to concatenate”)
ValueError: No objects to concatenate