Question

1 Approved Answer

Posted on Sep 25, 2024

i am trying to solve the following questions but i get an error when trying to merge the dataframes that i separate, the error is

i am trying to solve the following questions but i get an error when trying to merge the dataframes that i separate, the error is at the bottom, i am using python:

import matplotlib.pyplot as plt

import scipy.stats as stats

import statsmodels.api as sm

from statsmodels.formula.api import ols

import statsmodels.stats.multicomp as mc

import pandas as pd

pd.set_option('display.width', 1000)

# 1. In our dataset, there is a predation index (1 is minimum, 5 is maximum). Use ANOVA to determine if there is a significant difference in body weight (response) of all mammal species based on predation index (independent). Assuming there is a difference, conduct Tukey's HSD test to determine which species of mammals are significantly different from each other? Interpret the results of your tests.

# Define the column names as a list

sleep_colummn_names = ['Species', 'body_weight', 'brain_weight', 'nondream_sleep', 'dream_sleep', 'total_sleep', 'max_life_span', 'gestation_time', 'predation_risk', 'sleep_exposure', 'danger_index']

# Read in the csv file with the defined column names

sleep_dataframe = pd.read_csv('sleep.csv', names=sleep_colummn_names, header=None, skiprows=1)

# Print the first few rows of the dataframe to check that it was created correctly

print(sleep_dataframe.head())

# check dataframe datatypes

print(" Dataframe data types: ", sleep_dataframe.dtypes)

# check for null values

print(" Checking for null values in dataframe: ", sleep_dataframe.isnull())

# Choose only needed columns for each question

q1_data = sleep_dataframe[['Species', 'body_weight', 'predation_risk']]

print(" Dataframe for question one: ", q1_data)

q2_data = sleep_dataframe[['Species', 'body_weight', 'sleep_exposure']]

print(" Dataframe for question two: ", q2_data)

q3_data = sleep_dataframe[['Species', 'body_weight', 'brain_weight']]

print(" Dataframe for question three: ", q3_data)

q4_data = sleep_dataframe[['Species', 'total_sleep', 'max_life_span']]

print(" Dataframe for question four: ", q4_data)

print(q1_data.columns)

print(q2_data.columns)

print(q3_data.columns)

print(q4_data.columns)

# get all sub dataframes and merge into one

merged_data = q1_data.merge(q2_data, on='Species', validate='one_to_one').merge(q3_data, on=['Species', 'body_weight'], validate='one_to_one').merge(q4_data, on=['Species', 'total_sleep', 'max_life_span'], validate='one_to_one')

print(merged_data)

Traceback (most recent call last): File "C:\Python39\lib unpy.py", line 197, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Python39\lib unpy.py", line 87, in _run_code exec(code, run_globals) File "c:\Users\pimpd\.vscode\extensions\ms-python.python-2023.2.0\pythonFiles\lib\python\debugpy\__main__.py", line 39, in cli.main() File "c:\Users\pimpd\.vscode\extensions\ms-python.python-2023.2.0\pythonFiles\lib\python\debugpy/..\debugpy\server\cli.py", line 430, in main run() File "c:\Users\pimpd\.vscode\extensions\ms-python.python-2023.2.0\pythonFiles\lib\python\debugpy/..\debugpy\server\cli.py", line 284, in run_file runpy.run_path(target, run_name="__main__") File "c:\Users\pimpd\.vscode\extensions\ms-python.python-2023.2.0\pythonFiles\lib\python\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 321, in run_path return _run_module_code(code, init_globals, run_name, File "c:\Users\pimpd\.vscode\extensions\ms-python.python-2023.2.0\pythonFiles\lib\python\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 135, in _run_module_code _run_code(code, mod_globals, init_globals, File "c:\Users\pimpd\.vscode\extensions\ms-python.python-2023.2.0\pythonFiles\lib\python\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 124, in _run_code exec(code, run_globals) File "c:\myEnvforClass\homework4.py", line 153, in merged_data = q1_data.merge(q2_data, on='Species', validate='one_to_one').merge(q3_data, on=['Species', 'body_weight'], validate='one_to_one').merge(q4_data, on=['Species', 'total_sleep', 'max_life_span'], validate='one_to_one') File "c:\myEnvforClass\lib\site-packages\pandas\core\frame.py", line 10093, in merge return merge( File "c:\myEnvforClass\lib\site-packages\pandas\core eshape\merge.py", line 110, in merge op = _MergeOperation( File "c:\myEnvforClass\lib\site-packages\pandas\core eshape\merge.py", line 703, in __init__ ) = self._get_merge_keys() File "c:\myEnvforClass\lib\site-packages\pandas\core eshape\merge.py", line 1179, in _get_merge_keys left_keys.append(left._get_label_or_level_values(lk)) File "c:\myEnvforClass\lib\site-packages\pandas\core\generic.py", line 1850, in _get_label_or_level_values raise KeyError(key) KeyError: 'body_weight'