Skip to content
Star type, Temperature, Luminosity, a galactic 3D-map and more
# Start coding here...
Use the hipparcos mission data to plot the Hertzsprung Russell
diagram and from it estimate the mass, radius, luminosity, effective temperature. Also make a 3D-map of the bodies covered in this catalogue, map the density distribution, map the velocity profile, map the energy density, estimate the distribution of star-type per quadrant.
I am a novice and this is my first publicly available
data science project. As the project is big, I decided to split it into at least two parts, this being the first one.
Here I shall download the .csv.gz file from the ESA repository
untar it in a .csv file, get the relevant parameters, calculate the interest variables, estimate new variables from this calculated variables, plot the 3D-map given two distance limits, make some plots to understand the data
# libraries
import gzip
import urllib.request
import re
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import math
from scipy.optimize import curve_fit
import scipy.stats as stats
#----------------------------------------------------------------
### useful functions
def poly(x, vec):
'''Return polinomial function
______________________
Takes: the number which the function is calculated(x: a float) and
the vector of coefficients (vec: an array)
______________________
Returns: the value of the polynimial at the point x (a float) '''
temp = 0
for coef in range(len(vec)):
temp = temp + vec[coef]*x**coef
return temp
def point_distance(p_1, p_2):
'''Calculate the distance between two points in a 2d environment
______________________
Takes: two tuples containig (x, y) locations of each
______________________
Returns: the distance between these points(a float) '''
x_1, y_1 = p_1
x_2, y_2 = p_2
return np.sqrt((x_1 - x_2)**2 + (y_1 + y_2)**2)
def weigths(x , y, limits):
'''Calculate the weigths (weigthed probability) of the star types as the inverse
of the distanceto the star type lines in the pre-defined limits
______________________
Takes: the coordinates of the point and the star type lines limits
______________________
Returns: a normalized vector of the distance of the point to each star type
lines (a float), which may be interpreted as probabilities '''
distances = []
for limit in limits:
x_il, x_sl, coef = limit
if x >= x_il and x <= x_sl :
distances.append(1/max(abs(y - poly(x, vec=coef)), 0.01))
elif x < x_il:
distances.append(1/max(point_distance((x, y), (x_il, poly(x_il, vec=coef))), 0.01))
elif x > x_sl:
distances.append(1/max(point_distance((x, y), (x_sl, poly(x_sl, vec=coef))), 0.01))
return distances/np.sum(distances)
#----------------------------------------------------------------------
Libraries and some useful functions loaded, let us write some functions to download and process the data
####### functions to import the data from the ESA website and preprocessing ########
def web_download(url_file, filename):
"""Download the file named filename
____________
Args (str): the url addres of the file
____________
Returns (str): name pf the dowloaded data
"""
try:
urllib.request.urlretrieve(url_file, filename)
return(filename)
except:
print('NonExistentFile')
return False
def gz_data_extraction(url, output):
"""Extract a .gz data from the web apri uncompress it
____________
Args (str): its url adress
____________
Returns (str): None, bu it saves the file in another file
"""
# Download archive
try:
# Read the file inside the .gz archive located at url
with urllib.request.urlopen(url) as response:
with gzip.GzipFile(fileobj=response) as uncompressed:
file_content = uncompressed.read()
# write to file in binary mode 'wb'
with open(output, 'wb') as f:
f.write(file_content)
return 0
except Exception as e:
print(e)
return None
def dat_to_dataframe(filename):
"""Eliminate the empty spaces of each line and import the data as a dataframe
____________
Args (str): the name of the file
____________
Returns (str): returns a dataframe containing the data in the file
"""
file = open(filename, "r")
file_lines = file.readlines()
new = []
for i in range(len(file_lines)):
temp_line = re.sub("\s+", ",", file_lines[i].strip())
values = temp_line.split(',')
#according to the documentation, the lines beyond the 27-th are entries of a matrix, which we will not use
new.append(values[0:27])
file.close()
# data taken from the Read me file
names = ['HIP', 'Sn', 'So', 'Nc', 'RArad', 'DErad', 'Plx', 'pmRA', 'pmDE', 'e_RArad', \
'e_DErad', 'e_Plx', 'e_pmRA', 'e_pmDE', 'Ntr', 'F2', 'F1', 'var', 'ic', 'Vmag', \
'e_Hpmag', 'sHp', 'VA', 'B-V', 'e_B-V', 'V-I', 'UW']
return pd.DataFrame(new, columns = names)
def preprocessing():
"""bundle togheter the preprocessing steps
____________
Args (str): None
____________
Returns (str): a dataframe of processed data
"""
# main file download
gz_data_extraction('https://cdsarc.cds.unistra.fr/ftp/I/311/hip2.dat.gz', 'hip2.dat')
print('gz downloaded from the web')
# read me download
web_download('https://cdsarc.cds.unistra.fr/ftp/I/311/ReadMe', "ReadMe_hipparcos_2")
print('Read me downloaded from the web')
df = dat_to_dataframe("./hip2.dat")
print('data transformed to dataframe')
df = df[['B-V','Vmag', 'Plx', 'RArad', 'DErad']]
df = df.astype(float)
print('B-V','Vmag', 'Plx', 'RArad', 'DErad features selected')
temp_size = df.shape[0]
df = df[abs(df['Plx']) >= 0.0001]
per_of_del_data = (1 - df.shape[0]/temp_size)*100
print("Far away objects were deleted (Plx approx 0). A total of {var}% of the data was lost due this procedure.".format(var=per_of_del_data))
df = df.reset_index()
return df
#-----------------------------------------------------------------------
To check the data processing, let me define some easy to use data visualization functions to check the sanity of the data
#-----------------------------------------------------------------------
### plot histograms
def plot_histograms(df, variables, n_rows, n_cols) :
"""Plot histograms of the data
____________
Args (str): the dataframe (df), the variables to plot (variables) the number of rows
and columns to plot (n_rows, n_columns)
____________
Returns (str): None
"""
fig=plt.figure()
for i, var_name in enumerate(variables):
ax=fig.add_subplot(n_rows,n_cols,i+1)
df[var_name].hist(bins=80,ax=ax, color='black')
ax.set_title(var_name+" Distribution")
fig.tight_layout() # Improves appearance a bit.
plt.show()
return None
def print_NaNs(df) :
"""Plot histograms of the relative number of NaNs. The plot presents a horizintal line
at the heigth 5% so that it is ok the delete the data if the bar is below this line
____________
Args: the dataframe (df)
____________
Returns : None
"""
NaSum = df.isna().mean().values
p = sns.barplot(y=NaSum, x=df.columns, color='black')
p.axhline(y=0.05, linestyle = '--', color = 'red' )
plt.xticks(rotation=90)
plt.show()
return None
#-----------------------------------------------------------------------
Now, functions to calculate the features that can be taken from the data alone. Also is present a function to plot the HR diagram, a function to interpolate the typical region of each star type and a function to get the coefficients of a polynomial interpolation to those regions.
#-----------------------------------------------------------------------
### Add calculable properties
def add_distance(df) :
"""Calculate the distances from the object using the formula
D = 1/plx * 1000 * 3,26156 (in ly) and include
in the dataframe
____________
Args: the dataframe (df)
____________
Returns: the dataframe with the additinal column
"""
df['Distance'] = abs(3.26156*1000/np.array(df["Plx"], dtype=float))
return df
def add_cartesian_coordinates(df) :
"""Calculate the cartesian coordinates of the objects for posterior plotting
____________
Args: the dataframe (df)
____________
Returns: the dataframe with three additinal columns (the x,y,z coordinates)
"""
r = np.array(df['Distance'])
cos_theta = np.array(np.cos(df['DErad']))
sin_theta = np.array(np.sin(df['DErad']))
cos_phi = np.array(np.cos((df['RArad'])))
sin_phi = np.array(np.sin(df['RArad']))
df['x_pos'] = r * cos_theta * sin_phi
df['y_pos'] = r * sin_theta * sin_phi
df['z_pos'] = r * cos_phi
return df
def add_abs_magnitude(df) :
"""Calculate the absolute magnitude using the formula M = m + 5 - 5LOG_10(d(pc))
notice the distance in parsecs
____________
Args: the dataframe (df)
____________
Returns: the dataframe with the additinal column, the absolute magnitude
"""
df['AbsMag'] = np.array(df["Vmag"]) + 2.43287314 -5*np.log10(np.array(df["Distance"])/3.26156)
return df
#-------------------------------------------------------------------------
### Plot the HR-Diagram
def HR_diagram(Magnitude_B_V, Magnitude_absoluta) :
"""Plot the HR-diagram
____________
Args: the B_V magnitude feature and the absolute magnitude feature
____________
Returns: None
"""
data = pd.DataFrame({'X': Magnitude_B_V, 'Y': Magnitude_absoluta})
F1 = {"family": "serif", "weight": "bold", "color": "black", "size": 18}
F2 = {"family": "serif", "weight": "bold", "color": "black", "size": 20}
norm = mpl.colors.Normalize(vmin=Magnitude_B_V.min(), vmax=Magnitude_B_V.max())
fig, ax = plt.subplots(figsize=(10, 8))
ax.scatter(data['X'], data['Y'], c=data['X'], cmap = "YlOrRd",\
marker = '.', s = 2, linewidth = 0.0001, norm=norm)
plt.xlabel("Color Index (B-V)", fontdict = F1)
plt.ylabel("Absolute Magnitude (Mv)", fontdict = F1)
plt.title("Hertzsprung-Russell diagram", fontdict = F2)
plt.xlim(Magnitude_B_V.min(), Magnitude_B_V.max())
plt.ylim(Magnitude_absoluta.max(), Magnitude_absoluta.min())
ax.xaxis.set_minor_locator(AutoMinorLocator())
ax.yaxis.set_minor_locator(AutoMinorLocator())
Color_BC = plt.gca()
Color_BC.set_facecolor("darkblue")
plt.tight_layout()
plt.show()
def HR_diagram_regions(sc_x, sc_y) :
"""Plot the HR-diagram, plot the limits of the regions considered to each star type
in the HR diagram, plot a polynomial regression of each subset considering the limits
and return those regressions cefficients
____________
Args: The B_V magnitude and the Absolute magnitude (the same used to plot the HR diagram)
____________
Returns: 7 arrays, each containing the parameters of the polynomial coefficients to fit
the HR diagram and define the Star type regions
"""
### plot the points
data = pd.DataFrame({'X': sc_x, 'Y': sc_y})
F1 = {"family": "serif", "weight": "bold", "color": "black", "size": 18}
F2 = {"family": "serif", "weight": "bold", "color": "black", "size": 20}
norm = mpl.colors.Normalize(vmin=sc_x.min(), vmax=sc_x.max())
fig, ax = plt.subplots(figsize=(10, 8))
ax.scatter(data['X'], data['Y'], c=data['X'], cmap = "YlOrRd",\
marker = '.', s = 2, linewidth = 0.0001, norm=norm)
plt.xlabel("Color Index (B-V)", fontdict = F1)
plt.ylabel("Absolute Magnitude (Mv)", fontdict = F1)
plt.title("Hertzsprung-Russell diagram", fontdict = F2)
plt.xlim(sc_x.min(), sc_x.max())
plt.ylim(sc_y.max(), sc_y.min())
ax.xaxis.set_minor_locator(AutoMinorLocator())
ax.yaxis.set_minor_locator(AutoMinorLocator())
Color_BC = plt.gca()
Color_BC.set_facecolor("darkblue")
plt.tight_layout()
# useful polynomials
def func_2(x, a_0, a_1, a_2):
return a_0 + a_1*x + a_2*x**2
def func_3(x, a_0, a_1, a_2, a_3):
return a_0 + a_1*x + a_2*x**2 + a_3*x**3
def func_4(x, a_0, a_1, a_2, a_3, a_4):
return a_0 + a_1*x + a_2*x**2 + a_3*x**3 + a_4*x**4
def func_10(x, a_0, a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10):
return a_0 + a_1*x + a_2*x**2 + a_3*x**3 + a_4*x**4 + a_5*x**5 + a_6*x**6 +\
a_7*x**7 + a_8*x**8+ a_9*x**9 + a_10*x**10
### fitting degree 2 polynomial on the white dwarfs region
f_test = lambda x: 5*x + 6
test = pd.DataFrame({'X' : np.linspace(-0.4, 0.8, num = 50)})
test['Y'] = test['X'].map(f_test)
#sns.lineplot(x=test['X'], y=test['Y'],color='red', ax=ax)
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit'] = new_df['x_0'].map(f_test)
new_df = new_df[new_df['y_0']>new_df['limit']]
#sns.scatterplot(x=new_df['x_0'], y=new_df['y_0'], marker='*', color='gold')
popt, pcov = curve_fit(func_2, new_df['x_0'], new_df['y_0'])
test['Z'] = test['X'].map(lambda x: popt[0] + popt[1]*x + popt[2]*x**2)
sns.lineplot(x=test['X'], y=test['Z'],color='white', ax=ax)
popt_wd = popt.copy()
### fitting degree 4 polynomial on the brown dwarfs region
test = pd.DataFrame({'X' : np.linspace(1.35, 2.5, num = 50)})
test['Y_0'] = test['X'].map(lambda x: 15*x - 16.5)
test['Y_1'] = test['X'].map(lambda x: 4.5*x - 4)
#sns.lineplot(x=test['X'], y=test['Y_0'],color='red', ax=ax)
#sns.lineplot(x=test['X'], y=test['Y_1'],color='red', ax=ax)
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 15*x - 16.5)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 4.5*x - 4)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
new_df = new_df[new_df['x_0']>1.35]
#sns.scatterplot(x=new_df['x_0'], y=new_df['y_0'], marker='*', color='gold')
#sns.regplot(x=new_df['x_0'], y=new_df['y_0'], order = 4, ax= ax)
popt, pcov = curve_fit(func_4, new_df['x_0'], new_df['y_0'])
test['Z'] = test['X'].map(lambda x: popt[4]*x**4 + popt[3]*x**3 + popt[2]*x**2 + popt[1]*x + popt[0])
test = test[(test['X'] > 1.35)]
test = test[(test['X'] < 2.5)]
sns.lineplot(x=test['X'], y=test['Z'],color='white', ax=ax)
popt_bd = popt.copy()
# fitting the subdwarf branch with 3th degree polynomial
test = pd.DataFrame({'X' : np.linspace(-0.4, 3.5, num = 50)})
test['Y_0'] = test['X'].map(lambda x: 5*x + 6)
test['Y_1'] = test['X'].map(lambda x: 6.5*x )
#sns.lineplot(x=test['X'], y=test['Y_0'],color='red', ax=ax)
#sns.lineplot(x=test['X'], y=test['Y_1'],color='red', ax=ax)
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 5*x + 6)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 6.5*x)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
#sns.scatterplot(x=new_df['x_0'], y=new_df['y_0'], marker='*', color='gold')
#sns.regplot(x=new_df['x_0'], y=new_df['y_0'], order = 3, ax= ax)
popt, pcov = curve_fit(func_3, new_df['x_0'], new_df['y_0'])
test['Z'] = test['X'].map(lambda x: popt[3]*x**3 + popt[2]*x**2 + popt[1]*x + popt[0])
test = test[(test['X'] < 1.4)]
sns.lineplot(x=test['X'], y=test['Z'],color='white', ax=ax)
popt_sd = popt.copy()
# fitting the Main sequence stars to a 10-th order polynomial
test = pd.DataFrame({'X' : np.linspace(-0.4, 3.5, num = 50)})
test['Y_0'] = test['X'].map(lambda x: 6.5*x )
test['Y_1'] = test['X'].map(lambda x: 6.5*x - 4 )
#sns.lineplot(x=test['X'], y=test['Y_0'],color='red', ax=ax)
#sns.lineplot(x=test['X'], y=test['Y_1'],color='red', ax=ax)
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 6.5*x)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 6.5*x - 4)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
new_df = new_df[new_df['x_0']>-0.3]
#sns.scatterplot(x=new_df['x_0'], y=new_df['y_0'], marker='*', color='gold')
#sns.regplot(x=new_df['x_0'], y=new_df['y_0'], line_kws={'color':'black'}, scatter_kws={'s':1}, order=10)
popt, pcov = curve_fit(func_10, new_df['x_0'], new_df['y_0'])
test['Z'] = test['X'].map(lambda x: popt[10]*x**10 + popt[9]*x**9 + popt[8]*x**8 + popt[7]*x**7 + \
popt[6]*x**6 + popt[5]*x**5 + popt[4]*x**4 + popt[3]*x**3 + \
popt[2]*x**2 + popt[1]*x + popt[0])
test = test[test['X']>-0.3]
test = test[test['X']<1.8]
sns.lineplot(x=test['X'], y=test['Z'],color='white', ax=ax)
popt_ps = popt.copy()
# fitting the subgiant branch with 2th degree polynomial
test = pd.DataFrame({'X' : np.linspace(0.36, 2, num = 50)})
test['Y_0'] = test['X'].map(lambda x: -5*(x-1.3)**2 + 3)
test['Y_1'] = test['X'].map(lambda x: x - 2 )
#sns.lineplot(x=test['X'], y=test['Y_0'],color='red', ax=ax)
#sns.lineplot(x=test['X'], y=test['Y_1'],color='red', ax=ax)
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: -5*(x-1.3)**2 + 3)
new_df['limit_1'] = new_df['x_0'].map(lambda x: x - 2 )
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
#sns.scatterplot(x=new_df['x_0'], y=new_df['y_0'], marker='*', color='gold')
#sns.regplot(x=new_df['x_0'], y=new_df['y_0'], line_kws={'color':'black'}, scatter_kws={'s':1}, order=2)
popt, pcov = curve_fit(func_2, new_df['x_0'], new_df['y_0'])
test['Z'] = test['X'].map(lambda x: popt[2]*x**2 + popt[1]*x + popt[0])
sns.lineplot(x=test['X'], y=test['Z'],color='white', ax=ax)
popt_sG = popt.copy()
# fitting the giant branch with 4th degree polynomial
test = pd.DataFrame({'X' : np.linspace(-0.4, 3.2, num = 50)})
test['Y_0'] = test['X'].map(lambda x: 0.5*x - 6)
test['Y_1'] = test['X'].map(lambda x: 0.5*x - 3)
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 0.5*x - 6)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 0.5*x - 3)
new_df = new_df[new_df['y_0']>new_df['limit_0']]
new_df = new_df[new_df['y_0']<new_df['limit_1']]
#sns.scatterplot(x=new_df['x_0'], y=new_df['y_0'], c='red', marker = '*', ax=ax)
#sns.lineplot(x=test['X'], y=test['Y_0'],color='red', ax=ax)
#sns.lineplot(x=test['X'], y=test['Y_1'],color='red', ax=ax)
#sns.regplot(x=new_df['x_0'], y=new_df['y_0'], order = 4)
#sns.lineplot(x=test['X'], y=test['Y_2'],color='red', ax=ax)
popt, pcov = curve_fit(func_4, new_df['x_0'], new_df['y_0'])
test['Z'] = test['X'].map(lambda x: popt[4]*x**4 + popt[3]*x**3 + popt[2]*x**2 + popt[1]*x + popt[0] )
test = test[(test['X'] > 0.)]
#test = test[(test['X'] < 1.9)]
sns.lineplot(x=test['X'], y=test['Z'],color='white', ax=ax)
popt_G = popt.copy()
# fitting a 2-th polynomial to the Bright Giant Ib branch
test = pd.DataFrame({'X' : np.linspace(-0.4, 3.5, num = 50)})
test['Y_0'] = test['X'].map(lambda x: 0.5*x - 7.)
test['Y_1'] = test['X'].map(lambda x: 0.5*x - 10.)
#sns.lineplot(x=test['X'], y=test['Y_0'],color='red', ax=ax)
#sns.lineplot(x=test['X'], y=test['Y_1'],color='red', ax=ax)
#sns.scatterplot(x=new_df['x_0'], y=new_df['y_0'], c='red', marker = '*', ax=ax)
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 0.5*x - 7)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 0.5*x - 10)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
#sns.regplot(x=new_df['x_0'], y=new_df['y_0'], order = 4, line_kws={'color':'black'})
popt, pcov = curve_fit(func_4, new_df['x_0'], new_df['y_0'])
f_test = lambda x: popt[2]*x**2 + popt[1]*x + popt[0]
test['Z'] = test['X'].map(f_test)
test = test[(test['X'] > -0.4)]
test = test[(test['X'] < 3.0)]
sns.lineplot(x=test['X'], y=test['Z'],color='white', ax=ax)
popt_SGb = popt.copy()
# fitting a 2-th polynomial to the Bright Giant Ia branch
test = pd.DataFrame({'X' : np.linspace(-0.4, 3.5, num = 50)})
test['Y_0'] = test['X'].map(lambda x: 0.5*x - 10)
#sns.lineplot(x=test['X'], y=test['Y_0'],color='red', ax=ax)
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 0.5*x - 10)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
#sns.lineplot(x=test['X'], y=test['Y_1'],color='red', ax=ax)
#sns.scatterplot(x=new_df['x_0'], y=new_df['y_0'], c='red', marker = '*', ax=ax)
#sns.regplot(x=new_df['x_0'], y=new_df['y_0'], order = 4)
popt, pcov = curve_fit(func_4, new_df['x_0'], new_df['y_0'])
test['Z'] = test['X'].map(lambda x: popt[4]*x**4 + popt[3]*x**3 + popt[2]*x**2 + popt[1]*x + popt[0] )
test = test[(test['X'] > -0.4)]
test = test[(test['X'] < 3.5)]
sns.lineplot(x=test['X'], y=test['Z'],color='white', ax=ax)
popt_SGa = popt.copy()
plt.show()
return popt_wd, popt_ps, popt_bd, popt_sd, popt_sG, popt_G, popt_SGb, popt_SGa
def find_HR_diagram(sc_x, sc_y) :
"""Based on the plot in the HR_diagram_regions(sc_x, sc_y) function, find the coefficients
of the fitted polynomials without ploting anything
____________
Args: The B_V magnitude and the Absolute magnitude (the same used to plot the HR diagram)
____________
Returns: 7 arrays, each containing the parameters of the polynomial coefficients to fit
the HR diagram and define the Star type regions
"""
# useful polynomials
def func_2(x, a_0, a_1, a_2):
return a_0 + a_1*x + a_2*x**2
def func_3(x, a_0, a_1, a_2, a_3):
return a_0 + a_1*x + a_2*x**2 + a_3*x**3
def func_4(x, a_0, a_1, a_2, a_3, a_4):
return a_0 + a_1*x + a_2*x**2 + a_3*x**3 + a_4*x**4
def func_10(x, a_0, a_1, a_2, a_3, a_4, a_5, a_6, a_7, a_8, a_9, a_10):
return a_0 + a_1*x + a_2*x**2 + a_3*x**3 + a_4*x**4 + a_5*x**5 + a_6*x**6 +\
a_7*x**7 + a_8*x**8+ a_9*x**9 + a_10*x**10
### fitting degree 2 polynomial on the white dwarfs region 8, 5, 0
f_test = lambda x: 5*x + 6
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit'] = new_df['x_0'].map(f_test)
new_df = new_df[new_df['y_0']>new_df['limit']]
popt, pcov = curve_fit(func_2, new_df['x_0'], new_df['y_0'])
popt_wd = popt.copy()
### fitting degree 4 polynomial on the brown dwarfs region
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 15*x - 16.5)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 4.5*x - 4)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
new_df = new_df[new_df['x_0']>1.35]
popt, pcov = curve_fit(func_4, new_df['x_0'], new_df['y_0'])
popt_bd = popt.copy()
# fitting the subdwarf branch with 3th degree polynomial
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 5*x + 6)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 6.5*x)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
popt, pcov = curve_fit(func_3, new_df['x_0'], new_df['y_0'])
popt_sd = popt.copy()
# fitting the Main sequence stars to a 10-th order polynomial
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 6.5*x)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 6.5*x - 4)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
new_df = new_df[new_df['x_0']>-0.3]
new_df = new_df[new_df['x_0']<1.8]
popt, pcov = curve_fit(func_10, new_df['x_0'], new_df['y_0'])
popt_ps = popt.copy()
# fitting the subgiant branch with 2th degree polynomial
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: -5*(x-1.3)**2 + 3)
new_df['limit_1'] = new_df['x_0'].map(lambda x: x - 2 )
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
popt, pcov = curve_fit(func_2, new_df['x_0'], new_df['y_0'])
popt_sG = popt.copy()
# fitting the giant branch with 4th degree polynomial
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 0.5*x - 6)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 0.5*x - 3)
new_df = new_df[new_df['y_0']>new_df['limit_0']]
new_df = new_df[new_df['y_0']<new_df['limit_1']]
new_df = new_df[new_df['x_0']>-0.05]
new_df = new_df[new_df['x_0']<3.5]
popt, pcov = curve_fit(func_4, new_df['x_0'], new_df['y_0'])
popt_G = popt.copy()
# fitting a 2-th polynomial to the Bright Giant Ib branch
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 0.5*x - 7)
new_df['limit_1'] = new_df['x_0'].map(lambda x: 0.5*x - 10)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
new_df = new_df[new_df['y_0']>new_df['limit_1']]
popt, pcov = curve_fit(func_4, new_df['x_0'], new_df['y_0'])
popt_SGb = popt.copy()
# fitting a 2-th polynomial to the Bright Giant Ia branch
new_df=pd.DataFrame({'x_0': sc_x, 'y_0':sc_y})
new_df['limit_0'] = new_df['x_0'].map(lambda x: 0.5*x - 10)
new_df = new_df[new_df['y_0']<new_df['limit_0']]
popt, pcov = curve_fit(func_4, new_df['x_0'], new_df['y_0'])
popt_SGa = popt.copy()
return popt_wd, popt_bd, popt_sd, popt_ps, popt_sG, popt_G, popt_SGb, popt_SGa
#--------------------------------------------------------------------------
Now, add the features that we estimate through physical arguments and the star type approximated by the distance the obct is from the characteristic regions in the HR diagram
#--------------------------------------------------------------------------
### Spectral type measurements, beggining of the estimated features
def add_luminosity(df) :
"""Estimate the luminosity using L/Ls = 2.511886432**(Ms-M), L1_sol = 1, M1_sol = 4.83
____________
Args: the dataframe (df)
____________
Returns: the dataframe with the additinal column, the absolute magnitude
"""
df['EstimatedLuminosity'] = 2.511886432**(4.83-np.array(df['AbsMag']))
return df
def add_mass_estimative(df) :
"""Estimate the mass using mass-luminosity estimatives like
class 1 : M < 0.43 M_sun -> L/Ls = 0.23(M/Ms)**2.3
class 2 : 2 M_sun > M > 0.43 M_sun
class 3 : 55 M_sun > M > 2 M_sun
class 4 : M > 55 M_sun
____________
Args: the dataframe (df)
____________
Returns: the dataframe with the additinal column, the absolute magnitude
"""
def mass_class_1(L) :
# L < 0.033
# L/L_sun = 0.23(M/M_sun)**2.3
mass_estimative = (4.347826087 * L)**(0.434782609)
return mass_estimative
def mass_class_2(L) :
# 16 > L > 0.033
# L/L_sun = (M/M_sun)**4
mass_estimative = L**(0.25)
return mass_estimative
def mass_class_3(L) :
# 1 727 418 > L > 16
# L/L_sun = 1.4 (M/M_sun)**3.5
mass_estimative = (0.714285714 * L)**(0.285714286)
return mass_estimative
def mass_class_4(L) :
# L > 1 727 418
# L/L_sun = 32000 (M/M_sun)
mass_estimative = 0.00003125 * L
return mass_estimative
mass_array = []
for L in df['EstimatedLuminosity'] :
if L < 0.033 :
mass_array.append(mass_class_1(L))
elif L < 16 :
mass_array.append(mass_class_2(L))
elif L < 1727418 :
mass_array.append(mass_class_3(L))
else :
mass_array.append(mass_class_4(L))
df['EstimatedMass'] = mass_array
return df
def add_radius_estimative(df) :
"""Estimate the radius of the object. For now, assuming that is a main sequence star
____________
Args: the dataframe (df)
____________
Returns: the dataframe with the additinal column, the estimated radius
"""
def main_sequence(M):
if M < 1 :
R = M**0.57
else :
R = M**0.8
return R
radius_estimates = []
for M in df['EstimatedMass']:
radius_estimates.append(main_sequence(M))
df['EstimatedRadius'] = radius_estimates
return df
def add_effective_temperature_estimative(df) :
"""Estimate the effective temperatue using the stefan boltzman law: L = 4 pi R**2 cB T**4
____________
Args: the dataframe (df)
____________
Returns: the dataframe with the additinal column, the estimated effective temperatue
"""
R_sun = 696340
cB = 0.0567
df['EstimatedTemperature'] = (np.array(df['EstimatedLuminosity']*3.846 * 10**26) / (4 * math.pi * (R_sun * np.array(df['EstimatedRadius']))**2 * cB))**(0.25)
return df
def add_star_type_estimation(df) :
"""Estimate the star type by its position in the HR diagram
____________
Args: the dataframe (df)
____________
Returns: the dataframe with the additinal column, the estimative of the star type
"""
#popt_wd, popt_bd, popt_sd, popt_ps, popt_sG, popt_G, popt_SGb, popt_SGa = figure_test(temp['B-V'], temp['AbsMag'])
popt_wd, popt_bd, popt_sd, popt_ps, popt_sG, popt_G, popt_SGb, popt_SGa =\
find_HR_diagram(df['B-V'], df['AbsMag'])
test = pd.DataFrame({'X' : np.linspace(df['B-V'].min(), df['B-V'].max(), num = 100)})
test['wd_curve']= test['X'].map(lambda x: poly(x, vec=popt_wd))
test['bd_curve']= test['X'].map(lambda x: poly(x, vec=popt_bd))
test['swd_curve']= test['X'].map(lambda x: poly(x, vec=popt_sd))
test['PS_curve']= test['X'].map(lambda x: poly(x, vec=popt_ps))
test['sG_curve']= test['X'].map(lambda x: poly(x, vec=popt_sG))
test['G_curve']= test['X'].map(lambda x: poly(x, vec=popt_G))
test['SGb_curve']= test['X'].map(lambda x: poly(x, vec=popt_SGb))
test['SGa_curve']= test['X'].map(lambda x: poly(x, vec=popt_SGa))
wd_limits = (-0.4, 0.8, popt_wd)
bd_limits = (1.4, 2.5, popt_bd)
swd_limits = (-0.4, 1.4, popt_sd)
PS_limits = (-0.3, 1.8, popt_ps)
sG_limits = (0.4, 2, popt_sG)
G_limits = (0., 3.5, popt_G)
SGb_limits = (-0.4, 3.5, popt_SGb)
SGa_limits = (-0.4, 3.5, popt_SGa)
limits = [wd_limits, bd_limits, swd_limits, PS_limits, sG_limits, G_limits, SGa_limits, SGb_limits]
p_wd, p_bd, p_sd, p_PS, p_sG, p_G, p_SGb, p_SGa, t = [], [], [], [], [], [], [], [], []
StarType = ['white_dwarf', 'brown_dwarf','sub_dwarf', 'MS', 'sub_giant',\
'giant', 'super_giant_b', 'super_giant_a']
for index in range(df.shape[0]):
temp = weigths(df.loc[index, 'B-V'], df.loc[index,'AbsMag'], limits)
p_wd.append(temp[0])
p_bd.append(temp[1])
p_sd.append(temp[2])
p_PS.append(temp[3])
p_sG.append(temp[4])
p_G.append(temp[5])
p_SGb.append(temp[6])
p_SGa.append(temp[7])
i = np.where(temp==temp.max())[0][0]
t.append(StarType[i])
temp = pd.DataFrame({'p_wd':p_wd, 'p_bd':p_bd, 'p_sd':p_sd, 'p_PS':p_PS, 'p_sG':p_sG,\
'p_G':p_G, 'p_SGb':p_SGb, 'p_SGa':p_SGa, 'StarType':t})
df = pd.concat([df, temp], axis=1)
return df
def add_main_sequence_type(df) :
"""Estimate the main sequence type of a main sequence star based on the estimative of the
effective temperature
____________
Args: the dataframe (df)
____________
Returns: the dataframe with the StarType column changed to have main sequence star types
"""
for index in range(df.shape[0]):
if df.loc[index, 'StarType'] == 'MS' :
if df.loc[index, 'EstimatedTemperature'] >= 25000:
df.loc[index, 'StarType'] = 'MS_O_type'
elif df.loc[index, 'EstimatedTemperature'] >= 11000:
df.loc[index, 'StarType'] = 'MS_B_type'
elif df.loc[index, 'EstimatedTemperature'] >= 7500:
df.loc[index, 'StarType'] = 'MS_A_type'
elif df.loc[index, 'EstimatedTemperature'] >= 6000:
df.loc[index, 'StarType'] = 'MS_F_type'
elif df.loc[index, 'EstimatedTemperature'] >= 5000:
df.loc[index, 'StarType'] = 'MS_G_type'
elif df.loc[index, 'EstimatedTemperature'] >= 3500:
df.loc[index, 'StarType'] = 'MS_K_type'
else:
df.loc[index, 'StarType'] = 'MS_M_type'
return df
#-------------------------------------------------------------------------
### Add properties
def add_properties(df) :
"""bundle up the properties wich can be directely derived from the data,
such as the absolute magnitude and the distances, or estimatives made with
physical arguments, sucha as scaling relations
____________
Args: the dataframe (df)
____________
Returns: a new dataframe with the new features appended
"""
df = add_distance(df)
print('Distances added')
df = add_cartesian_coordinates(df)
print('3-dimensional cartesian coordinates added')
df = add_abs_magnitude(df)
print('Absolute magnitude added')
aux = df.shape[0]
df = df[abs(df['B-V']) > 0.000001]
print("More data was deleted due poin float errors. An amount of {var}% was deleted".format(var=round((1-df.shape[0]/aux)*100,4)))
df = df.reset_index()
df = add_luminosity(df)
print('Luminosity estimative added')
df = add_mass_estimative(df)
print('Mass estimative added')
df = add_star_type_estimation(df)
print('Gross star classification estimative added')
df = add_radius_estimative(df)
print('Radius estimative added')
df = add_effective_temperature_estimative(df)
print('Effective temperatue estimative added')
df = add_main_sequence_type(df)
print('Main sequence stars classification estimative added')
return df
#-------------------------------------------------------------------------
Plot the 3D-map and some plotting functions to make a preliminary analysis of the data
### 3d neighborhood map according to the data
def plot_map(df, lower_distance, upper_distance) :
"""Plot a 3D-map (which is navigable using the right-left mouse buttons) with
color defined by star type and temperature, size define by the star radius and the
map centered in the sun
____________
Args: the dataframe (df) and two distance limits, will be plotted all stars
distant beteween the estipulated limits
____________
Returns: None
"""
df.sort_values(by='Distance', ascending=True, inplace=True, ignore_index=True)
temp = df[df['Distance'] < upper_distance]
temp = temp[temp['Distance'] > lower_distance]
temp.reset_index(drop=True, inplace=True)
CMap = []
Marker = []
all_bool = []
for index in range(temp.shape[0]):
if temp.loc[index, 'StarType'] == 'white_dwarf':
CMap.append('lightgray')
Marker.append('v')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'brown_dwarf':
CMap.append('darkorange')
Marker.append('s')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'sub_dwarf':
CMap.append('cadetblue')
Marker.append('p')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'MS_O_type':
CMap.append('blue')
Marker.append('o')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'MS_B_type':
CMap.append('aqua')
Marker.append('o')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'MS_A_type':
CMap.append('lightseagreen')
Marker.append('o')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'MS_F_type':
CMap.append('yellow')
Marker.append('o')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'MS_G_type':
CMap.append('orange')
Marker.append('o')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'MS_K_type':
CMap.append('orangered')
Marker.append('o')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'MS_M_type':
CMap.append('red')
Marker.append('o')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'sub_giant':
CMap.append('coral')
Marker.append('D')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'giant':
CMap.append('crimson')
Marker.append('X')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'super_giant_b':
CMap.append('thistle')
Marker.append('P')
all_bool.append(True)
elif temp.loc[index, 'StarType'] == 'super_giant_a':
CMap.append('lavenderblush')
Marker.append('*')
all_bool.append(True)
else:
all_bool.append(False)
temp = temp[all_bool]
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter(0.0, 0.0, 0.0, s=1, marker='*', c='white')
ax.scatter(temp['x_pos'], temp['y_pos'], temp['z_pos'], s=temp['EstimatedRadius'], \
alpha = 1, c=CMap, marker = 'o')
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])
plt.axis('off')
plt.grid(b=None)
Color_BC = plt.gca()
Color_BC.set_facecolor("black")
plt.show()
def plot_bexenplots_degree_distance(df, feat_to_plot, inf_distance, sup_distance):
"""Plot boxenplots of a object feature in a distance interval
____________
Args: the dataframe (df), the feature (to plot) name and two distance limits, will be plotted all stars
distant between the estipulated limits
____________
Returns: None
"""
df.sort_values(by='Distance', ascending=True, inplace=True, ignore_index=True)
temp = df[df['Distance'] <= sup_distance]
temp = temp[temp['Distance'] >= inf_distance]
fig, ax = plt.subplots(3,1, figsize=(16, 14))
plt.subplots_adjust(wspace=.3, hspace=.3)
g_0 = sns.boxenplot(data=temp, y=feat_to_plot, x = 'RArad', ax=ax[0], color = 'yellow')
g_1 = sns.boxenplot(data=temp, y=feat_to_plot, x = 'DErad', ax=ax[1], color = 'orange')
g_2 = sns.boxenplot(data=temp, y=feat_to_plot, x = 'Distance', ax=ax[2], color = 'violet')
ax[0].set_xticks([-math.pi, -math.pi/2 ,0, math.pi/2, math.pi, 3*math.pi/2], ['180', '270','0', '90', '180', '270'])
ax[1].set_xticks([-math.pi, -math.pi/2 ,0, math.pi/2, math.pi], ['180', '270','0', '90', '180'])
ax[0].set_xlabel('Degrees')
ax[1].set_xlabel('Degrees')
ax[2].set_xlabel('Distance (ly)')
plt.show()
return None
def plot_lineplots_degree_distance(df, feat_to_plot, inf_distance, sup_distance):
"""Plot lineplots of a numerical feature in a distance interval
____________
Args: the dataframe (df), the feature (to plot) name and two distance limits, will be plotted all stars
distant between the estipulated limits
____________
Returns: None
"""
df.sort_values(by='Distance', ascending=True, inplace=True, ignore_index=True)
temp = df[df['Distance'] <= sup_distance]
temp = temp[temp['Distance'] >= inf_distance]
fig, ax = plt.subplots(3,1, figsize=(16, 14))
plt.subplots_adjust(wspace=.3, hspace=.3)
g_0 = sns.lineplot(data=temp, y=feat_to_plot, x = 'RArad', ax=ax[0], color = 'yellow')
g_1 = sns.lineplot(data=temp, y=feat_to_plot, x = 'DErad', ax=ax[1], color = 'orange')
g_2 = sns.lineplot(data=temp, y=feat_to_plot, x = 'Distance', ax=ax[2], color = 'violet')
ax[0].set_xticks([-math.pi, -math.pi/2 ,0, math.pi/2, math.pi, 3*math.pi/2], ['180', '270','0', '90', '180', '270'])
ax[1].set_xticks([-math.pi, -math.pi/2 ,0, math.pi/2, math.pi], ['180', '270','0', '90', '180'])
ax[0].set_xlabel('Degrees')
ax[1].set_xlabel('Degrees')
ax[2].set_xlabel('Distance (ly)')
plt.show()
return None
#-------------------------------------------------------------------------