Post

Genesis Simulation 7 Fitness Analysis

Genesis Simulation 7 Fitness Analysis

Simulation 7 break down

Changes

Significant fitness function tuning.

Results

We finally got trees!! This was the main goal for Project Genesis. The average fitness was still trending upwards by the 50th epoch.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import pandas as pd
import json
import seaborn as sns
import matplotlib.pyplot as plt
import math
import gzip
import os

def compareAvgMetricsByEpoch(dataframe, firstColumn, secondColumn ):
    
    avgFirstColumnDf = dataframe.groupby('epoch', as_index=False)[firstColumn].mean()
    avgSecondColumnDf = dataframe.groupby('epoch', as_index=False)[secondColumn].mean()

    joinedData = pd.merge(avgFirstColumnDf, avgSecondColumnDf, on='epoch')
    avgEpochOverlay = joinedData.melt(id_vars='epoch', var_name='Metric', value_name='Value')

    return sns.lineplot(data=avgEpochOverlay, x='epoch', y='Value', hue='Metric')

def read_files_to_dataframe(basedirectory, metric):
    dataframes = []
    for directory in os.listdir(basedirectory):
        if( directory != 'overview.json' ):
            filepath = os.path.join(os.path.join(basedirectory, directory), metric + '.txt.gz')
            with gzip.open(filepath, 'rb') as f:
                df = pd.read_json(f,lines=True)
                df['epoch'] = int(directory.rsplit('-', 1)[-1])
                dataframes.append(df)
    return pd.concat(dataframes, ignore_index=True)
    
def readPopulationOverTime(file):
    raw_df = pd.read_json(file)
    overview = pd.json_normalize(raw_df['worlds'])
    overview['epoch'] = overview['name'].str.split('-', expand=True)[2].astype(int)

    trimmed = pd.DataFrame(overview, columns=['epoch', 'totalOrganisms'])
    return trimmed.iloc[::2]
Global Variable:
1
2
3
4
INPUT_FILE_DIR = '/Users/luke/dev/analysis/data/simulation98'

METRIC='Performance'
EXPANDED_FITNESS_MAGNIFICATION = { 'startIndex': 0, 'count': 50 }
Global Computed Variables:
1
2
fullSimulationDataDf = read_files_to_dataframe( INPUT_FILE_DIR, METRIC)
populationOverTimeDf = fullSimulationDataDf.value_counts('epoch').reset_index(name='totalOrganisms')
Analysis:
1
sns.lineplot(x="epoch", y="totalOrganisms", data=populationOverTimeDf ).set_title("Population Over Time")
1
Text(0.5, 1.0, 'Population Over Time')

png

1
2
3
mostFitByEpochDf = fullSimulationDataDf.groupby('epoch')['fitness'].max().reset_index()
plt.figure(figsize=(14, 8))
sns.lineplot(x="epoch", y="fitness", data=mostFitByEpochDf ).set_title("Most Fit Organism Fitness")
1
Text(0.5, 1.0, 'Most Fit Organism Fitness')

png

1
2
avgSizeByEpoch = fullSimulationDataDf.groupby('epoch', as_index=False)['cells'].mean()
sns.lineplot(x="epoch", y="cells", data=avgSizeByEpoch ).set_title("Avg Number of Cells per Organism")
1
Text(0.5, 1.0, 'Avg Number of Cells per Organism')

png

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
fitnessHeritage = (
    fullSimulationDataDf.sort_values(by='fitness', ascending=False)
    .groupby('epoch')
    .head(50)
)

percentages = (
    fitnessHeritage.assign(
        isOrigOrganism =fitnessHeritage['parentId'].eq('GOD')
    )
    .groupby('epoch')['isOrigOrganism']
    .value_counts(normalize=True)
    .unstack(fill_value=0)
    .rename(columns={True: 'Initial Organisms', False: 'Organic Organisms'})
)

percentages.plot(
    kind='area',
    stacked=True,
    figsize=(16, 8),
    colormap='copper',
    title='Most Fit Organisms'
)
plt.ylabel('Percentage')
plt.xlabel('Epoch')
plt.legend(title='Parent Type')
plt.show()

png

1
2
avgFitnessByEpoch = fullSimulationDataDf.groupby('epoch', as_index=False)['fitness'].mean()
sns.lineplot(x="epoch", y="fitness", data=avgFitnessByEpoch ).set_title("Avg Fitness")
1
Text(0.5, 1.0, 'Avg Fitness')

png

1
2
linePlt = compareAvgMetricsByEpoch(fullSimulationDataDf, 'fitness', 'offspring')
nothing = linePlt.set_title('Avg Fitness and Avg Offspring')

png

1
linePlt = sns.lineplot(x="epoch", y="age", data=fullSimulationDataDf ).set_title("Population Range")

png

1
compareAvgMetricsByEpoch(fullSimulationDataDf, 'fitness', 'age').set_yscale('log')

png

1
1
2
3
4
5
6
7
8
9
10
11
12
trend = (
    fullSimulationDataDf.groupby(['epoch', 'causeOfDeath'])
      .size()
      .reset_index(name='count')
      .pivot(index='epoch', columns='causeOfDeath', values='count')
      .fillna(0)
)
sns.heatmap(trend.T, cmap="viridis", annot=False, fmt="g")
plt.xlabel("Epoch")
plt.ylabel("Cause of Death")
plt.title("Counts by Epoch and Cause of Death")
plt.show()

png

Death value key is Unknown (0), Stagnation (1), Exhaustion (2), OldAge (3);

1
2
3
4
5
trend.plot.area()
plt.xlabel("Epoch")
plt.ylabel("Count")
plt.title("Cause of Death Trends (Stacked)")
plt.show()

png

1
This post is licensed under CC BY 4.0 by the author.