Film Bang Companies Analysis

Analysis and charts of data on the companies listed in Film Bang from 1976 - 2020

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
plt.style.use('seaborn')
plt.style.use('seaborn-colorblind')
plt.rcParams['figure.figsize'] = [12, 6]
In [2]:
x = str
df = pd.read_excel('../Core_Data/020321_Master_List_Film_Bang_Production Companies.xlsx')
In [3]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 300)
In [4]:
colours = sns.color_palette('colorblind', as_cmap=True)
In [5]:
df.rename(columns={2021: '2021', 2020: '2020',2019: '2019', 2018: '2018',2017: '2017', 2016: '2016', 2015: '2015', 2014: '2014', 2013: '2013', 2012: '2012', 2011: '2011', 2010: '2010', 2009: '2009', 2008: '2008', 2007: '2007', 2006: '2006', 2005: '2005', 2004: '2004', 2003: '2003', 2002: '2002', 2001: '2001', 2000: '2000'}, inplace=True)
In [6]:
df.rename(columns={1999: '1999', 1998: '1998', 1997: '1997', 1996: '1996', 1995: '1995', 1994: '1994', 1993: '1993', 1992: '1992', 1991: '1991', 1990: '1990'}, inplace=True)
In [7]:
df.rename(columns={1989:'1989', 1988:'1988', 1987:'1987', 1986:'1986', 1984: '1984', 1982: '1982', 1981:'1981', 1979: '1979', 1978: '1978', 1976: '1976'}, inplace=True)
In [8]:
df.shape
Out[8]:
(431, 53)
In [9]:
# Example
df.loc[1, ['Company Name','Website','City','Key personnel']]
Out[9]:
Company Name           15 Media
Website                     NaN
City                   Aberdeen
Key personnel    Norman Thomson
Name: 1, dtype: object
In [10]:
# Number of companies
df['Company Name'].count()
Out[10]:
431
In [11]:
# Check for duplicates
df['Company Name'].value_counts().head(5)
Out[11]:
1A Productions                                             1
Skyline Film & Television Productions                      1
Mediasport                                                 1
Gauvain Productions Ltd (from 2002 Red Eye Productions)    1
Pixall                                                     1
Name: Company Name, dtype: int64
In [12]:
df['Larger/Group'].value_counts()
Out[12]:
Y      36
Y/G    11
Name: Larger/Group, dtype: int64

Data Preparation

Cities

In [13]:
# count instanges of & to count companies with more than one location
df['City'].str.contains('&').sum()
#NOTE number is actually 3 since one instance of '&' is part of a 'prev' qualification
Out[13]:
4
In [14]:
# count instanges of / to count companies with more than one location
df['City'].str.contains('/').sum()
Out[14]:
2
In [15]:
# count instances of 'prev' to count companies that moved
df['City'].str.contains('prev').sum()
Out[15]:
25
In [16]:
# find 'mispelled entries and replace with correct string
df['City'] = df['City'].str.replace('Edinbburgh', 'Edinburgh')
In [17]:
# City :'last known' city. Split column by 'prev' and use only column 1 of the set
cities = df['City'].str.split('prev|&', expand=True,)
cities.fillna(value=np.nan, inplace=True)
df = df.join(cities)
# rename columns
df.rename(columns={0: 'City1', 1: 'City2', 2: 'City3'}, inplace=True)
In [18]:
# Replace NaN values with string to allow filtering
df['City'].replace(np.nan, 'Missing', inplace=True)
In [19]:
# remove whitespace from city columns
df['City1'] = df['City1'].str.strip()
df['City2'] = df['City2'].str.strip()
df['City3'] = df['City3'].str.strip()

Regions

In [20]:
region_numbers = df['Region'].value_counts(dropna=False).values.tolist()
region_keys = df["Region"].value_counts(dropna=False).index.tolist()
In [21]:
# Export Region Data for Datawrapper
df_regions = pd.DataFrame(columns=('Region','Number'))
df_regions['Region'] = region_keys
df_regions['Number'] = region_numbers
df_regions.to_csv('Outputs/regions.csv', index=False)

Postcodes

In [22]:
postcodes = df['Post code'].str.split('prev|Prev|(2001)|;', expand=True)
postcodes.fillna(value=np.nan, inplace=True)
df = df.join(postcodes)
df.rename(columns={0: 'P_Code1'}, inplace=True)
In [23]:
df['P_Code1'] = df['P_Code1'].str.replace(r'\;|\(','')
In [24]:
df['P_Code1'].replace(r"^ +| +$", r"", regex=True, inplace=True)
In [25]:
# Import second dataset in order to add a column with area code
df2 = pd.read_csv('../Core_Data/ukpostcodes.csv')
In [26]:
df = df.join(df2.set_index('postcode'), on='P_Code1')
In [27]:
locations = df['City1'].value_counts().index.tolist()
location_count = df['City1'].value_counts().values.tolist()
In [28]:
country = ['Scotland' for x in range(len(locations))]
In [29]:
df_locations = pd.DataFrame(columns=('Country','Location', 'Count'))
df_locations['Country'] = country
df_locations['Location'] = locations
df_locations['Count'] = location_count
df_locations.to_csv('Outputs/locations.csv', index=False)
df_locations
Out[29]:
Country Location Count
0 Scotland Glasgow 200
1 Scotland Edinburgh 118
2 Scotland Aberdeen 12
3 Scotland Inverness 5
4 Scotland Stornoway 3
5 Scotland Ayr 3
6 Scotland Dundee 3
7 Scotland Inverkeithing 2
8 Scotland North Berwick 2
9 Scotland Penicuik 2
10 Scotland Blairgowrie 2
11 Scotland Skye 2
12 Scotland Stirling 2
13 Scotland Motherwell 2
14 Scotland East Kilbride 2
15 Scotland Berwickshire 2
16 Scotland Aberdeenshire 1
17 Scotland Killearn 1
18 Scotland Tillicoutry 1
19 Scotland Borders 1
20 Scotland Angus 1
21 Scotland Markinch 1
22 Scotland South Queensferry 1
23 Scotland Glasgow (and London in 2001) 1
24 Scotland Ardgour 1
25 Scotland Tillicoultry 1
26 Scotland Ellon 1
27 Scotland Lanark 1
28 Scotland Lewis 1
29 Scotland Beith 1
30 Scotland Ninemileburn 1
31 Scotland Gourock 1
32 Scotland Belfast 1
33 Scotland Carlisle 1
34 Scotland Glasgow and London 1
35 Scotland Ayrshire 1
36 Scotland Achnasheen 1
37 Scotland Kilmacolm 1
38 Scotland Rumbling Bridge 1
39 Scotland Dunning 1
40 Scotland Livingston 1
41 Scotland Brechin 1
42 Scotland Dunblane 1
43 Scotland Balquhidder 1
44 Scotland Melrose 1
45 Scotland Bearsden 1
46 Scotland Glenrothes 1
47 Scotland Clydebank 1
48 Scotland Fort William 1
49 Scotland St Abbs 1
50 Scotland Alva 1
51 Scotland Nairn 1
52 Scotland Maybole 1
53 Scotland Kirknewton 1
54 Scotland Orkney 1
55 Scotland Renfrewshire 1
56 Scotland Strathaven 1
57 Scotland Hawick 1
58 Scotland Lochwinnoch 1
59 Scotland Edin/London 1
60 Scotland Montrose 1
61 Scotland Falkirk 1
62 Scotland Dumbarton 1
63 Scotland Newburgh 1
64 Scotland Perth 1
65 Scotland Argyll 1
66 Scotland HI 1
67 Scotland Lesmahagow 1
68 Scotland Kilmarnock 1
69 Scotland Lasswade 1
70 Scotland Glengarnock 1
71 Scotland Fife 1
72 Scotland Callander 1
73 Scotland Shandon 1
74 Scotland Barra 1
75 Scotland Cromarty 1
76 Scotland Coupar Angus 1

Add latitude & Longitude to Data

for use with other chart tools

In [30]:
lat = df['latitude']
lon = df['longitude']
df_lat_long = pd.DataFrame(columns=('latitude', 'longitude', 'count'))
df_lat_long['latitude'] = lat
df_lat_long['longitude'] = lon
df_lat_long['count'] = 1
In [31]:
df_lat_long.to_csv('Outputs/df_lat_long.csv', index=False)
df_lat_long.head(10)
Out[31]:
latitude longitude count
0 NaN NaN 1
1 NaN NaN 1
2 NaN NaN 1
3 NaN NaN 1
4 55.606248 -2.908081 1
5 NaN NaN 1
6 NaN NaN 1
7 NaN NaN 1
8 NaN NaN 1
9 57.145434 -5.799500 1

Clusters

In [32]:
df['Post code'].value_counts().head(5)
Out[32]:
EH6 6AW    4
G1 1LH     4
G3 8ND     4
G51 2LY    4
EH1 3JT    4
Name: Post code, dtype: int64
In [33]:
# Showing duplicates of key personnel. 
# Cases where the same people set up more than one production company over the years
#df['Key personnel'].value_counts().head(10)
In [34]:
# Number of cases where multiple companies fall under the same name
len(df['Key personnel'].value_counts().head(8).values.tolist())
Out[34]:
8
In [35]:
# Checking duplicate values in Website column
df['Website'].value_counts(dropna=True).head(10)
Out[35]:
Prod Co                          19
Video Prod                        3
www.g-mac.co.uk                   2
Prod Co & Video                   2
Prod                              2
www.blackbear.tv                  1
www.mnetelevision.com             1
Prod Co Video                     1
www.objectivemediagroup.co.uk     1
www.enterprisescreen.com          1
Name: Website, dtype: int64
In [36]:
# Duplicate addresses / clusters
df['Address'].value_counts(dropna=True).head(10)
Out[36]:
1103 Argyle Street                4
Film City                         3
100b Constitution Street          3
Film City, 401 Govan Road         3
69 Fergus Drive, Glasgow          2
420 Sauchiehall Street            2
227 Sauchiehall Street            2
34 Albion Street                  2
6 Queen Margaret Road, Glasgow    2
4 Picardy Place                   2
Name: Address, dtype: int64

Queries

No of Years in Film Bang

In [37]:
def year_count(range_start, range_end):
    for year in df.loc[:, range_start:range_end]:
        df['Yr Cnt '+year] = df.loc[:, year:range_end].count(axis=1)
year_count('2021', '1976')
In [38]:
df_entries = df.loc[:,'2021':'1976']
In [39]:
df['No of Yrs'] = df_entries.count(axis=1)
In [40]:
# Aggregate to 5 year bins
bins = pd.cut(df['No of Yrs'], [0, 5, 10, 15, 20, 25, 30, 35, 40, 45])
df.groupby(bins)['No of Yrs'].agg(['count'])
Out[40]:
count
No of Yrs
(0, 5] 295
(5, 10] 64
(10, 15] 34
(15, 20] 20
(20, 25] 11
(25, 30] 4
(30, 35] 0
(35, 40] 0
(40, 45] 0
In [41]:
# Aggregate around bins used in Companies Survey
bins = pd.cut(df['No of Yrs'], [0, 2, 5, 10, 20, 45])
bins_values = df.groupby(bins)['No of Yrs'].agg(['count'])
In [42]:
bins_values
Out[42]:
count
No of Yrs
(0, 2] 186
(2, 5] 109
(5, 10] 64
(10, 20] 54
(20, 45] 15
In [43]:
bin_indexes = ['1 year','2-5 years', '6-10 years', '11 - 20', '> 20 years']
In [44]:
# Drop entries with no year data (added to db in error)
# Get indexes where No of Yrs == 0
noEntriesIndexes = df[df['No of Yrs'] == 0].index
# Delete these row indexes from dataFrame
df.drop(noEntriesIndexes , inplace=True)

Film Bang Listings - Company Count

In [45]:
# Drop NFB entries in 2021 column
# Get indexes where name column has value 'NFB'
indexNames = df[(df['2021'] == 'NFB')].index
# Delete these row indexes from dataFrame
df.drop(indexNames , inplace=True)
In [46]:
indexNames
Out[46]:
Int64Index([132, 133, 150, 406], dtype='int64')
In [47]:
def company_count(output_array, dataframe, range_stop, range_start):
    for col in dataframe.loc[:, range_stop:range_start]:
        total = dataframe[col].value_counts(dropna=True).values.tolist()
        output_array.append(total[0])
    return(output_array)
In [48]:
# create output array
total_company = []
In [49]:
# call company count function
companytotal = company_count(total_company, df, '2021', '1976')
In [50]:
print(total_company)
[44, 51, 51, 49, 49, 55, 56, 61, 60, 65, 64, 62, 77, 79, 78, 70, 72, 74, 78, 81, 87, 85, 82, 81, 72, 62, 60, 65, 56, 53, 48, 39, 35, 27, 34, 29, 26, 22, 16, 15, 13, 11]
In [51]:
# create indexes
plot_years = df.loc[:, '2021':'1976'].columns.tolist()
In [52]:
df_companies = pd.DataFrame(columns=('Years','Count'))
df_companies['Years'] = plot_years
df_companies['Count'] = total_company
df_companies.to_csv('Outputs/companies_figures.csv', index=False)
#df_companies
In [53]:
x_indexes = list(range(len(plot_years)))
x_indexes.reverse()
fig, ax = plt.subplots()
ax.plot(x_indexes, companytotal, color='#2E0014', label='Companies in Film Bang 1976 - 2021', linewidth=3)
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_xticks(ticks=x_indexes)
ax.set_xticklabels(plot_years)
fig.autofmt_xdate(rotation=90)
ax.legend(fontsize=14)
ax.set_facecolor('white')
plt.tight_layout()
plt.savefig('Outputs/companies_time.png', facecolor='#ffffff')
plt.show()

Recent Companies 2019 - 2021

In [54]:
# Compile a DataFrame of companies operating from 2019 - 2021
# To filter out NFB values, change this to df_2021 = df[df'2021 == 2021]
df_2021 = df[df['2021'].notna()]
df_2020 = df[df['2020'].notna()]
df_2019 = df[df['2019'].notna()]
df_recent = pd.concat([df_2019, df_2020, df_2021])
df_recent = df_recent.drop_duplicates(subset=['Company Name'])
In [55]:
# Filter for those that set up recently (no of years < 4)
filt_yrs = (df['No of Yrs'] < 4)
df_recent = df_recent[filt_yrs]
df_recent['Region'].value_counts()
Out[55]:
Edinburgh         7
Glasgow           4
Aberdeenshire     1
East Lothian      1
Stirling          1
Fife              1
North Ayrshire    1
Name: Region, dtype: int64

Charts

In [56]:
# Simplify Data
simple_values = (bins_values.values)
value_list = [ item for elem in simple_values for item in elem]
# Plot
colors = sns.cubehelix_palette(start=2, rot=1, dark=0.3)
#colors = ['blue', 'magenta', 'orange', 'green', 'lime', 'cyan', 'yellow']
fig, ax = plt.subplots()
ax.pie(value_list,  labels=bin_indexes, labeldistance=1.2, pctdistance=0.85, textprops={'fontsize': 14}, shadow=False,
        startangle=90, autopct='%1.0f%%', colors=colours, wedgeprops={'edgecolor':'black'})
ax.set_title('Companies', fontsize=16, color='#635DC6', weight='bold')
ax.text(.0,.0,' ', fontsize=14, ha='right')
circle=plt.Circle( (0,0), 0.7, color='white')
p=plt.gcf()
p.gca().add_artist(circle)
plt.tight_layout()
plt.savefig('Outputs/companies_longevity_bins_percentages_ring.png', facecolor='#ffffff')
plt.show()
In [57]:
# Data
keys_list = df['No of Yrs'].value_counts().index.tolist()
values = df['No of Yrs'].value_counts().values
median = df['No of Yrs'].median()

# Plot
width=0.8
fig, ax = plt.subplots()
ax.bar(keys_list, values, color=colours[2], width = width, label='Companies')
ax.axvline(median, linewidth=3, color='black', label='Median')
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_title('')
ax.set_facecolor('white')
ax.set_xticks(ticks=keys_list)
ax.legend(fontsize=14)
plt.grid(True)
plt.tight_layout()
plt.savefig('Outputs/company_longevity.png', facecolor='#ffffff')
plt.show()
In [58]:
# Data
filt = (df['No of Yrs'] > 1)
df2 = df[filt]
keys_list = df2['No of Yrs'].value_counts().index.tolist()
values = df2['No of Yrs'].value_counts().values
median = df2['No of Yrs'].median()

# Plot
width=0.8
fig, ax = plt.subplots()
ax.bar(keys_list, values, color=colours[4], width = width, label='Companies')
ax.axvline(median, linewidth=3, color='black', label='Median')
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_title('')
ax.set_facecolor('white')
ax.set_xticks(ticks=keys_list)
ax.legend(fontsize=14)
plt.grid(True)
plt.tight_layout()
plt.savefig('Outputs/company_longevity>1yr.png', facecolor='#ffffff')
plt.show()

Turnover

In [59]:
years = [*range(1,43)]
df_longevity = pd.DataFrame({'Years': years})
for year in df.loc[:,'2021':'1976']:
    filtyear = df[year].notna()
    dfyear = df[filtyear]
    values_year = dfyear['Yr Cnt '+year].value_counts().array
    #print(values_year)
    keys_year = dfyear['Yr Cnt '+year].value_counts().index
    #print(keys_year)
    df_year = pd.DataFrame({f'{year} Keys':keys_year, f'{year} Values':values_year})
    #print(df_year)
    df_longevity = df_longevity.join(df_year.set_index(f'{year} Keys'), on='Years')
longevity_data = df_longevity.set_index('Years')
In [60]:
#longevity_data
In [61]:
import matplotlib.pylab as pylt

plot_years = df.loc[:, '2021':'1976']
hm_colours = sns.cubehelix_palette(start=2, rot=1, dark=0, light=.95, as_cmap=True)

pylt.figure(figsize=(22,10))
ax = sns.heatmap(longevity_data, linewidth=0.3, cmap=hm_colours, annot=True, fmt=".0f")
pylt.yticks(rotation=0)
ax.xaxis.set_ticks_position('top')
ax.set_xticklabels(plot_years,rotation=90)
ax.set_facecolor('white')
pylt.xlabel('')
pylt.ylabel('Length of Career in Years')
plt.title('Companies' , fontsize=16, color='black', weight='bold')
#plt.figtext(.0,.0,'Fig. 1.3', fontsize=14, ha='right')
plt.savefig(f"Outputs/company_longevity_basic_1.png", facecolor='#ffffff', dpi=500)
pylt.show()

Dropouts

In [62]:
df['Yr Cnt 1976 x'] = df['Yr Cnt 1976']
In [63]:
df = df[['1976', '1978', '1979', '1981', '1982',
         '1984', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
         '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002',
         '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
         '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021',
         'Yr Cnt 1976 x', 'Yr Cnt 1976', 'Yr Cnt 1978',
         'Yr Cnt 1979', 'Yr Cnt 1981', 'Yr Cnt 1982', 'Yr Cnt 1984', 'Yr Cnt 1986',
         'Yr Cnt 1987', 'Yr Cnt 1988', 'Yr Cnt 1989', 'Yr Cnt 1990', 'Yr Cnt 1991',
         'Yr Cnt 1992', 'Yr Cnt 1993', 'Yr Cnt 1994', 'Yr Cnt 1995', 'Yr Cnt 1996',
         'Yr Cnt 1997', 'Yr Cnt 1998', 'Yr Cnt 1999', 'Yr Cnt 2000', 'Yr Cnt 2001',
         'Yr Cnt 2002', 'Yr Cnt 2003', 'Yr Cnt 2004', 'Yr Cnt 2005', 'Yr Cnt 2006',
         'Yr Cnt 2007', 'Yr Cnt 2008', 'Yr Cnt 2009', 'Yr Cnt 2010', 'Yr Cnt 2011',
         'Yr Cnt 2012', 'Yr Cnt 2013', 'Yr Cnt 2014', 'Yr Cnt 2015', 'Yr Cnt 2016',
         'Yr Cnt 2017', 'Yr Cnt 2018', 'Yr Cnt 2019', 'Yr Cnt 2020', 'Yr Cnt 2021']]
#df.head(1)
In [64]:
temp = df['Yr Cnt 1976']
temp.value_counts()
Out[64]:
0    413
1     11
Name: Yr Cnt 1976, dtype: int64
In [65]:
# check those values against the duplicate column
if temp.value_counts().values.tolist() == df['Yr Cnt 1976 x'].value_counts().values.tolist(): print("Values Equal")
Values Equal
In [66]:
# create diff dataframe
df_diff = df.loc[:, 'Yr Cnt 1976 x':'Yr Cnt 2021'].diff(axis='columns')
#df_diff
In [67]:
df_diff['Yr Cnt 1976'] = temp
In [68]:
# check operation has worked
if df_diff['Yr Cnt 1976'].value_counts().values.tolist() == df['Yr Cnt 1976'].value_counts().values.tolist(): print("Values Equal")
Values Equal
In [69]:
# Multiply selected cols in dataframe by diff dataframe & show results
df_results = df.loc[:, 'Yr Cnt 1976 x':'Yr Cnt 2021'].multiply(df_diff)
#df_results.head()
In [70]:
# Get list of column names
yr_cnt_range = df_results.loc[:, 'Yr Cnt 1976':'Yr Cnt 2021'].columns.tolist()
In [71]:
# Get list of years
yr_range = df.loc[:, '1976':'2021'].columns.tolist()
In [72]:
# Get int version of years list
int_yr_range = []
for i in yr_range:
    int_yr_range.append(int(i))
#print(int_yr_range)
In [73]:
nIYR = len(int_yr_range)
for index in range(1, nIYR):
    i = int_yr_range[index]
    j = str(i)
    df_results['dropout '+ (j[2:])] = np.where((df_results[yr_cnt_range[index-1]] >
              df_results[yr_cnt_range[index]]), i, np.nan)
In [74]:
dropouts = []
In [75]:
for i in yr_range[1:]:
    try:
        dropouts.append((i, df_results['dropout '+i[2:]].value_counts().values[0]))
    except IndexError:
        dropouts.append((i, 0))
In [76]:
df_results['dropout 11'].value_counts().values.tolist()
Out[76]:
[12]
In [77]:
dropout_list = []
for i in dropouts:
    dropout_list.append(i[1])
In [78]:
# at a 0 value for 1976 in dropout_list to account for no dropouts in the first year
insert = 0 # index position
dropout_list[insert:insert] = [0]
In [79]:
# Optional put dropouts in new dataframe
df2 = pd.DataFrame({'values':dropout_list})
In [80]:
# Median number of dropouts per year
dropout_median = df2['values'].median()
In [81]:
df['1976'].value_counts().values[0]
Out[81]:
11
In [82]:
# Get totals for each year
general_count = []
for column in df.loc[:, '1976':'2021']:
    try:
        step = df[column].value_counts().values
        #print(step[0])
        general_count.append(step[0])
    except IndexError:
        general_count.append(0)
print(general_count)
[11, 13, 15, 16, 22, 26, 29, 34, 27, 35, 39, 48, 53, 56, 65, 60, 62, 72, 81, 82, 85, 87, 81, 78, 74, 72, 70, 78, 79, 77, 62, 64, 65, 60, 61, 56, 55, 49, 49, 51, 51, 44]
In [83]:
new_entries = []

for column in df_results.loc[:, 'Yr Cnt 1976':'Yr Cnt 2021']:
    current = df_results[column].isin([1]).sum(axis=0)
    new_entries.append(current)
In [84]:
df3 = pd.DataFrame({'values':new_entries})
new_entry_median = df3['values'].median()
print(new_entry_median)
10.0
In [85]:
df2 = pd.DataFrame({'values':dropout_list})
#print(df2)
In [86]:
df_dropouts = df_results.loc[:, 'dropout 78':'dropout 21']
In [87]:
df['multi dropouts'] = df_dropouts.notna().sum(axis=1)
In [88]:
dropout_tally_list = df['multi dropouts'].value_counts().tolist()
In [89]:
dropout_tally_list
Out[89]:
[341, 42, 35, 5, 1]
In [90]:
try:
    dropout_tally_list.pop(0)
except IndexError:
        print('Index Error')
try:
    dropout_tally_list.pop(0)
except IndexError:
        print('Index Error')
drop_sum = sum(dropout_tally_list)
In [91]:
total_entries = 431
d = 100 / total_entries
one_dropout = d * 341
two_dropouts = d * 42
three_dropouts = d * 42
four_dropouts = d * 5
five_dropouts = d * 1

multi_dropout_total = d * 90

print('One:' , one_dropout , ' %')
print('Two:' , two_dropouts , ' %')
print('Three:' , three_dropouts , ' %')
print('Four:' , four_dropouts , ' %')
print('Five:' , five_dropouts , ' %')
print('Multiple Dropouts:' , multi_dropout_total , ' %')
One: 79.11832946635731  %
Two: 9.744779582366588  %
Three: 9.744779582366588  %
Four: 1.160092807424594  %
Five: 0.23201856148491878  %
Multiple Dropouts: 20.88167053364269  %
In [92]:
plt.rcParams['axes.facecolor']='white'
In [93]:
plot_years = yr_range
In [94]:
fig, ax = plt.subplots()
ax.plot(plot_years, general_count, color="black", label="Total Entries", linewidth=3)
ax.plot(plot_years, dropout_list, color=colours[3], label='Dropouts', linewidth=3)
ax.plot(plot_years, new_entries, color=colours[2], label='New Entries', linewidth=3)

ax.set_xlabel('')
ax.set_ylabel('')
ax.set_title('')
ax.set_xticks(ticks=plot_years)
fig.autofmt_xdate(rotation=90)
ax.set_facecolor('white')

ax.legend(fontsize=14)
plt.grid(True)
plt.tight_layout()

plt.savefig('Outputs/turnover_new_entries_dropouts_totals.png', facecolor='#ffffff')
plt.show()
In [95]:
width = 0.5
fig, axs = plt.subplots(2, 1, sharex=True)
neg_dropout_list = [ -x for x in dropout_list]

axs[0].bar(plot_years, new_entries, width=width, color=colours[2], label='New Entries')
axs[0].set_xticks(plot_years)
axs[1].bar(plot_years, neg_dropout_list, width=width, color=colours[3], label='Dropouts')
axs[1].set_xticks(plot_years)
axs[0].legend(fontsize=14, loc=2, bbox_to_anchor=(0.0,0.9))
axs[1].legend(fontsize=14, loc=2, bbox_to_anchor=(0.0,1.8))
plt.xticks(rotation=90)
plt.tight_layout()
plt.subplots_adjust(hspace=0)
axs[1].set_xlabel('')
axs[0].set_ylabel('')
axs[0].set_title('Film Bang Companies 1976 - 2021 :  New Entries & Dropouts')

fig.set_facecolor('white')
axs[0].set_facecolor('white')
axs[1].set_facecolor('white')

ticks =  axs[1].get_yticks()
axs[1].set_yticklabels([int(abs(tick)) for tick in ticks])
axs[0].grid(False)
axs[1].grid(False)
plt.savefig('Outputs/turnover_new_entries_dropouts_mirror_bars.png', facecolor='#ffffff')

plt.show()
In [96]:
from IPython.display import IFrame
IFrame('https://datawrapper.dwcdn.net/nQITo/1/', width=700, height=900)
Out[96]:
In [ ]: