# Taking care of jupyter environment
# show graphs in-line, and turn on/off pretty_printing of lists
%matplotlib inline
%pprint
import plotly
import plotly.graph_objs as go
import numpy as np # So we can use random numbers in examples
# Must enable in order to use plotly off-line (vs. in the cloud... hate cloud)
plotly.offline.init_notebook_mode()
N = 1000
random_x = np.random.randn(N)
random_y = np.random.randn(N)
# Create a trace
trace = go.Scatter(
x = random_x,
y = random_y,
mode = 'markers'
)
data = [trace]
# Plot and embed in ipython notebook!
plotly.offline.iplot(data, filename='basic-scatter')
trace0 = go.Scatter(
x=[1, 2, 3, 4],
y=[10, 15, 13, 17]
)
trace1 = go.Scatter(
x=[1, 2, 3, 4],
y=[16, 5, 11, 9]
)
data = go.Data([trace0, trace1])
plotly.offline.iplot(data, filename = 'basic-line')
Let's use NLTK's inaugural speech corpus. We will look at the use of 'America', 'Citizen' over the years.
import nltk
from nltk.corpus import inaugural
inaugural.fileids()
for x in inaugural.fileids()[:10]: # remove [:10] to see all
print(x, len(inaugural.words(x)))
foo = {f:len(inaugural.words(f)) for f in inaugural.fileids()}
for x in sorted(foo, key=foo.get, reverse=True)[:10]: # remove [:10] to see all
print(x, foo[x])
# dictionary. keys: year, value: (president, tokenized_word) tuple
speeches = {}
for x in inaugural.fileids():
year = int(x[:4])
pres = x[5:-4]
#print(year, pres)
speech = inaugural.words(x)
speeches[year] = (pres, speech)
speeches[1989]
# creating (year, word) tuples for all speeches
# as preparation for conditional frequency dictionary
year2word = [(year, word) for year in speeches for word in speeches[year][1]]
year2word[-10:]
# build a condifitional frequency dictionary.
year2word_cfd = nltk.ConditionalFreqDist(year2word)
year2word_cfd[2009]['America'] # number of mention in Obama's speech
years = sorted(year2word_cfd)
years
presidents = [speeches[y][0] for y in years]
presidents
america_count = [year2word_cfd[year]['America']
+ year2word_cfd[year]['American']
+ year2word_cfd[year]['Americans']
for year in years]
america_count[:20]
citizen_count = [year2word_cfd[year]['citizens']
+ year2word_cfd[year]['citizen']
+ year2word_cfd[year]['Citizens']
+ year2word_cfd[year]['Citizen']
for year in years]
citizen_count[:20]
trace0 = go.Scatter(x=years, y=america_count,
name="America",
mode="lines+markers")
# mode can be 'markers', 'lines+markers', 'lines'
trace1 = go.Scatter(x=years, y=citizen_count,
name="Citizen",
mode="lines+markers")
mydata = go.Data([trace0, trace1])
mylayout = go.Layout(
title="Frequency of 'America' vs. 'citizen' in inaugural speeches"
)
fig = go.Figure(data=mydata, layout=mylayout)
plotly.offline.iplot(fig, filename = 'inaugural-america-vs-citizen')
from plotly.tools import FigureFactory as ff
import pandas as pd
file = "https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv"
df = pd.read_csv(file)
table = ff.create_table(df)
plotly.offline.iplot(table, filename='jupyter/table1')
type(df)
myitems = [('a',[1,2,3,4]), ('b',[2,8,30,40]), ('c',[14,3,7,20])]
df = pd.DataFrame.from_items(myitems) # pandas' data frame
table = ff.create_table(df) # create a table off of data frame!
plotly.offline.iplot(table)
# Create a vector
time = ('Year', years)
pres = ('President', presidents)
amer = ('America', america_count)
citi = ('Citizen', citizen_count)
inaug_items = [time, pres, amer, citi]
df_inaug = pd.DataFrame.from_items(inaug_items)
table_inaug = ff.create_table(df_inaug)
plotly.offline.iplot(table_inaug, filename='jupyter/table_inaug')
trace_a = go.Bar(x=df_inaug.Year,
y=df_inaug.America,
name='America',
marker=dict(color='#A2D5F2'))
trace_b = go.Bar(x=df_inaug.Year,
y=df_inaug.Citizen,
name='Citizen',
marker=dict(color='#FFCDD2'))
data3 = go.Data([trace_a, trace_b])
#data3 = [go.Bar(x=df_inaug.Year, y=df_inaug.America)]
plotly.offline.iplot(data3, filename='jupyter/basic_bar')