Read CSV using pandas
import pandas as pd
dataframe1=pd.read_csv('xxx.csv')
Example
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
wv = pd.read_sas(r'C:\temp\final_all.sas7bdat')
schoolname=wv['School_Name'].value_counts()
schoolname[:76]
type(wv)
wv.shape
wv.columns
type(wv.columns)
wv.index
type(wv.index)
wv.iloc[:5,:4]
wv.iloc[-5,:5]
wv.head(5)
wv.tail(3)
wv.info()
ethnicity=wv['Ethnicity']
type(ethnicity)
np_vals = wv.values
x1=wv['Reading_Score'].values
plt.plot(x1)
plt.show()
#Reading vs Science
wv.plot(x='Reading_Score',y='Science_Score',kind='scatter')
plt.xlabel('Reading')
plt.ylabel('Science')
plt.show
#histogram
wv.plot(y='Science_Score',kind='hist')
plt.xlabel('Science_Score')
plt.show
wv['GrandTotal'].count()
wv['GrandTotal'].mean()
wv['GrandTotal'].std()
wv['GrandTotal'].median()
#entire data frame
wv.mean()
wv.std()
wv.quantile(0.5)
#box plots -- this didn't work
wv.plot(kind='box')
plt.ylabel('[Reading_Score]')
plt.show
wv['SchoolName'].describe()
Reading a text file
encoding="utf-8"
df = pd.read_csv('C:/Users/1/Documents/Python Scripts/test.csv')
df
fruits = ['apple', 'banana', 'orange']
# Get the elements of fruits using a for loop, and print 'I like ___s'
for fruit in fruits:
print ('I like '+ fruit+'s')
Reference:
Charles Severance's Python for Informatics
IN and NOT IN
if "H" in "Hello":
print("Yes")
"Z" not in "HEllo"
Identify the working directory
import os
os.getcwd()
Change the working directory (On my computer, I need double \ ).
os.chdir('c:\\temp')
Read a file and print a line that includes a search word (p. 129 of Python for Informatics)
import os
import re
os.chdir('C:\\temp')
hand=open('macro1.txt')
for line in hand:
line=line.rstrip()
if re.search('Madison',line):
print (line)