pandasで要素、行、列に関数を適用するmap, applymap, apply
In [1]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'a':[11, 21, 31],
'b':[12, 22, 32],
'c':[13, 23, 33],
'd':[14, 24, 34]})
print(df)
In [2]:
In [3]:
df['col'] = ['PCB,MO(4) FH-3050の2次処理面 TO PCB,MO(1) FH-3050-1..',
'PCB,MO(4) FH-3050の2次処理面 TO PCB,MO(1) FH-3050-1..','PCB,MO(4) FH-3050の2次処理面 TO PCB,MO(1) FH-3050-1..']
s = df['col'].map(lambda x: str(x).rpartition(' TO ')[0])
print(s)
#print(s.map(lambda x: str(x).rpartition(' TO ')[0]))
In [4]:
s = 'PCB,MO(4) FH-3050の1次処理面 TO PCB,MO(4) FH-3050の2次処理面'
df['col'] = [s, s, s]
print(s.find('の'))
print(s[:s.find('の')])
#def f_str(s):
# return str(s)[:s.find('PCB,MO(4) FH-3050', s.find('TO'))] + s[s.find('PCB,MO(4) FH-3050', s.find('TO')) + len('PCB,MO(4) FH-3050')+1:]
s = df['col'].map(lambda s: str(s)[:s.find(s[:s.find('の')], s.find('TO'))]
+ s[s.find(s[:s.find('の')], s.find('TO')) + len(s[:s.find('の')])+1:])
print(s)
#s[:s.find('PCB,MO(4) FH-3050', s.find('TO'))] + s[s.find('PCB,MO(4) FH-3050', s.find('TO')) + len('PCB,MO(4) FH-3050')+1:]
In [5]:
s = 'PCB,MO(4) FH-3050の1次処理面 TO PCB,MO(1) FH-3050-1の2次処理面'
df['col'] = [s, s, s]
s = df['col'].map(lambda s: str(s).replace('次処理',''))
print(s)
In [6]:
df = pd.DataFrame({'a':[11, 21, 31],
'b':[12, 22, 32],
'c':[13, 23, 33],
'd':[14, 24, 34]})
f_oddeven = lambda x: 'odd' if x % 2 == 1 else 'even'
print(df.applymap(f_oddeven))
In [7]:
f_maxmin = lambda x: max(x) - min(x)
print(df.apply(f_maxmin))
print(df.apply(f_maxmin, axis=1))
In [8]:
df['b'] = df['b'].map(f_str)
print(df)
df.iloc[2] = df.iloc[2].map(f_str)
print(df)
空DataFrameの作成パターン
In [1]:
import pandas as pd
cols = ['col1', 'col2']
df = pd.DataFrame(index=[], columns=cols)
record = pd.Series(['hoge', 'fuga'], index=df.columns)
for _ in range(5):
df = df.append(record, ignore_index=True)
print(df)
In [2]:
df_blank = df.iloc[0:0]
print(df_blank)
df1 = df_blank.append(df)
print(df1)
In [3]:
import pandas as pd
import numpy as np
# 元になるDataFrame
df = pd.DataFrame(np.random.random([100, 3]), columns=['foo', 'bar', 'baz'])
print(df.head())
print(len(df))
In [4]:
# カラムがないindexだけ設定されているDataFrameを作成
df_new = pd.DataFrame(index=df.index, columns=[])
print(df_new)
In [5]:
# これにdfを整形したカラムを追加してみる
df_new['hoge'] = df.foo ** 2
print(df_new.head())
In [ ]:
pandas.DataFrameに列を追加(assign, contact)
import pandas as pd
import numpy as np
In [2]:
%%html
<style>
table {float:left}
td {text-align:left}
</style>
In [3]:
df = pd.DataFrame({'A': ['A1', 'A2', 'A3'],
'B': ['B1', 'B2', 'B3'],
'C': ['C1', 'C2', 'C3']},
index=['ONE', 'TWO', 'THREE'])
print(df)
In [4]:
df['new column'] = [1, 2, 3]
print(df)
In [5]:
df_new = df.assign(A=0)
print(df_new)
In [6]:
df_new = df.assign(D=0)
print(df_new)
In [7]:
s = pd.Series(['X2', 'X3', 'X4'], index=['TWO', 'THREE', 'FOUR'], name='X')
print(s)
df_new = df.assign(C='XXX',
D=0, E=[0, 1, 2],
F=s, G=s.values,
H=df['A'] + df['B'])
print(df_new)
In [8]:
df = pd.DataFrame({'A': ['A1', 'A2', 'A3'],
'B': ['B1', 'B2', 'B3'],
'C': ['C1', 'C2', 'C3']},
index=['ONE', 'TWO', 'THREE'])
print(df)
In [9]:
s = pd.Series(['X2', 'X3', 'X4'], index=['TWO', 'THREE', 'FOUR'], name='X')
print(s)
df_concat = pd.concat([df, s], axis=1, sort=True)
print(df_concat)
In [10]:
df_concat_in = pd.concat([df, s], axis=1, join='inner')
print(df_concat_in)
In [11]:
s1 = pd.Series(['X1', 'X2', 'X3'], index=df.index, name='X')
s2 = pd.Series(['Y1', 'Y2', 'Y3'], index=df.index, name='Y')
df_concat = pd.concat([df, s1, s2], axis=1)
print(df_concat)
In [12]:
df2 = pd.DataFrame({'df_col1': 0, 'df_col2': range(3)}, index=df.index)
print(df2)
df_concat2 = pd.concat([df, df2], axis=1)
print(df_concat2)
In [ ]:
Python 標準ライブラリー
In [ ]:
import os
print(os.getcwd()) # Return the current working directory
os.chdir('C:/Users/904PP4148/Documents') # Change current working directory
print(os.getcwd())
os.system('mkdir today') # Run the command mkdir in the system shell
dir(os)
help(os)
In [ ]:
import shutil
shutil.copyfile('fileA', 'fileB')
shutil.move('/build/executables', 'installdir')
In [ ]:
import glob
glob.glob('*.py')
In [ ]:
import sys
print(sys.argv)
In [ ]:
sys.stderr.write('Warning, log file not found starting a new one\n')
In [31]:
import re
rst = re.findall(r'\bf[a-z]*', 'which foot or hand fell fastest')
print(rst)
rst = re.sub(r'(\b[a-z]+) \1', r'\1', 'cat in the the hat')
print(rst)
In [32]:
rst = 'tea for too'.replace('too', 'two')
print(rst)
In [33]:
import math
print(math.cos(math.pi / 4))
print(math.log(1024, 2))
In [38]:
In [39]:
import statistics
data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
print(statistics.mean(data))
print(statistics.median(data))
print(statistics.variance(data))
In [43]:
from urllib.request import urlopen
with urlopen('http://tycho.usno.navy.mil/cgi-bin/timer.pl') as response:
for line in response:
line = line.decode('utf-8') # Decoding the binary data to text.
if 'EST' in line or 'EDT' in line: # look for Eastern Time
print(line)
In [1]:
import urllib
proxy = urllib.request.ProxyHandler({'http': '127.0.0.1'})
opener = urllib.request.build_opener(proxy)
urllib.request.install_opener(opener)
In [ ]:
import smtplib
server = smtplib.SMTP('localhost')
server.sendmail('soothsayer@example.org', 'jcaesar@example.org',
"""To: jcaesar@example.org
From: soothsayer@example.org
Beware the Ides of March.
""")
server.quit()
In [49]:
from datetime import date
now = date.today()
print(now)
print(now.strftime("%m-%d-%y. %d %b %Y is a %A on the %d day of %B."))
birthday = date(1964, 7, 31)
age = now - birthday
print(age.days)
In [53]:
import zlib
s = b'witch which has which witches wrist watch'
print(len(s))
t = zlib.compress(s)
print(t)
print(len(t))
print(zlib.decompress(t))
print(zlib.crc32(s))
In [54]:
from timeit import Timer
t1 = Timer('t=a; a=b; b=t', 'a=1; b=2').timeit()
print(t1)
t2 = Timer('a,b = b,a', 'a=1; b=2').timeit()
print(t2)
In [56]:
def average(values):
"""Computes the arithmetic mean of a list of numbers.
>>> print(average([20, 30, 70]))
40.0
"""
return sum(values) / len(values)
import doctest
doctest.testmod() # automatically validate the embedded tests
Out[56]:
In [ ]:
import unittest
class TestStatisticalFunctions(unittest.TestCase):
def test_average(self):
self.assertEqual(average([20, 30, 70]), 40.0)
self.assertEqual(round(average([1, 5, 7]), 1), 4.3)
with self.assertRaises(ZeroDivisionError):
average([])
with self.assertRaises(TypeError):
average(20, 30, 70)
unittest.main() # Calling from the command line invokes all tests
pandasデータのイテレーション(forループ処理)
In [1]:
In [2]:
In [3]:
# DataFrame の列名と 列の値 ( Series ) からなる tuple をイテレーション
for column_name, item in df.iteritems():
print(column_name, type(column_name))
print(item, type(item))
print(item['Alice'], item[0], item.Alice)
print('======\n')
In [4]:
# DataFrame の行名と 行の値 ( Series ) からなる tuple をイテレーション
for index, row in df.iterrows():
print(index, type(index))
print(row, type(row))
print(row['point'], row[2], row.point)
print('======\n')
In [5]:
# DataFrame の**行名と 行の値からなるタプル ** をイテレーション
# デフォルトではPandasという名前のnamedtupleを返す。
for row in df.itertuples():
print(row, type(row))
print(row[3], row.point)
print('======')
In [6]:
# 引数nameをNoneとするとノーマルのタプルを返す。
for row in df.itertuples(name=None):
print(row, type(row))
print(row[3])
print('======\n')
In [7]:
In [8]:
for index, row in df.iterrows():
row.point /= 2
print(df)
In [9]:
for index, row in df.iterrows():
df.at[index, 'point'] /= 2
print(df)