Imagenette/woof training

This is an example of how to analyze the results logged by a number of runs of fastgpu on Imagenette.

path = Path('~/git/fastai/nbs/examples/exps').expanduser()
path_out = path/'out'
path_scr = path/'complete'
def fn_name(fn): return fn.name.split('.')[0]

hdrs = 'name epoch trn_loss val_loss top1 top5 time'.split()

def scr_results(fn):
    return L([dict(zip(hdrs, [fn_name(fn)] + o.strip().split()))
           for o in fn.readlines() if re.match(r'\d+   ', o)])
all_results = L([scr_results(o) for o in path_out.glob('*.stdout')]).concat()
df_results = DataFrame.from_records(all_results).query('time==time').astype(
    dict(epoch=int, trn_loss=float, val_loss=float, top5=float, top1=float))
def scr_params(fn):
    return merge(dict([o.strip().split() for o in fn.read_text().split('--')[1:]]),
                 {'name':fn_name(fn)})
df_params = DataFrame([scr_params(o) for o in path_scr.ls()]).astype({'epochs':int})
df_params.head()
woof lr size sqrmom mom eps epochs bs opt sa fp16 arch name pool meta runs mixup sh sym beta
0 0 8e-3 192 0.99 0.95 1e-6 5 64 ranger 1 0 xse_resnext18 02a NaN NaN NaN NaN NaN NaN NaN
1 0 8e-3 128 0.99 0.95 1e-6 20 64 ranger 1 1 xse_resnext50 xf104a MaxPool NaN NaN NaN NaN NaN NaN
2 0 8e-3 256 0.99 0.95 1e-6 5 64 ranger 1 0 xse_resnext18 03a NaN NaN NaN NaN NaN NaN NaN
3 1 8e-3 128 0.99 0.95 1e-6 5 64 ranger 1 1 xse_resnext50 x01b NaN NaN NaN NaN NaN NaN NaN
4 0 8e-3 192 0.99 0.95 1e-6 20 64 ranger 1 1 xse_resnext50 x05a NaN NaN NaN NaN NaN NaN NaN
df_merge = pd.merge(df_params, df_results, 'left', on='name')
assert df_merge[pd.isna(df_merge.arch)].empty
df_merge.fillna(0.0, inplace=True)
df_merge['repoch'] = df_merge.epochs - df_merge.epoch
df_merge.to_csv('imagenette.csv', index=False)
FileLink('imagenette.csv')