by paul murphy
Updated Aug 22/22
Get the raw data here: National Oceonic and Atmospheric Administration.
Use:
wget -r -level=0 -E -ignore-length -x -k -p -erobots=off -np -N https://www1.ncdc.noaa.gov/pub/data/igra/data/data-por/
on linux
Unzip data and put the files in subdirectory data
#!/bin/sh
awk -f awk.split data/* | awk -f awk.av | sort -n -k1 >data.txt
where awk.split is:
BEGIN {d=0; ghp=0;t2=0; x=0; }
{
g=substr($0,17,5)+0;
t=substr($0,23,5)+0;
if((g<-8000)||(t< -8000)) { next}
if($1~"#") {
d= substr($0,14,10);
t1=0; x=0; g=0;
}
else {
t=substr($0,23,5)+0;
g=substr($0,17,5)+0;
if((t>t1) && (x==0) && (g > 8000)) {x=1; gph=g; t2=t;
print d," ",gph," ",t2;
next;
}
}
t1=t;
}
and awk.av is:
BEGIN {
#you can omit this Begin stuff, I added it late for early years with lots of blanks
for(i=1905;i<2022;i++)
{
j=1; {
for(k=1;k<31;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=2; {
for(k=1;k<29;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=3; {
for(k=1;k<31;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=4; {
for(k=1;k<30;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=5; {
for(k=1;k<31;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=6; {
for(k=1;k<30;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=7; {
for(k=1;k<31;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=8; {
for(k=1;k<31;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=9; {
for(k=1;k<30;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=10; {
for(k=1;k<31;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=11; {
for(k=1;k<30;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
j=12; {
for(k=1;k<31;k++) {
str=sprintf("%4d%02d%02d", i,j,k);
list[str]=str } }
}
}
#end of begin
{
list[$1$2$3]=$1$2$3;
hlist[$1$2$3]+=$4
clist[$1$2$3]=1+clist[$1$2$3];
}
END { for (i in list){
if(clist[i]!=0) { print list[i],clist[i],int(hlist[i]/clist[i]); }
}
}