pip install optimuspyspark
# Put your db credentials here db = op.connect( driver="mysql", host="165.227.196.70", database= "optimus", user= "test", password = "test") # Convert a table a dataframe db.table_to_df("test_data").table()
# This is a custom function def func(value, arg): return "this was a number" new_df = df\ .rows.sort("rank","desc")\ .withColumn('new_age', df.age)\ .cols.lower(["names","function"])\ .cols.date_transform("date arrival", "yyyy/MM/dd", "dd-MM-YYYY")\ .cols.years_between("date arrival", "dd-MM-YYYY", output_cols = "from arrival")\ .cols.remove_accents("names")\ .cols.remove_special_chars("names")\ .rows.drop(df["rank"]>8)\ .cols.rename(str.lower)\ .cols.trim("*")\ .cols.unnest("japanese name", output_cols="other names")\ .cols.unnest("last position seen",separator=",", output_cols="pos")\ .cols.drop(["last position seen", "japanese name","date arrival", "cybertronian", "nulltype"])