diff --git a/Day2_Core_Data_Processing.md b/Day2_Core_Data_Processing.md index 0f9303c..086d64e 100644 --- a/Day2_Core_Data_Processing.md +++ b/Day2_Core_Data_Processing.md @@ -46,3 +46,18 @@ Transform the raw data into structured, insightful information using Python’s reviews_df["sentiment_score"] = reviews_df["review_text"].apply( lambda text: sid.polarity_scores(text)["compound"] ) + + + +### Aggregate sentiment data: + +sentiment_summary = reviews_df.groupby("product_id").agg( + avg_sentiment_score=("sentiment_score", "mean"), + num_reviews=("review_text", "count") +).reset_index() + + +### Merge with sales data: + +final_df = pd.merge(sales_summary, sentiment_summary, on="product_id", how="left") +return json.loads(final_df.to_json(orient="records")) \ No newline at end of file