:3

2025-01-01 20:56:09 -08:00 · 2025-01-01 20:56:09 -08:00 · aa647ec057
commit aa647ec057
parent 03043b2e5c
932 changed files with 145602 additions and 111 deletions
--- a/utils/pageupdater/pycache/pages.cpython-312.pyc
+++ b/utils/pageupdater/pycache/pages.cpython-312.pyc
--- a/utils/pageupdater/commit_post_history.py
+++ b/utils/pageupdater/commit_post_history.py
@ -0,0 +1,119 @@
+import json
+import pages
+import datetime
+import os
+import hashlib
+
+# Create a JSON string that stores information about each page over time
+# Schema:
+# {
+#  "last_generated": "2025-01-01 00:00:00",
+#  "post_history": [
+#    {
+#      "2025-01-01": [
+#        {
+#          "/blog/test.md": {
+#             "op": "add", # add, edit, delete
+#             "hash": "SHA256 HERE",
+#             "char_count": 1234,
+#             "word_count": 123,
+#             "title": "Test",
+#             "description": "This is a test"
+#          }
+#        }
+#      ]
+#    }
+#  ]
+# }
+# pages_info is to be sourced from pages, and state is the JSON string of the previous state
+# This will check for added, removed, and edited pages compared to the state and append the changes
+def generate_post_history(pages_info, state):
+    # Load the state JSON string into a dictionary
+    state_dict = json.loads(state)
+
+    # Get the current date and time
+    current_date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    # Initialize the post history list
+    post_history = []
+
+    # Build a dict of files and their hashes from the current state
+    # Build from least recent to most recent, including deletes
+    current_files = dict()
+    for post in state_dict["post_history"]:
+        for date, posts in post.items():
+            for post in posts:
+                for path, page in post.items():
+                    if "op" in page:
+                        if page["op"] == "delete":
+                            if path in current_files:
+                                del current_files[path]
+                        else:
+                            current_files[path] = page
+                    else:
+                        current_files[path] = page
+
+    print(current_files)
+
+    # Check for deleted files
+    for path, page in current_files.items():
+        if path not in pages_info:
+            post_history.append({path: 
+                {
+                    "op": "delete", "hash": page["hash"], "title": page["title"]
+                }})
+
+    # Use the map of hashes to compare with the current pages_info 
+    # to identify adds and edits
+    for path, page in pages_info.items():
+        if path in current_files:
+            if current_files[path]["hash"] != page["hash"]:
+                post_history.append({path: 
+                    {
+                        "op": "edit", "hash": page["hash"], 
+                        "char_count": page["char_count"], 
+                        "word_count": page["word_count"],
+                        "title": page["metadata"]["title"],
+                        "description": page["metadata"]["description"],
+                        "tags": page["metadata"]["tags"]
+                    }})
+        else:
+            post_history.append({path: 
+                {
+                    "op": "add", "hash": page["hash"], 
+                    "char_count": page["char_count"], 
+                    "word_count": page["word_count"],
+                    "title": page["metadata"]["title"],
+                    "description": page["metadata"]["description"],
+                    "tags": page["metadata"]["tags"]
+                }})
+
+    # Append the post history list to the state dictionary
+    state_dict["post_history"].append({current_date: post_history})
+
+    # Update the last_generated field
+    state_dict["last_generated"] = current_date
+
+    # Convert the dictionary to a JSON string
+    post_history_json = json.dumps(state_dict, indent=2)
+
+    # Return the JSON string
+    return post_history_json
+    
+# Get the pages info from the public/blog directory
+pages_info = pages.get_pages_info("", "public/blog")
+
+# Load the previous state from the assets/post_history.json file
+try:
+    with open("assets/post_history.json", "r") as f:
+        state = f.read()
+except FileNotFoundError:
+    state = "{\"post_history\": []}"
+
+
+# Generate the post history JSON string
+post_history = generate_post_history(pages_info, state)
+
+# Output to assets/post_history.json (overwriting)
+with open("assets/post_history.json", "w") as f:
+    f.write(post_history)
--- a/utils/pageupdater/page_list_gen.py
+++ b/utils/pageupdater/page_list_gen.py
@ -0,0 +1,59 @@
+import json
+import pages
+import datetime
+
+# Output JSON string in this schema:
+# {
+#  "posts": [
+#    {
+#      "metadata": {
+#    "description": "A guide to using RomFS on the 3DS. (Old)",    "date": "2025-01-01",    "tags": ["3ds", "programming", "c", "devkitpro", "old"],    "previous": "old3ds_helloworld.md",    "next": "old3ds_touchscreen.md"
+#      },
+#      "id": "old3ds_romfs",
+#      "url": "/blog/old3ds_romfs.md"
+#    },
+#    {
+#      "metadata": {
+#    "description": "A curated list of awesome stuff I like",    "date": "2024-11-26",    "tags": ["awesome", "curated"]
+#      },
+#      "id": "awesome",
+#      "url": "/blog/awesome.md"
+#    },
+def generate_page_list(pages_info):
+    # Initialize the list of pages
+    page_list = []
+    # Iterate over the pages_info dictionary
+    for path, page in pages_info.items():
+        # Create a dictionary with the metadata and path of the page
+        page_dict = {
+            "metadata": page["metadata"],
+            'id': page["local_path"],
+            'url': page["local_path"],
+            "hash": page["hash"],
+        }
+
+        # Append the page dictionary to the page list
+        page_list.append(page_dict)
+
+    # Change any dates in metadata.date to a "YYYY-MM-DD" format string
+    for page in page_list:
+        if "date" in page["metadata"]:
+            page["metadata"]["date"] = page["metadata"]["date"].strftime("%Y-%m-%d")
+
+    # Create a dictionary with the page list
+    page_list_dict = {
+        "last_generated": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        "posts": page_list
+        }
+    # Convert the dictionary to a JSON string
+    page_list_json = json.dumps(page_list_dict, indent=2)
+    # Return the JSON string
+    return page_list_json
+
+# Print the page list
+post_list = generate_page_list(pages.get_pages_info("", "public/blog"));
+print(post_list)
+
+# Output to assets/blog_list.json (overwriting)
+with open("assets/blog_list.json", "w") as f:
+    f.write(post_list)
--- a/utils/pageupdater/pages.py
+++ b/utils/pageupdater/pages.py
@ -0,0 +1,89 @@
+import os
+import hashlib
+from typing import Dict
+# Front Matter reader
+import frontmatter
+
+def get_metadata(file_path):
+    file = frontmatter.load(file_path)
+    return file.metadata
+
+def get_sha256_hash(file_path):
+    # Open the file in binary mode
+    with open(file_path, "rb") as f:
+        # Read the contents of the file
+        content = f.read()
+        # Create a sha256 hash object
+        sha256_hash = hashlib.sha256()
+        # Update the hash object with the file content
+        sha256_hash.update(content)
+        # Get the hexadecimal representation of the hash
+        hex_dig = sha256_hash.hexdigest()
+    return hex_dig
+
+def get_char_count(file_path):
+    # Open the file in read mode
+    with open(file_path, "r") as f:
+        # Read the contents of the file
+        content = f.read()
+        # Get the character count of the content
+        char_count = len(content)
+    return char_count
+
+def get_word_count(file_path):
+    # Open the file in read mode
+    with open(file_path, "r") as f:
+        # Read the contents of the file
+        content = f.read()
+        # Get the word count of the content
+        word_count = len(content.split())
+    return word_count
+
+# Dict of pages containing metadata (Front Matter), sha256 hash of the page content,
+# page character count, page word count, and path
+def get_pages_info(search_directory, root_directory):
+    # Scan the search directory for .md files
+    # When seeing a directory, recursively call this function
+
+    # Initialize the dictionary
+    page_info = {}
+
+    current_directory = root_directory + search_directory
+
+    # Get the list of files and directories in the search directory
+    files = os.listdir(root_directory + search_directory)
+
+    # Iterate over the files and directories
+    for file in files:
+        # Get the full path of the file
+        full_path = os.path.join(current_directory, file)
+        local_path = full_path.replace(root_directory, "")
+
+        # If the file is a directory, recursively call this function
+        if os.path.isdir(full_path):
+            page_info.update(get_page_infos(search_directory + "/" + file, root_directory))
+        # If the file is a markdown file, get the metadata
+        elif file.endswith(".md"):
+            # Get the metadata
+            metadata = get_metadata(full_path)
+
+            # Get the sha256 hash of the content
+            sha256_hash = get_sha256_hash(full_path)
+
+            # Get the character count of the content
+            char_count = get_char_count(full_path)
+
+            # Get the word count of the content
+            word_count = get_word_count(full_path)
+
+            # Add the metadata, sha256 hash, character count, word count, and path to the dictionary
+            page_info[full_path] = {
+                "local_path": local_path,
+                "metadata": metadata,
+                "hash": sha256_hash,
+                "char_count": char_count,
+                "word_count": word_count,
+                "path": full_path,
+            }
+    
+    return page_info
--- a/utils/pageupdater/rss_xml_gen.py
+++ b/utils/pageupdater/rss_xml_gen.py
@ -0,0 +1,94 @@
+import json
+import datetime
+import pages
+import os
+import hashlib
+import xml.etree.ElementTree as ET
+from xml.dom import minidom
+
+# Function for sorted() to sort the XML <item>s by pubDate
+def sort_func(x):
+    if x.find("pubDate") is not None:
+        x.find("pubDate").text
+        return x.find("pubDate").text
+    else:
+        return ""
+
+    
+
+
+# Take a JSON file and generate an RSS XML string
+# Schema:
+def history_to_rss(history):
+    # Load the history JSON string into a dictionary
+    history_dict = json.loads(history)
+
+    # First, iterate over the posts in order of dates, recent to oldest
+    # Then, each iteration, add the RSS item to the root
+    # Finally, return the XML string
+    root = ET.Element("rss")
+    root.set("version", "2.0")
+    channel = ET.SubElement(root, "channel")
+    title = ET.SubElement(channel, "title")
+    title.text = "Post History"
+    link = ET.SubElement(channel, "link")
+    link.text = "https://thefelidae.github.io"
+    description = ET.SubElement(channel, "description")
+    description.text = "Post history of the site"
+
+    for post in history_dict["post_history"]:
+        for date, posts in post.items():
+            for post in posts:
+                for path, page in post.items():
+                    item = ET.SubElement(channel, "item")
+                    title = ET.SubElement(item, "title")
+                    link = ET.SubElement(item, "link")
+                    link.text = "https://thefelidae.github.io/blog/?post=" + path
+                    description = ET.SubElement(item, "description")
+                    print(page)
+                    if "op" in page:
+                        if page["op"] == "delete":
+                            title.text = "Deleted article: " + page["title"]
+                        elif page["op"] == "edit":
+                            title.text = "Edited article: " + page["title"]
+                            description.text = "Edited this article: It now sits at " + str(page["char_count"]) + " characters and " + str(page["word_count"]) + " words"
+                            description.text += "\n\n" + page["description"]
+                        elif page["op"] == "add":
+                            title.text = "New article: " + page["title"]
+                            description.text = page["description"]
+                    else:
+                        title.text = "Article: " + page["title"]
+                        description.text = "" + page["description"]
+
+                    guid = ET.SubElement(item, "guid")
+                    # Take current contents of XML and hash it
+                    # Then, set the guid to the hash
+                    guid.text = hashlib.sha256(ET.tostring(root, encoding="unicode").encode()).hexdigest()
+                    # It's not isPermaLink
+                    guid.set("isPermaLink", "false")
+                    pubDate = ET.SubElement(item, "pubDate")
+                    # Must be formatted as RFC 822 - Current format is 2025-01-01 00:00:00
+                    pubDate.text = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S").strftime("%a, %d %b %Y %H:%M:%S +0000")
+
+                    category = ET.SubElement(item, "category")
+                    # For each tag, add a category
+                    for tag in page["tags"]:
+                        category.text = tag
+                        
+    # Ensure the XML <item>s are sorted by date, newest first
+    # This is done by sorting the children of the channel element
+    # based on the pubDate element
+    channel[:] = sorted(channel, key=sort_func, reverse=True)
+
+    # Return the pretty-printed XML string
+    return minidom.parseString(ET.tostring(root)).toprettyxml()
+
+# Print the RSS XML string from assets/post_history.json
+post_history = open("assets/post_history.json", "r").read()
+
+rss_xml = history_to_rss(post_history)
+print(rss_xml)
+
+# Write the RSS XML string to public/rss.xml
+rss_file = open("public/rss.xml", "w")
+rss_file.write(rss_xml)
--- a/utils/track_posts.sh
+++ b/utils/track_posts.sh
@ -1,87 +0,0 @@
-#!/bin/sh
-
-# This is meant to track the contents of /public/blog/*.md
-# to maintain an up-to-date list of blog posts.
-#
-# It outputs JSON to stdout:
-# Given a file /public/blog/2019-01-01-foo-bar.md of contents:
-#
-# ---
-# title: Foo Bar
-# date: 2019-01-01
-# arbitrary_key: arbitrary_value
-# ---
-# # Foo Bar
-#
-# it will output:
-# {
-#     posts: [
-#         {
-#           id: "2019-01-01-foo-bar",
-#           title: "Foo Bar",
-#           date: "2019-01-01",
-#           arbitrary_key: "arbitrary_value",
-#           url: "/blog/2019-01-01-foo-bar"
-#         }
-#     ]
-# }
-
-# It should also read the YAML Front Matter of each post
-# and place all the keys in the JSON output.
-
-# The script should be run from the root of the project.
-
-# SCRIPT ENTRY
-echo "{"
-echo "  \"posts\": ["
-
-# Front-Matter Extraction (procedure)
-# Given data, extract the YAML Front Matter header
-# and output it as JSON.
-extract_front_matter() {
-    local data="$1"
-
-    # Remove everything after the second '---'
-    # make sure everything between the first and second '---' is the front matter
-    # use awk
-    local front_matter=$(echo "$data" | awk '/---/ && !f {f=1; next} f; /---/ {exit}')
-
-    local processed=$(echo "$front_matter" | sed '1d;$d' | sed 's/^/    "/' | sed 's/: /": "/' | sed 's/$/"/' | tr '\n' ',' | sed 's/,$//' | sed 's/"tags": "\[\(.*\)\]"/"tags": \[\1\]/g' | sed "s/'/\"/g")
-
-    echo "$processed"
-}
-
-# Find files via Regex
-# Find all files in /public/blog/*.md
-# Process and extract the front matter of each file.
-# Output the JSON representation of the front matter.
-extract_files() {
-    local files=$(find public/blog -type f -name "*.md")
-    for file in $files; do
-        # Enter, create {} for each file
-        echo "    {"
-
-        echo "      \"metadata\": {"
-
-        local data=$(cat $file)
-        local front_matter=$(extract_front_matter "$data")
-        echo "$front_matter"
-
-        echo "      },"
-
-        # Add the id and url
-        local id=$(echo "$file" | sed 's/public\/blog\///' | sed 's/\.md//')
-        echo "      \"id\": \"$id\","
-        echo "      \"url\": \"/blog/$id.md\""
-
-        # Exit, close {} for each file 
-        echo "    },"
-    done
-}
-
-# Process all files
-extract_files | sed '$s/,$//'
-
-# SCRIPT EXIT
-echo "  ]"
-echo "}"