:3
This commit is contained in:
parent
03043b2e5c
commit
aa647ec057
932 changed files with 145602 additions and 111 deletions
BIN
utils/pageupdater/__pycache__/pages.cpython-312.pyc
Normal file
BIN
utils/pageupdater/__pycache__/pages.cpython-312.pyc
Normal file
Binary file not shown.
119
utils/pageupdater/commit_post_history.py
Normal file
119
utils/pageupdater/commit_post_history.py
Normal file
|
@ -0,0 +1,119 @@
|
|||
import json
|
||||
import pages
|
||||
import datetime
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
# Create a JSON string that stores information about each page over time
|
||||
# Schema:
|
||||
# {
|
||||
# "last_generated": "2025-01-01 00:00:00",
|
||||
# "post_history": [
|
||||
# {
|
||||
# "2025-01-01": [
|
||||
# {
|
||||
# "/blog/test.md": {
|
||||
# "op": "add", # add, edit, delete
|
||||
# "hash": "SHA256 HERE",
|
||||
# "char_count": 1234,
|
||||
# "word_count": 123,
|
||||
# "title": "Test",
|
||||
# "description": "This is a test"
|
||||
# }
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
# pages_info is to be sourced from pages, and state is the JSON string of the previous state
|
||||
# This will check for added, removed, and edited pages compared to the state and append the changes
|
||||
def generate_post_history(pages_info, state):
|
||||
# Load the state JSON string into a dictionary
|
||||
state_dict = json.loads(state)
|
||||
|
||||
# Get the current date and time
|
||||
current_date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# Initialize the post history list
|
||||
post_history = []
|
||||
|
||||
# Build a dict of files and their hashes from the current state
|
||||
# Build from least recent to most recent, including deletes
|
||||
current_files = dict()
|
||||
for post in state_dict["post_history"]:
|
||||
for date, posts in post.items():
|
||||
for post in posts:
|
||||
for path, page in post.items():
|
||||
if "op" in page:
|
||||
if page["op"] == "delete":
|
||||
if path in current_files:
|
||||
del current_files[path]
|
||||
else:
|
||||
current_files[path] = page
|
||||
else:
|
||||
current_files[path] = page
|
||||
|
||||
print(current_files)
|
||||
|
||||
# Check for deleted files
|
||||
for path, page in current_files.items():
|
||||
if path not in pages_info:
|
||||
post_history.append({path:
|
||||
{
|
||||
"op": "delete", "hash": page["hash"], "title": page["title"]
|
||||
}})
|
||||
|
||||
# Use the map of hashes to compare with the current pages_info
|
||||
# to identify adds and edits
|
||||
for path, page in pages_info.items():
|
||||
if path in current_files:
|
||||
if current_files[path]["hash"] != page["hash"]:
|
||||
post_history.append({path:
|
||||
{
|
||||
"op": "edit", "hash": page["hash"],
|
||||
"char_count": page["char_count"],
|
||||
"word_count": page["word_count"],
|
||||
"title": page["metadata"]["title"],
|
||||
"description": page["metadata"]["description"],
|
||||
"tags": page["metadata"]["tags"]
|
||||
}})
|
||||
else:
|
||||
post_history.append({path:
|
||||
{
|
||||
"op": "add", "hash": page["hash"],
|
||||
"char_count": page["char_count"],
|
||||
"word_count": page["word_count"],
|
||||
"title": page["metadata"]["title"],
|
||||
"description": page["metadata"]["description"],
|
||||
"tags": page["metadata"]["tags"]
|
||||
}})
|
||||
|
||||
# Append the post history list to the state dictionary
|
||||
state_dict["post_history"].append({current_date: post_history})
|
||||
|
||||
# Update the last_generated field
|
||||
state_dict["last_generated"] = current_date
|
||||
|
||||
# Convert the dictionary to a JSON string
|
||||
post_history_json = json.dumps(state_dict, indent=2)
|
||||
|
||||
# Return the JSON string
|
||||
return post_history_json
|
||||
|
||||
# Get the pages info from the public/blog directory
|
||||
pages_info = pages.get_pages_info("", "public/blog")
|
||||
|
||||
# Load the previous state from the assets/post_history.json file
|
||||
try:
|
||||
with open("assets/post_history.json", "r") as f:
|
||||
state = f.read()
|
||||
except FileNotFoundError:
|
||||
state = "{\"post_history\": []}"
|
||||
|
||||
|
||||
# Generate the post history JSON string
|
||||
post_history = generate_post_history(pages_info, state)
|
||||
|
||||
# Output to assets/post_history.json (overwriting)
|
||||
with open("assets/post_history.json", "w") as f:
|
||||
f.write(post_history)
|
59
utils/pageupdater/page_list_gen.py
Normal file
59
utils/pageupdater/page_list_gen.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
import json
|
||||
import pages
|
||||
import datetime
|
||||
|
||||
# Output JSON string in this schema:
|
||||
# {
|
||||
# "posts": [
|
||||
# {
|
||||
# "metadata": {
|
||||
# "description": "A guide to using RomFS on the 3DS. (Old)", "date": "2025-01-01", "tags": ["3ds", "programming", "c", "devkitpro", "old"], "previous": "old3ds_helloworld.md", "next": "old3ds_touchscreen.md"
|
||||
# },
|
||||
# "id": "old3ds_romfs",
|
||||
# "url": "/blog/old3ds_romfs.md"
|
||||
# },
|
||||
# {
|
||||
# "metadata": {
|
||||
# "description": "A curated list of awesome stuff I like", "date": "2024-11-26", "tags": ["awesome", "curated"]
|
||||
# },
|
||||
# "id": "awesome",
|
||||
# "url": "/blog/awesome.md"
|
||||
# },
|
||||
def generate_page_list(pages_info):
|
||||
# Initialize the list of pages
|
||||
page_list = []
|
||||
# Iterate over the pages_info dictionary
|
||||
for path, page in pages_info.items():
|
||||
# Create a dictionary with the metadata and path of the page
|
||||
page_dict = {
|
||||
"metadata": page["metadata"],
|
||||
'id': page["local_path"],
|
||||
'url': page["local_path"],
|
||||
"hash": page["hash"],
|
||||
}
|
||||
|
||||
# Append the page dictionary to the page list
|
||||
page_list.append(page_dict)
|
||||
|
||||
# Change any dates in metadata.date to a "YYYY-MM-DD" format string
|
||||
for page in page_list:
|
||||
if "date" in page["metadata"]:
|
||||
page["metadata"]["date"] = page["metadata"]["date"].strftime("%Y-%m-%d")
|
||||
|
||||
# Create a dictionary with the page list
|
||||
page_list_dict = {
|
||||
"last_generated": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"posts": page_list
|
||||
}
|
||||
# Convert the dictionary to a JSON string
|
||||
page_list_json = json.dumps(page_list_dict, indent=2)
|
||||
# Return the JSON string
|
||||
return page_list_json
|
||||
|
||||
# Print the page list
|
||||
post_list = generate_page_list(pages.get_pages_info("", "public/blog"));
|
||||
print(post_list)
|
||||
|
||||
# Output to assets/blog_list.json (overwriting)
|
||||
with open("assets/blog_list.json", "w") as f:
|
||||
f.write(post_list)
|
89
utils/pageupdater/pages.py
Normal file
89
utils/pageupdater/pages.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
import os
|
||||
import hashlib
|
||||
from typing import Dict
|
||||
# Front Matter reader
|
||||
import frontmatter
|
||||
|
||||
def get_metadata(file_path):
|
||||
file = frontmatter.load(file_path)
|
||||
return file.metadata
|
||||
|
||||
def get_sha256_hash(file_path):
|
||||
# Open the file in binary mode
|
||||
with open(file_path, "rb") as f:
|
||||
# Read the contents of the file
|
||||
content = f.read()
|
||||
# Create a sha256 hash object
|
||||
sha256_hash = hashlib.sha256()
|
||||
# Update the hash object with the file content
|
||||
sha256_hash.update(content)
|
||||
# Get the hexadecimal representation of the hash
|
||||
hex_dig = sha256_hash.hexdigest()
|
||||
return hex_dig
|
||||
|
||||
def get_char_count(file_path):
|
||||
# Open the file in read mode
|
||||
with open(file_path, "r") as f:
|
||||
# Read the contents of the file
|
||||
content = f.read()
|
||||
# Get the character count of the content
|
||||
char_count = len(content)
|
||||
return char_count
|
||||
|
||||
def get_word_count(file_path):
|
||||
# Open the file in read mode
|
||||
with open(file_path, "r") as f:
|
||||
# Read the contents of the file
|
||||
content = f.read()
|
||||
# Get the word count of the content
|
||||
word_count = len(content.split())
|
||||
return word_count
|
||||
|
||||
# Dict of pages containing metadata (Front Matter), sha256 hash of the page content,
|
||||
# page character count, page word count, and path
|
||||
def get_pages_info(search_directory, root_directory):
|
||||
# Scan the search directory for .md files
|
||||
# When seeing a directory, recursively call this function
|
||||
|
||||
# Initialize the dictionary
|
||||
page_info = {}
|
||||
|
||||
current_directory = root_directory + search_directory
|
||||
|
||||
# Get the list of files and directories in the search directory
|
||||
files = os.listdir(root_directory + search_directory)
|
||||
|
||||
# Iterate over the files and directories
|
||||
for file in files:
|
||||
# Get the full path of the file
|
||||
full_path = os.path.join(current_directory, file)
|
||||
local_path = full_path.replace(root_directory, "")
|
||||
|
||||
# If the file is a directory, recursively call this function
|
||||
if os.path.isdir(full_path):
|
||||
page_info.update(get_page_infos(search_directory + "/" + file, root_directory))
|
||||
# If the file is a markdown file, get the metadata
|
||||
elif file.endswith(".md"):
|
||||
# Get the metadata
|
||||
metadata = get_metadata(full_path)
|
||||
|
||||
# Get the sha256 hash of the content
|
||||
sha256_hash = get_sha256_hash(full_path)
|
||||
|
||||
# Get the character count of the content
|
||||
char_count = get_char_count(full_path)
|
||||
|
||||
# Get the word count of the content
|
||||
word_count = get_word_count(full_path)
|
||||
|
||||
# Add the metadata, sha256 hash, character count, word count, and path to the dictionary
|
||||
page_info[full_path] = {
|
||||
"local_path": local_path,
|
||||
"metadata": metadata,
|
||||
"hash": sha256_hash,
|
||||
"char_count": char_count,
|
||||
"word_count": word_count,
|
||||
"path": full_path,
|
||||
}
|
||||
|
||||
return page_info
|
94
utils/pageupdater/rss_xml_gen.py
Normal file
94
utils/pageupdater/rss_xml_gen.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
import json
|
||||
import datetime
|
||||
import pages
|
||||
import os
|
||||
import hashlib
|
||||
import xml.etree.ElementTree as ET
|
||||
from xml.dom import minidom
|
||||
|
||||
# Function for sorted() to sort the XML <item>s by pubDate
|
||||
def sort_func(x):
|
||||
if x.find("pubDate") is not None:
|
||||
x.find("pubDate").text
|
||||
return x.find("pubDate").text
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
|
||||
# Take a JSON file and generate an RSS XML string
|
||||
# Schema:
|
||||
def history_to_rss(history):
|
||||
# Load the history JSON string into a dictionary
|
||||
history_dict = json.loads(history)
|
||||
|
||||
# First, iterate over the posts in order of dates, recent to oldest
|
||||
# Then, each iteration, add the RSS item to the root
|
||||
# Finally, return the XML string
|
||||
root = ET.Element("rss")
|
||||
root.set("version", "2.0")
|
||||
channel = ET.SubElement(root, "channel")
|
||||
title = ET.SubElement(channel, "title")
|
||||
title.text = "Post History"
|
||||
link = ET.SubElement(channel, "link")
|
||||
link.text = "https://thefelidae.github.io"
|
||||
description = ET.SubElement(channel, "description")
|
||||
description.text = "Post history of the site"
|
||||
|
||||
for post in history_dict["post_history"]:
|
||||
for date, posts in post.items():
|
||||
for post in posts:
|
||||
for path, page in post.items():
|
||||
item = ET.SubElement(channel, "item")
|
||||
title = ET.SubElement(item, "title")
|
||||
link = ET.SubElement(item, "link")
|
||||
link.text = "https://thefelidae.github.io/blog/?post=" + path
|
||||
description = ET.SubElement(item, "description")
|
||||
print(page)
|
||||
if "op" in page:
|
||||
if page["op"] == "delete":
|
||||
title.text = "Deleted article: " + page["title"]
|
||||
elif page["op"] == "edit":
|
||||
title.text = "Edited article: " + page["title"]
|
||||
description.text = "Edited this article: It now sits at " + str(page["char_count"]) + " characters and " + str(page["word_count"]) + " words"
|
||||
description.text += "\n\n" + page["description"]
|
||||
elif page["op"] == "add":
|
||||
title.text = "New article: " + page["title"]
|
||||
description.text = page["description"]
|
||||
else:
|
||||
title.text = "Article: " + page["title"]
|
||||
description.text = "" + page["description"]
|
||||
|
||||
guid = ET.SubElement(item, "guid")
|
||||
# Take current contents of XML and hash it
|
||||
# Then, set the guid to the hash
|
||||
guid.text = hashlib.sha256(ET.tostring(root, encoding="unicode").encode()).hexdigest()
|
||||
# It's not isPermaLink
|
||||
guid.set("isPermaLink", "false")
|
||||
pubDate = ET.SubElement(item, "pubDate")
|
||||
# Must be formatted as RFC 822 - Current format is 2025-01-01 00:00:00
|
||||
pubDate.text = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S").strftime("%a, %d %b %Y %H:%M:%S +0000")
|
||||
|
||||
category = ET.SubElement(item, "category")
|
||||
# For each tag, add a category
|
||||
for tag in page["tags"]:
|
||||
category.text = tag
|
||||
|
||||
# Ensure the XML <item>s are sorted by date, newest first
|
||||
# This is done by sorting the children of the channel element
|
||||
# based on the pubDate element
|
||||
channel[:] = sorted(channel, key=sort_func, reverse=True)
|
||||
|
||||
# Return the pretty-printed XML string
|
||||
return minidom.parseString(ET.tostring(root)).toprettyxml()
|
||||
|
||||
# Print the RSS XML string from assets/post_history.json
|
||||
post_history = open("assets/post_history.json", "r").read()
|
||||
|
||||
rss_xml = history_to_rss(post_history)
|
||||
print(rss_xml)
|
||||
|
||||
# Write the RSS XML string to public/rss.xml
|
||||
rss_file = open("public/rss.xml", "w")
|
||||
rss_file.write(rss_xml)
|
|
@ -1,87 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# This is meant to track the contents of /public/blog/*.md
|
||||
# to maintain an up-to-date list of blog posts.
|
||||
#
|
||||
# It outputs JSON to stdout:
|
||||
# Given a file /public/blog/2019-01-01-foo-bar.md of contents:
|
||||
#
|
||||
# ---
|
||||
# title: Foo Bar
|
||||
# date: 2019-01-01
|
||||
# arbitrary_key: arbitrary_value
|
||||
# ---
|
||||
# # Foo Bar
|
||||
#
|
||||
# it will output:
|
||||
# {
|
||||
# posts: [
|
||||
# {
|
||||
# id: "2019-01-01-foo-bar",
|
||||
# title: "Foo Bar",
|
||||
# date: "2019-01-01",
|
||||
# arbitrary_key: "arbitrary_value",
|
||||
# url: "/blog/2019-01-01-foo-bar"
|
||||
# }
|
||||
# ]
|
||||
# }
|
||||
|
||||
# It should also read the YAML Front Matter of each post
|
||||
# and place all the keys in the JSON output.
|
||||
|
||||
# The script should be run from the root of the project.
|
||||
|
||||
# SCRIPT ENTRY
|
||||
echo "{"
|
||||
echo " \"posts\": ["
|
||||
|
||||
# Front-Matter Extraction (procedure)
|
||||
# Given data, extract the YAML Front Matter header
|
||||
# and output it as JSON.
|
||||
extract_front_matter() {
|
||||
local data="$1"
|
||||
|
||||
# Remove everything after the second '---'
|
||||
# make sure everything between the first and second '---' is the front matter
|
||||
# use awk
|
||||
local front_matter=$(echo "$data" | awk '/---/ && !f {f=1; next} f; /---/ {exit}')
|
||||
|
||||
local processed=$(echo "$front_matter" | sed '1d;$d' | sed 's/^/ "/' | sed 's/: /": "/' | sed 's/$/"/' | tr '\n' ',' | sed 's/,$//' | sed 's/"tags": "\[\(.*\)\]"/"tags": \[\1\]/g' | sed "s/'/\"/g")
|
||||
|
||||
echo "$processed"
|
||||
}
|
||||
|
||||
# Find files via Regex
|
||||
# Find all files in /public/blog/*.md
|
||||
# Process and extract the front matter of each file.
|
||||
# Output the JSON representation of the front matter.
|
||||
extract_files() {
|
||||
local files=$(find public/blog -type f -name "*.md")
|
||||
for file in $files; do
|
||||
# Enter, create {} for each file
|
||||
echo " {"
|
||||
|
||||
echo " \"metadata\": {"
|
||||
|
||||
local data=$(cat $file)
|
||||
local front_matter=$(extract_front_matter "$data")
|
||||
echo "$front_matter"
|
||||
|
||||
echo " },"
|
||||
|
||||
# Add the id and url
|
||||
local id=$(echo "$file" | sed 's/public\/blog\///' | sed 's/\.md//')
|
||||
echo " \"id\": \"$id\","
|
||||
echo " \"url\": \"/blog/$id.md\""
|
||||
|
||||
# Exit, close {} for each file
|
||||
echo " },"
|
||||
done
|
||||
}
|
||||
|
||||
# Process all files
|
||||
extract_files | sed '$s/,$//'
|
||||
|
||||
# SCRIPT EXIT
|
||||
echo " ]"
|
||||
echo "}"
|
Loading…
Add table
Add a link
Reference in a new issue