memrise

Memrise Upload Auto with TTS Local

Required Library

python -m pip install pyttsx3
python -m pip install pydantic
python -m pip install requests

How to use the pydantic

Let create new a file json with the following content:

myjson.json

{"courses": [{"id": 6131624, "name": "Dummy", "slug": "dummy", "url": "/course/6131624/dummy/", "description": "", "photo": "https://static.memrise.com/garden/img/placeholders/course-4.png", "photo_small": "https://static.memrise.com/garden/img/placeholders/course-4.png", "photo_large": "https://static.memrise.com/garden/img/placeholders/course-4.png", "num_things": 3, "num_levels": 2, "num_learners": 1, "source": {"id": 474, "slug": "vietnamese", "name": "Vietnamese", "photo": "https://static.memrise.com/uploads/category_photos/vietnamese.png", "parent_id": 614, "index": 966, "language_code": "vi"}, "target": {"id": 6, "slug": "english", "name": "English", "photo": "https://static.memrise.com/uploads/category_photos/en.png", "parent_id": 578, "index": 1051, "language_code": "en"}, "learned": 0, "review": 0, "ignored": 0, "ltm": 0, "difficult": 0, "category": {"name": "English", "photo": "https://static.memrise.com/uploads/category_photos/en.png"}, "next_session": {"recommendation_id": "3d0d34b4-41f1-454b-a019-150d67787e87", "next_session": {"session_type": "learn", "is_enabled": true, "counter": 0, "url": "/aprender/learn?course_id=6131624?recommendation_id=3d0d34b4-41f1-454b-a019-150d67787e87", "is_pro": false}, "selector": [], "is_unlocked": false}, "percent_complete": 0}], "to_review_total": 0, "has_more_courses": true}

The json file has 3 major informations:

  • courses : this informations has list datatype
  • to_review_total : has the integer type
  • has_more_courses : has the boolean type

Therefore, we will implement a class with as the following

import pydantic
import json


class MyCourse(pydantic.BaseModel):
    courses: list
    to_review_total: int
    has_more_courses: bool
    
if __name__ == "__main__":
    with open('myjson.json') as file:
        data = json.load(file) # convert to dictionary
        mycourse: MyCourse = MyCourse(**data)
    
    # Now figure it out
    # First information
    print('First information:',mycourse.courses)
    # Second information
    print('Second information:',mycourse.to_review_total)
    # Final information
    print('Final information:',mycourse.has_more_courses)

First information: [{'id': 6131624, 'name': 'Dummy', 'slug': 'dummy', 'url': '/course/6131624/dummy/', 'description': '', 'photo': 'https://static.memrise.com/garden/img/placeholders/course-4.png', 'photo_small': 'https://static.memrise.com/garden/img/placeholders/course-4.png', 'photo_large': 'https://static.memrise.com/garden/img/placeholders/course-4.png', 'num_things': 3, 'num_levels': 2, 'num_learners': 1, 'source': {'id': 474, 'slug': 'vietnamese', 'name': 'Vietnamese', 'photo': 'https://static.memrise.com/uploads/category_photos/vietnamese.png', 'parent_id': 614, 'index': 966, 'language_code': 'vi'}, 'target': {'id': 6, 'slug': 'english', 'name': 'English', 'photo': 'https://static.memrise.com/uploads/category_photos/en.png', 'parent_id': 578, 'index': 1051, 'language_code': 'en'}, 'learned': 0, 'review': 0, 'ignored': 0, 'ltm': 0, 'difficult': 0, 'category': {'name': 'English', 'photo': 'https://static.memrise.com/uploads/category_photos/en.png'}, 'next_session': {'recommendation_id': '3d0d34b4-41f1-454b-a019-150d67787e87', 'next_session': {'session_type': 'learn', 'is_enabled': True, 'counter': 0, 'url': '/aprender/learn?course_id=6131624?recommendation_id=3d0d34b4-41f1-454b-a019-150d67787e87', 'is_pro': False}, 'selector': [], 'is_unlocked': False}, 'percent_complete': 0}]
Second information: 0
Final information: True

More details :

As we can see that the courses information is a list of the course which has many informations inside. So, we can build the class course with the new schema as the following program:

import pydantic
import json
from typing import List

class course(pydantic.BaseModel):
    """course schema."""

    id: int
    name: str
    slug: str
    url: str
    description: str
    photo: str
    photo_small: str
    photo_large: str
    num_things: int
    num_levels: int
    num_learners: int
    source: dict
    target: dict
    learned: int
    review: int
    ignored: int
    ltm: int
    difficult: int
    category: dict
    percent_complete: int

class MyCourse(pydantic.BaseModel):
    courses: List[course]
    to_review_total: int
    has_more_courses: bool
    
if __name__ == "__main__":
    with open('myjson.json') as file:
        data = json.load(file) # convert to dictionary
        mycourse: MyCourse = MyCourse(**data)
    
    # Now figure it out
    # First information
    course = mycourse.courses[0]
    print('id:', course.id)
    print('name:', course.name)
    print('slug:', course.slug)
    print('url:', course.url)
    print('description:', course.description)
    print('photo:', course.photo)
    print('photo_small:', course.photo_small)
    print('photo_large:', course.photo_large)
    print('num_things:', course.num_things)
    print('num_levels:', course.num_levels)
    print('num_learners:', course.num_learners)

id: 6131624
name: Dummy
slug: dummy
url: /course/6131624/dummy/
description: 
photo: https://static.memrise.com/garden/img/placeholders/course-4.png
photo_small: https://static.memrise.com/garden/img/placeholders/course-4.png
photo_large: https://static.memrise.com/garden/img/placeholders/course-4.png
num_things: 3
num_levels: 2
num_learners: 1

Extract data from html type

from bs4 import BeautifulSoup

class EditStatus(BaseModel):
    """Learnable is present for vocabulary"""
    success: bool
    rendered: str
    
class MyLevelSchema(BaseModel):
    """Level schema"""
    id: int
    index: int
    kind: int
    title: str
    pool_id: int
    course_id: int
    learnable_ids: List[int]

class MyListLevel(BaseModel):
    """List of level schema"""
    levels: List[MyLevelSchema]
    version: str

with open('./myjsons/levels.json') as file:
    data = json.load(file)
    mydata: MyListLevel = MyListLevel(**data)
with open('./myjsons/editing_level.json') as file:
    data = json.load(file)
    mydata2: EditStatus = EditStatus(**data)
mydata.levels[0].learnable_ids[1]
20945127407874

Extract with BeautifulSoup

soup2 = BeautifulSoup(mydata2.rendered,'html.parser')

with open('index.html','w',encoding='utf-8') as fp:
    fp.seek(0)
    fp.write(mydata2.rendered)

# <th class="column audio" data-key="3" data-role="pool-column-header"><span class="txt">Audio</span>
# <i class="ico ico-edit ico-blue"></i></th>
# get data-key
tags = soup("th",{"class":"column audio"})
for tag in tags:
    print(tag["data-key"])
# Figure out the column which store audio
audio_col = soup.find("th",{"class":"column audio"})["data-key"]
print('Audio column:', audio_col)

3
Audio column: 3

Figure out the word_id and the number of audio existing with relugar expression

import re
tags = soup2("tr",{"class":"thing"})
for tag in tags:
    item = tag.get("data-thing-id")
    if item is not None:
        learnable_id = item
        learnable_text = tag.text.split('\n')[2].strip()
        try:
            audios = re.findall("\\d+",tag.text)
            audio_count = int(audios[-1])
        except IndexError as e:
            audio_count = 0
        print(f'{learnable_id} - {audio_count} audio files - {learnable_text}')

319506819 - 2 audio files - she studied ALL night for her CHEMISTRY final.
319506820 - 2 audio files - we WORKED for HOURS on our MATH assignment.
319506821 - 2 audio files - he FORGOT to HAND in his ESSAY.
319506822 - 2 audio files - she POINTED out a SILLY mistake I had MADE.
319506823 - 2 audio files - that's your END OF YEAR project.

Extract with the library lxml

from lxml import html
tree = html.fromstring(mydata2.rendered)

Find all the tag <tr> with the item “thing”

The characters ‘//’ to inform that the tag <tr> is hide below the others tags

learnables_html = tree.xpath("//tr[contains(@class, 'thing')]")

for learnable in learnables_html:
    learnable_id = learnable.attrib["data-thing-id"]
    learnable_text = learnable.xpath("td[2]/div/div/text()")[0]
    column_number = learnable.xpath("td[contains(@class, 'audio')]/@data-key")[0]
    audio_count = learnable.xpath(
                    "td[contains(@class, 'audio')]/div/div[contains(@class, 'dropdown-menu')]/div"
                )
    print(f'{learnable_id} - {len(audio_count)} audio files - {learnable_text}')
print('Audio column:', column_number)

319506819 - 2 audio files - she studied ALL night for her CHEMISTRY final.
319506820 - 2 audio files - we WORKED for HOURS on our MATH assignment.
319506821 - 2 audio files - he FORGOT to HAND in his ESSAY.
319506822 - 2 audio files - she POINTED out a SILLY mistake I had MADE.
319506823 - 2 audio files - that's your END OF YEAR project.
Audio column: 4

Explaining

The code below:

learnable_text = learnable.xpath("td[2]/div/div/text()")[0]

Get the text inside of the tag <div class="text">she studied ALL night for her CHEMISTRY final.</div>

<td>
    <div class="text">
    <button><div class="text">Alts</div></button> Upload file here
        <div class="text">she studied ALL night for her CHEMISTRY final.</div>
    </div>
</td>

GitHub

View Github