extract_figures.py 2.56 KB
Newer Older
Greg Wilson's avatar
Greg Wilson committed
1
2
3
4
5
6
7
#!/usr/bin/env python

import sys
import os
import glob
from optparse import OptionParser

Greg Wilson's avatar
Greg Wilson committed
8
from util import Reporter, read_markdown, IMAGE_FILE_SUFFIX
9
from __future__ import print_function
Greg Wilson's avatar
Greg Wilson committed
10

Greg Wilson's avatar
Greg Wilson committed
11
12
13
14
15
def main():
    """Main driver."""

    args = parse_args()
    images = []
Greg Wilson's avatar
Greg Wilson committed
16
    for filename in args.filenames:
Greg Wilson's avatar
Greg Wilson committed
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
        images += get_images(args.parser, filename)
    save(sys.stdout, images)


def parse_args():
    """Parse command-line arguments."""

    parser = OptionParser()
    parser.add_option('-p', '--parser',
                      default=None,
                      dest='parser',
                      help='path to Markdown parser')

    args, extras = parser.parse_args()
    require(args.parser is not None,
            'Path to Markdown parser not provided')
Greg Wilson's avatar
Greg Wilson committed
33
34
    require(extras,
            'No filenames specified')
Greg Wilson's avatar
Greg Wilson committed
35

Greg Wilson's avatar
Greg Wilson committed
36
    args.filenames = extras
Greg Wilson's avatar
Greg Wilson committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
    return args


def get_filenames(source_dir):
    """Get all filenames to be searched for images."""

    return glob.glob(os.path.join(source_dir, '*.md'))


def get_images(parser, filename):
    """Extract all images from file."""

    content = read_markdown(parser, filename)
    result = []
    find_image_nodes(content['doc'], result)
Greg Wilson's avatar
Greg Wilson committed
52
    find_image_links(content['doc'], result)
Greg Wilson's avatar
Greg Wilson committed
53
54
55
56
57
58
    return result


def find_image_nodes(doc, result):
    """Find all nested nodes representing images."""

Greg Wilson's avatar
Greg Wilson committed
59
60
    if (doc['type'] == 'img') or \
       ((doc['type'] == 'html_element') and (doc['value'] == 'img')):
61
62
        alt = doc['attr'].get('alt', '')
        result.append({'alt': alt, 'src': doc['attr']['src']})
Greg Wilson's avatar
Greg Wilson committed
63
    else:
Greg Wilson's avatar
Greg Wilson committed
64
        for child in doc.get('children', []):
Greg Wilson's avatar
Greg Wilson committed
65
66
67
            find_image_nodes(child, result)


Greg Wilson's avatar
Greg Wilson committed
68
69
70
def find_image_links(doc, result):
    """Find all links to files in the 'fig' directory."""

Greg Wilson's avatar
Greg Wilson committed
71
72
    if ((doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr'])) \
       or \
73
       ((doc['type'] == 'html_element') and (doc['value'] == 'a') and ('href' in doc['attr'])):
Greg Wilson's avatar
Greg Wilson committed
74
        path = doc['attr']['href']
Greg Wilson's avatar
Greg Wilson committed
75
        if os.path.splitext(path)[1].lower() in IMAGE_FILE_SUFFIX:
Greg Wilson's avatar
Greg Wilson committed
76
77
78
79
80
81
            result.append({'alt':'', 'src': doc['attr']['href']})
    else:
        for child in doc.get('children', []):
            find_image_links(child, result)


Greg Wilson's avatar
Greg Wilson committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def save(stream, images):
    """Save results as Markdown."""

    text = '\n<hr/>\n'.join(['<p><img alt="{0}" src="{1}" /></p>'.format(img['alt'], img['src']) for img in images])
    print(text, file=stream)


def require(condition, message):
    """Fail if condition not met."""

    if not condition:
        print(message, file=sys.stderr)
        sys.exit(1)


if __name__ == '__main__':
    main()