CESI: Intro Big Data, biblio + environnement démo Hadoop

This commit is contained in:
2018-08-24 17:16:57 +02:00
parent 37fccd7d8c
commit a29c349d75
9 changed files with 250 additions and 5 deletions

View File

@ -0,0 +1,41 @@
FROM ubuntu:bionic
ARG DEBIAN_FRONTEND=noninteractive
ARG HADOOP_VERSION=3.1.1
RUN apt-get update -y &&\
apt-get install -y ssh rsync openjdk-8-jdk-headless supervisor
RUN wget -O- http://apache.mediamirrors.org/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz > hadoop-$HADOOP_VERSION.tar.gz &&\
tar -xzf hadoop-$HADOOP_VERSION.tar.gz &&\
rm -f hadoop-$HADOOP_VERSION.tar.gz &&\
mkdir -p /opt &&\
mv hadoop-$HADOOP_VERSION /opt/hadoop
WORKDIR /opt/hadoop
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
COPY ./conf/core-site.xml etc/hadoop/core-site.xml
COPY ./conf/hdfs-site.xml etc/hadoop/hdfs-site.xml
RUN mkdir -p /var/log/hadoop &&\
mkdir -p /run/sshd
RUN echo "export JAVA_HOME=$JAVA_HOME" >> etc/hadoop/hadoop-env.sh && \
echo "export HDFS_DATANODE_USER=root" >> etc/hadoop/hadoop-env.sh && \
echo "export HDFS_NAMENODE_USER=root" >> etc/hadoop/hadoop-env.sh && \
echo "export HDFS_SECONDARYNAMENODE_USER=root" >> etc/hadoop/hadoop-env.sh && \
echo "export YARN_RESOURCEMANAGER_USER=root" >> etc/hadoop/yarn-env.sh && \
echo "export YARN_NODEMANAGER_USER=root" >> etc/hadoop/yarn-env.sh && \
echo "PATH=$PATH:$HADOOP_HOME/bin" >> ~/.bashrc
RUN ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa &&\
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys &&\
chmod 0600 ~/.ssh/authorized_keys
COPY ./conf/supervisor.ini /etc/supervisor.d/hadoop.ini
EXPOSE 9870 8088
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor.d/hadoop.ini"]

View File

@ -0,0 +1,6 @@
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>

View File

@ -0,0 +1,6 @@
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>

View File

@ -0,0 +1,66 @@
[supervisord]
nodaemon=true
[program:sshd]
command = /usr/sbin/sshd -o PermitRootLogin=yes -D
directory = /
user = root
autostart = true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
priority = 0
[program:namenode-format]
command = bin/hdfs namenode -format
directory = /opt/hadoop
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
autostart = true
priority = 10
[program:namenode]
command = bin/hdfs --config etc/hadoop namenode
directory = /opt/hadoop
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
autostart = true
priority = 20
[program:datanode]
command = bin/hdfs --config etc/hadoop datanode
directory = /opt/hadoop
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
user = root
autostart = true
priority = 30
[program:resourcemanager]
command = bin/yarn --config etc/hadoop resourcemanager
directory = /opt/hadoop
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
autostart = true
priority = 40
[program:nodemanager]
command = bin/yarn --config etc/hadoop nodemanager
directory = /opt/hadoop
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
autostart = true
priority = 40